def predict(self, data): """ Returns a prediction on the data. If the data is a koalas DataFrame, the return is a Koalas Series. If the data is a pandas Dataframe, the return is the expected output of the underlying pyfunc object (typically a pandas Series or a numpy array). """ if isinstance(data, pd.DataFrame): return self._model.predict(data) if isinstance(data, DataFrame): return_col = self._model_udf(*data._internal.data_spark_columns) # TODO: the columns should be named according to the mlflow spec # However, this is only possible with spark >= 3.0 # s = F.struct(*data.columns) # return_col = self._model_udf(s) column_labels = [(col, ) for col in data._internal.spark_frame.select( return_col).columns] return Series( data._internal.copy(spark_column=return_col, column_labels=column_labels), anchor=data, )
def align_diff_series(func, this_series, *args, how="full"): from databricks.koalas.base import IndexOpsMixin from databricks.koalas.series import Series cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)] combined = combine_frames(this_series.to_frame(), *cols, how=how) scol = func(combined["this"]._internal.column_scols[0], *combined["that"]._internal.column_scols) return Series( combined._internal.copy( scol=scol, column_labels=this_series._internal.column_labels), anchor=combined, )
def align_diff_series(func, this_series, *args, how="full"): from databricks.koalas.base import IndexOpsMixin from databricks.koalas.series import Series cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)] combined = combine_frames(this_series.to_frame(), *cols, how=how) that_columns = [combined['that'][arg._internal.column_index[0]]._scol if isinstance(arg, IndexOpsMixin) else arg for arg in args] scol = func(combined['this'][this_series._internal.column_index[0]]._scol, *that_columns) return Series(combined._internal.copy(scol=scol, column_index=this_series._internal.column_index), anchor=combined)