Exemple #1
0
    def predict(self, data):
        """
        Returns a prediction on the data.

        If the data is a koalas DataFrame, the return is a Koalas Series.

        If the data is a pandas Dataframe, the return is the expected output of the underlying
        pyfunc object (typically a pandas Series or a numpy array).
        """
        if isinstance(data, pd.DataFrame):
            return self._model.predict(data)
        if isinstance(data, DataFrame):
            return_col = self._model_udf(*data._internal.data_spark_columns)
            # TODO: the columns should be named according to the mlflow spec
            # However, this is only possible with spark >= 3.0
            # s = F.struct(*data.columns)
            # return_col = self._model_udf(s)
            column_labels = [(col, )
                             for col in data._internal.spark_frame.select(
                                 return_col).columns]
            return Series(
                data._internal.copy(spark_column=return_col,
                                    column_labels=column_labels),
                anchor=data,
            )
Exemple #2
0
def align_diff_series(func, this_series, *args, how="full"):
    from databricks.koalas.base import IndexOpsMixin
    from databricks.koalas.series import Series

    cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)]
    combined = combine_frames(this_series.to_frame(), *cols, how=how)

    scol = func(combined["this"]._internal.column_scols[0],
                *combined["that"]._internal.column_scols)

    return Series(
        combined._internal.copy(
            scol=scol, column_labels=this_series._internal.column_labels),
        anchor=combined,
    )
Exemple #3
0
def align_diff_series(func, this_series, *args, how="full"):
    from databricks.koalas.base import IndexOpsMixin
    from databricks.koalas.series import Series

    cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)]
    combined = combine_frames(this_series.to_frame(), *cols, how=how)

    that_columns = [combined['that'][arg._internal.column_index[0]]._scol
                    if isinstance(arg, IndexOpsMixin) else arg for arg in args]

    scol = func(combined['this'][this_series._internal.column_index[0]]._scol,
                *that_columns)

    return Series(combined._internal.copy(scol=scol,
                                          column_index=this_series._internal.column_index),
                  anchor=combined)