def _predict(self, X, y, objective=None, pad=False):
        features = self.compute_estimator_features(X, y)
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)

        if objective is not None:
            objective = get_objective(objective, return_instance=True)
            if not objective.is_defined_for_problem_type(self.problem_type):
                raise ValueError(
                    f"Objective {objective.name} is not defined for time series binary classification."
                )

        if self.threshold is None:
            predictions = self._estimator_predict(features_no_nan, y_no_nan)
        else:
            proba = self._estimator_predict_proba(features_no_nan, y_no_nan)
            proba = proba.iloc[:, 1]
            if objective is None:
                predictions = proba > self.threshold
            else:
                predictions = objective.decision_function(
                    proba, threshold=self.threshold, X=features_no_nan)
        if pad:
            return pad_with_nans(
                predictions, max(0, features.shape[0] - predictions.shape[0]))
        return predictions
Exemple #2
0
    def predict(self, X, y=None, objective=None):
        """Make predictions using selected features.

        Arguments:
            X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples]
            objective (Object or string): The objective to use to make predictions

        Returns:
            pd.Series: Predicted values.
        """
        if X is None:
            X = pd.DataFrame()
        X = _convert_to_woodwork_structure(X)
        y = _convert_to_woodwork_structure(y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        features = self.compute_estimator_features(X, y)
        features_no_nan, y = drop_rows_with_nans(features, y)
        y_arg = None
        if self.estimator.predict_uses_y:
            y_arg = y
        predictions = self.estimator.predict(features_no_nan, y_arg)
        predictions = predictions.rename(self.input_target_name)
        return pad_with_nans(predictions,
                             max(0, features.shape[0] - predictions.shape[0]))
 def _predict(self, X, y, objective=None, pad=False):
     features = self.compute_estimator_features(X, y)
     features = _convert_woodwork_types_wrapper(features.to_dataframe())
     features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
     predictions = self._estimator_predict(features_no_nan, y_no_nan)
     if pad:
         padded = pad_with_nans(predictions.to_series(), max(0, features.shape[0] - predictions.shape[0]))
         return _convert_to_woodwork_structure(padded)
     return predictions
    def _predict(self, X, y, objective=None, pad=False):
        features = self.compute_estimator_features(X, y)
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
        predictions = self._estimator_predict(features_no_nan, y_no_nan)

        if pad:
            return pad_with_nans(
                predictions, max(0, features.shape[0] - predictions.shape[0]))
        return predictions
 def predict_proba(self, X, y=None):
     if y is None:
         raise ValueError(
             "Cannot predict Time Series Baseline Estimator if y is None")
     y = _convert_to_woodwork_structure(y)
     y = _convert_woodwork_types_wrapper(y.to_series())
     preds = self.predict(X, y).dropna(axis=0, how='any').astype('int')
     proba_arr = np.zeros((len(preds), y.max() + 1))
     proba_arr[np.arange(len(preds)), preds] = 1
     return pad_with_nans(pd.DataFrame(proba_arr), len(y) - len(preds))
    def predict_proba(self, X, y=None):
        """Make probability estimates for labels.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]

        Returns:
            pd.DataFrame: Probability estimates
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        features = self.compute_estimator_features(X, y)
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
        proba = self._estimator_predict_proba(features_no_nan, y_no_nan)

        proba.columns = self._encoder.classes_
        return pad_with_nans(proba, max(0, features.shape[0] - proba.shape[0]))
    def predict(self, X, y=None, objective=None):
        """Make predictions using selected features.

        Arguments:
            X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples]
            objective (Object or string): The objective to use to make predictions

        Returns:
            ww.DataColumn: Predicted values.
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        n_features = max(len(y), X.shape[0])
        predictions = self._predict(X, y, objective=objective, pad=False)
        predictions = _convert_woodwork_types_wrapper(predictions.to_series())
        # In case gap is 0 and this is a baseline pipeline, we drop the nans in the
        # predictions before decoding them
        predictions = pd.Series(self._decode_targets(predictions.dropna()), name=self.input_target_name)
        padded = pad_with_nans(predictions, max(0, n_features - predictions.shape[0]))
        return _convert_to_woodwork_structure(padded)
Exemple #8
0
def test_pad_with_nans_with_series_name():
    name = "data to pad"
    data = pd.Series([1, 2, 3], name=name)
    padded = pad_with_nans(data, 1)
    _check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name, dtype="Float64"))
Exemple #9
0
def test_pad_with_nans(data, num_to_pad, expected):
    padded = pad_with_nans(data, num_to_pad)
    _check_equality(padded, expected)