Ejemplo n.º 1
0
    def predict(self, X, y=None, objective=None):
        """Make predictions using selected features.

        Arguments:
            X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples]
            objective (Object or string): The objective to use to make predictions

        Returns:
            ww.DataColumn: Predicted values.
        """
        if X is None:
            X = pd.DataFrame()
        X = infer_feature_types(X)
        y = infer_feature_types(y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y = drop_rows_with_nans(features, y)
        y_arg = None
        if self.estimator.predict_uses_y:
            y_arg = y
        predictions = self.estimator.predict(features_no_nan,
                                             y_arg).to_series()
        predictions = predictions.rename(self.input_target_name)
        padded = pad_with_nans(
            predictions, max(0, features.shape[0] - predictions.shape[0]))
        return infer_feature_types(padded)
    def _predict(self, X, y, objective=None, pad=False):
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)

        if objective is not None:
            objective = get_objective(objective, return_instance=True)
            if not objective.is_defined_for_problem_type(self.problem_type):
                raise ValueError(
                    f"Objective {objective.name} is not defined for time series binary classification."
                )

        if self.threshold is None:
            predictions = self._estimator_predict(features_no_nan,
                                                  y_no_nan).to_series()
        else:
            proba = self._estimator_predict_proba(features_no_nan,
                                                  y_no_nan).to_dataframe()
            proba = proba.iloc[:, 1]
            if objective is None:
                predictions = proba > self.threshold
            else:
                predictions = objective.decision_function(
                    proba, threshold=self.threshold, X=features_no_nan)
        if pad:
            predictions = pad_with_nans(
                predictions, max(0, features.shape[0] - predictions.shape[0]))
        return infer_feature_types(predictions)
Ejemplo n.º 3
0
    def predict(self, X, y=None, objective=None):
        """Make predictions using selected features.

        Arguments:
            X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples]
            objective (Object or string): The objective to use to make predictions

        Returns:
            ww.DataColumn: Predicted values.
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        y = self._encode_targets(y)
        n_features = max(len(y), X.shape[0])
        predictions = self._predict(X, y, objective=objective, pad=False)
        predictions = _convert_woodwork_types_wrapper(predictions.to_series())
        # In case gap is 0 and this is a baseline pipeline, we drop the nans in the
        # predictions before decoding them
        predictions = pd.Series(self._decode_targets(predictions.dropna()),
                                name=self.input_target_name)
        padded = pad_with_nans(predictions,
                               max(0, n_features - predictions.shape[0]))
        return infer_feature_types(padded)
 def _predict(self, X, y, objective=None, pad=False):
     features = self.compute_estimator_features(X, y)
     features = _convert_woodwork_types_wrapper(features.to_dataframe())
     features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
     predictions = self._estimator_predict(features_no_nan, y_no_nan)
     if pad:
         padded = pad_with_nans(predictions.to_series(), max(0, features.shape[0] - predictions.shape[0]))
         return infer_feature_types(padded)
     return predictions
Ejemplo n.º 5
0
 def predict_proba(self, X, y=None):
     if y is None:
         raise ValueError("Cannot predict Time Series Baseline Estimator if y is None")
     y = infer_feature_types(y)
     y = _convert_woodwork_types_wrapper(y.to_series())
     preds = self.predict(X, y).to_series().dropna(axis=0, how='any').astype('int')
     proba_arr = np.zeros((len(preds), y.max() + 1))
     proba_arr[np.arange(len(preds)), preds] = 1
     padded = pad_with_nans(pd.DataFrame(proba_arr), len(y) - len(preds))
     return infer_feature_types(padded)
    def predict_proba(self, X, y=None):
        """Make probability estimates for labels.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]

        Returns:
            ww.DataTable: Probability estimates
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        y = self._encode_targets(y)
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
        proba = self._estimator_predict_proba(features_no_nan, y_no_nan).to_dataframe()
        proba.columns = self._encoder.classes_
        padded = pad_with_nans(proba, max(0, features.shape[0] - proba.shape[0]))
        return infer_feature_types(padded)