def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) if objective is not None: objective = get_objective(objective, return_instance=True) if not objective.is_defined_for_problem_type(self.problem_type): raise ValueError( f"Objective {objective.name} is not defined for time series binary classification." ) if self.threshold is None: predictions = self._estimator_predict(features_no_nan, y_no_nan) else: proba = self._estimator_predict_proba(features_no_nan, y_no_nan) proba = proba.iloc[:, 1] if objective is None: predictions = proba > self.threshold else: predictions = objective.decision_function( proba, threshold=self.threshold, X=features_no_nan) if pad: return pad_with_nans( predictions, max(0, features.shape[0] - predictions.shape[0])) return predictions
def predict(self, X, y=None, objective=None): """Make predictions using selected features. Arguments: X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples] objective (Object or string): The objective to use to make predictions Returns: pd.Series: Predicted values. """ if X is None: X = pd.DataFrame() X = _convert_to_woodwork_structure(X) y = _convert_to_woodwork_structure(y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) features = self.compute_estimator_features(X, y) features_no_nan, y = drop_rows_with_nans(features, y) y_arg = None if self.estimator.predict_uses_y: y_arg = y predictions = self.estimator.predict(features_no_nan, y_arg) predictions = predictions.rename(self.input_target_name) return pad_with_nans(predictions, max(0, features.shape[0] - predictions.shape[0]))
def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) features = _convert_woodwork_types_wrapper(features.to_dataframe()) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) predictions = self._estimator_predict(features_no_nan, y_no_nan) if pad: padded = pad_with_nans(predictions.to_series(), max(0, features.shape[0] - predictions.shape[0])) return _convert_to_woodwork_structure(padded) return predictions
def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) predictions = self._estimator_predict(features_no_nan, y_no_nan) if pad: return pad_with_nans( predictions, max(0, features.shape[0] - predictions.shape[0])) return predictions
def predict_proba(self, X, y=None): if y is None: raise ValueError( "Cannot predict Time Series Baseline Estimator if y is None") y = _convert_to_woodwork_structure(y) y = _convert_woodwork_types_wrapper(y.to_series()) preds = self.predict(X, y).dropna(axis=0, how='any').astype('int') proba_arr = np.zeros((len(preds), y.max() + 1)) proba_arr[np.arange(len(preds)), preds] = 1 return pad_with_nans(pd.DataFrame(proba_arr), len(y) - len(preds))
def predict_proba(self, X, y=None): """Make probability estimates for labels. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features] Returns: pd.DataFrame: Probability estimates """ X, y = self._convert_to_woodwork(X, y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) features = self.compute_estimator_features(X, y) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) proba = self._estimator_predict_proba(features_no_nan, y_no_nan) proba.columns = self._encoder.classes_ return pad_with_nans(proba, max(0, features.shape[0] - proba.shape[0]))
def predict(self, X, y=None, objective=None): """Make predictions using selected features. Arguments: X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples] objective (Object or string): The objective to use to make predictions Returns: ww.DataColumn: Predicted values. """ X, y = self._convert_to_woodwork(X, y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) n_features = max(len(y), X.shape[0]) predictions = self._predict(X, y, objective=objective, pad=False) predictions = _convert_woodwork_types_wrapper(predictions.to_series()) # In case gap is 0 and this is a baseline pipeline, we drop the nans in the # predictions before decoding them predictions = pd.Series(self._decode_targets(predictions.dropna()), name=self.input_target_name) padded = pad_with_nans(predictions, max(0, n_features - predictions.shape[0])) return _convert_to_woodwork_structure(padded)
def test_pad_with_nans_with_series_name(): name = "data to pad" data = pd.Series([1, 2, 3], name=name) padded = pad_with_nans(data, 1) _check_equality(padded, pd.Series([np.nan, 1, 2, 3], name=name, dtype="Float64"))
def test_pad_with_nans(data, num_to_pad, expected): padded = pad_with_nans(data, num_to_pad) _check_equality(padded, expected)