def score(self, X, y, objectives): """Evaluate model performance on current and additional objectives. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features] y (pd.Series, ww.DataColumn): True labels of length [n_samples] objectives (list): Non-empty list of objectives to score on Returns: dict: Ordered dictionary of objective scores """ # Only converting X for the call to _score_all_objectives if X is None: X = pd.DataFrame() X = infer_feature_types(X) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = infer_feature_types(y) y = _convert_woodwork_types_wrapper(y.to_series()) y_predicted = self.predict(X, y) y_predicted = _convert_woodwork_types_wrapper(y_predicted.to_series()) y_shifted = y.shift(-self.gap) objectives = self.create_objectives(objectives) y_shifted, y_predicted = drop_rows_with_nans(y_shifted, y_predicted) return self._score_all_objectives(X, y_shifted, y_predicted, y_pred_proba=None, objectives=objectives)
def fit(self, X, y): """Fit a time series regression pipeline. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series, np.ndarray): The target training targets of length [n_samples] Returns: self """ if X is None: X = pd.DataFrame() X = infer_feature_types(X) y = infer_feature_types(y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) X_t = self._compute_features_during_fit(X, y) X_t = X_t.to_dataframe() y_shifted = y.shift(-self.gap) X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted) self.estimator.fit(X_t, y_shifted) self.input_feature_names = self._component_graph.input_feature_names return self
def predict(self, X, y=None, objective=None): """Make predictions using selected features. Arguments: X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples] objective (Object or string): The objective to use to make predictions Returns: ww.DataColumn: Predicted values. """ if X is None: X = pd.DataFrame() X = infer_feature_types(X) y = infer_feature_types(y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) features = self.compute_estimator_features(X, y) features = _convert_woodwork_types_wrapper(features.to_dataframe()) features_no_nan, y = drop_rows_with_nans(features, y) y_arg = None if self.estimator.predict_uses_y: y_arg = y predictions = self.estimator.predict(features_no_nan, y_arg).to_series() predictions = predictions.rename(self.input_target_name) padded = pad_with_nans( predictions, max(0, features.shape[0] - predictions.shape[0])) return infer_feature_types(padded)
def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) features = _convert_woodwork_types_wrapper(features.to_dataframe()) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) if objective is not None: objective = get_objective(objective, return_instance=True) if not objective.is_defined_for_problem_type(self.problem_type): raise ValueError( f"Objective {objective.name} is not defined for time series binary classification." ) if self.threshold is None: predictions = self._estimator_predict(features_no_nan, y_no_nan).to_series() else: proba = self._estimator_predict_proba(features_no_nan, y_no_nan).to_dataframe() proba = proba.iloc[:, 1] if objective is None: predictions = proba > self.threshold else: predictions = objective.decision_function( proba, threshold=self.threshold, X=features_no_nan) if pad: predictions = pad_with_nans( predictions, max(0, features.shape[0] - predictions.shape[0])) return infer_feature_types(predictions)
def score(self, X, y, objectives): """Evaluate model performance on current and additional objectives. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series): True labels of length [n_samples] objectives (list): Non-empty list of objectives to score on Returns: dict: Ordered dictionary of objective scores """ X, y = self._convert_to_woodwork(X, y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) objectives = [ get_objective(o, return_instance=True) for o in objectives ] y_encoded = self._encode_targets(y) y_shifted = y_encoded.shift(-self.gap) y_predicted, y_predicted_proba = self._compute_predictions( X, y, objectives, time_series=True) if y_predicted is not None: y_predicted = _convert_woodwork_types_wrapper( y_predicted.to_series()) if y_predicted_proba is not None: y_predicted_proba = _convert_woodwork_types_wrapper( y_predicted_proba.to_dataframe()) y_shifted, y_predicted, y_predicted_proba = drop_rows_with_nans( y_shifted, y_predicted, y_predicted_proba) return self._score_all_objectives(X, y_shifted, y_predicted, y_pred_proba=y_predicted_proba, objectives=objectives)
def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) features = _convert_woodwork_types_wrapper(features.to_dataframe()) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) predictions = self._estimator_predict(features_no_nan, y_no_nan) if pad: padded = pad_with_nans(predictions.to_series(), max(0, features.shape[0] - predictions.shape[0])) return infer_feature_types(padded) return predictions
def predict_proba(self, X, y=None): """Make probability estimates for labels. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features] Returns: ww.DataTable: Probability estimates """ X, y = self._convert_to_woodwork(X, y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) y = self._encode_targets(y) features = self.compute_estimator_features(X, y) features = _convert_woodwork_types_wrapper(features.to_dataframe()) features_no_nan, y_no_nan = drop_rows_with_nans(features, y) proba = self._estimator_predict_proba(features_no_nan, y_no_nan).to_dataframe() proba.columns = self._encoder.classes_ padded = pad_with_nans(proba, max(0, features.shape[0] - proba.shape[0])) return infer_feature_types(padded)
def fit(self, X, y): """Fit a time series classification pipeline. Arguments: X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features] y (ww.DataColumn, pd.Series, np.ndarray): The target training targets of length [n_samples] Returns: self """ X, y = self._convert_to_woodwork(X, y) X = _convert_woodwork_types_wrapper(X.to_dataframe()) y = _convert_woodwork_types_wrapper(y.to_series()) self._encoder.fit(y) y = self._encode_targets(y) X_t = self._compute_features_during_fit(X, y) X_t = _convert_woodwork_types_wrapper(X_t.to_dataframe()) y_shifted = y.shift(-self.gap) X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted) self.estimator.fit(X_t, y_shifted) return self