Python drop_rows_with_nans 예제들, evalml.utils.drop_rows_with_nans Python 예제들

예제 #1

0

파일 보기

    def score(self, X, y, objectives):
        """Evaluate model performance on current and additional objectives.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]
            y (pd.Series, ww.DataColumn): True labels of length [n_samples]
            objectives (list): Non-empty list of objectives to score on

        Returns:
            dict: Ordered dictionary of objective scores
        """
        # Only converting X for the call to _score_all_objectives
        if X is None:
            X = pd.DataFrame()
        X = infer_feature_types(X)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = infer_feature_types(y)
        y = _convert_woodwork_types_wrapper(y.to_series())

        y_predicted = self.predict(X, y)
        y_predicted = _convert_woodwork_types_wrapper(y_predicted.to_series())

        y_shifted = y.shift(-self.gap)
        objectives = self.create_objectives(objectives)
        y_shifted, y_predicted = drop_rows_with_nans(y_shifted, y_predicted)
        return self._score_all_objectives(X,
                                          y_shifted,
                                          y_predicted,
                                          y_pred_proba=None,
                                          objectives=objectives)

예제 #2

0

파일 보기

    def fit(self, X, y):
        """Fit a time series regression pipeline.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray): The target training targets of length [n_samples]

        Returns:
            self
        """
        if X is None:
            X = pd.DataFrame()

        X = infer_feature_types(X)
        y = infer_feature_types(y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        X_t = self._compute_features_during_fit(X, y)
        X_t = X_t.to_dataframe()

        y_shifted = y.shift(-self.gap)
        X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted)
        self.estimator.fit(X_t, y_shifted)
        self.input_feature_names = self._component_graph.input_feature_names

        return self

예제 #3

0

파일 보기

    def predict(self, X, y=None, objective=None):
        """Make predictions using selected features.

        Arguments:
            X (ww.DataTable, pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray, None): The target training targets of length [n_samples]
            objective (Object or string): The objective to use to make predictions

        Returns:
            ww.DataColumn: Predicted values.
        """
        if X is None:
            X = pd.DataFrame()
        X = infer_feature_types(X)
        y = infer_feature_types(y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y = drop_rows_with_nans(features, y)
        y_arg = None
        if self.estimator.predict_uses_y:
            y_arg = y
        predictions = self.estimator.predict(features_no_nan,
                                             y_arg).to_series()
        predictions = predictions.rename(self.input_target_name)
        padded = pad_with_nans(
            predictions, max(0, features.shape[0] - predictions.shape[0]))
        return infer_feature_types(padded)

예제 #4

0

파일 보기

파일: time_series_classification_pipelines.py 프로젝트: sparkpoints/evalml

    def _predict(self, X, y, objective=None, pad=False):
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)

        if objective is not None:
            objective = get_objective(objective, return_instance=True)
            if not objective.is_defined_for_problem_type(self.problem_type):
                raise ValueError(
                    f"Objective {objective.name} is not defined for time series binary classification."
                )

        if self.threshold is None:
            predictions = self._estimator_predict(features_no_nan,
                                                  y_no_nan).to_series()
        else:
            proba = self._estimator_predict_proba(features_no_nan,
                                                  y_no_nan).to_dataframe()
            proba = proba.iloc[:, 1]
            if objective is None:
                predictions = proba > self.threshold
            else:
                predictions = objective.decision_function(
                    proba, threshold=self.threshold, X=features_no_nan)
        if pad:
            predictions = pad_with_nans(
                predictions, max(0, features.shape[0] - predictions.shape[0]))
        return infer_feature_types(predictions)

예제 #5

0

파일 보기

파일: time_series_classification_pipelines.py 프로젝트: sparkpoints/evalml

    def score(self, X, y, objectives):
        """Evaluate model performance on current and additional objectives.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series): True labels of length [n_samples]
            objectives (list): Non-empty list of objectives to score on

        Returns:
            dict: Ordered dictionary of objective scores
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        objectives = [
            get_objective(o, return_instance=True) for o in objectives
        ]

        y_encoded = self._encode_targets(y)
        y_shifted = y_encoded.shift(-self.gap)
        y_predicted, y_predicted_proba = self._compute_predictions(
            X, y, objectives, time_series=True)
        if y_predicted is not None:
            y_predicted = _convert_woodwork_types_wrapper(
                y_predicted.to_series())
        if y_predicted_proba is not None:
            y_predicted_proba = _convert_woodwork_types_wrapper(
                y_predicted_proba.to_dataframe())
        y_shifted, y_predicted, y_predicted_proba = drop_rows_with_nans(
            y_shifted, y_predicted, y_predicted_proba)
        return self._score_all_objectives(X,
                                          y_shifted,
                                          y_predicted,
                                          y_pred_proba=y_predicted_proba,
                                          objectives=objectives)

예제 #6

0

파일 보기

파일: time_series_classification_pipelines.py 프로젝트: sujala/evalml

 def _predict(self, X, y, objective=None, pad=False):
     features = self.compute_estimator_features(X, y)
     features = _convert_woodwork_types_wrapper(features.to_dataframe())
     features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
     predictions = self._estimator_predict(features_no_nan, y_no_nan)
     if pad:
         padded = pad_with_nans(predictions.to_series(), max(0, features.shape[0] - predictions.shape[0]))
         return infer_feature_types(padded)
     return predictions

예제 #7

0

파일 보기

파일: time_series_classification_pipelines.py 프로젝트: sujala/evalml

    def predict_proba(self, X, y=None):
        """Make probability estimates for labels.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]

        Returns:
            ww.DataTable: Probability estimates
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        y = self._encode_targets(y)
        features = self.compute_estimator_features(X, y)
        features = _convert_woodwork_types_wrapper(features.to_dataframe())
        features_no_nan, y_no_nan = drop_rows_with_nans(features, y)
        proba = self._estimator_predict_proba(features_no_nan, y_no_nan).to_dataframe()
        proba.columns = self._encoder.classes_
        padded = pad_with_nans(proba, max(0, features.shape[0] - proba.shape[0]))
        return infer_feature_types(padded)

예제 #8

0

파일 보기

파일: time_series_classification_pipelines.py 프로젝트: baagie7/evalml

    def fit(self, X, y):
        """Fit a time series classification pipeline.

        Arguments:
            X (ww.DataTable, pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]
            y (ww.DataColumn, pd.Series, np.ndarray): The target training targets of length [n_samples]

        Returns:
            self
        """
        X, y = self._convert_to_woodwork(X, y)
        X = _convert_woodwork_types_wrapper(X.to_dataframe())
        y = _convert_woodwork_types_wrapper(y.to_series())
        self._encoder.fit(y)
        y = self._encode_targets(y)
        X_t = self._compute_features_during_fit(X, y)
        X_t = _convert_woodwork_types_wrapper(X_t.to_dataframe())
        y_shifted = y.shift(-self.gap)
        X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted)
        self.estimator.fit(X_t, y_shifted)
        return self