Exemple #1
0
    def find_best_ensemble_method(self, df, actuals):
        predictions_df = self.get_all_predictions(df)

        summary_df = self.get_summary_stats(predictions_df)

        for method in ['min', 'max', 'average', 'median']:
            print(method)
            for col in summary_df.columns:
                if method in col:
                    if self.type_of_estimator == 'regressor':
                        advanced_scoring_regressors(summary_df[col], actuals, name=method)
                    else:
                        advanced_scoring_classifiers(summary_df[col], actuals)
Exemple #2
0
    def score(self, X_test, y_test, advanced_scoring=True, verbose=2):

        if isinstance(X_test, list):
            X_test = pd.DataFrame(X_test)
        y_test = list(y_test)

        X_test, y_test = utils.drop_missing_y_vals(X_test, y_test,
                                                   self.output_column)

        if self._scorer is not None:
            if self.type_of_estimator == 'regressor':
                return self._scorer.score(self.trained_pipeline,
                                          X_test,
                                          y_test,
                                          self.took_log_of_y,
                                          advanced_scoring=advanced_scoring,
                                          verbose=verbose,
                                          name=self.name)

            elif self.type_of_estimator == 'classifier':
                # TODO: can probably refactor accuracy score now that we've turned scoring into it's own class
                if self._scorer == accuracy_score:
                    predictions = self.trained_pipeline.predict(X_test)
                    return self._scorer.score(y_test, predictions)
                elif advanced_scoring:
                    score, probas = self._scorer.score(
                        self.trained_pipeline,
                        X_test,
                        y_test,
                        advanced_scoring=advanced_scoring)
                    utils_scoring.advanced_scoring_classifiers(probas,
                                                               y_test,
                                                               name=self.name)
                    return score
                else:
                    return self._scorer.score(
                        self.trained_pipeline,
                        X_test,
                        y_test,
                        advanced_scoring=advanced_scoring)
        else:
            return self.trained_pipeline.score(X_test, y_test)
Exemple #3
0
    def score(self, X_test, y_test, advanced_scoring=True, verbose=2):

        if isinstance(X_test, list):
            X_test = pd.DataFrame(X_test)
        y_test = list(y_test)

        if self._scorer is not None:
            if self.type_of_estimator == 'regressor':
                return self._scorer(self.trained_pipeline,
                                    X_test,
                                    y_test,
                                    self.took_log_of_y,
                                    advanced_scoring=advanced_scoring,
                                    verbose=verbose,
                                    name=self.name)

            elif self.type_of_estimator == 'classifier':
                if self._scorer == accuracy_score:
                    predictions = self.trained_pipeline.predict(X_test)
                    return self._scorer(y_test, predictions)
                elif advanced_scoring:
                    score, probas = self._scorer(
                        self.trained_pipeline,
                        X_test,
                        y_test,
                        advanced_scoring=advanced_scoring)
                    utils_scoring.advanced_scoring_classifiers(probas,
                                                               y_test,
                                                               name=self.name)
                    return score
                else:
                    return self._scorer(self.trained_pipeline,
                                        X_test,
                                        y_test,
                                        advanced_scoring=advanced_scoring)
        else:
            return self.trained_pipeline.score(X_test, y_test)
Exemple #4
0
 def getScore(self, df_test):
     pred_proba = self.trained_model.predict_proba(df_test)
     # pred_proba = [proba[1] for proba in pred_proba]
     print("Random Forest Probability -->", pred_proba)
     actuals = [0, 1]
     brier_score = advanced_scoring_classifiers(pred_proba, actuals)
     print(
         "*********** Accuracy of Randon Forest Model ***********************"
     )
     print("*** The best possible Brier score is 0, for total accuracy.")
     print(
         "*** The lowest possible score is 1, which mean the forecast was wholly inaccurate."
     )
     print(
         "Smaller scores (closer to zero) indicate better forecasts. Scores in the middle (e.g. 0.44, 0.69) can be hard to interpret as good or bad"
     )
     print("Brier Score for this test is -->", brier_score)
     print(
         "*********** Accuracy of Randon Forest Model ***********************"
     )
     return brier_score