Ejemplo n.º 1
0
 def score(self, X: DataFrame, y=None, model=None):
     if y is None:
         X, y = self.extract_label(X)
     self._validate_class_labels(y)
     w = None
     if self.weight_evaluation:
         X, w = extract_column(X, self.sample_weight)
     if self.eval_metric.needs_pred:
         y_pred = self.predict(X=X, model=model, as_pandas=False)
         if self.problem_type == BINARY:
             # Use 1 and 0, otherwise f1 can crash due to unknown pos_label.
             y_pred = self.label_cleaner.transform(y_pred)
             y = self.label_cleaner.transform(y)
     elif self.eval_metric.needs_quantile:
         y_pred = self.predict(X=X, model=model, as_pandas=False)
     else:
         y_pred = self.predict_proba(X=X,
                                     model=model,
                                     as_pandas=False,
                                     as_multiclass=False)
         y = self.label_cleaner.transform(y)
     return compute_weighted_metric(
         y,
         y_pred,
         self.eval_metric,
         w,
         weight_evaluation=self.weight_evaluation,
         quantile_levels=self.quantile_levels)
Ejemplo n.º 2
0
    def score_debug(self, X: DataFrame, y=None, extra_info=False, compute_oracle=False, extra_metrics=None, silent=False):
        leaderboard_df = self.leaderboard(extra_info=extra_info, silent=silent)
        if y is None:
            X, y = self.extract_label(X)
        if extra_metrics is None:
            extra_metrics = []
        self._validate_class_labels(y)
        w = None
        if self.weight_evaluation:
            X, w = extract_column(X, self.sample_weight)

        X = self.transform_features(X)
        y_internal = self.label_cleaner.transform(y)
        y_internal = y_internal.fillna(-1)

        trainer = self.load_trainer()
        scores = {}
        all_trained_models = trainer.get_model_names()
        all_trained_models_can_infer = trainer.get_model_names(can_infer=True)
        all_trained_models_original = all_trained_models.copy()
        model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(X=X, models=all_trained_models_can_infer, fit=False, record_pred_time=True)

        if compute_oracle:
            pred_probas = list(model_pred_proba_dict.values())
            ensemble_selection = EnsembleSelection(ensemble_size=100, problem_type=trainer.problem_type, metric=self.eval_metric, quantile_levels=self.quantile_levels)
            ensemble_selection.fit(predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w)  # TODO: Only fit non-nan

            oracle_weights = ensemble_selection.weights_
            oracle_pred_time_start = time.time()
            oracle_pred_proba_norm = [pred * weight for pred, weight in zip(pred_probas, oracle_weights)]
            oracle_pred_proba_ensemble = np.sum(oracle_pred_proba_norm, axis=0)
            oracle_pred_time = time.time() - oracle_pred_time_start
            model_pred_proba_dict['OracleEnsemble'] = oracle_pred_proba_ensemble
            pred_time_test_marginal['OracleEnsemble'] = oracle_pred_time
            all_trained_models.append('OracleEnsemble')

        scoring_args = dict(
            y=y,
            y_internal=y_internal,
            sample_weight=w
        )

        extra_scores = {}
        for model_name, y_pred_proba_internal in model_pred_proba_dict.items():
            scores[model_name] = self._score_with_pred_proba(
                y_pred_proba_internal=y_pred_proba_internal,
                metric=self.eval_metric,
                **scoring_args
            )
            for metric in extra_metrics:
                metric = get_metric(metric, self.problem_type, 'leaderboard_metric')
                if metric.name not in extra_scores:
                    extra_scores[metric.name] = {}
                extra_scores[metric.name][model_name] = self._score_with_pred_proba(
                    y_pred_proba_internal=y_pred_proba_internal,
                    metric=metric,
                    **scoring_args
                )

        if extra_scores:
            series = []
            for metric in extra_scores:
                series.append(pd.Series(extra_scores[metric], name=metric))
            df_extra_scores = pd.concat(series, axis=1)
            extra_metrics_names = list(df_extra_scores.columns)
            df_extra_scores['model'] = df_extra_scores.index
            df_extra_scores = df_extra_scores.reset_index(drop=True)
        else:
            df_extra_scores = None
            extra_metrics_names = None

        pred_time_test = {}
        # TODO: Add support for calculating pred_time_test_full for oracle_ensemble, need to copy graph from trainer and add oracle_ensemble to it with proper edges.
        for model in model_pred_proba_dict.keys():
            if model in all_trained_models_original:
                base_model_set = trainer.get_minimum_model_set(model)
                if len(base_model_set) == 1:
                    pred_time_test[model] = pred_time_test_marginal[base_model_set[0]]
                else:
                    pred_time_test_full_num = 0
                    for base_model in base_model_set:
                        pred_time_test_full_num += pred_time_test_marginal[base_model]
                    pred_time_test[model] = pred_time_test_full_num
            else:
                pred_time_test[model] = None

        scored_models = list(scores.keys())
        for model in all_trained_models:
            if model not in scored_models:
                scores[model] = None
                pred_time_test[model] = None
                pred_time_test_marginal[model] = None

        logger.debug('Model scores:')
        logger.debug(str(scores))
        model_names_final = list(scores.keys())
        df = pd.DataFrame(
            data={
                'model': model_names_final,
                'score_test': list(scores.values()),
                'pred_time_test': [pred_time_test[model] for model in model_names_final],
                'pred_time_test_marginal': [pred_time_test_marginal[model] for model in model_names_final],
            }
        )
        if df_extra_scores is not None:
            df = pd.merge(df, df_extra_scores, on='model', how='left')

        df_merged = pd.merge(df, leaderboard_df, on='model', how='left')
        df_merged = df_merged.sort_values(by=['score_test', 'pred_time_test', 'score_val', 'pred_time_val', 'model'], ascending=[False, True, False, True, False]).reset_index(drop=True)
        df_columns_lst = df_merged.columns.tolist()
        explicit_order = [
            'model',
            'score_test',
        ]
        if extra_metrics_names is not None:
            explicit_order += extra_metrics_names
        explicit_order += [
            'score_val',
            'pred_time_test',
            'pred_time_val',
            'fit_time',
            'pred_time_test_marginal',
            'pred_time_val_marginal',
            'fit_time_marginal',
            'stack_level',
            'can_infer',
            'fit_order',
        ]
        df_columns_other = [column for column in df_columns_lst if column not in explicit_order]
        df_columns_new = explicit_order + df_columns_other
        df_merged = df_merged[df_columns_new]

        return df_merged