def score(self, X: DataFrame, y=None, model=None): if y is None: X, y = self.extract_label(X) self._validate_class_labels(y) w = None if self.weight_evaluation: X, w = extract_column(X, self.sample_weight) if self.eval_metric.needs_pred: y_pred = self.predict(X=X, model=model, as_pandas=False) if self.problem_type == BINARY: # Use 1 and 0, otherwise f1 can crash due to unknown pos_label. y_pred = self.label_cleaner.transform(y_pred) y = self.label_cleaner.transform(y) elif self.eval_metric.needs_quantile: y_pred = self.predict(X=X, model=model, as_pandas=False) else: y_pred = self.predict_proba(X=X, model=model, as_pandas=False, as_multiclass=False) y = self.label_cleaner.transform(y) return compute_weighted_metric( y, y_pred, self.eval_metric, w, weight_evaluation=self.weight_evaluation, quantile_levels=self.quantile_levels)
def score_debug(self, X: DataFrame, y=None, extra_info=False, compute_oracle=False, extra_metrics=None, silent=False): leaderboard_df = self.leaderboard(extra_info=extra_info, silent=silent) if y is None: X, y = self.extract_label(X) if extra_metrics is None: extra_metrics = [] self._validate_class_labels(y) w = None if self.weight_evaluation: X, w = extract_column(X, self.sample_weight) X = self.transform_features(X) y_internal = self.label_cleaner.transform(y) y_internal = y_internal.fillna(-1) trainer = self.load_trainer() scores = {} all_trained_models = trainer.get_model_names() all_trained_models_can_infer = trainer.get_model_names(can_infer=True) all_trained_models_original = all_trained_models.copy() model_pred_proba_dict, pred_time_test_marginal = trainer.get_model_pred_proba_dict(X=X, models=all_trained_models_can_infer, fit=False, record_pred_time=True) if compute_oracle: pred_probas = list(model_pred_proba_dict.values()) ensemble_selection = EnsembleSelection(ensemble_size=100, problem_type=trainer.problem_type, metric=self.eval_metric, quantile_levels=self.quantile_levels) ensemble_selection.fit(predictions=pred_probas, labels=y_internal, identifiers=None, sample_weight=w) # TODO: Only fit non-nan oracle_weights = ensemble_selection.weights_ oracle_pred_time_start = time.time() oracle_pred_proba_norm = [pred * weight for pred, weight in zip(pred_probas, oracle_weights)] oracle_pred_proba_ensemble = np.sum(oracle_pred_proba_norm, axis=0) oracle_pred_time = time.time() - oracle_pred_time_start model_pred_proba_dict['OracleEnsemble'] = oracle_pred_proba_ensemble pred_time_test_marginal['OracleEnsemble'] = oracle_pred_time all_trained_models.append('OracleEnsemble') scoring_args = dict( y=y, y_internal=y_internal, sample_weight=w ) extra_scores = {} for model_name, y_pred_proba_internal in model_pred_proba_dict.items(): scores[model_name] = self._score_with_pred_proba( y_pred_proba_internal=y_pred_proba_internal, metric=self.eval_metric, **scoring_args ) for metric in extra_metrics: metric = get_metric(metric, self.problem_type, 'leaderboard_metric') if metric.name not in extra_scores: extra_scores[metric.name] = {} extra_scores[metric.name][model_name] = self._score_with_pred_proba( y_pred_proba_internal=y_pred_proba_internal, metric=metric, **scoring_args ) if extra_scores: series = [] for metric in extra_scores: series.append(pd.Series(extra_scores[metric], name=metric)) df_extra_scores = pd.concat(series, axis=1) extra_metrics_names = list(df_extra_scores.columns) df_extra_scores['model'] = df_extra_scores.index df_extra_scores = df_extra_scores.reset_index(drop=True) else: df_extra_scores = None extra_metrics_names = None pred_time_test = {} # TODO: Add support for calculating pred_time_test_full for oracle_ensemble, need to copy graph from trainer and add oracle_ensemble to it with proper edges. for model in model_pred_proba_dict.keys(): if model in all_trained_models_original: base_model_set = trainer.get_minimum_model_set(model) if len(base_model_set) == 1: pred_time_test[model] = pred_time_test_marginal[base_model_set[0]] else: pred_time_test_full_num = 0 for base_model in base_model_set: pred_time_test_full_num += pred_time_test_marginal[base_model] pred_time_test[model] = pred_time_test_full_num else: pred_time_test[model] = None scored_models = list(scores.keys()) for model in all_trained_models: if model not in scored_models: scores[model] = None pred_time_test[model] = None pred_time_test_marginal[model] = None logger.debug('Model scores:') logger.debug(str(scores)) model_names_final = list(scores.keys()) df = pd.DataFrame( data={ 'model': model_names_final, 'score_test': list(scores.values()), 'pred_time_test': [pred_time_test[model] for model in model_names_final], 'pred_time_test_marginal': [pred_time_test_marginal[model] for model in model_names_final], } ) if df_extra_scores is not None: df = pd.merge(df, df_extra_scores, on='model', how='left') df_merged = pd.merge(df, leaderboard_df, on='model', how='left') df_merged = df_merged.sort_values(by=['score_test', 'pred_time_test', 'score_val', 'pred_time_val', 'model'], ascending=[False, True, False, True, False]).reset_index(drop=True) df_columns_lst = df_merged.columns.tolist() explicit_order = [ 'model', 'score_test', ] if extra_metrics_names is not None: explicit_order += extra_metrics_names explicit_order += [ 'score_val', 'pred_time_test', 'pred_time_val', 'fit_time', 'pred_time_test_marginal', 'pred_time_val_marginal', 'fit_time_marginal', 'stack_level', 'can_infer', 'fit_order', ] df_columns_other = [column for column in df_columns_lst if column not in explicit_order] df_columns_new = explicit_order + df_columns_other df_merged = df_merged[df_columns_new] return df_merged