def run(): # init dataset = CaliforniaHousing() regressor = dataset.get_model() train, test = dataset.as_dmd() test_1st_half, test_2nd_half = test.split(ratio=0.2) metric = Metrics.mae pytrust = PyTrust(model=regressor, xtest=test_1st_half, metric=metric) xtest2, ytest2 = test_2nd_half.values, test_2nd_half.target method = 'mae' # or 'probability' uncertainty_model = pytrust.create_uncertainty_model(method=method) yp = uncertainty_model.predict(xtest2) # same as model.predict uncertainty = uncertainty_model.uncertainty(xtest2) # uncertainty value print('y_true, y_pred, uncertainty') print( numpy.concatenate([ ytest2.reshape(-1, 1), yp.reshape(-1, 1), uncertainty.reshape(-1, 1) ], axis=1)[:10]) # example plt.figure() uncertainty_levels = numpy.array([0, 0.2, 0.4, 0.6, 0.8, 1.0001]) mn, mx = 1, 0 # uncertainty model may be based on 'confidence' or 'probability' for classification, and 'mae' or 'rmse' for regression for method in ['mae', 'rmse']: # train uncertainty model uncertainty_model = pytrust.create_uncertainty_model(method=method) yp = uncertainty_model.predict(xtest2) # same as model.predict uncertainty = uncertainty_model.uncertainty( xtest2) # uncertainty value level_inds = numpy.digitize(uncertainty.ravel(), uncertainty_levels) performance = [] for ibin in range(len(uncertainty_levels) - 1): inds = level_inds == ibin + 1 if not any(inds): performance.append(0) else: subset_score = metric.function(y_true=ytest2[inds], y_pred=yp[inds]) performance.append(subset_score) uncertainty_levels_middle = (uncertainty_levels[1:] + uncertainty_levels[:-1]) / 2 plt.figure(1) plt.plot(uncertainty_levels_middle, performance, '*-b' if method == 'mae' else '*-r') plt.xlabel("Uncertainty level") plt.ylabel("{} Score".format(metric.name)) plt.title("{} score vs uncertainty level".format(metric.name)) plt.legend(['method=mae', 'method=rmse'], loc='upper right') print(uncertainty_levels_middle) print(GeneralUtils.f3(performance)) mn = min(min(performance), mn) mx = max(max(performance), mx) uncertainty_model.plot_calibration_curve() plt.figure(1) # emphasize bins for level in uncertainty_levels: plt.plot([level, level], [mn, mx], '-k')
def plot(self, ax=None): if ax is None: fig, ax = plt.subplots(1) ci_low = GeneralUtils.f5(self.ci_low) ci_high = GeneralUtils.f5(self.ci_high) value = GeneralUtils.f5(self.value) if ci_high == ci_low: n_digits = 5 else: n_digits = -int( numpy.log10(ci_high - ci_low)) + 1 # 0.0011 --> -(-2) +1 = 3 ax.plot( [ci_low, ci_high], [1, 1], '-b', ci_low, 1, '|b', ci_high, 1, '|b', value, 1, 'or', ) delta = (ci_high - ci_low) * 1e-1 + 10**-n_digits / 2 metric_obj = Metrics.supported_metrics()[self.metric] r_lim = 1e100 if metric_obj.is_loss else 1 l_lim = 0 if metric_obj.is_loss else -1e100 l_lim = max(l_lim, numpy.round(ci_low - delta, n_digits)) r_lim = min(r_lim, numpy.round(ci_high + delta, n_digits)) ax.set_xlim(l_lim, r_lim) n_points = 1 + int( numpy.round(r_lim - l_lim, n_digits) / 10**-n_digits) % 10 x = numpy.linspace(l_lim, r_lim, num=n_points) xlabels = ["%.5g" % numpy.round(k, n_digits) for k in x] ax.set(xticks=x.tolist(), xticklabels=xlabels, yticklabels=[''], title='Confidence intervals for metric {}'.format(self.metric), ylabel='', xlabel='{}'.format(self.metric)) # Loop over data dimensions and create text annotations. for x, label in [(ci_low, 'ci_low (25%)'), (value, '{} value'.format(self.metric)), (ci_high, 'ci_high (75%)')]: y = 1.01 + 0.01 * (x == value) ax.text( x, y, label, ha="center", va="center", ) plt.draw()
def normalized_confusion_matrix(self): cm = numpy.array(self.confusion_matrix) cm = cm / cm.sum(axis=1)[:, numpy.newaxis] return GeneralUtils.f3(cm).tolist()
def __init__(self): self._xtrain, self._ytrain = None, None self._xtest, self._ytest = None, None self.model = GeneralUtils.simple_imputation_pipeline( RandomForestClassifier(random_state=0, n_estimators=100, n_jobs=3))
def score_value_report(self, model, dmd_test: DMD, labels=None, y_proba: numpy.ndarray = None, y_pred: numpy.ndarray = None) -> [ScoringMetricReport]: ''' :param model: model of interest :param dmd_test: test set :param y_proba: pre-calculated predicted probabilities for test set, if available :param y_pred: pre-calculated models' predictions for test set, if available :return: scoring report ''' score_report = [] model_support_dmd = GeneralUtils.dmd_supported(model, dmd_test) x_test = dmd_test if model_support_dmd else dmd_test.values y_true = dmd_test.target is_classification = GeneralUtils.is_classification(model) confusion_matrix, scatter, classification_report = None, None, None if is_classification: y_proba = y_proba if y_proba is not None else model.predict_proba(x_test) y_pred = y_pred if y_pred is not None else numpy.argmax(y_proba, axis=1) confusion_matrix = ConfusionMatrixReport(y_true=y_true, y_pred=y_pred, labels=labels if labels is not None else unique_labels(y_true, y_pred)) classification_report = SklearnClassificationReport(y_true=y_true, y_pred=y_pred, y_proba=y_proba, labels=labels) for metric in self.metrics: if not metric.ptype == CLASSIFICATION: continue if metric.is_proba: yp = y_proba else: yp = y_pred score = metric.function(y_true, yp) ci_low, ci_high = Metrics.confidence_interval(metric, y_true=y_true, y_pred=y_pred, y_proba=y_proba) score_report.append(ScoringMetricReport( metric=metric.name, value=score, ci_low=ci_low, ci_high=ci_high)) else: y_pred = y_pred if y_pred is not None else model.predict(x_test) error_bars = self._calc_error_bars(dmd_test, model) scatter = ScatterReport(y_true=y_true, y_pred=y_pred, error_bars=error_bars) for metric in self.metrics: if not metric.ptype == REGRESSION: continue score = metric.function(y_true, y_pred) ci_low, ci_high = Metrics.confidence_interval(metric, y_true=y_true, y_pred=y_pred) ci_low = GeneralUtils.f5(ci_low) ci_high = GeneralUtils.f5(ci_high) score = GeneralUtils.f5(score) score_report.append(ScoringMetricReport( metric=metric.name, value=score, ci_low=ci_low, ci_high=ci_high)) return score_report, confusion_matrix, scatter, classification_report
def sensitivity_analysis(self, model, dmd_test: DMD, metric, dmd_train=None, method=SensitivityTypes.shuffled, raw_scores=False, y_pred=None) -> SensitivityOfFeaturesReport: self.model_support_dmd = GeneralUtils.dmd_supported(model, dmd_test) x = dmd_test if self.model_support_dmd else dmd_test.values y_pred = y_pred or model.predict(x) ytest = dmd_test.target score_function = self.metrics[metric].function if metric in ['auc', 'logloss'] and ytest is not None: base_score = score_function(ytest, model.predict_proba(x)) y_pred = ytest else: base_score = 0 y_pred = y_pred predict_function = model.predict_proba if self.metrics[ metric].is_proba \ else model.predict scores = {} for i, name in enumerate(dmd_test.feature_names): if dmd_test.n_samples > self.max_samples_to_use: rs = numpy.random.RandomState(i) subset = rs.permutation(dmd_test.n_samples)[:self.max_samples_to_use] dmd_test_ = dmd_test.split_by_indices(subset) y_pred_ = y_pred[subset] else: dmd_test_ = dmd_test y_pred_ = y_pred shuffled_x = self.get_shuffled_x(dmd_test_, i, dmd_train=dmd_train, method=method, model_support_dmd=self.model_support_dmd) shuffled_pred = predict_function(shuffled_x) if base_score > 0: scores[name] = 1 - abs(base_score - score_function(y_pred_, shuffled_pred)) # higher difference - more impact so add 1- in front else: scores[name] = score_function(y_pred_, shuffled_pred) # higher score - less impact if raw_scores: # description = "The raw scores of how each feature affects the model's predictions." return SensitivityOfFeaturesReport(method=method, sensitivities=scores, stats_report=self._sensitivity_stats_report(scores)) # higher score / lower loss means the shuffled feature did less impact if self.metrics[metric].is_loss: impact = scores else: impact = {name: 1 - score for name, score in scores.items()} total_impact = sum([score for score in impact.values()]) impact = {name: float(score / total_impact) for name, score in impact.items()} impact = GeneralUtils.round_values(impact) # description="The impact of each feature on model's predictions. " # "Higher value mean larger impact (0 means no impact at all). " # "Values are normalized to 1.") return SensitivityOfFeaturesReport(method=method, sensitivities=impact, stats_report=self._sensitivity_stats_report(sensitivities=impact))