def cv_p_improvement_correct(base_classifier, x_train, y_train, x_test, y_test, cv=2, score_type=None): bins = np.linspace(0, 1, 101) improv_counts = np.zeros(100) total_counts = np.zeros(100) skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=seed) for i, (train, cali) in enumerate(skf.split(X=x_train, y=y_train)): if i < cv: x_t = x_train[train] y_t = y_train[train] x_c = x_train[cali] y_c = y_train[cali] classifier = clone(base_classifier) classifier.fit(x_t, y_t) ccv = calibrate(classifier, x_c, y_c, method="beta", score_type=score_type) scores_c = ccv.base_estimator.predict_proba(x_c) scores = ccv.base_estimator.predict_proba(x_test) scores_beta = ccv.predict_proba(x_test)[:, 1] ll_before = cross_entropy(scores, y_test) ll_after = cross_entropy(scores_beta, y_test) test = beta_test(ccv.calibrator.calibrator_.map_, test_type="adev", scores=scores_c) p_value = test["p-value"] idx = np.digitize(p_value, bins) - 1 if idx == 100: idx = 99 total_counts[idx] += 1 if ll_after < ll_before: improv_counts[idx] += 1 return {"improv": improv_counts, "total": total_counts}
def cv_p_improvement(base_classifier, x_train, y_train, x_test, y_test, cv=2, score_type=None): p_values = np.array([]) skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=seed) for i, (train, cali) in enumerate(skf.split(X=x_train, y=y_train)): if i < cv: x_t = x_train[train] y_t = y_train[train] x_c = x_train[cali] y_c = y_train[cali] classifier = clone(base_classifier) classifier.fit(x_t, y_t) ccv = calibrate(classifier, x_c, y_c, method="beta", score_type=score_type) scores = ccv.base_estimator.predict_proba(x_test) scores_beta = ccv.predict_proba(x_test)[:, 1] ll_before = cross_entropy(scores, y_test) ll_after = cross_entropy(scores_beta, y_test) if ll_after < ll_before: test = beta_test(ccv.calibrator.calibrator_.map_, test_type="adev", scores=scores) p_values = np.append(p_values, test["p-value"]) return p_values
def fit(self, X, y, sample_weight=None): """Fit beta calibration only if the classifier is considered uncalibrated. Parameters ---------- X : array-like, shape (n_samples,) Training data. y : array-like, shape (n_samples,) Training target. sample_weight : array-like, shape = [n_samples] or None Sample weights. If None, then samples are equally weighted. Returns ------- self : object Returns an instance of self. """ self._calibrator = BetaCalibration(parameters="abm").fit(X, y) test = beta_test(self._calibrator.calibrator_.map_, test_type="adev", scores=X) if test["p-value"] >= 0.05: self._calibrator = _DummyCalibration().fit(X, y) return self
def cv_confidence_intervals(base_classifier, x_train, y_train, x_test, y_test, cv=2, score_type=None): intervals = None skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=seed) for i, (train, cali) in enumerate(skf.split(X=x_train, y=y_train)): if i == 0: x_t = x_train[train] y_t = y_train[train] x_c = x_train[cali] y_c = y_train[cali] classifier = clone(base_classifier) classifier.fit(x_t, y_t) ccv = calibrate(classifier, x_c, y_c, method=None, score_type=score_type) scores = ccv.predict_proba(x_c)[:, 1] scores_test = ccv.predict_proba(x_test)[:, 1] ll_before = cross_entropy(scores_test, y_test) brier_before = brier_score(scores_test, y_test) calibrator = BetaCalibration(parameters="abm").fit(scores, y_c) ll_after = cross_entropy(calibrator.predict(scores_test), y_test) brier_after = brier_score(calibrator.predict(scores_test), y_test) original_map = calibrator.calibrator_.map_ intervals = beta_test(original_map, test_type="adev", scores=scores) intervals["ll_diff"] = ll_after - ll_before intervals["bs_diff"] = brier_after - brier_before return intervals
def cv_all_p(base_classifier, x_train, y_train, x_test, y_test, cv=2, score_type=None): p_values = np.zeros(cv) improvements = np.zeros(cv) p_values_dist = np.zeros(cv) skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=seed) for i, (train, cali) in enumerate(skf.split(X=x_train, y=y_train)): if i < cv: x_t = x_train[train] y_t = y_train[train] x_c = x_train[cali] y_c = y_train[cali] classifier = clone(base_classifier) classifier.fit(x_t, y_t) ccv = calibrate(classifier, x_c, y_c, method="beta", score_type=score_type) scores_c = ccv.base_estimator.predict_proba(x_c) scores = ccv.base_estimator.predict_proba(x_test) test = beta_test(ccv.calibrator.calibrator_.map_, test_type="adev", scores=scores_c) p_values[i] = test["p-value"] scores_beta = ccv.predict_proba(x_test)[:, 1] ll_before = cross_entropy(scores, y_test) ll_after = cross_entropy(scores_beta, y_test) improvements[i] = ll_after < ll_before # df_pos = scores_c[y_c == 1] # df_neg = scores_c[y_c == 0] # alpha_pos, beta_pos = fit_beta_moments(df_pos) # alpha_neg, beta_neg = fit_beta_moments(df_neg) # a = alpha_pos - alpha_neg # if np.isnan(a): # a = 0 # if a > 100: # a = 100 # b = beta_neg - beta_pos # if np.isnan(b): # b = 0 # if b > 100: # b = 100 # prior_pos = len(df_pos) / len(scores_c) # prior_neg = len(df_neg) / len(scores_c) # m = fit_beta_midpoint(prior_pos, alpha_pos, beta_pos, prior_neg, # alpha_neg, beta_neg) # map = [a, b, m] # test = beta_test(map, test_type="adev", scores=scores_c) # p_values_dist[i] = test["p-value"] return p_values, improvements, p_values_dist