def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) y = np.array([10, 0, 2]) y_ = np.array([4, 4, 4]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm]) # check we don't crash when all x are equal: ir = IsotonicRegression() assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
def bench_isotonic_regression(Y): """ Runs a single iteration of isotonic regression on the input data, and reports the total time taken (in seconds). """ gc.collect() tstart = datetime.now() isotonic_regression(Y) return (datetime.now() - tstart).total_seconds()
def prox_owl(v, w): r""" OWL norm proximal operator From pyowl: https://github.com/vene/pyowl/ Author: Vlad Niculae <*****@*****.**> The weights of the OWL norm can change its behavior: - For l1, \lambda_1 = w_1 = w_2 = ... = w_n - For l∞, \lambda_1 = w_1 > w_2 = w_3 ... = w_n = 0 - For OSCAR, w_i = λ1 + λ2(n - 1), for i = 1, ..., n, λ1 > 0, λ2 > 0 References ---------- X Zeng, M A T Figueiredo, The Ordered Weighted $l_1$ Norm: Atomic Formulation, Projections, and Algorithms. J. Bogdan, E. Berg, W. Su, and E. Candes, Statistical Estimation and Testing via the Ordered $l_1$ Norm. """ # === remove signs === s = np.abs(v) # === sort permutation matrix === ix = np.argsort(s)[::-1] # === u = sorted s === u = s[ix] # === projection on the monotone, non-negative decreasing cone === x = isotonic_regression(u - w, y_min=0, increasing=False) # === unsort === inv_ix = np.zeros_like(ix) inv_ix[ix] = np.arange(len(v)) x = x[inv_ix] # === restore signs === res = np.sign(v) * x return res
def test_isotonic_regression_sample_weight_not_overwritten(): """Check that calling fitting function of isotonic regression will not overwrite `sample_weight`. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/20508 """ X, y = make_regression(n_samples=10, n_features=1, random_state=41) sample_weight_original = np.ones_like(y) sample_weight_original[0] = 10 sample_weight_fit = sample_weight_original.copy() isotonic_regression(y, sample_weight=sample_weight_fit) assert_allclose(sample_weight_fit, sample_weight_original) IsotonicRegression().fit(X, y, sample_weight=sample_weight_fit) assert_allclose(sample_weight_fit, sample_weight_original)
def test_l2_agrees_with_sklearn(self): rng = np.random.RandomState(0) y = rng.randn(10) * rng.randint(1, 5) sol = np.zeros_like(y) isotonic.isotonic_l2(y, sol) sol_skl = isotonic_regression(y, increasing=False) np.testing.assert_array_almost_equal(sol, sol_skl)
def kfold_cv(X, K=10, isotonic=True): """K-fold cross-validated eigenvalues for LW nonlinear shrinkage""" S = empirical_covariance(X) lam, U = np.linalg.eigh(S) d = _nls_cv(X, S, K) if isotonic: d = isotonic_regression(d, increasing=True) return U @ np.diag(d) @ U.T
def test_isotonic_regression(self): data = np.abs(np.random.randn(100)) data = data.cumsum() df = pdml.ModelFrame(np.arange(len(data)), target=data) result = df.isotonic.isotonic_regression() expected = isotonic.isotonic_regression(data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_index_equal(result.index, df.index) self.assert_numpy_array_equal(result.values, expected)
def test_isotonic_regression(self): data = np.abs(np.random.randn(100)) data = data.cumsum() df = pdml.ModelFrame(np.arange(len(data)), target=data) result = df.isotonic.isotonic_regression() expected = isotonic.isotonic_regression(data) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_index_equal(result.index, df.index) self.assert_numpy_array_equal(result.values, expected)
def test_l2_agrees_with_sklearn(self): rng = np.random.RandomState(0) y = rng.randn(10) * rng.randint(1, 5) sol = torch.zeros_like(torch.tensor(y, device='cpu')) _isotonic_l2(torch.tensor(y), sol, 'cpu') sol_pkg = np.zeros_like(y) isotonic_l2(y, sol_pkg) sol_skl = isotonic_regression(y, increasing=False) np.testing.assert_array_almost_equal(sol_pkg, sol_skl) np.testing.assert_array_almost_equal(sol.detach().numpy(), sol_pkg)
def test_isotonic_ymin_ymax(): # Test from @NelleV's issue: # https://github.com/scikit-learn/scikit-learn/issues/6921 x = np.array( [ 1.263, 1.318, -0.572, 0.307, -0.707, -0.176, -1.599, 1.059, 1.396, 1.906, 0.210, 0.028, -0.081, 0.444, 0.018, -0.377, -0.896, -0.377, -1.327, 0.180, ] ) y = isotonic_regression(x, y_min=0.0, y_max=0.1) assert np.all(y >= 0) assert np.all(y <= 0.1) # Also test decreasing case since the logic there is different y = isotonic_regression(x, y_min=0.0, y_max=0.1, increasing=False) assert np.all(y >= 0) assert np.all(y <= 0.1) # Finally, test with only one bound y = isotonic_regression(x, y_min=0.0, increasing=False) assert np.all(y >= 0)
def test_isotonic_ymin_ymax(): # Test from @NelleV's issue: # https://github.com/scikit-learn/scikit-learn/issues/6921 x = np.array([1.263, 1.318, -0.572, 0.307, -0.707, -0.176, -1.599, 1.059, 1.396, 1.906, 0.210, 0.028, -0.081, 0.444, 0.018, -0.377, -0.896, -0.377, -1.327, 0.180]) y = isotonic_regression(x, y_min=0., y_max=0.1) assert(np.all(y >= 0)) assert(np.all(y <= 0.1)) # Also test decreasing case since the logic there is different y = isotonic_regression(x, y_min=0., y_max=0.1, increasing=False) assert(np.all(y >= 0)) assert(np.all(y <= 0.1)) # Finally, test with only one bound y = isotonic_regression(x, y_min=0., increasing=False) assert(np.all(y >= 0))
def proxOWL(beta, weights): p = len(beta) abs_beta = np.abs(beta) ix = np.argsort(abs_beta)[::-1] abs_beta = abs_beta[ix] iso_input = abs_beta - weights abs_beta = isotonic_regression(iso_input, y_min=0, increasing=False) idxs = np.zeros_like(ix) idxs[ix] = np.arange(p) abs_beta = abs_beta[idxs] beta = np.sign(beta) * abs_beta return beta
def _op_method(self, input_data, extra_factor=1.0): """Operator. This method returns the input data after the a clustering and a thresholding. Implements (Eq 24) in :cite:`figueiredo2014`. Parameters ---------- input_data : numpy.ndarray Input data array extra_factor : float Additional multiplication factor (default is ``1.0``) Returns ------- numpy.ndarray Thresholded data """ # Update threshold with extra factor. threshold = self.weights * extra_factor # Squeezing the data data_squeezed = np.squeeze(input_data) # Sorting (non increasing order) input vector's absolute values data_abs = np.abs(data_squeezed) data_abs_sort_idx = np.argsort(data_abs)[::-1] data_abs = data_abs[data_abs_sort_idx] # Projection onto the monotone non-negative cone using # isotonic_regression data_abs = isotonic_regression( data_abs - threshold, y_min=0, increasing=False, ) # Unsorting the data data_abs_unsorted = np.empty_like(data_abs) data_abs_unsorted[data_abs_sort_idx] = data_abs # Putting the sign back with np.errstate(invalid='ignore'): sign_data = data_squeezed / np.abs(data_squeezed) # Removing NAN caused by the sign sign_data[np.isnan(sign_data)] = 0 return np.reshape(sign_data * data_abs_unsorted, input_data.shape)
def _isotonic_fit(self, X): cons = ({'type': 'ineq', 'fun': lambda x: np.diff(x)}) # Kyle's idea: use as a first guess the non-regularized isotonic regression. # This implementation is O(n) complexity, so the cost is minimal. x0 = isotonic_regression(X) if self.do_smoothing: return minimize(self._ls_min_func, x0=x0, args=(X, self.isotonic_lambda), method='COBYLA', constraints=cons).x else: return x0
def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0.0, y_max=1.0) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0.0, y_max=1.0) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
def test_isotonic_regression(): y = np.array([3, 7, 5, 9, 8, 7, 10]) y_ = np.array([3, 6, 6, 8, 8, 8, 10]) assert_array_equal(y_, isotonic_regression(y)) x = np.arange(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) ir.fit(x, y) assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y)) assert_array_equal(ir.transform(x), ir.predict(x)) # check that it is immune to permutation perm = np.random.permutation(len(y)) ir = IsotonicRegression(y_min=0., y_max=1.) assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm]) assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
def test_l2_agrees_with_sklearn(self): rng = np.random.RandomState(0) y_numpy = rng.randn(10) * rng.randint(1, 5) y_pytorch = torch.from_numpy(y_numpy) sol_numpy = np.zeros_like(y_numpy) isotonic_numpy.isotonic_l2(y_numpy, sol_numpy) sol_pytorch = torch.zeros_like(y_pytorch) isotonic_pytorch.isotonic_l2(y_pytorch, sol_pytorch) sol_skl = isotonic_regression(y_numpy, increasing=False) np.testing.assert_array_almost_equal(sol_skl, sol_numpy) np.testing.assert_array_almost_equal(sol_skl, sol_pytorch) np.testing.assert_array_almost_equal(sol_pytorch, sol_numpy)
def test_isotonic_dtype(): y = [2, 1, 4, 3, 5] weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64) reg = IsotonicRegression() for dtype in (np.int32, np.int64, np.float32, np.float64): for sample_weight in (None, weights.astype(np.float32), weights): y_np = np.array(y, dtype=dtype) expected_dtype = as_float_array(y_np).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) assert_equal(res.dtype, expected_dtype) X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) assert_equal(res.dtype, expected_dtype)
def test_isotonic_dtype(): y = [2, 1, 4, 3, 5] weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64) reg = IsotonicRegression() for dtype in (np.int32, np.int64, np.float32, np.float64): for sample_weight in (None, weights.astype(np.float32), weights): y_np = np.array(y, dtype=dtype) expected_dtype = \ check_array(y_np, dtype=[np.float64, np.float32], ensure_2d=False).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) assert res.dtype == expected_dtype X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) assert res.dtype == expected_dtype
def test_isotonic_dtype(): y = [2, 1, 4, 3, 5] weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64) reg = IsotonicRegression() for dtype in (np.int32, np.int64, np.float32, np.float64): for sample_weight in (None, weights.astype(np.float32), weights): y_np = np.array(y, dtype=dtype) expected_dtype = \ check_array(y_np, dtype=[np.float64, np.float32], ensure_2d=False).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) assert_equal(res.dtype, expected_dtype) X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) assert_equal(res.dtype, expected_dtype)
def prox(self, beta, weights): """ X. Zeng, M. Figueiredo, The ordered weighted L1 norm: Atomic formulation, dual norm, and projections. eprint http://arxiv.org/abs/1409.4271 """ p = len(beta) abs_beta = np.abs(beta) # returns indices that would sort the array and then reverse the order to descending ix = np.argsort(abs_beta)[::-1] abs_beta = abs_beta[ix] iso_input = abs_beta - weights abs_beta = isotonic_regression(iso_input, y_min=0, increasing=False) idxs = np.zeros_like(ix) idxs[ix] = np.arange(p) abs_beta = abs_beta[idxs] beta = np.sign(beta) * abs_beta return beta
def prox_owl(v, w): """Proximal operator of the OWL norm dot(w, reversed(sort(v))) Follows description and notation from: X. Zeng, M. Figueiredo, The ordered weighted L1 norm: Atomic formulation, dual norm, and projections. eprint http://arxiv.org/abs/1409.4271 """ # wlog operate on absolute values v_abs = np.abs(v) ix = np.argsort(v_abs)[::-1] v_abs = v_abs[ix] # project to K+ (monotone non-negative decreasing cone) v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False) # undo the sorting inv_ix = np.zeros_like(ix) inv_ix[ix] = np.arange(len(v)) v_abs = v_abs[inv_ix] return np.sign(v) * v_abs
def _Euclidean_project(self, theta): """ Efficient bregman projections onto the permutahedron and related polytopes. C. H. Lim and S. J. Wright. In Proc. of AISTATS, pages 1205–1213, 2016 """ from sklearn.isotonic import isotonic_regression n_classes = len(theta) w = self._get_w(n_classes) perm = np.argsort(theta)[::-1] theta = theta[perm] dual_sol = isotonic_regression(theta - w, increasing=False) # Or equivalently #dual_sol = -isotonic_regression(w - theta, increasing=True) primal_sol = theta - dual_sol return primal_sol[inv_permutation(perm)]
def _Euclidean_project(self, theta): from sklearn.isotonic import isotonic_regression return isotonic_regression(theta, y_min=0, y_max=1, increasing=False)
def plot_calibration_curve(classifier_name, pred_csv_file, fig_index): """Plot calibration curve for est w/o and with calibration. cf http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_curve.html#sphx-glr-auto-examples-calibration-plot-calibration-curve-py """ from sklearn.metrics import brier_score_loss, precision_score, recall_score, f1_score from sklearn.calibration import CalibratedClassifierCV, calibration_curve import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from sklearn.isotonic import isotonic_regression from sklearn.metrics import roc_auc_score, roc_curve, auc # # Calibrated with isotonic calibration # isotonic = CalibratedClassifierCV(base_estimator=None, cv="prefit", method='isotonic') # # Calibrated with sigmoid calibration # sigmoid = CalibratedClassifierCV(base_estimator=None, cv="prefit", method='sigmoid') # # Logistic regression with no calibration as baseline # lr = LogisticRegression(C=1., solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") # for name in [classifier_name, classifier_name + ' + Isotonic', classifier_name + ' + Sigmoid']: for name in [classifier_name, classifier_name + ' + Sigmoid']: # for name in [classifier_name]: y_test, prob_pos, y_pred, _, _ = read_pred_csv_file_to_arrays( pred_csv_file) if name == classifier_name + ' + Sigmoid': a, b = sigmoid_calibration(prob_pos, y_test, sample_weight=None) prob_pos = predict_sigmoid(a, b, prob_pos) print a, b y_pred = binary_predict(prob_pos, threshold=0.5) if name == classifier_name + ' + Isotonic': prob_pos = isotonic_regression(prob_pos, sample_weight=None, y_min=None, y_max=None, increasing=True) y_pred = binary_predict(prob_pos, threshold=0.5) # print prob_pos[:20] # # plot roc curve for test: class 1 only # fpr, tpr, _ = roc_curve(y_test, prob_pos) # lw = 2 # plt.plot(fpr, tpr, color='darkorange', # lw=lw, label='ROC curve (area = %0.2f)' %(roc_auc_score(y_test, prob_pos, average='macro'))) # plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') # plt.xlim([0.0, 1.0]) # plt.ylim([0.0, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic example') # plt.legend(loc="lower right") # plt.savefig('plots/roc_%s.png'%(name)) # plt.clf() clf_score = brier_score_loss(y_test, prob_pos, pos_label=1) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f" % f1_score(y_test, y_pred)) print("\tROC: %1.3f\n" % roc_auc_score(y_test, prob_pos, average='macro')) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() plt.savefig('plots/calibration.png') plt.clf()