Ejemplo n.º 1
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
Ejemplo n.º 2
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
Ejemplo n.º 3
0
def bench_isotonic_regression(Y):
    """
    Runs a single iteration of isotonic regression on the input data,
    and reports the total time taken (in seconds).
    """
    gc.collect()

    tstart = datetime.now()
    isotonic_regression(Y)
    return (datetime.now() - tstart).total_seconds()
Ejemplo n.º 4
0
def prox_owl(v, w):
    r"""
    OWL norm proximal operator

    From pyowl: https://github.com/vene/pyowl/
    Author: Vlad Niculae <*****@*****.**>

    The weights of the OWL norm can change its behavior:
        - For l1, \lambda_1 = w_1 = w_2 = ... = w_n
        - For l∞, \lambda_1 = w_1 > w_2 = w_3 ... = w_n = 0
        - For OSCAR,  w_i = λ1 + λ2(n - 1), for i = 1, ..., n, λ1 > 0, λ2 > 0

    References
    ----------
    X Zeng, M A T Figueiredo, The Ordered Weighted $l_1$ Norm:
    Atomic Formulation, Projections, and Algorithms.

    J. Bogdan, E. Berg, W. Su, and E. Candes, Statistical Estimation and
    Testing via the Ordered $l_1$ Norm.
    """
    # === remove signs ===
    s = np.abs(v)
    # === sort permutation matrix ===
    ix = np.argsort(s)[::-1]
    # === u = sorted s ===
    u = s[ix]
    # === projection on the monotone, non-negative decreasing cone ===
    x = isotonic_regression(u - w, y_min=0, increasing=False)
    # === unsort ===
    inv_ix = np.zeros_like(ix)
    inv_ix[ix] = np.arange(len(v))
    x = x[inv_ix]
    # === restore signs ===
    res = np.sign(v) * x
    return res
Ejemplo n.º 5
0
def test_isotonic_regression_sample_weight_not_overwritten():
    """Check that calling fitting function of isotonic regression will not
    overwrite `sample_weight`.
    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20508
    """
    X, y = make_regression(n_samples=10, n_features=1, random_state=41)
    sample_weight_original = np.ones_like(y)
    sample_weight_original[0] = 10
    sample_weight_fit = sample_weight_original.copy()

    isotonic_regression(y, sample_weight=sample_weight_fit)
    assert_allclose(sample_weight_fit, sample_weight_original)

    IsotonicRegression().fit(X, y, sample_weight=sample_weight_fit)
    assert_allclose(sample_weight_fit, sample_weight_original)
Ejemplo n.º 6
0
 def test_l2_agrees_with_sklearn(self):
     rng = np.random.RandomState(0)
     y = rng.randn(10) * rng.randint(1, 5)
     sol = np.zeros_like(y)
     isotonic.isotonic_l2(y, sol)
     sol_skl = isotonic_regression(y, increasing=False)
     np.testing.assert_array_almost_equal(sol, sol_skl)
Ejemplo n.º 7
0
def kfold_cv(X, K=10, isotonic=True):
    """K-fold cross-validated eigenvalues for LW nonlinear shrinkage"""
    S = empirical_covariance(X)
    lam, U = np.linalg.eigh(S)
    d = _nls_cv(X, S, K)
    if isotonic:
        d = isotonic_regression(d, increasing=True)
    return U @ np.diag(d) @ U.T
Ejemplo n.º 8
0
    def test_isotonic_regression(self):
        data = np.abs(np.random.randn(100))
        data = data.cumsum()
        df = pdml.ModelFrame(np.arange(len(data)), target=data)

        result = df.isotonic.isotonic_regression()
        expected = isotonic.isotonic_regression(data)
        self.assertIsInstance(result, pdml.ModelSeries)
        self.assert_index_equal(result.index, df.index)
        self.assert_numpy_array_equal(result.values, expected)
Ejemplo n.º 9
0
    def test_isotonic_regression(self):
        data = np.abs(np.random.randn(100))
        data = data.cumsum()
        df = pdml.ModelFrame(np.arange(len(data)), target=data)

        result = df.isotonic.isotonic_regression()
        expected = isotonic.isotonic_regression(data)
        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_index_equal(result.index, df.index)
        self.assert_numpy_array_equal(result.values, expected)
Ejemplo n.º 10
0
 def test_l2_agrees_with_sklearn(self):
     rng = np.random.RandomState(0)
     y = rng.randn(10) * rng.randint(1, 5)
     sol = torch.zeros_like(torch.tensor(y, device='cpu'))
     _isotonic_l2(torch.tensor(y), sol, 'cpu')
     sol_pkg = np.zeros_like(y)
     isotonic_l2(y, sol_pkg)
     sol_skl = isotonic_regression(y, increasing=False)
     np.testing.assert_array_almost_equal(sol_pkg, sol_skl)
     np.testing.assert_array_almost_equal(sol.detach().numpy(), sol_pkg)
Ejemplo n.º 11
0
def test_isotonic_ymin_ymax():
    # Test from @NelleV's issue:
    # https://github.com/scikit-learn/scikit-learn/issues/6921
    x = np.array(
        [
            1.263,
            1.318,
            -0.572,
            0.307,
            -0.707,
            -0.176,
            -1.599,
            1.059,
            1.396,
            1.906,
            0.210,
            0.028,
            -0.081,
            0.444,
            0.018,
            -0.377,
            -0.896,
            -0.377,
            -1.327,
            0.180,
        ]
    )
    y = isotonic_regression(x, y_min=0.0, y_max=0.1)

    assert np.all(y >= 0)
    assert np.all(y <= 0.1)

    # Also test decreasing case since the logic there is different
    y = isotonic_regression(x, y_min=0.0, y_max=0.1, increasing=False)

    assert np.all(y >= 0)
    assert np.all(y <= 0.1)

    # Finally, test with only one bound
    y = isotonic_regression(x, y_min=0.0, increasing=False)

    assert np.all(y >= 0)
Ejemplo n.º 12
0
def test_isotonic_ymin_ymax():
    # Test from @NelleV's issue:
    # https://github.com/scikit-learn/scikit-learn/issues/6921
    x = np.array([1.263, 1.318, -0.572, 0.307, -0.707, -0.176, -1.599, 1.059,
                  1.396, 1.906, 0.210, 0.028, -0.081, 0.444, 0.018, -0.377,
                  -0.896, -0.377, -1.327, 0.180])
    y = isotonic_regression(x, y_min=0., y_max=0.1)

    assert(np.all(y >= 0))
    assert(np.all(y <= 0.1))

    # Also test decreasing case since the logic there is different
    y = isotonic_regression(x, y_min=0., y_max=0.1, increasing=False)

    assert(np.all(y >= 0))
    assert(np.all(y <= 0.1))

    # Finally, test with only one bound
    y = isotonic_regression(x, y_min=0., increasing=False)

    assert(np.all(y >= 0))
Ejemplo n.º 13
0
def proxOWL(beta, weights):
    p = len(beta)
    abs_beta = np.abs(beta)
    ix = np.argsort(abs_beta)[::-1]
    abs_beta = abs_beta[ix]
    iso_input = abs_beta - weights
    abs_beta = isotonic_regression(iso_input, y_min=0, increasing=False)

    idxs = np.zeros_like(ix)
    idxs[ix] = np.arange(p)
    abs_beta = abs_beta[idxs]

    beta = np.sign(beta) * abs_beta
    return beta
Ejemplo n.º 14
0
    def _op_method(self, input_data, extra_factor=1.0):
        """Operator.

        This method returns the input data after the a clustering and a
        thresholding. Implements (Eq 24) in :cite:`figueiredo2014`.

        Parameters
        ----------
        input_data : numpy.ndarray
            Input data array
        extra_factor : float
            Additional multiplication factor (default is ``1.0``)

        Returns
        -------
        numpy.ndarray
            Thresholded data

        """
        # Update threshold with extra factor.
        threshold = self.weights * extra_factor

        # Squeezing the data
        data_squeezed = np.squeeze(input_data)

        # Sorting (non increasing order) input vector's absolute values
        data_abs = np.abs(data_squeezed)
        data_abs_sort_idx = np.argsort(data_abs)[::-1]
        data_abs = data_abs[data_abs_sort_idx]

        # Projection onto the monotone non-negative cone using
        # isotonic_regression
        data_abs = isotonic_regression(
            data_abs - threshold,
            y_min=0,
            increasing=False,
        )

        # Unsorting the data
        data_abs_unsorted = np.empty_like(data_abs)
        data_abs_unsorted[data_abs_sort_idx] = data_abs

        # Putting the sign back
        with np.errstate(invalid='ignore'):
            sign_data = data_squeezed / np.abs(data_squeezed)

        # Removing NAN caused by the sign
        sign_data[np.isnan(sign_data)] = 0

        return np.reshape(sign_data * data_abs_unsorted, input_data.shape)
Ejemplo n.º 15
0
    def _isotonic_fit(self, X):
        cons = ({'type': 'ineq', 'fun': lambda x: np.diff(x)})

        # Kyle's idea: use as a first guess the non-regularized isotonic regression.
        # This implementation is O(n) complexity, so the cost is minimal.
        x0 = isotonic_regression(X)

        if self.do_smoothing:
            return minimize(self._ls_min_func,
                            x0=x0,
                            args=(X, self.isotonic_lambda),
                            method='COBYLA',
                            constraints=cons).x
        else:
            return x0
Ejemplo n.º 16
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
Ejemplo n.º 17
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
Ejemplo n.º 18
0
    def test_l2_agrees_with_sklearn(self):
        rng = np.random.RandomState(0)

        y_numpy = rng.randn(10) * rng.randint(1, 5)
        y_pytorch = torch.from_numpy(y_numpy)

        sol_numpy = np.zeros_like(y_numpy)
        isotonic_numpy.isotonic_l2(y_numpy, sol_numpy)

        sol_pytorch = torch.zeros_like(y_pytorch)
        isotonic_pytorch.isotonic_l2(y_pytorch, sol_pytorch)

        sol_skl = isotonic_regression(y_numpy, increasing=False)

        np.testing.assert_array_almost_equal(sol_skl, sol_numpy)
        np.testing.assert_array_almost_equal(sol_skl, sol_pytorch)
        np.testing.assert_array_almost_equal(sol_pytorch, sol_numpy)
Ejemplo n.º 19
0
def test_isotonic_dtype():
    y = [2, 1, 4, 3, 5]
    weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64)
    reg = IsotonicRegression()

    for dtype in (np.int32, np.int64, np.float32, np.float64):
        for sample_weight in (None, weights.astype(np.float32), weights):
            y_np = np.array(y, dtype=dtype)
            expected_dtype = as_float_array(y_np).dtype

            res = isotonic_regression(y_np, sample_weight=sample_weight)
            assert_equal(res.dtype, expected_dtype)

            X = np.arange(len(y)).astype(dtype)
            reg.fit(X, y_np, sample_weight=sample_weight)
            res = reg.predict(X)
            assert_equal(res.dtype, expected_dtype)
Ejemplo n.º 20
0
def test_isotonic_dtype():
    y = [2, 1, 4, 3, 5]
    weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64)
    reg = IsotonicRegression()

    for dtype in (np.int32, np.int64, np.float32, np.float64):
        for sample_weight in (None, weights.astype(np.float32), weights):
            y_np = np.array(y, dtype=dtype)
            expected_dtype = \
                check_array(y_np, dtype=[np.float64, np.float32],
                            ensure_2d=False).dtype

            res = isotonic_regression(y_np, sample_weight=sample_weight)
            assert res.dtype == expected_dtype

            X = np.arange(len(y)).astype(dtype)
            reg.fit(X, y_np, sample_weight=sample_weight)
            res = reg.predict(X)
            assert res.dtype == expected_dtype
Ejemplo n.º 21
0
def test_isotonic_dtype():
    y = [2, 1, 4, 3, 5]
    weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64)
    reg = IsotonicRegression()

    for dtype in (np.int32, np.int64, np.float32, np.float64):
        for sample_weight in (None, weights.astype(np.float32), weights):
            y_np = np.array(y, dtype=dtype)
            expected_dtype = \
                check_array(y_np, dtype=[np.float64, np.float32],
                            ensure_2d=False).dtype

            res = isotonic_regression(y_np, sample_weight=sample_weight)
            assert_equal(res.dtype, expected_dtype)

            X = np.arange(len(y)).astype(dtype)
            reg.fit(X, y_np, sample_weight=sample_weight)
            res = reg.predict(X)
            assert_equal(res.dtype, expected_dtype)
Ejemplo n.º 22
0
    def prox(self, beta, weights):
        """
        X. Zeng, M. Figueiredo,
        The ordered weighted L1 norm: Atomic formulation, dual norm,
        and projections.
        eprint http://arxiv.org/abs/1409.4271
        """
        p = len(beta)
        abs_beta = np.abs(beta)
        # returns indices that would sort the array and then reverse the order to descending
        ix = np.argsort(abs_beta)[::-1]
        abs_beta = abs_beta[ix]
        iso_input = abs_beta - weights
        abs_beta = isotonic_regression(iso_input, y_min=0, increasing=False)

        idxs = np.zeros_like(ix)
        idxs[ix] = np.arange(p)
        abs_beta = abs_beta[idxs]

        beta = np.sign(beta) * abs_beta
        return beta
Ejemplo n.º 23
0
def prox_owl(v, w):
    """Proximal operator of the OWL norm dot(w, reversed(sort(v)))
    Follows description and notation from:
    X. Zeng, M. Figueiredo,
    The ordered weighted L1 norm: Atomic formulation, dual norm,
    and projections.
    eprint http://arxiv.org/abs/1409.4271
    """

    # wlog operate on absolute values
    v_abs = np.abs(v)
    ix = np.argsort(v_abs)[::-1]
    v_abs = v_abs[ix]
    # project to K+ (monotone non-negative decreasing cone)
    v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False)

    # undo the sorting
    inv_ix = np.zeros_like(ix)
    inv_ix[ix] = np.arange(len(v))
    v_abs = v_abs[inv_ix]

    return np.sign(v) * v_abs
Ejemplo n.º 24
0
    def _Euclidean_project(self, theta):
        """
        Efficient bregman projections onto the permutahedron and
        related polytopes.
        C. H. Lim and S. J. Wright.
        In Proc. of AISTATS, pages 1205–1213, 2016
        """
        from sklearn.isotonic import isotonic_regression

        n_classes = len(theta)
        w = self._get_w(n_classes)

        perm = np.argsort(theta)[::-1]
        theta = theta[perm]

        dual_sol = isotonic_regression(theta - w, increasing=False)

        # Or equivalently
        #dual_sol = -isotonic_regression(w - theta, increasing=True)

        primal_sol = theta - dual_sol

        return primal_sol[inv_permutation(perm)]
Ejemplo n.º 25
0
 def _Euclidean_project(self, theta):
     from sklearn.isotonic import isotonic_regression
     return isotonic_regression(theta, y_min=0, y_max=1, increasing=False)
def plot_calibration_curve(classifier_name, pred_csv_file, fig_index):
    """Plot calibration curve for est w/o and with calibration.
        cf http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_curve.html#sphx-glr-auto-examples-calibration-plot-calibration-curve-py
    """

    from sklearn.metrics import brier_score_loss, precision_score, recall_score, f1_score
    from sklearn.calibration import CalibratedClassifierCV, calibration_curve
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    from sklearn.isotonic import isotonic_regression
    from sklearn.metrics import roc_auc_score, roc_curve, auc

    # # Calibrated with isotonic calibration
    # isotonic = CalibratedClassifierCV(base_estimator=None, cv="prefit", method='isotonic')

    # # Calibrated with sigmoid calibration
    # sigmoid = CalibratedClassifierCV(base_estimator=None, cv="prefit", method='sigmoid')

    # # Logistic regression with no calibration as baseline
    # lr = LogisticRegression(C=1., solver='lbfgs')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")

    # for name in [classifier_name, classifier_name + ' + Isotonic',  classifier_name + ' + Sigmoid']:
    for name in [classifier_name, classifier_name + ' + Sigmoid']:
        # for name in [classifier_name]:

        y_test, prob_pos, y_pred, _, _ = read_pred_csv_file_to_arrays(
            pred_csv_file)

        if name == classifier_name + ' + Sigmoid':
            a, b = sigmoid_calibration(prob_pos, y_test, sample_weight=None)
            prob_pos = predict_sigmoid(a, b, prob_pos)
            print a, b
            y_pred = binary_predict(prob_pos, threshold=0.5)

        if name == classifier_name + ' + Isotonic':
            prob_pos = isotonic_regression(prob_pos,
                                           sample_weight=None,
                                           y_min=None,
                                           y_max=None,
                                           increasing=True)
            y_pred = binary_predict(prob_pos, threshold=0.5)

        # print prob_pos[:20]
        # # plot roc curve for test: class 1 only
        # fpr, tpr, _ = roc_curve(y_test, prob_pos)
        # lw = 2
        # plt.plot(fpr, tpr, color='darkorange',
        #                        lw=lw, label='ROC curve (area = %0.2f)' %(roc_auc_score(y_test, prob_pos, average='macro')))
        # plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
        # plt.xlim([0.0, 1.0])
        # plt.ylim([0.0, 1.05])
        # plt.xlabel('False Positive Rate')
        # plt.ylabel('True Positive Rate')
        # plt.title('Receiver operating characteristic example')
        # plt.legend(loc="lower right")
        # plt.savefig('plots/roc_%s.png'%(name))
        # plt.clf()

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=1)
        print("%s:" % name)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f" % f1_score(y_test, y_pred))
        print("\tROC: %1.3f\n" %
              roc_auc_score(y_test, prob_pos, average='macro'))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value,
                 fraction_of_positives,
                 "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(prob_pos,
                 range=(0, 1),
                 bins=10,
                 label=name,
                 histtype="step",
                 lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
    plt.savefig('plots/calibration.png')
    plt.clf()