Beispiel #1
0
def homotopy_path(X, Y, lambda_, coef, y_t, epsilon=1e-3, nu=1.):

    eps_0 = epsilon / 10.
    step_size = np.sqrt(2. * (epsilon - eps_0) / nu)
    Y_t = np.array(list(Y[:-1]) + [y_t], order='F')
    y_stop = Y[-1]

    while y_t < y_stop:

        y_t = min(y_t + step_size, y_stop)
        Y_t[-1] = y_t
        tol = eps_0 / np.linalg.norm(Y_t) ** 2
        alpha = [lambda_ / X.shape[0]]
        res = lasso_path(X, Y_t, alphas=alpha, coef_init=coef, eps=tol)
        coef = res[1].ravel()

    while y_t > y_stop:

        y_t = max(y_t - step_size, y_stop)
        Y_t[-1] = y_t
        tol = eps_0 / np.linalg.norm(Y_t) ** 2
        alpha = [lambda_ / X.shape[0]]
        res = lasso_path(X, Y_t, alphas=alpha, coef_init=coef, eps=tol)
        coef = res[1].ravel()

    return coef
Beispiel #2
0
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    f = assert_warns_message

    def in_warn_message(msg):
        return 'Early stopping' in msg or 'Dropping a regressor' in msg

    lars_alphas, _, lars_coef = f(ConvergenceWarning,
                                  in_warn_message,
                                  linear_model.lars_path,
                                  X,
                                  y,
                                  method='lasso')

    with ignore_warnings():
        _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                    y,
                                                    alphas=lars_alphas,
                                                    tol=1e-6,
                                                    fit_intercept=False)

        lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
        iter_models = enumerate(
            linear_model.lasso_path(X,
                                    y,
                                    alphas=lars_alphas,
                                    tol=1e-6,
                                    return_models=True,
                                    fit_intercept=False))
        for i, model in iter_models:
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X,
                                                           y,
                                                           method='lasso')

    assert_true(len(warning_list) > 0)
    assert_true(('Dropping a regressor' in warning_list[0].message.args[0])
                or ('Early stopping' in warning_list[0].message.args[0]))

    _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", DeprecationWarning)
        for i, model in enumerate(
                linear_model.lasso_path(X,
                                        y,
                                        alphas=lars_alphas,
                                        tol=1e-6,
                                        return_models=True,
                                        fit_intercept=False)):
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X, y,
                                                           method='lasso')

    assert_true(len(warning_list) > 0)
    assert_true(('Dropping a regressor' in warning_list[0].message.args[0])
                or ('Early stopping' in warning_list[0].message.args[0]))

    _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                alphas=lars_alphas, tol=1e-6,
                                                fit_intercept=False)

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", DeprecationWarning)
        for i, model in enumerate(linear_model.lasso_path(X, y,
                                                          alphas=lars_alphas,
                                                          tol=1e-6,
                                                          return_models=True,
                                                          fit_intercept=False)):
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    f = assert_warns_message

    def in_warn_message(msg):
        return 'Early stopping' in msg or 'Dropping a regressor' in msg
    lars_alphas, _, lars_coef = f(ConvergenceWarning,
                                  in_warn_message,
                                  linear_model.lars_path, X, y, method='lasso')

    with ignore_warnings():
        _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                    alphas=lars_alphas,
                                                    tol=1e-6,
                                                    fit_intercept=False)

        lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
        iter_models = enumerate(linear_model.lasso_path(X, y,
                                                        alphas=lars_alphas,
                                                        tol=1e-6,
                                                        return_models=True,
                                                        fit_intercept=False))
        for i, model in iter_models:
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)
Beispiel #6
0
def fit_model(X, Y_t, coef, lambda_, eps_0, method="lasso"):

    if coef is None:
        coef = np.zeros(X.shape[1])

    if method is "lasso":
        tol = eps_0 / np.linalg.norm(Y_t)**2
        lmd = [lambda_ / X.shape[0]]
        res = lasso_path(X,
                         Y_t,
                         alphas=lmd,
                         coef_init=coef,
                         eps=tol,
                         max_iter=int(1e8))
        coef = res[1].ravel()

    elif method is "ridge":
        reg = Ridge(alpha=lambda_, fit_intercept=False, solver="auto")
        reg.fit(X, Y_t)
        coef = reg.coef_

    elif method is "logcosh":
        # I cannot early stop scipy.minimize with duality gap :-/
        coef = logcosh_reg(X, Y_t, lambda_, coef)

    elif method is "linex":
        # I cannot early stop scipy.minimize with duality gap :-/
        coef = linex_reg(X, Y_t, lambda_, coef=coef)

    mu = X.dot(coef)

    return mu, np.abs(Y_t - mu), coef
def plot(X, y):
    
    #X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)
    
    # Compute paths
    
    eps = 5e-3  # the smaller it is the longer is the path

    alphas_lasso, coefs_lasso, _ = linear_model.lasso_path(X, y, eps)
    print alphas_lasso
    print coefs_lasso.shape
    
    for i in coefs_lasso.T:
        print i
    
    plt.figure(1)
    ax = plt.gca()
    
    colors = cycle(['r', 'g', 'b', 'c', 'm', 'y'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)

    for coef_l, c in zip(coefs_lasso, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
 
    
    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    #plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')
    plt.show()
Beispiel #8
0
def LassoDiff(clf, A, time, p, delta_p, SNR, i=None):
    flux = np.sin(2 * np.pi / p * time) + np.sin(2 * np.pi /
                                                 (p + delta_p) * time)
    flux += np.random.normal(0, flux.std() / np.sqrt(SNR), flux.size)

    convWarning = False
    with warnings.catch_warnings(record=True) as w:
        # clf.fit(A, flux)
        clf = linear_model.lasso_path(A, flux)

        if len(w) == 1 and type(w[0].message) is ConvergenceWarning:
            convWarning = True

    # ipdb.set_trace()
    coeffs = clf[1][:, 99]

    power = coeffs[:A.shape[1] // 2]**2 + coeffs[A.shape[1] // 2:A.shape[1] //
                                                 2 * 2]**2

    peaks = findPeaks(power)
    if peaks.size > 1:
        d2P_dp2 = np.gradient(np.gradient(power))
        p1_i, p2_i = peaks[-4:][d2P_dp2[peaks[-4:]].argsort()][:2]
    else:
        p1_i, p2_i = power.argsort()[-2:]

    return i, power, p1_i, p2_i, convWarning
 def plot_results(self):
     """Create the base regression plots as well as a regularization path plot."""
     rc.REG.plot_results(self)
     path = linear_model.lasso_path(self.independentVar, self.dependentVar, return_models=False, fit_intercept=False)
     alphas = path[0] #Vector of alphas
     coefs = (path[1]).T #Array of coefficients for each alpha
     viz.plot_regPath(alphas, coefs).plot()
Beispiel #10
0
def alpha_choice_fig(x, y, my_alphas, nb_features, train_size):
    '''

    Parameters
    ----------
    x : data.
    y : desired output.
    my_alphas : array of different values for alpha.
    nb_features : number of features.
    train_size : number of train points.

    Returns : representation of lasso path
    -------

    '''
    X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
    alpha_for_path, coefs_lasso, _ = lasso_path(X_train[:, 0:nb_features-1],
                                                X_train[:, nb_features-1],alphas=my_alphas)
    for i in range(coefs_lasso.shape[0]):
        plt.plot(alpha_for_path, coefs_lasso[i, :])

    plt.xlabel('Alpha')
    plt.ylabel('Coefficients')
    plt.title('Lasso path')
    plt.show()
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()
    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')

    _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    def meta_algorithm(XTX, XTXi, sampler):

        min_success = 6
        ntries = 10

        def _alpha_grid(X, y, center, XTX):
            n, p = X.shape
            alphas, coefs, _ = lasso_path(X.copy(),
                                          y.copy(),
                                          Xy=center.copy(),
                                          precompute=XTX.copy())
            nselected = np.count_nonzero(coefs, axis=0)
            alphas = alphas[nselected < 20]
            return alphas

        alpha_grid = _alpha_grid(X, y, sampler.center, XTX)
        success = np.zeros((p, alpha_grid.shape[0]))

        for _ in range(ntries):
            scale = 1.  # corresponds to sub-samples of 50%
            noisy_S = sampler(scale=scale)
            _, coefs, _ = lasso_path(X,
                                     y,
                                     Xy=noisy_S,
                                     precompute=XTX,
                                     alphas=alpha_grid)
            success += np.abs(np.sign(coefs))

        selected = np.apply_along_axis(
            lambda row: any(x > min_success for x in row), 1, success)
        vars = set(np.nonzero(selected)[0])
        return vars
    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()
    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')

    _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
Beispiel #15
0
def sklearn_lasso_coefs_plot(X, y):
    """ Show the path taken by coefficients as Lasso shrinks them towards zero.

    Adapted from: https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_coordinate_descent_path.html """

    from itertools import cycle
    from sklearn.linear_model import lasso_path

    eps = 5e-3  # the smaller it is the longer is the path

    print("Computing regularization path using the lasso...")
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=True)

    # Display results

    fig, ax = plt.subplots(figsize=(10, 5), dpi=200)
    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)
    for coef, label, c in zip(coefs_lasso, X.columns, colors):
        _ = plt.plot(neg_log_alphas_lasso, coef, label=label, c=c)
    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso path')
    plt.legend(loc='upper left')
    plt.axis('tight')

    plt.show()
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
    assert_true(len(warning_list) > 0)
    assert_true(
        ("Dropping a regressor" in warning_list[0].message.args[0])
        or ("Early stopping" in warning_list[0].message.args[0])
    )

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    for i, model in enumerate(linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)):
        lasso_coef[:, i] = model.coef_
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
Beispiel #17
0
    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])
def _fit_bootstrap_sample(X, y, func, L):
    """ Computes the regularization path for the regression y ~ X.
    
    Parameters
    ----------
    X : array, shape (n_samples , n_features)

    y : array, shape (n_samples)

    func : string
         the function used for computing the regularization path
         (either 'lasso', 'elasticnet', or 'lars').
        
    L : int
        length of the path.

    Returns
    -------
    array, shape (n_features , L) 
        0 if the coefficient is null and 1 otherwise.

    """
    if func == 'lasso':
        _, coef_path, _ = lasso_path(X, y, n_alphas=L)
    elif func == 'elasticnet':
        _, coef_path, _ = enet_path(X, y, nalphas=L)
    elif func == 'lars':
        _, _, coef_path = lars_path(X, y, max_iter=L - 1)

    return 1 * (coef_path != 0)
Beispiel #19
0
def conf_pred(X, Y_seen, lambda_, Y_range, alpha=0.1, method="lasso"):

    X_train, X_test, Y_train, Y_test = train_test_split(X[:-1, :],
                                                        Y_seen,
                                                        test_size=0.5,
                                                        random_state=414)

    # Training
    if method is "lasso":
        lmd = [lambda_ / X_train.shape[0]]
        res = lasso_path(X_train, Y_train, alphas=lmd, eps=1e-12)
        coef = res[1].ravel()

    elif method is "logcosh":
        coef = logcosh_reg(X_train, Y_train, lambda_)

    elif method is "linex":
        coef = linex_reg(X_train, Y_train, lambda_)

    # Ranking on the test
    mu = X_test.dot(coef)
    sorted_residual = np.sort(np.abs(Y_test - mu))
    index = int((X.shape[0] / 2 + 1) * (1 - alpha))
    quantile = sorted_residual[index]

    mu = X[-1, :].dot(coef)

    return intervals.closed(mu - quantile, mu + quantile)
    def LassoPath(self,
                  alpha,
                  eps=1e-2):  # eps the smaller it is the longer is the path
        sel_log_LassoAlpha = -log10(alpha)
        alphas_lasso, coefs_lasso, _ = lasso_path(self.X,
                                                  self.y,
                                                  eps,
                                                  fit_intercept=False)
        colors = cycle(['b', 'r', 'g', 'c', 'k'])
        neg_log_LassoAlphas = -log10(alphas_lasso)

        fig, ax = plt.subplots()
        for coef_l, c in zip(coefs_lasso, colors):
            ax.plot(neg_log_LassoAlphas, coef_l, c=c)
        ax.axvline(sel_log_LassoAlpha,
                   linestyle='--',
                   color='k',
                   label='$\\alpha$: CV estimate')
        xunit, yunit = plot_unit([ax.get_xlim(), ax.get_ylim()])
        ax.text(sel_log_LassoAlpha - xunit,
                ax.get_ylim()[1] + yunit, round(sel_log_LassoAlpha, 2))
        ax.set_xlabel(
            '-log($\\alpha$)')  # or # ax.set_xlabel(r'-log($\alpha$)')
        ax.set_ylabel('Coefficients')
        ax.set_title('Lasso Path')
        fig.savefig(
            os.path.join(self.fpath, '_'.join([self.name, 'X_LassoPath.png'])))
 def _gen_cv_paths(self, alphas):
     """Helper function to generate lasso paths."""
     self.alphas, self.coefs_cv, _ = linear_model.lasso_path(
         self.x_train,
         self.y_train,
         fit_intercept=self.intercept,
         alphas=alphas)
     self.coefs_cv = self.coefs_cv.T
Beispiel #22
0
def cross_val(X, y, lambdas, sigma_0, eps=1e-4, method="lasso", KF=None):
    """
        Perform a 5-fold cross-validation and return the mean square errors for
        different parameters lambdas. 
    """

    n_samples, n_features = X.shape
    n_lambdas = len(lambdas)
    if KF is None:
        KF = KFold(n_samples, 5, shuffle=True, random_state=42)
    n_folds = KF.n_folds
    errors = np.zeros((n_lambdas, n_folds))
    i_fold = 0

    for train_index, test_index in KF:

        X_train = X[train_index]
        X_test = X[test_index]
        y_train = y[train_index]
        y_test = y[test_index]

        if method == "smoothed_concomitant":
            betas, sigmas, gaps, n_iters, _ = \
                SC_path(X_train, y_train, lambdas, eps=eps,
                        sigma_0=sigma_0)

        elif method == "lasso":
            betas = lasso_path(X_train, y_train, alphas=lambdas, tol=eps)[1]
            betas = betas.T

        elif method in ["ls_smoothed_concomitant", "ls_lasso"]:
            betas = estimator_LS(X_train,
                                 y_train,
                                 lambdas,
                                 sigma_0=sigma_0,
                                 eps=eps,
                                 method=method)

        elif method == "SZ_path":
            betas, sigmas = SZ_path(X_train, y_train, eps, lambdas)

        elif method == "SBvG":
            betas, sigmas = SBvG_path(X_train, y_train, lambdas)

        elif method == "belloni":
            betas, sigmas = belloni_path(X_train, y_train, lambdas)

        else:
            1 / 0  # BOOM !

        for l in range(n_lambdas):
            y_pred = np.dot(X_test, betas[l, :])
            errors[l, i_fold] = np.mean((y_pred - y_test)**2)

        i_fold += 1

    return np.mean(errors, axis=1)
 def _alpha_grid(X, y, center, XTX):
     n, p = X.shape
     alphas, coefs, _ = lasso_path(X.copy(),
                                   y.copy(),
                                   Xy=center.copy(),
                                   precompute=XTX.copy())
     nselected = np.count_nonzero(coefs, axis=0)
     alphas = alphas[nselected < 20]
     return alphas
Beispiel #24
0
def fit_lfm_lasso_path(x, z, p=None, lambdas=None, fit_intercept=True):
    """For details, see here.

    Parameters
    ----------
        x : array, shape (t_, n_)
        z : array, shape (t_, k_)
        p : array, optional, shape (t_,)
        lambdas : array, shape(i_,), optional
        fit_intercept : bool, optional

    Returns
    -------
        alpha : array, shape (i_, n_)
        beta : array, shape (i_, n_, k_)

    """

    if len(x.shape) == 1:
        x = x.reshape(-1, 1)

    if len(z.shape) == 1:
        z = z.reshape(-1, 1)

    t_, n_ = x.shape
    k_ = z.shape[1]

    if lambdas is None:
        lambdas = np.array([0, 0.1, 0.2])
    i_ = lambdas.shape[0]

    if p is None:
        p = np.ones(t_) / t_

    if fit_intercept is True:
        m_x = p @ x
        m_z = p @ z
    else:
        m_x = np.zeros(n_, )
        m_z = np.zeros(k_, )

    x_p = ((x - m_x).T * np.sqrt(p)).T
    z_p = ((z - m_z).T * np.sqrt(p)).T

    _, coeff_, _ = lasso_path(z_p,
                              x_p,
                              alphas=lambdas / (2 * t_),
                              fit_intercept=False)

    # lasso_path automatically sorts lambdas from the largest to the smallest,
    # so we have to revert order of coeff_ back to the original lambdas
    idx = np.argsort(lambdas)[::-1]
    betas = np.zeros((i_, n_, k_))
    betas[idx, :, :] = coeff_.transpose((2, 0, 1))
    alphas = m_x - betas @ m_z

    return alphas, betas
Beispiel #25
0
 def plot_process(self,
                  eps=5e-3,
                  title="Lasso coef Plot",
                  save=False,
                  file_path=None):
     '''
         Ploting the process of finding the best features
     :param eps: float
         Length of the path
     :param title string
         The title of the plot
     :param save boolean, default = False
         If the this parameter is set to False that the model will not save the model
         If it is set to True the plot will be saved using :param file_path
     :param file_path: string, default = None
         The file path where the plot will be saved
         If the :param save is set to False the it is not used
     :return:
         Plots the process of the algorithm
     '''
     X = self.dataframe[self.X_columns].values
     y = self.dataframe[self.y_column].values
     alphas = np.linspace(self.lasso_cv.alpha_ - 0.1,
                          self.lasso_cv.alpha_ + 0.1,
                          self.n_alphas,
                          endpoint=True)
     alphas_lasso, coefs_lasso, _ = lasso_path(X,
                                               y,
                                               eps,
                                               fit_intercept=False,
                                               alphas=alphas)
     neg_log_alphas_lasso = alphas_lasso
     max_coef = coefs_lasso[0][0]
     min_coef = coefs_lasso[0][0]
     for i in range(len(coefs_lasso)):
         line_style = lambda col: '-' if col in self.choosed_cols else '--'
         plt.plot(neg_log_alphas_lasso,
                  coefs_lasso[i],
                  line_style(self.X_columns[i]),
                  label=self.X_columns[i])
         if max(coefs_lasso[i]) > max_coef:
             max_coef = max(coefs_lasso[i])
         if min(coefs_lasso[i]) < min_coef:
             min_coef = min(coefs_lasso[i])
     plt.vlines(self.lasso_cv.alpha_,
                min_coef,
                max_coef,
                linestyles='dashed')
     plt.xlabel('-Log(alpha)')
     plt.ylabel('coefficients')
     plt.title(title)
     plt.axis('tight')
     plt.legend()
     if save:
         plt.savefig(file_path)
     plt.show()
Beispiel #26
0
def test_compute_alo_path_fast(method):
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)

    alo = lasso.compute_alo_lasso_reference(X, y, beta_hats)
    alo_fast = method(X, y, beta_hats)

    assert np.all(np.isfinite(alo) == np.isfinite(alo_fast))
    assert np.square(alo[np.isfinite(alo)] -
                     alo_fast[np.isfinite(alo_fast)]).mean() < 1e-3
Beispiel #27
0
def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname = 'CV', yname = '', method = 'Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X, y, accept_sparse='csc',
                     order='F', dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True, y_numeric=True)
    y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize,
                                                                 fit_intercept, copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(X, y, alphas=alphas, return_n_iter = True,
                                                   **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(X, y, alphas=alphas, return_n_iter=True,
                                                                   **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas),len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j,:] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j,:], y_holdout)
        cvcols.append(('predict','"'+ method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols
Beispiel #28
0
def test_mtl_path():
    X, Y = build_dataset(n_targets=3)
    tol = 1e-10
    params = dict(eps=0.01, tol=tol, n_alphas=10)
    alphas, coefs, gaps = mtl_path(X, Y, **params)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, **params, max_iter=10000)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)
Beispiel #29
0
def test_mtl():
    # n_samples, n_features = 30, 70
    # X, Y, _, _ = build_dataset(n_samples, n_features, n_targets=10)
    X, Y, _, _ = build_dataset(n_targets=10)
    tol = 1e-9
    alphas, coefs, gaps = mtl_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)
Beispiel #30
0
 def Plot_Lasso_Path(self, path_lenght=5e-3, alphas=None):
     import matplotlib.pyplot as plt
     print("Computing regularization path using the lasso...")
     alphas_lasso, coefs_lasso, _ = lasso_path(
         self.X, self.Y, path_lenght, fit_intercept=False, alphas=alphas)
     print("Computing regularization path using the positive lasso...")
     alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
         self.X, self.Y, path_lenght, positive=True, fit_intercept=False, alphas=alphas)
     plt.figure()
     ax = plt.gca()
     ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
     l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
     l2 = plt.plot(-np.log10(alphas_positive_lasso), coefs_positive_lasso.T,
                   linestyle='--')
     plt.xlabel('-Log(alpha)')
     plt.ylabel('coefficients')
     plt.title('Lasso and positive Lasso')
     plt.legend(
         (l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')
     plt.axis('tight')
     plt.show()
    def test_lasso_path(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.lasso_path()
        expected = lm.lasso_path(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 3)
        tm.assert_numpy_array_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].index, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])

        result = df.linear_model.lasso_path(return_models=True)
        expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True)
        self.assertEqual(len(result), len(expected))
        self.assertIsInstance(result, tuple)
        tm.assert_numpy_array_equal(result[0], result[0])
        tm.assert_numpy_array_equal(result[1], result[1])
        tm.assert_numpy_array_equal(result[2], result[2])
Beispiel #32
0
 def train(self, features, labels):
     from sklearn import linear_model
     betas = []
     xs = []
     for ci,ells in enumerate(labels.T):
         active = ~np.isnan(ells)
         fi = features[active]
         ells = ells[active]
         fits = linear_model.lasso_path(fi, ells)
         xs.append(fits[-1].coef_.T.copy())
         betas.append(fits[-1].intercept_.copy())
     return product_intercept_predictor(np.array(xs).T, np.array(betas))
Beispiel #33
0
def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    params = dict(eps=1e-2, n_alphas=10, tol=1e-14)
    alphas1, coefs1, gaps1 = celer_path(
        X, y, return_thetas=False, verbose=1, prune=prune, **params)

    alphas2, coefs2, gaps2 = lasso_path(X, y, verbose=False, **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1, coefs2, rtol=1e-05, atol=1e-6)
Beispiel #34
0
    def test_lasso_path(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.lasso_path()
        expected = lm.lasso_path(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 3)
        tm.assert_numpy_array_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].index, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])

        result = df.linear_model.lasso_path(return_models=True)
        expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True)
        self.assertEqual(len(result), len(expected))
        self.assertIsInstance(result, tuple)
        tm.assert_numpy_array_equal(result[0], result[0])
        tm.assert_numpy_array_equal(result[1], result[1])
        tm.assert_numpy_array_equal(result[2], result[2])
Beispiel #35
0
def do_lasso_simulation(data, NUM_LAMBDA_SPLITS=3000):
    # Make lasso path
    lasso_path, coefs, _ = linear_model.lasso_path(
        data.X_train,
        np.array(data.y_train.flatten().tolist()[0]), # reshape appropriately
        method='lasso'
    )
    prob = LassoProblemWrapper(
        data.X_train,
        data.y_train
    )

    # print "lasso_path", lasso_path

    val_errors = []
    for i, l in enumerate(lasso_path):
        beta = prob.solve(np.array([l]))
        val_error = testerror_lasso(data.X_validate, data.y_validate, beta)
        val_errors.append(val_error)
    sorted_idx = np.argsort(val_errors)

    max_lam = lasso_path[np.min(sorted_idx[:3])]
    min_lam = lasso_path[np.max(sorted_idx[:3])]
    print "min_lam", min_lam, "max_lam", max_lam
    print "lasso_path[sorted_idx[:3]]", lasso_path[sorted_idx[:3]]

    finer_lam_range = []
    for i, l_idx in enumerate(range(np.min(sorted_idx[:3]) - 1, np.max(sorted_idx[:3]) + 1)):
        fudge = 0
        if i == 0:
            fudge = 1e-10
        l_min = lasso_path[l_idx + 1] if lasso_path.size - 1 >= l_idx + 1 else 0
        l_max = lasso_path[l_idx] if l_idx >= 0 else lasso_path[0] + 0.1

        add_l = np.arange(start=l_min, stop=l_max + fudge, step=(l_max - l_min)/NUM_LAMBDA_SPLITS)
        finer_lam_range.append(add_l)
    finer_lam_range = np.concatenate(finer_lam_range)
    print "finer_lam_range min", np.min(finer_lam_range), "max", np.max(finer_lam_range)

    fine_val_errors = []
    for i, l in enumerate(finer_lam_range):
        beta = prob.solve(np.array([l]))
        val_error = testerror_lasso(data.X_validate, data.y_validate, beta)
        fine_val_errors.append(val_error)

    fine_sorted_idx = np.argsort(fine_val_errors)
    best_lam = finer_lam_range[fine_sorted_idx[0]]
    print "best_lam", best_lam

    min_dist, idx = get_dist_of_closest_lambda(best_lam, lasso_path)
    print "min_dist", min_dist
    return min_dist
Beispiel #36
0
def test_compute_alo_path():
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)
    alo = lasso.compute_alo_lasso_reference(X, y, beta_hats)

    beta_hat = beta_hats[:, 5]
    h_5 = lasso.compute_h_lasso(X, np.abs(beta_hat) > 1e-5)
    r_5 = y - np.dot(X, beta_hat)
    alo_5 = np.square(r_5 / (1 - h_5)).mean()

    assert len(alo) == len(alphas)
    assert np.allclose(alo_5, alo[5])
Beispiel #37
0
def get_coef_range(X, y):
    print "starting experiment"
    #with stopwatch("lasso paths"):
    alphas, coefs, dual_gaps = lasso_path(
        X,
        y,
        #l1_ratio=1.0,
        verbose=True,
        #return_models=False,
        positive=False,
        max_iter=1000)

    return alphas, coefs, dual_gaps
Beispiel #38
0
def my_lasso_path(U, F):

    alphas = np.logspace(-10, -3, num=50)
    print 'alphas'
    print alphas

    a, b, c = lasso_path(U,
                         F,
                         alphas=alphas,
                         fit_intercept=False,
                         normalize=False,
                         max_iter=1000,
                         tol=0.000001,
                         selection='random')

    print 'alpha n_nonzero'
    t = np.sum(b != 0, 0)
    for aa, tt in zip(a.tolist(), t.tolist()):
        print str(aa) + '\t' + str(tt)

    print

    a_st, b_st, c_st = lasso_path(U,
                                  F,
                                  alphas=alphas,
                                  fit_intercept=True,
                                  normalize=True,
                                  max_iter=1000,
                                  tol=0.000001,
                                  selection='random')

    print 'alpha n_nonzero'
    t_st = np.sum(b_st != 0, 0)
    for aa, tt, tt_st in zip(a.tolist(), t.tolist(), t_st.tolist()):
        print[aa, tt, tt_st]

    print

    return a, b, c
Beispiel #39
0
def test_compute_alo_leverage():
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)

    alo_fast, leverage = native_impl.lasso_compute_alo(X,
                                                       y,
                                                       beta_hats,
                                                       return_leverage=True)

    assert alo_fast.shape == (beta_hats.shape[1], )
    assert leverage.shape == (50, beta_hats.shape[1])
    assert np.all(leverage >= 0)
    assert np.all(leverage <= 1)
Beispiel #40
0
def plot_lasso_path(df, resp_var, exp_vars):
    """ Plot the lasso path. Both response and explanatory
        variables are standardised first.
    
    Args:
        df:       Dataframe
        resp_var: String. Response variable
        exp_vars: List of strings. Explanatory variables
    
    Returns:
        Dataframe of path and matplotlib figure with tooltip-
        labelled lines. To view this figure in a notebook, use
        mpld3.display(f) on the returned figure object, f.
    """
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import mpld3
    from mpld3 import plugins
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import lasso_path

    # Standardise the feature data and response
    feat_std = StandardScaler().fit_transform(df[[resp_var,] + exp_vars])
    
    # Calculate lasso path
    alphas, coefs, _ = lasso_path(feat_std[:, 1:],         # X
                                  feat_std[:, 0],          # y
                                  eps=1e-3,                # Path length
                                  fit_intercept=False)     # Already centred

    # -Log(alphas) is easier for display
    neg_log_alphas = -np.log10(alphas)

    # Build df of results
    res_df = pd.DataFrame(data=coefs.T, index=alphas, columns=exp_vars)
    
    # Plot
    fig, ax = plt.subplots()

    for coef, name in zip(coefs, exp_vars):
        line = ax.plot(neg_log_alphas, coef, label=name)
        plugins.connect(fig, plugins.LineLabelTooltip(line[0], label=name))

    plt.xlabel('-Log(alpha)')
    plt.ylabel('Coefficients')
    plt.title('Lasso paths')
    plt.legend(loc='best', title='', ncol=3)    
    
    return res_df, fig
Beispiel #41
0
def L_U(X, y, eps=1e-4, max_iter=5000):
    n_samples, n_features = X.shape
    # Lasso with universal lambda
    univ_lambda = np.sqrt(2. * np.log(n_features) / float(n_samples))
    beta_ulasso = lasso_path(X,
                             y,
                             alphas=[univ_lambda],
                             tol=eps,
                             max_iter=max_iter)[1]
    beta_ulasso = beta_ulasso.ravel()
    size = np.sum(beta_ulasso != 0)
    sigma_ulasso = np.linalg.norm(y - np.dot(X, beta_ulasso))
    sigma_ulasso /= np.sqrt(n_samples - size)
    return beta_ulasso, sigma_ulasso
Beispiel #42
0
def solvePenalizedLSQ(A, b, eps, B, c):
    """
    Solves an  unconstrained linear least squares problem

    minimize   2-norm (A*x-b)^2 + eps * 2-norm (Bx-c)^2 + tau 1-norm(x)
    """
    X = numpy.vstack((A, eps * B))
    y = numpy.concatenate((b, c))

    print "Computing regularization path using the lasso..."
    models = lasso_path(X, y)
    alphas_lasso = numpy.array([model.alpha for model in models])
    coefs_lasso = numpy.array([model.coef_ for model in models])

    print alphas_lasso
    print coefs_lasso
Beispiel #43
0
    def fit(self, X, y, feature_labels, estimator_params=None):
        """Computes the Lasso path using Sklearn's lasso_path method.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data (the independent variables).

        y : array-like, shape (n_samples, n_outputs)
            Training data (the output/target values).

        feature_labels : array-like, shape (n_features)
                         Labels for each of the features in X.

        estimator_params : dict, optional
                           The parameters to pass to Sklearn's Lasso estimator.


        Returns
        -------
        self
        """
        self._reset()
        if estimator_params is None:
            estimator_params = {}
        self.feature_labels_ = feature_labels

        alphas, coefs, _ = lasso_path(X, y, **estimator_params)
        self.alphas_ = alphas.copy()
        self.coefs_ = coefs.copy()

        # Rank the features in X by order of importance. This ranking is based
        # on how early a given features enter the regression (the earlier a
        # feature enters the regression, the MORE important it is).
        feature_rankings = [[] for _ in range(X.shape[1])]
        for target_coef_paths in self.coefs_:
            for i, feature_path in enumerate(target_coef_paths):
                entrance_step = 1
                for val_at_step in feature_path:
                    if val_at_step == 0:
                        entrance_step += 1
                    else:
                        break
                feature_rankings[i].append(entrance_step)
        self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])
        return self
from scipy.io import mmwrite, mmread
from sklearn.linear_model.coordinate_descent import _alpha_grid
from sklearn.linear_model import lasso_path
from 
from time import time

# Newsgroup datasets
X_new = mmread("X_new.mtx")
y_new = mmread("y_new.mtx")
y_new = y_new.toarray()[0]
#alphas = _alpha_grid(X_new, y_new, eps=1e-3, fit_intercept=True, normalize=False, n_alphas=100)
t = time()
lasso_path(X_new, y_new, eps=1e-3, precompute=False, fit_intercept=True, normalize=False, n_alphas=100)
print time() - t

X_new = mmread("haxby_X.mtx").toarray()
y_new = mmread("haxby_y.mtx").toarray()[0]
#print y_new.shape
#y_new = y_new.toarray()[0]
# alphas = _alpha_grid(X_new, y_new, eps=1e-3, fit_intercept=True, normalize=False, n_alphas=100)

t = time()
coef = lasso_path(X_new, y_new, eps=1e-3, precompute=False, fit_intercept=True, normalize=False, alphas=alphas)#n_alphas=100)
coef_ = coef[1]
sq_loss = np.sum(0.5*(y_new[:, np.newaxis] - safe_sparse_dot(X_new, coef_))**2, axis=0)
l2_penalty = np.sum(coef_**2, axis=0)
l1_penalty = np.sum(np.abs(coef_), axis=0)

print time() - t
Beispiel #45
0
    def source(self, numpoints=100, L=0.5, por=0.0, LocR=None, srcTime=None):
        event = self.event
        # precondiciones

        assert(0 <= por)
        
        assert(L > 0)
        
        assert(numpoints == int(numpoints))
    
        if LocR is None:
            LocX, LocY, LocZ = (event.LocX, event.LocY, event.LocZ)
        else:
            LocX, LocY, LocZ = LocR
            
        if srcTime is None:
            self.srcTime = dateTime2Num(event.origin_time) + linspace(-por * L, (1 - por) * L, numpoints)
        
        '''
        reconstruccion de la fuente
        :param event: objeto del tipo event
        :param numpoints=100: numero de la discretizacion de la fuente estimada
        :param L: Largo de la ventana de tiempo de la fuente entimada
        :param por: fraccion de tiempo antes del tiempo estimado por Codelco
        '''
        
        dt = self.srcTime[1] - self.srcTime[0]
        
        """
            se requiere resolver un sistema lineal del tipo A*alphas = U
        """
        A, U = ([], [])
    
        # agregar el campo de desplazamiento a el vector de respuesta
        # los id son los mismos!
        for gs in event.seismograms:
    
            # se agregan todas las dimensiones que mantienen mediciones validas
            
            data = gs.data.values
            
            if gs.X_enabled == 1:
                U = hstack((U, data[:, 0].T))
    
            if gs.Y_enabled == 1:
                U = hstack((U, data[:, 1].T))
    
            if gs.Z_enabled == 1:
                U = hstack((U, data[:, 2].T))
    
        for G in event.seismograms:
    
            # frecuencia de muestreo
            hsr = G.hardware_sampling_rate
    
            # la relacion dt*hsr > 1
            deltat = dt * hsr
            #assert dt * hsr <= 1 , 'Advertencia: el producto dt * hsr deberia ser mayor que 1'
    
            R = (G.x_coord - LocX, G.y_coord - LocY, G.z_coord - LocZ)
    
            # funcion de green
            # t = G.timevector - dateTime2Num(date=event.origin_time)
            t = G.timevector - self.srcTime[0]
            alpha = G.P_velocity
            beta = G.S_velocity
            rho = G.RockDensity
    
            Gk = GreenKernel(R=R, time=t, alpha=alpha, beta=beta, rho=rho)
            assert(not any(isnan(Gk[:])))
            # integracion de la funcion de Green
            dtdomain = t[1] - t[0]
    
            F = cumsum(Gk, axis=2) * dtdomain
            FF = zeros(shape(F))
    
            # matriz auxiliar en donde se almacenaran todas las convoluciones
            # producidas en un solo sensor.
            B = []
    
            for jj in xrange(numpoints):
                # para todo elemento de la base
                ii = xrange(size(F, 2))
    
                # indices para los saltos en la convolucion entre la base y la
                # funcion de Green
    
                tf = map(lambda I: int(max(I - floor(jj * deltat), 0)), ii)
                ti = map(lambda I: int(max(I - floor((jj + 1) * deltat), 0)), ii)
    
                # convolucion con respecto la base seleccionada
                FF[:, :, ii] = F[:, :, tf] - F[:, :, ti]
    
                # convolucion para un elemento de la base
                C = []
                if G.X_enabled:
                    if C == []:
                        C = FF[0, :, :].copy()
                    else:
                        C = hstack((C, FF[0, :, :].copy()))
                    assert(not any(isnan(C[:])))
                if G.Y_enabled:
                    if C == []:
                        C = FF[1, :, :].copy()
                    else:
                        C = hstack((C, FF[1, :, :].copy()))
                    assert(not any(isnan(C[:])))
                if G.Z_enabled:
                    if C == []:
                        C = FF[2, :, :].copy()
                    else:
                        C = hstack((C, FF[2, :, :].copy()))
                    assert(not any(isnan(C[:])))
    
                if B == []:
                    B = C.copy()
                else:
                    B = vstack((B, C.copy()))
    
            if A == []:
                A = B.copy()
            else:
                A = hstack((A, B.copy()))
    
        # @todo: minimizar la norma 1 para hacer la estimacion mas robusta
        # resolucion del sistema lineal que minimiza la suma de la norma 2 de error
        assert(not any(isnan(A[:])))
        #from scipy.sparse import csr_matrix
        #from scipy.sparse.linalg import lsqr
        #matrix = csr_matrix(A.T)
        # X = numpy.linalg.lstsq(A.T, U)[0]
        # regresion lineal
        
        lasso = None
        if lasso == True:
            
            _, lasso_path, _ = lasso_path(A.T, U)
            rgr_lasso = Lasso()
            rgr_lasso.fit(A.T, U)
            X = rgr_lasso.coef_
            
            pass
        if det(dot(A, A.T)) != 0:
            #invertible
            X = dot(dot(U, A.T), inv(dot(A, A.T)))
        else:
            #no invertible
            X = dot(dot(U, A.T), pinv(dot(A, A.T)))
    
        src = zip(self.srcTime.T,
                  X[range(0, 3 * numpoints, 3)].T,
                  X[range(1, 3 * numpoints, 3)].T,
                  X[range(2, 3 * numpoints, 3)].T
                  )
        # post condiciones
        assert(shape(src) == (numpoints, 4))
    
        # error de estimacion
        error = norm(U - dot(X, A), 2)
        src = array(src)
        rot, vec, val = _rotate(src)
    
        #condiciones necesarias de orden de los valores propios y las coordenadas de
        #la fuente
        order = sorted(range(3), key=lambda k:val[k])
        val = val[order]
    
        #assert(val[0] <= val[1] <= val[2])
        rot[:, 1:4] = rot[:, 1:4][:, order]
        
        
        return(src, error, rot, vec, val)
plt.subplot(121)
m_log_alphas = -np.log10(lasso.steps[1][1].alphas_)
plt.plot(m_log_alphas, lasso.steps[1][1].mse_path_, ':')
plt.plot(m_log_alphas, lasso.steps[1][1].mse_path_.mean(axis=-1), 'k',
         label='Average across the folds', linewidth=2)
plt.axvline(-np.log10(lasso.steps[1][1].alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')
plt.legend(loc = 'upper left')
plt.xlabel('-Log(alpha)')
plt.ylabel('Mean square error')
plt.title('Mean square error on each fold')
plt.axis('tight')

# Plot lasso paths 
plt.subplot(122)
alphas_lasso, coefs_lasso, _ = lasso_path(features, labels, alphas = alphas)
plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
plt.axvline(-np.log10(lasso.steps[1][1].alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')
plt.xlabel('-Log(alpha)')
plt.ylabel('Coefficients')
plt.title('Lasso Paths')
plt.axis('tight')

lasso_coef = pd.DataFrame({'coef': lasso.steps[1][1].coef_.tolist()}, features_list[1:]).round(3) 
lasso_selection = lasso_coef[lasso_coef['coef']!=0]
lasso_selection

print "Lasso selected an alpha of %.2f with %d features:" % (alpha_lasso, len(lasso_selection)) 
lasso_selection.sort('coef', ascending=False)
Beispiel #47
0
def lasso_coefs(X, y):
    _, coefs, _ = lasso_path(X, y)
    return coefs.T
Beispiel #48
0
def main():

    path = '/users/davecwright/documents/kaggle/liberty_fire_cost/'
    path = 'c:\\users\\dwright\\code\\'
    train_name = path + 'train.csv'
    test_name = path + 'test.csv'
    # f = open(path + 'train.csv', 'rb')

    readRows = 2000 #None for all
    print 'loading train_data'
    train_data = pd.read_csv(train_name, nrows=readRows)

    print 'train_data loaded'
    y_data = train_data['target'].values
    train_data.drop('target', 1)
    train_data = scrub(train_data)
    y_data = y_data.astype(float)

    eps = 5e-3
    X = train_data
    y = y_data
    folds = 10
    kf = cross_validation.KFold(y_data.shape[0], n_folds=folds)
    alphas = range(folds)
    k = 0
    for test, train in kf:
        penalty = (alphas[k] + 1) * 1/folds
        print alpha
        clf = ElasticNet(l1_ratio=penalty, eps=eps)
        clf.train(X, y)
        # doing our own cv parametarization


    print 'computing lasso path'
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept = False)
    print 'computing enet path'
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

    plt.figure(1)
    ax = plt.gca()
    ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
    l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
    l2 = plt.plot(-np.log10(alphas_enet), coefs_enet.T, linestyle='--')

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')
    plt.show()
    sys.exit()



    for train_index, test_index in kf:
        #print train_index, test_index
        print train_data.iloc[train_index], train_data.iloc[test_index]


    # I am implementing the lasso reduction here...
    #for train, test in kf:

    fig = plt.figure(figsize=(12, 9))
    #ax = fig.add_subplot(111)

    #ax.plot(np.sort(y_data))
    #plt.xlabel("Number of Features")
    #plt.ylabel("claims cost")
    #plt.title("claims_cost")
    #ax.set_xscale("log")

    #ax.set_position([box.x0, box.y0 + box.height * 0.3, box.width, box.height * 0.7])
    #ax.legend(**_PLT_LEGEND_OPTIONS)
    #plt.show()


    train_data = train_data.drop('target', 1)

    # A - preprocessing
    # encode the text variables, var1-var9, Z values are NaN

    # fill in missing values in the text variables and in the continuous variables
    # skip continuous for now

    # A3. build new features through the interactions of various items
    # skip for now

    #  A4. dimensionality reduction to take the feature set back down to something more manageable.

    est_clf = svm.SVR(kernel='linear', C=1)
    rks = select_ests(X_train, y_data, 100, est_clf)
    X_train = X_train[:, rks]

    clf = svm.SVR(kernel='linear', C=1)
    acy = cv(X_train, y_data, clf, None, estimator_name(clf))

    # the point here is to understand what accuracy is

    print 'accuracy:', acy

    #select_model(X_train, y_data)

    # B. split out test and fit sets

    test_data = pd.read_csv(test_name, nrows=readRows)
    test_data = encode_impute(test_data)
    X_test = scaler.transform(test_data)
Beispiel #49
0
for alpha in alphas:
    simpLasCoefs, Yhat = simpleLasso(X,y,alpha,True)
    # print "alpha = ", alpha
    # print simpLasCoefs
    # print Yhat
    simpLasCoefsCR, YhatCR = simpleLasso(X_cr,y_cr,alpha,False)
    # print simpLasCoefsCR
    # print YhatCR



################------ Exercice 2.4 ------###############################
print "Ex. 2.4: Figure"
#_, theta_lasso, _ =lasso_path(np.array(X), np.array(y), alphas=alphas,  fit_intercept=True, return_models=False)
_, theta_lasso_CR, _ =lasso_path(np.array(X_cr), np.array(y_cr), alphas=alphas,  fit_intercept=False, return_models=False)

# plot lasso path
# fig1=plt.figure(figsize=(12,8))
# plt.title("Chemin du Lasso: "+ r"$p={0}, n={1} $".format(nfeatures,nsamples),fontsize = 16)
# ax1 = fig1.add_subplot(111)
# ax1.plot(alphas,np.transpose(theta_lasso),linewidth=3)
# ax1.set_xscale('log')
# ax1.set_xlabel(r"$\lambda$")
# ax1.set_ylabel("Amplitude des coefficients")
# ax1.set_ylim([-2,0.5])
# ax1.set_xlim([lstart,lend])
# plt.show(block=False)

fig2=plt.figure(figsize=(12,8))
plt.title("Chemin du Lasso vars_CR: "+ r"$p={0}, n={1} $".format(nfeatures,nsamples),fontsize = 16)
#Convert list of list to np array for input to sklearn packages

#Unnormalized labels
Y = numpy.array(labels)

#normalized lables
Y = numpy.array(labelNormalized)

#Unnormalized X's
X = numpy.array(xList)

#Normlized Xss
X = numpy.array(xNormalized)

alphas, coefs, _  = linear_model.lasso_path(X, Y,  return_models=False)


plot.plot(alphas,coefs.T)

plot.xlabel('alpha')
plot.ylabel('Coefficients')
plot.axis('tight')
plot.semilogx()
ax = plot.gca()
ax.invert_xaxis()
plot.show()

nattr, nalpha = coefs.shape

#find coefficient ordering
Beispiel #51
0
data = load_mnist()
pos_ind = 6
neg_ind = 5
sig_D = 100
# lmda_list = [0.0005, 0.001, 0.01, 0.1, 0.3]
x, y = convert_binary(data, pos_ind, neg_ind)
n, p = x.shape
x = x.astype(float)
y = y.astype(float)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
x = min_max_scaler.fit_transform(x)
# xtest = min_max_scaler.transform(x)
# ntrain = ytrain.size

alphas, coefs, gaps = linear_model.lasso_path(x, y, n_alphas=5, return_models=False, fit_intercept=False)
lmda_list = alphas[::-1]
n_iter = 10
ss = cv.StratifiedShuffleSplit(y=y, n_iter=n_iter, test_size=0.3, random_state=5)
nzs_scg_T = np.zeros((n_iter, len(lmda_list)))
nzs_scg_bar = np.zeros((n_iter, len(lmda_list)))
nzs_rda_T = np.zeros((n_iter, len(lmda_list)))
nzs_rda_bar = np.zeros((n_iter, len(lmda_list)))
nzs_rda2_T = np.zeros((n_iter, len(lmda_list)))
nzs_rda2_bar = np.zeros((n_iter, len(lmda_list)))
nzs_cd_T = np.zeros((n_iter, len(lmda_list)))
nzs_cd_bar = np.zeros((n_iter, len(lmda_list)))
nzs_sgd = np.zeros((n_iter, len(lmda_list)))
nsweep = 5
b = 5
c = 1
Beispiel #52
0
xsum = x.sum(axis=0)
# ind = np.where(xsum>0)  # return object is tuple
# x = x[:, ind[0]]
x = x.astype(float)
y = y.astype(float)
n, p = x.shape
random_state = 21
lmda = 0.01
nsweep = 1
xtrain, xtest, ytrain, ytest = cv.train_test_split(x, y, test_size=0.2, random_state=random_state)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
xtrain = min_max_scaler.fit_transform(xtrain)
xtest = min_max_scaler.transform(xtest)
ntrain = ytrain.size

alphas, coefs, gaps = linear_model.lasso_path(xtrain, ytrain,n_alphas=10, return_models=False, fit_intercept=False)
alphas = alphas[::-1]
# zs = (coefs==0).sum(axis=0)
# zs = zs[::-1]
# gaps = gaps[::-1]

obj = np.zeros(len(alphas))
zs2 = np.zeros(len(alphas))
obj2 = np.zeros(len(alphas))
zs3 = np.zeros(len(alphas))
zs = np.zeros(len(alphas))
obj3 = np.zeros(len(alphas))
for i, alpha in enumerate(alphas):
    print "alpha: %f" % alpha
    print 'cd with random permutation'
    clf = LassoLI(lmda=alpha, algo='cd', cd_ord='rand', T=6000)
Beispiel #53
0
from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(0) # Standardize data (easier to set the rho parameter)

################################################################################
# Compute paths

eps = 5e-3 # the smaller it is the longer is the path

print "Computing regularization path using the lasso..."
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print "Computing regularization path using the elastic net..."
models = enet_path(X, y, eps=eps, rho=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

################################################################################
# Display results

ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(coefs_lasso)
l2 = pl.plot(coefs_enet, linestyle='--')
plt.axvline(BLLModel.alpha_, linestyle='--', label='CV Estimate of Best alpha')
plt.semilogx()
plt.legend()
ax = plt.gca()
ax.invert_xaxis()
plt.xlabel('alpha')
plt.ylabel('Mean Square Error')
plt.axis('tight')
plt.title('Determining alpha via LASSO 10-fold CV')
plt.show()

print "alpha Value that Minimizes CV Error ", BLLModel.alpha_
print "Minimum MSE ", min(BLLModel.mse_path_.mean(axis=-1))
bestAlpha = BLLModel.alpha_

alphas, coefs, _ = lasso_path(X, y, return_models=False)
plt.plot(alphas,coefs.T)
plt.xlabel('alpha')
plt.ylabel('Coefficients')
plt.axis('tight')
plt.title('Variables as they enter the model')
plt.semilogx()
plt.legend(loc='upper left')
ax = plt.gca()
ax.invert_xaxis()
plt.show()

nattr, nalpha = coefs.shape	

#find coefficient ordering
nzList = []
print("Computing regularization path using the ridge...")
n_alphas = 200
alphas = np.logspace(-4, 2, n_alphas)
clf = linear_model.Ridge(fit_intercept=False)

coefs = []
for a in alphas:
    clf.set_params(alpha=a, max_iter=1000)
    clf.fit(xtrain, ytrain)
    coefs.append(clf.coef_)

# lasso and elastic net path
eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(xtrain, ytrain, eps, fit_intercept=False)


print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    xtrain, ytrain, eps=eps, l1_ratio=0.8, fit_intercept=False)

# Display results

plt.figure(1)
ax = plt.gca()
ax.set_color_cycle(['b', 'r', 'g', 'c', 'k', 'y', 'm'])

ax.plot(alphas, coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
# The return_models parameter sets that lasso_path will return
# the alphas and the coefficients as output, instead of a list
# of models as it does by default. Returning the list of models
# is deprecated and will eventually be removed in 0.15
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, return_models=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(X, y, eps,
                                                        positive=True,
                                                        return_models=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8,
                                    return_models=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(X, y, eps=eps,
                                                      l1_ratio=0.8,
                                                      positive=True,
                                                      return_models=False)
from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(0)  # Standardize data (easier to set the l1_ratio parameter)

###############################################################################
# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the positive lasso...")
models = lasso_path(X, y, eps=eps, positive=True)
alphas_positive_lasso = np.array([model.alpha for model in models])
coefs_positive_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

print("Computing regularization path using the positve elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True)
def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            dataset_kwargs = {
                'n_samples': n_samples,
                'n_features': n_features,
                'n_informative': n_features // 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            X, y = make_regression(**dataset_kwargs)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (without Gram)'].append(delta)

    return results
from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
    X, y, eps, positive=True, fit_intercept=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)

# Display results

plt.figure(1)