Python lasso_path 예제들, sklearn.linear_model.lasso_path Python 예제들

예제 #1

0

파일 보기

def homotopy_path(X, Y, lambda_, coef, y_t, epsilon=1e-3, nu=1.):

    eps_0 = epsilon / 10.
    step_size = np.sqrt(2. * (epsilon - eps_0) / nu)
    Y_t = np.array(list(Y[:-1]) + [y_t], order='F')
    y_stop = Y[-1]

    while y_t < y_stop:

        y_t = min(y_t + step_size, y_stop)
        Y_t[-1] = y_t
        tol = eps_0 / np.linalg.norm(Y_t) ** 2
        alpha = [lambda_ / X.shape[0]]
        res = lasso_path(X, Y_t, alphas=alpha, coef_init=coef, eps=tol)
        coef = res[1].ravel()

    while y_t > y_stop:

        y_t = max(y_t - step_size, y_stop)
        Y_t[-1] = y_t
        tol = eps_0 / np.linalg.norm(Y_t) ** 2
        alpha = [lambda_ / X.shape[0]]
        res = lasso_path(X, Y_t, alphas=alpha, coef_init=coef, eps=tol)
        coef = res[1].ravel()

    return coef

예제 #2

0

파일 보기

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    f = assert_warns_message

    def in_warn_message(msg):
        return 'Early stopping' in msg or 'Dropping a regressor' in msg

    lars_alphas, _, lars_coef = f(ConvergenceWarning,
                                  in_warn_message,
                                  linear_model.lars_path,
                                  X,
                                  y,
                                  method='lasso')

    with ignore_warnings():
        _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                    y,
                                                    alphas=lars_alphas,
                                                    tol=1e-6,
                                                    fit_intercept=False)

        lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
        iter_models = enumerate(
            linear_model.lasso_path(X,
                                    y,
                                    alphas=lars_alphas,
                                    tol=1e-6,
                                    return_models=True,
                                    fit_intercept=False))
        for i, model in iter_models:
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)

예제 #3

0

파일 보기

파일: test_least_angle.py 프로젝트: zhoumin007/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X,
                                                           y,
                                                           method='lasso')

    assert_true(len(warning_list) > 0)
    assert_true(('Dropping a regressor' in warning_list[0].message.args[0])
                or ('Early stopping' in warning_list[0].message.args[0]))

    _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", DeprecationWarning)
        for i, model in enumerate(
                linear_model.lasso_path(X,
                                        y,
                                        alphas=lars_alphas,
                                        tol=1e-6,
                                        return_models=True,
                                        fit_intercept=False)):
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)

예제 #4

0

파일 보기

파일: test_least_angle.py 프로젝트: 2011200799/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X, y,
                                                           method='lasso')

    assert_true(len(warning_list) > 0)
    assert_true(('Dropping a regressor' in warning_list[0].message.args[0])
                or ('Early stopping' in warning_list[0].message.args[0]))

    _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                alphas=lars_alphas, tol=1e-6,
                                                fit_intercept=False)

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", DeprecationWarning)
        for i, model in enumerate(linear_model.lasso_path(X, y,
                                                          alphas=lars_alphas,
                                                          tol=1e-6,
                                                          return_models=True,
                                                          fit_intercept=False)):
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)

예제 #5

0

파일 보기

파일: test_least_angle.py 프로젝트: 1oscar/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    f = assert_warns_message

    def in_warn_message(msg):
        return 'Early stopping' in msg or 'Dropping a regressor' in msg
    lars_alphas, _, lars_coef = f(ConvergenceWarning,
                                  in_warn_message,
                                  linear_model.lars_path, X, y, method='lasso')

    with ignore_warnings():
        _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                    alphas=lars_alphas,
                                                    tol=1e-6,
                                                    fit_intercept=False)

        lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
        iter_models = enumerate(linear_model.lasso_path(X, y,
                                                        alphas=lars_alphas,
                                                        tol=1e-6,
                                                        return_models=True,
                                                        fit_intercept=False))
        for i, model in iter_models:
            lasso_coef[:, i] = model.coef_

    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
    np.testing.assert_array_almost_equal(lasso_coef, lasso_coef2, decimal=1)

예제 #6

0

파일 보기

def fit_model(X, Y_t, coef, lambda_, eps_0, method="lasso"):

    if coef is None:
        coef = np.zeros(X.shape[1])

    if method is "lasso":
        tol = eps_0 / np.linalg.norm(Y_t)**2
        lmd = [lambda_ / X.shape[0]]
        res = lasso_path(X,
                         Y_t,
                         alphas=lmd,
                         coef_init=coef,
                         eps=tol,
                         max_iter=int(1e8))
        coef = res[1].ravel()

    elif method is "ridge":
        reg = Ridge(alpha=lambda_, fit_intercept=False, solver="auto")
        reg.fit(X, Y_t)
        coef = reg.coef_

    elif method is "logcosh":
        # I cannot early stop scipy.minimize with duality gap :-/
        coef = logcosh_reg(X, Y_t, lambda_, coef)

    elif method is "linex":
        # I cannot early stop scipy.minimize with duality gap :-/
        coef = linex_reg(X, Y_t, lambda_, coef=coef)

    mu = X.dot(coef)

    return mu, np.abs(Y_t - mu), coef

예제 #7

0

파일 보기

파일: model_sklearn_lasso.py 프로젝트: sophiewxh/Personality

def plot(X, y):
    
    #X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)
    
    # Compute paths
    
    eps = 5e-3  # the smaller it is the longer is the path

    alphas_lasso, coefs_lasso, _ = linear_model.lasso_path(X, y, eps)
    print alphas_lasso
    print coefs_lasso.shape
    
    for i in coefs_lasso.T:
        print i
    
    plt.figure(1)
    ax = plt.gca()
    
    colors = cycle(['r', 'g', 'b', 'c', 'm', 'y'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)

    for coef_l, c in zip(coefs_lasso, colors):
        l1 = plt.plot(neg_log_alphas_lasso, coef_l, c=c)
 
    
    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    #plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')
    plt.show()

예제 #8

0

파일 보기

파일: testperiods.py 프로젝트: ysshah/period-search

def LassoDiff(clf, A, time, p, delta_p, SNR, i=None):
    flux = np.sin(2 * np.pi / p * time) + np.sin(2 * np.pi /
                                                 (p + delta_p) * time)
    flux += np.random.normal(0, flux.std() / np.sqrt(SNR), flux.size)

    convWarning = False
    with warnings.catch_warnings(record=True) as w:
        # clf.fit(A, flux)
        clf = linear_model.lasso_path(A, flux)

        if len(w) == 1 and type(w[0].message) is ConvergenceWarning:
            convWarning = True

    # ipdb.set_trace()
    coeffs = clf[1][:, 99]

    power = coeffs[:A.shape[1] // 2]**2 + coeffs[A.shape[1] // 2:A.shape[1] //
                                                 2 * 2]**2

    peaks = findPeaks(power)
    if peaks.size > 1:
        d2P_dp2 = np.gradient(np.gradient(power))
        p1_i, p2_i = peaks[-4:][d2P_dp2[peaks[-4:]].argsort()][:2]
    else:
        p1_i, p2_i = power.argsort()[-2:]

    return i, power, p1_i, p2_i, convWarning

예제 #9

0

파일 보기

파일: lasso.py 프로젝트: smcdonald2013/hdstats-framework

 def plot_results(self):
     """Create the base regression plots as well as a regularization path plot."""
     rc.REG.plot_results(self)
     path = linear_model.lasso_path(self.independentVar, self.dependentVar, return_models=False, fit_intercept=False)
     alphas = path[0] #Vector of alphas
     coefs = (path[1]).T #Array of coefficients for each alpha
     viz.plot_regPath(alphas, coefs).plot()

예제 #10

0

파일 보기

파일: source.py 프로젝트: opheliecoiffier/LASSOvsFoBa

def alpha_choice_fig(x, y, my_alphas, nb_features, train_size):
    '''

    Parameters
    ----------
    x : data.
    y : desired output.
    my_alphas : array of different values for alpha.
    nb_features : number of features.
    train_size : number of train points.

    Returns : representation of lasso path
    -------

    '''
    X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size)
    alpha_for_path, coefs_lasso, _ = lasso_path(X_train[:, 0:nb_features-1],
                                                X_train[:, nb_features-1],alphas=my_alphas)
    for i in range(coefs_lasso.shape[0]):
        plt.plot(alpha_for_path, coefs_lasso[i, :])

    plt.xlabel('Alpha')
    plt.ylabel('Coefficients')
    plt.title('Lasso path')
    plt.show()

예제 #11

0

파일 보기

파일: test_least_angle.py 프로젝트: MartinThoma/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()
    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')

    _, lasso_coef2, _ = linear_model.lasso_path(X, y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)

예제 #12

0

파일 보기

파일: stability_selection.py 프로젝트: sophial05/selective-inference

    def meta_algorithm(XTX, XTXi, sampler):

        min_success = 6
        ntries = 10

        def _alpha_grid(X, y, center, XTX):
            n, p = X.shape
            alphas, coefs, _ = lasso_path(X.copy(),
                                          y.copy(),
                                          Xy=center.copy(),
                                          precompute=XTX.copy())
            nselected = np.count_nonzero(coefs, axis=0)
            alphas = alphas[nselected < 20]
            return alphas

        alpha_grid = _alpha_grid(X, y, sampler.center, XTX)
        success = np.zeros((p, alpha_grid.shape[0]))

        for _ in range(ntries):
            scale = 1.  # corresponds to sub-samples of 50%
            noisy_S = sampler(scale=scale)
            _, coefs, _ = lasso_path(X,
                                     y,
                                     Xy=noisy_S,
                                     precompute=XTX,
                                     alphas=alpha_grid)
            success += np.abs(np.sign(coefs))

        selected = np.apply_along_axis(
            lambda row: any(x > min_success for x in row), 1, success)
        vars = set(np.nonzero(selected)[0])
        return vars

예제 #13

0

파일 보기

파일: test_linear_model.py 프로젝트: pseudotensor/pandas-ml

    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

예제 #14

0

파일 보기

파일: test_least_angle.py 프로젝트: zlb12080220/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()
    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')

    _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)

예제 #15

0

파일 보기

def sklearn_lasso_coefs_plot(X, y):
    """ Show the path taken by coefficients as Lasso shrinks them towards zero.

    Adapted from: https://scikit-learn.org/stable/auto_examples/linear_model/plot_lasso_coordinate_descent_path.html """

    from itertools import cycle
    from sklearn.linear_model import lasso_path

    eps = 5e-3  # the smaller it is the longer is the path

    print("Computing regularization path using the lasso...")
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=True)

    # Display results

    fig, ax = plt.subplots(figsize=(10, 5), dpi=200)
    colors = cycle(['b', 'r', 'g', 'c', 'k'])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)
    for coef, label, c in zip(coefs_lasso, X.columns, colors):
        _ = plt.plot(neg_log_alphas_lasso, coef, label=label, c=c)
    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso path')
    plt.legend(loc='upper left')
    plt.axis('tight')

    plt.show()

예제 #16

0

파일 보기

파일: test_least_angle.py 프로젝트: kkuunnddaann/scikit-learn

def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 80, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()

    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", UserWarning)
        lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
    assert_true(len(warning_list) > 0)
    assert_true(
        ("Dropping a regressor" in warning_list[0].message.args[0])
        or ("Early stopping" in warning_list[0].message.args[0])
    )

    lasso_coef = np.zeros((w.shape[0], len(lars_alphas)))
    for i, model in enumerate(linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)):
        lasso_coef[:, i] = model.coef_
    np.testing.assert_array_almost_equal(lars_coef, lasso_coef, decimal=1)

예제 #17

0

파일 보기

파일: test_linear_model.py 프로젝트: sinhrks/pandas-ml

    def test_Lasso_Path(self):
        diabetes = datasets.load_diabetes()
        X = diabetes.data
        y = diabetes.target
        X /= X.std(axis=0)

        df = pdml.ModelFrame(diabetes)
        df.data /= df.data.std(axis=0, ddof=False)

        self.assert_numpy_array_almost_equal(df.data.values, X)

        eps = 5e-3
        expected = lm.lasso_path(X, y, eps, fit_intercept=False)
        result = df.lm.lasso_path(eps=eps, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        result = df.lm.enet_path(eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

        expected = lm.lars_path(X, y, method='lasso', verbose=True)
        result = df.lm.lars_path(method='lasso', verbose=True)
        self.assert_numpy_array_almost_equal(expected[0], result[0])
        self.assert_numpy_array_almost_equal(expected[1], result[1])
        self.assert_numpy_array_almost_equal(expected[2], result[2])

예제 #18

0

파일 보기

파일: TIGRESS.py 프로젝트: ncaptier/Associations_Inference

def _fit_bootstrap_sample(X, y, func, L):
    """ Computes the regularization path for the regression y ~ X.
    
    Parameters
    ----------
    X : array, shape (n_samples , n_features)

    y : array, shape (n_samples)

    func : string
         the function used for computing the regularization path
         (either 'lasso', 'elasticnet', or 'lars').
        
    L : int
        length of the path.

    Returns
    -------
    array, shape (n_features , L) 
        0 if the coefficient is null and 1 otherwise.

    """
    if func == 'lasso':
        _, coef_path, _ = lasso_path(X, y, n_alphas=L)
    elif func == 'elasticnet':
        _, coef_path, _ = enet_path(X, y, nalphas=L)
    elif func == 'lars':
        _, _, coef_path = lars_path(X, y, max_iter=L - 1)

    return 1 * (coef_path != 0)

예제 #19

0

파일 보기

def conf_pred(X, Y_seen, lambda_, Y_range, alpha=0.1, method="lasso"):

    X_train, X_test, Y_train, Y_test = train_test_split(X[:-1, :],
                                                        Y_seen,
                                                        test_size=0.5,
                                                        random_state=414)

    # Training
    if method is "lasso":
        lmd = [lambda_ / X_train.shape[0]]
        res = lasso_path(X_train, Y_train, alphas=lmd, eps=1e-12)
        coef = res[1].ravel()

    elif method is "logcosh":
        coef = logcosh_reg(X_train, Y_train, lambda_)

    elif method is "linex":
        coef = linex_reg(X_train, Y_train, lambda_)

    # Ranking on the test
    mu = X_test.dot(coef)
    sorted_residual = np.sort(np.abs(Y_test - mu))
    index = int((X.shape[0] / 2 + 1) * (1 - alpha))
    quantile = sorted_residual[index]

    mu = X[-1, :].dot(coef)

    return intervals.closed(mu - quantile, mu + quantile)

예제 #20

0

파일 보기

파일: Feature_Selection.py 프로젝트: kkc-krish/Radiomics-Analyze

    def LassoPath(self,
                  alpha,
                  eps=1e-2):  # eps the smaller it is the longer is the path
        sel_log_LassoAlpha = -log10(alpha)
        alphas_lasso, coefs_lasso, _ = lasso_path(self.X,
                                                  self.y,
                                                  eps,
                                                  fit_intercept=False)
        colors = cycle(['b', 'r', 'g', 'c', 'k'])
        neg_log_LassoAlphas = -log10(alphas_lasso)

        fig, ax = plt.subplots()
        for coef_l, c in zip(coefs_lasso, colors):
            ax.plot(neg_log_LassoAlphas, coef_l, c=c)
        ax.axvline(sel_log_LassoAlpha,
                   linestyle='--',
                   color='k',
                   label='$\\alpha$: CV estimate')
        xunit, yunit = plot_unit([ax.get_xlim(), ax.get_ylim()])
        ax.text(sel_log_LassoAlpha - xunit,
                ax.get_ylim()[1] + yunit, round(sel_log_LassoAlpha, 2))
        ax.set_xlabel(
            '-log($\\alpha$)')  # or # ax.set_xlabel(r'-log($\alpha$)')
        ax.set_ylabel('Coefficients')
        ax.set_title('Lasso Path')
        fig.savefig(
            os.path.join(self.fpath, '_'.join([self.name, 'X_LassoPath.png'])))

예제 #21

0

파일 보기

파일: regularized_regression.py 프로젝트: smcdonald2013/Machine_Learning_Interface

 def _gen_cv_paths(self, alphas):
     """Helper function to generate lasso paths."""
     self.alphas, self.coefs_cv, _ = linear_model.lasso_path(
         self.x_train,
         self.y_train,
         fit_intercept=self.intercept,
         alphas=alphas)
     self.coefs_cv = self.coefs_cv.T

예제 #22

0

파일 보기

def cross_val(X, y, lambdas, sigma_0, eps=1e-4, method="lasso", KF=None):
    """
        Perform a 5-fold cross-validation and return the mean square errors for
        different parameters lambdas. 
    """

    n_samples, n_features = X.shape
    n_lambdas = len(lambdas)
    if KF is None:
        KF = KFold(n_samples, 5, shuffle=True, random_state=42)
    n_folds = KF.n_folds
    errors = np.zeros((n_lambdas, n_folds))
    i_fold = 0

    for train_index, test_index in KF:

        X_train = X[train_index]
        X_test = X[test_index]
        y_train = y[train_index]
        y_test = y[test_index]

        if method == "smoothed_concomitant":
            betas, sigmas, gaps, n_iters, _ = \
                SC_path(X_train, y_train, lambdas, eps=eps,
                        sigma_0=sigma_0)

        elif method == "lasso":
            betas = lasso_path(X_train, y_train, alphas=lambdas, tol=eps)[1]
            betas = betas.T

        elif method in ["ls_smoothed_concomitant", "ls_lasso"]:
            betas = estimator_LS(X_train,
                                 y_train,
                                 lambdas,
                                 sigma_0=sigma_0,
                                 eps=eps,
                                 method=method)

        elif method == "SZ_path":
            betas, sigmas = SZ_path(X_train, y_train, eps, lambdas)

        elif method == "SBvG":
            betas, sigmas = SBvG_path(X_train, y_train, lambdas)

        elif method == "belloni":
            betas, sigmas = belloni_path(X_train, y_train, lambdas)

        else:
            1 / 0  # BOOM !

        for l in range(n_lambdas):
            y_pred = np.dot(X_test, betas[l, :])
            errors[l, i_fold] = np.mean((y_pred - y_test)**2)

        i_fold += 1

    return np.mean(errors, axis=1)

예제 #23

0

파일 보기

파일: stability_selection.py 프로젝트: sophial05/selective-inference

 def _alpha_grid(X, y, center, XTX):
     n, p = X.shape
     alphas, coefs, _ = lasso_path(X.copy(),
                                   y.copy(),
                                   Xy=center.copy(),
                                   precompute=XTX.copy())
     nselected = np.count_nonzero(coefs, axis=0)
     alphas = alphas[nselected < 20]
     return alphas

예제 #24

0

파일 보기

파일: fit_lfm_lasso_path.py 프로젝트: s0ap/arpmRes

def fit_lfm_lasso_path(x, z, p=None, lambdas=None, fit_intercept=True):
    """For details, see here.

    Parameters
    ----------
        x : array, shape (t_, n_)
        z : array, shape (t_, k_)
        p : array, optional, shape (t_,)
        lambdas : array, shape(i_,), optional
        fit_intercept : bool, optional

    Returns
    -------
        alpha : array, shape (i_, n_)
        beta : array, shape (i_, n_, k_)

    """

    if len(x.shape) == 1:
        x = x.reshape(-1, 1)

    if len(z.shape) == 1:
        z = z.reshape(-1, 1)

    t_, n_ = x.shape
    k_ = z.shape[1]

    if lambdas is None:
        lambdas = np.array([0, 0.1, 0.2])
    i_ = lambdas.shape[0]

    if p is None:
        p = np.ones(t_) / t_

    if fit_intercept is True:
        m_x = p @ x
        m_z = p @ z
    else:
        m_x = np.zeros(n_, )
        m_z = np.zeros(k_, )

    x_p = ((x - m_x).T * np.sqrt(p)).T
    z_p = ((z - m_z).T * np.sqrt(p)).T

    _, coeff_, _ = lasso_path(z_p,
                              x_p,
                              alphas=lambdas / (2 * t_),
                              fit_intercept=False)

    # lasso_path automatically sorts lambdas from the largest to the smallest,
    # so we have to revert order of coeff_ back to the original lambdas
    idx = np.argsort(lambdas)[::-1]
    betas = np.zeros((i_, n_, k_))
    betas[idx, :, :] = coeff_.transpose((2, 0, 1))
    alphas = m_x - betas @ m_z

    return alphas, betas

예제 #25

0

파일 보기

 def plot_process(self,
                  eps=5e-3,
                  title="Lasso coef Plot",
                  save=False,
                  file_path=None):
     '''
         Ploting the process of finding the best features
     :param eps: float
         Length of the path
     :param title string
         The title of the plot
     :param save boolean, default = False
         If the this parameter is set to False that the model will not save the model
         If it is set to True the plot will be saved using :param file_path
     :param file_path: string, default = None
         The file path where the plot will be saved
         If the :param save is set to False the it is not used
     :return:
         Plots the process of the algorithm
     '''
     X = self.dataframe[self.X_columns].values
     y = self.dataframe[self.y_column].values
     alphas = np.linspace(self.lasso_cv.alpha_ - 0.1,
                          self.lasso_cv.alpha_ + 0.1,
                          self.n_alphas,
                          endpoint=True)
     alphas_lasso, coefs_lasso, _ = lasso_path(X,
                                               y,
                                               eps,
                                               fit_intercept=False,
                                               alphas=alphas)
     neg_log_alphas_lasso = alphas_lasso
     max_coef = coefs_lasso[0][0]
     min_coef = coefs_lasso[0][0]
     for i in range(len(coefs_lasso)):
         line_style = lambda col: '-' if col in self.choosed_cols else '--'
         plt.plot(neg_log_alphas_lasso,
                  coefs_lasso[i],
                  line_style(self.X_columns[i]),
                  label=self.X_columns[i])
         if max(coefs_lasso[i]) > max_coef:
             max_coef = max(coefs_lasso[i])
         if min(coefs_lasso[i]) < min_coef:
             min_coef = min(coefs_lasso[i])
     plt.vlines(self.lasso_cv.alpha_,
                min_coef,
                max_coef,
                linestyles='dashed')
     plt.xlabel('-Log(alpha)')
     plt.ylabel('coefficients')
     plt.title(title)
     plt.axis('tight')
     plt.legend()
     if save:
         plt.savefig(file_path)
     plt.show()

예제 #26

0

파일 보기

파일: test_lasso.py 프로젝트: wendazhou/alocv-package

def test_compute_alo_path_fast(method):
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)

    alo = lasso.compute_alo_lasso_reference(X, y, beta_hats)
    alo_fast = method(X, y, beta_hats)

    assert np.all(np.isfinite(alo) == np.isfinite(alo_fast))
    assert np.square(alo[np.isfinite(alo)] -
                     alo_fast[np.isfinite(alo_fast)]).mean() < 1e-3

예제 #27

0

파일 보기

파일: cv.py 프로젝트: USGS-Astrogeology/PySAT

def path_calc(X, y, X_holdout, y_holdout, alphas, paramgrid, colname = 'CV', yname = '', method = 'Elastic Net'):
    #make a copy of the parameters before popping things off
    copy_params = copy.deepcopy(paramgrid)
    fit_intercept = copy_params.pop('fit_intercept')
    precompute = copy_params.pop('precompute')
    copy_X = copy_params.pop('copy_X')
    normalize = False

    # this code adapted from sklearn ElasticNet fit function, which unfortunately doesn't accept multiple alphas at once
    X, y = check_X_y(X, y, accept_sparse='csc',
                     order='F', dtype=[np.float64, np.float32],
                     copy=copy_X and fit_intercept,
                     multi_output=True, y_numeric=True)
    y = check_array(y, order='F', copy=False, dtype=X.dtype.type,
                    ensure_2d=False)

    #this is the step that gives the data to find intercept if fit_intercept is true.
    X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(X, y, None, precompute, normalize,
                                                                 fit_intercept, copy=False)
    y = np.squeeze(y)

    #do the path calculation, and tell how long it took
    print('Calculating path...')
    start_t = time.time()
    if method == 'Elastic Net':
        path_alphas, path_coefs, path_gaps, path_iters = enet_path(X, y, alphas=alphas, return_n_iter = True,
                                                   **copy_params)
    if method == 'LASSO':
        path_alphas, path_coefs, path_gaps, path_iters = lasso_path(X, y, alphas=alphas, return_n_iter=True,
                                                                   **copy_params)
    dt = time.time() - start_t
    print('Took ' + str(dt) + ' seconds')

    #create some empty arrays to store the result
    y_pred_holdouts = np.empty(shape=(len(alphas),len(y_holdout)))
    intercepts = np.empty(shape=(len(alphas)))
    rmses = np.empty(shape=(len(alphas)))
    cvcols = []
    for j in list(range(len(path_alphas))):

        coef_temp = path_coefs[:, j]

        if fit_intercept:
            coef_temp = coef_temp / X_scale
            intercept = y_offset - np.dot(X_offset, coef_temp.T)
        else:
            intercept = 0.

        y_pred_holdouts[j,:] = np.dot(X_holdout, path_coefs[:, j]) + intercept
        intercepts[j] = intercept
        rmses[j] = RMSE(y_pred_holdouts[j,:], y_holdout)
        cvcols.append(('predict','"'+ method + ' - ' + yname + ' - ' + colname + ' - Alpha:' + str(path_alphas[j]) + ' - ' + str(paramgrid) + '"'))

    return path_alphas, path_coefs, intercepts, path_iters, y_pred_holdouts, rmses, cvcols

예제 #28

0

파일 보기

파일: test_mtl.py 프로젝트: mindis/celer

def test_mtl_path():
    X, Y = build_dataset(n_targets=3)
    tol = 1e-10
    params = dict(eps=0.01, tol=tol, n_alphas=10)
    alphas, coefs, gaps = mtl_path(X, Y, **params)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, **params, max_iter=10000)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)

예제 #29

0

파일 보기

파일: test_mtl.py 프로젝트: Sandy4321/celer

def test_mtl():
    # n_samples, n_features = 30, 70
    # X, Y, _, _ = build_dataset(n_samples, n_features, n_targets=10)
    X, Y, _, _ = build_dataset(n_targets=10)
    tol = 1e-9
    alphas, coefs, gaps = mtl_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)

예제 #30

0

파일 보기

파일: CracktheCode.py 프로젝트: Xqua/CS181

 def Plot_Lasso_Path(self, path_lenght=5e-3, alphas=None):
     import matplotlib.pyplot as plt
     print("Computing regularization path using the lasso...")
     alphas_lasso, coefs_lasso, _ = lasso_path(
         self.X, self.Y, path_lenght, fit_intercept=False, alphas=alphas)
     print("Computing regularization path using the positive lasso...")
     alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
         self.X, self.Y, path_lenght, positive=True, fit_intercept=False, alphas=alphas)
     plt.figure()
     ax = plt.gca()
     ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
     l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
     l2 = plt.plot(-np.log10(alphas_positive_lasso), coefs_positive_lasso.T,
                   linestyle='--')
     plt.xlabel('-Log(alpha)')
     plt.ylabel('coefficients')
     plt.title('Lasso and positive Lasso')
     plt.legend(
         (l1[-1], l2[-1]), ('Lasso', 'positive Lasso'), loc='lower left')
     plt.axis('tight')
     plt.show()

예제 #31

0

파일 보기

파일: test_linear_model.py 프로젝트: pseudotensor/pandas-ml

    def test_lasso_path(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.lasso_path()
        expected = lm.lasso_path(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 3)
        tm.assert_numpy_array_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].index, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])

        result = df.linear_model.lasso_path(return_models=True)
        expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True)
        self.assertEqual(len(result), len(expected))
        self.assertIsInstance(result, tuple)
        tm.assert_numpy_array_equal(result[0], result[0])
        tm.assert_numpy_array_equal(result[1], result[1])
        tm.assert_numpy_array_equal(result[2], result[2])

예제 #32

0

파일 보기

파일: regularized.py 프로젝트: luispedro/lisbon-dream

 def train(self, features, labels):
     from sklearn import linear_model
     betas = []
     xs = []
     for ci,ells in enumerate(labels.T):
         active = ~np.isnan(ells)
         fi = features[active]
         ells = ells[active]
         fits = linear_model.lasso_path(fi, ells)
         xs.append(fits[-1].coef_.T.copy())
         betas.append(fits[-1].intercept_.copy())
     return product_intercept_predictor(np.array(xs).T, np.array(betas))

예제 #33

0

파일 보기

파일: test_homotopy.py 프로젝트: js641/celer

def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    params = dict(eps=1e-2, n_alphas=10, tol=1e-14)
    alphas1, coefs1, gaps1 = celer_path(
        X, y, return_thetas=False, verbose=1, prune=prune, **params)

    alphas2, coefs2, gaps2 = lasso_path(X, y, verbose=False, **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1, coefs2, rtol=1e-05, atol=1e-6)

예제 #34

0

파일 보기

파일: test_linear_model.py 프로젝트: sinhrks/pandas-ml

    def test_lasso_path(self):
        diabetes = datasets.load_diabetes()
        df = pdml.ModelFrame(diabetes)

        result = df.linear_model.lasso_path()
        expected = lm.lasso_path(diabetes.data, diabetes.target)

        self.assertEqual(len(result), 3)
        tm.assert_numpy_array_equal(result[0], expected[0])
        self.assertIsInstance(result[1], pdml.ModelFrame)
        tm.assert_index_equal(result[1].index, df.data.columns)
        self.assert_numpy_array_almost_equal(result[1].values, expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])

        result = df.linear_model.lasso_path(return_models=True)
        expected = lm.lasso_path(diabetes.data, diabetes.target, return_models=True)
        self.assertEqual(len(result), len(expected))
        self.assertIsInstance(result, tuple)
        tm.assert_numpy_array_equal(result[0], result[0])
        tm.assert_numpy_array_equal(result[1], result[1])
        tm.assert_numpy_array_equal(result[2], result[2])

예제 #35

0

파일 보기

def do_lasso_simulation(data, NUM_LAMBDA_SPLITS=3000):
    # Make lasso path
    lasso_path, coefs, _ = linear_model.lasso_path(
        data.X_train,
        np.array(data.y_train.flatten().tolist()[0]), # reshape appropriately
        method='lasso'
    )
    prob = LassoProblemWrapper(
        data.X_train,
        data.y_train
    )

    # print "lasso_path", lasso_path

    val_errors = []
    for i, l in enumerate(lasso_path):
        beta = prob.solve(np.array([l]))
        val_error = testerror_lasso(data.X_validate, data.y_validate, beta)
        val_errors.append(val_error)
    sorted_idx = np.argsort(val_errors)

    max_lam = lasso_path[np.min(sorted_idx[:3])]
    min_lam = lasso_path[np.max(sorted_idx[:3])]
    print "min_lam", min_lam, "max_lam", max_lam
    print "lasso_path[sorted_idx[:3]]", lasso_path[sorted_idx[:3]]

    finer_lam_range = []
    for i, l_idx in enumerate(range(np.min(sorted_idx[:3]) - 1, np.max(sorted_idx[:3]) + 1)):
        fudge = 0
        if i == 0:
            fudge = 1e-10
        l_min = lasso_path[l_idx + 1] if lasso_path.size - 1 >= l_idx + 1 else 0
        l_max = lasso_path[l_idx] if l_idx >= 0 else lasso_path[0] + 0.1

        add_l = np.arange(start=l_min, stop=l_max + fudge, step=(l_max - l_min)/NUM_LAMBDA_SPLITS)
        finer_lam_range.append(add_l)
    finer_lam_range = np.concatenate(finer_lam_range)
    print "finer_lam_range min", np.min(finer_lam_range), "max", np.max(finer_lam_range)

    fine_val_errors = []
    for i, l in enumerate(finer_lam_range):
        beta = prob.solve(np.array([l]))
        val_error = testerror_lasso(data.X_validate, data.y_validate, beta)
        fine_val_errors.append(val_error)

    fine_sorted_idx = np.argsort(fine_val_errors)
    best_lam = finer_lam_range[fine_sorted_idx[0]]
    print "best_lam", best_lam

    min_dist, idx = get_dist_of_closest_lambda(best_lam, lasso_path)
    print "min_dist", min_dist
    return min_dist

예제 #36

0

파일 보기

파일: test_lasso.py 프로젝트: wendazhou/alocv-package

def test_compute_alo_path():
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)
    alo = lasso.compute_alo_lasso_reference(X, y, beta_hats)

    beta_hat = beta_hats[:, 5]
    h_5 = lasso.compute_h_lasso(X, np.abs(beta_hat) > 1e-5)
    r_5 = y - np.dot(X, beta_hat)
    alo_5 = np.square(r_5 / (1 - h_5)).mean()

    assert len(alo) == len(alphas)
    assert np.allclose(alo_5, alo[5])

예제 #37

0

파일 보기

def get_coef_range(X, y):
    print "starting experiment"
    #with stopwatch("lasso paths"):
    alphas, coefs, dual_gaps = lasso_path(
        X,
        y,
        #l1_ratio=1.0,
        verbose=True,
        #return_models=False,
        positive=False,
        max_iter=1000)

    return alphas, coefs, dual_gaps

예제 #38

0

파일 보기

def my_lasso_path(U, F):

    alphas = np.logspace(-10, -3, num=50)
    print 'alphas'
    print alphas

    a, b, c = lasso_path(U,
                         F,
                         alphas=alphas,
                         fit_intercept=False,
                         normalize=False,
                         max_iter=1000,
                         tol=0.000001,
                         selection='random')

    print 'alpha n_nonzero'
    t = np.sum(b != 0, 0)
    for aa, tt in zip(a.tolist(), t.tolist()):
        print str(aa) + '\t' + str(tt)

    print

    a_st, b_st, c_st = lasso_path(U,
                                  F,
                                  alphas=alphas,
                                  fit_intercept=True,
                                  normalize=True,
                                  max_iter=1000,
                                  tol=0.000001,
                                  selection='random')

    print 'alpha n_nonzero'
    t_st = np.sum(b_st != 0, 0)
    for aa, tt, tt_st in zip(a.tolist(), t.tolist(), t_st.tolist()):
        print[aa, tt, tt_st]

    print

    return a, b, c

예제 #39

0

파일 보기

파일: test_lasso.py 프로젝트: wendazhou/alocv-package

def test_compute_alo_leverage():
    X, y = make_test_case(50, 20, 10)

    alphas, beta_hats, _ = linear_model.lasso_path(X, y)

    alo_fast, leverage = native_impl.lasso_compute_alo(X,
                                                       y,
                                                       beta_hats,
                                                       return_leverage=True)

    assert alo_fast.shape == (beta_hats.shape[1], )
    assert leverage.shape == (50, beta_hats.shape[1])
    assert np.all(leverage >= 0)
    assert np.all(leverage <= 1)

예제 #40

0

파일 보기

파일: ecoreg_code.py 프로젝트: JamesSample/ECOREG

def plot_lasso_path(df, resp_var, exp_vars):
    """ Plot the lasso path. Both response and explanatory
        variables are standardised first.
    
    Args:
        df:       Dataframe
        resp_var: String. Response variable
        exp_vars: List of strings. Explanatory variables
    
    Returns:
        Dataframe of path and matplotlib figure with tooltip-
        labelled lines. To view this figure in a notebook, use
        mpld3.display(f) on the returned figure object, f.
    """
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import mpld3
    from mpld3 import plugins
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import lasso_path

    # Standardise the feature data and response
    feat_std = StandardScaler().fit_transform(df[[resp_var,] + exp_vars])
    
    # Calculate lasso path
    alphas, coefs, _ = lasso_path(feat_std[:, 1:],         # X
                                  feat_std[:, 0],          # y
                                  eps=1e-3,                # Path length
                                  fit_intercept=False)     # Already centred

    # -Log(alphas) is easier for display
    neg_log_alphas = -np.log10(alphas)

    # Build df of results
    res_df = pd.DataFrame(data=coefs.T, index=alphas, columns=exp_vars)
    
    # Plot
    fig, ax = plt.subplots()

    for coef, name in zip(coefs, exp_vars):
        line = ax.plot(neg_log_alphas, coef, label=name)
        plugins.connect(fig, plugins.LineLabelTooltip(line[0], label=name))

    plt.xlabel('-Log(alpha)')
    plt.ylabel('Coefficients')
    plt.title('Lasso paths')
    plt.legend(loc='best', title='', ncol=3)    
    
    return res_df, fig

예제 #41

0

파일 보기

def L_U(X, y, eps=1e-4, max_iter=5000):
    n_samples, n_features = X.shape
    # Lasso with universal lambda
    univ_lambda = np.sqrt(2. * np.log(n_features) / float(n_samples))
    beta_ulasso = lasso_path(X,
                             y,
                             alphas=[univ_lambda],
                             tol=eps,
                             max_iter=max_iter)[1]
    beta_ulasso = beta_ulasso.ravel()
    size = np.sum(beta_ulasso != 0)
    sigma_ulasso = np.linalg.norm(y - np.dot(X, beta_ulasso))
    sigma_ulasso /= np.sqrt(n_samples - size)
    return beta_ulasso, sigma_ulasso

예제 #42

0

파일 보기

파일: cvxRegression.py 프로젝트: smhjn/cvxFin

def solvePenalizedLSQ(A, b, eps, B, c):
    """
    Solves an  unconstrained linear least squares problem

    minimize   2-norm (A*x-b)^2 + eps * 2-norm (Bx-c)^2 + tau 1-norm(x)
    """
    X = numpy.vstack((A, eps * B))
    y = numpy.concatenate((b, c))

    print "Computing regularization path using the lasso..."
    models = lasso_path(X, y)
    alphas_lasso = numpy.array([model.alpha for model in models])
    coefs_lasso = numpy.array([model.coef_ for model in models])

    print alphas_lasso
    print coefs_lasso

예제 #43

0

파일 보기

파일: lasso.py 프로젝트: FullStackHan/ottertune

    def fit(self, X, y, feature_labels, estimator_params=None):
        """Computes the Lasso path using Sklearn's lasso_path method.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data (the independent variables).

        y : array-like, shape (n_samples, n_outputs)
            Training data (the output/target values).

        feature_labels : array-like, shape (n_features)
                         Labels for each of the features in X.

        estimator_params : dict, optional
                           The parameters to pass to Sklearn's Lasso estimator.


        Returns
        -------
        self
        """
        self._reset()
        if estimator_params is None:
            estimator_params = {}
        self.feature_labels_ = feature_labels

        alphas, coefs, _ = lasso_path(X, y, **estimator_params)
        self.alphas_ = alphas.copy()
        self.coefs_ = coefs.copy()

        # Rank the features in X by order of importance. This ranking is based
        # on how early a given features enter the regression (the earlier a
        # feature enters the regression, the MORE important it is).
        feature_rankings = [[] for _ in range(X.shape[1])]
        for target_coef_paths in self.coefs_:
            for i, feature_path in enumerate(target_coef_paths):
                entrance_step = 1
                for val_at_step in feature_path:
                    if val_at_step == 0:
                        entrance_step += 1
                    else:
                        break
                feature_rankings[i].append(entrance_step)
        self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])
        return self

예제 #44

0

파일 보기

파일: compare_sklearn_glmnet.py 프로젝트: MechCoder/GlmnetvsSklearn

from scipy.io import mmwrite, mmread
from sklearn.linear_model.coordinate_descent import _alpha_grid
from sklearn.linear_model import lasso_path
from 
from time import time

# Newsgroup datasets
X_new = mmread("X_new.mtx")
y_new = mmread("y_new.mtx")
y_new = y_new.toarray()[0]
#alphas = _alpha_grid(X_new, y_new, eps=1e-3, fit_intercept=True, normalize=False, n_alphas=100)
t = time()
lasso_path(X_new, y_new, eps=1e-3, precompute=False, fit_intercept=True, normalize=False, n_alphas=100)
print time() - t

X_new = mmread("haxby_X.mtx").toarray()
y_new = mmread("haxby_y.mtx").toarray()[0]
#print y_new.shape
#y_new = y_new.toarray()[0]
# alphas = _alpha_grid(X_new, y_new, eps=1e-3, fit_intercept=True, normalize=False, n_alphas=100)

t = time()
coef = lasso_path(X_new, y_new, eps=1e-3, precompute=False, fit_intercept=True, normalize=False, alphas=alphas)#n_alphas=100)
coef_ = coef[1]
sq_loss = np.sum(0.5*(y_new[:, np.newaxis] - safe_sparse_dot(X_new, coef_))**2, axis=0)
l2_penalty = np.sum(coef_**2, axis=0)
l1_penalty = np.sum(np.abs(coef_), axis=0)

print time() - t

예제 #45

0

파일 보기

파일: Source.py 프로젝트: ljofre/memoria-de-titulo-icci

    def source(self, numpoints=100, L=0.5, por=0.0, LocR=None, srcTime=None):
        event = self.event
        # precondiciones

        assert(0 <= por)
        
        assert(L > 0)
        
        assert(numpoints == int(numpoints))
    
        if LocR is None:
            LocX, LocY, LocZ = (event.LocX, event.LocY, event.LocZ)
        else:
            LocX, LocY, LocZ = LocR
            
        if srcTime is None:
            self.srcTime = dateTime2Num(event.origin_time) + linspace(-por * L, (1 - por) * L, numpoints)
        
        '''
        reconstruccion de la fuente
        :param event: objeto del tipo event
        :param numpoints=100: numero de la discretizacion de la fuente estimada
        :param L: Largo de la ventana de tiempo de la fuente entimada
        :param por: fraccion de tiempo antes del tiempo estimado por Codelco
        '''
        
        dt = self.srcTime[1] - self.srcTime[0]
        
        """
            se requiere resolver un sistema lineal del tipo A*alphas = U
        """
        A, U = ([], [])
    
        # agregar el campo de desplazamiento a el vector de respuesta
        # los id son los mismos!
        for gs in event.seismograms:
    
            # se agregan todas las dimensiones que mantienen mediciones validas
            
            data = gs.data.values
            
            if gs.X_enabled == 1:
                U = hstack((U, data[:, 0].T))
    
            if gs.Y_enabled == 1:
                U = hstack((U, data[:, 1].T))
    
            if gs.Z_enabled == 1:
                U = hstack((U, data[:, 2].T))
    
        for G in event.seismograms:
    
            # frecuencia de muestreo
            hsr = G.hardware_sampling_rate
    
            # la relacion dt*hsr > 1
            deltat = dt * hsr
            #assert dt * hsr <= 1 , 'Advertencia: el producto dt * hsr deberia ser mayor que 1'
    
            R = (G.x_coord - LocX, G.y_coord - LocY, G.z_coord - LocZ)
    
            # funcion de green
            # t = G.timevector - dateTime2Num(date=event.origin_time)
            t = G.timevector - self.srcTime[0]
            alpha = G.P_velocity
            beta = G.S_velocity
            rho = G.RockDensity
    
            Gk = GreenKernel(R=R, time=t, alpha=alpha, beta=beta, rho=rho)
            assert(not any(isnan(Gk[:])))
            # integracion de la funcion de Green
            dtdomain = t[1] - t[0]
    
            F = cumsum(Gk, axis=2) * dtdomain
            FF = zeros(shape(F))
    
            # matriz auxiliar en donde se almacenaran todas las convoluciones
            # producidas en un solo sensor.
            B = []
    
            for jj in xrange(numpoints):
                # para todo elemento de la base
                ii = xrange(size(F, 2))
    
                # indices para los saltos en la convolucion entre la base y la
                # funcion de Green
    
                tf = map(lambda I: int(max(I - floor(jj * deltat), 0)), ii)
                ti = map(lambda I: int(max(I - floor((jj + 1) * deltat), 0)), ii)
    
                # convolucion con respecto la base seleccionada
                FF[:, :, ii] = F[:, :, tf] - F[:, :, ti]
    
                # convolucion para un elemento de la base
                C = []
                if G.X_enabled:
                    if C == []:
                        C = FF[0, :, :].copy()
                    else:
                        C = hstack((C, FF[0, :, :].copy()))
                    assert(not any(isnan(C[:])))
                if G.Y_enabled:
                    if C == []:
                        C = FF[1, :, :].copy()
                    else:
                        C = hstack((C, FF[1, :, :].copy()))
                    assert(not any(isnan(C[:])))
                if G.Z_enabled:
                    if C == []:
                        C = FF[2, :, :].copy()
                    else:
                        C = hstack((C, FF[2, :, :].copy()))
                    assert(not any(isnan(C[:])))
    
                if B == []:
                    B = C.copy()
                else:
                    B = vstack((B, C.copy()))
    
            if A == []:
                A = B.copy()
            else:
                A = hstack((A, B.copy()))
    
        # @todo: minimizar la norma 1 para hacer la estimacion mas robusta
        # resolucion del sistema lineal que minimiza la suma de la norma 2 de error
        assert(not any(isnan(A[:])))
        #from scipy.sparse import csr_matrix
        #from scipy.sparse.linalg import lsqr
        #matrix = csr_matrix(A.T)
        # X = numpy.linalg.lstsq(A.T, U)[0]
        # regresion lineal
        
        lasso = None
        if lasso == True:
            
            _, lasso_path, _ = lasso_path(A.T, U)
            rgr_lasso = Lasso()
            rgr_lasso.fit(A.T, U)
            X = rgr_lasso.coef_
            
            pass
        if det(dot(A, A.T)) != 0:
            #invertible
            X = dot(dot(U, A.T), inv(dot(A, A.T)))
        else:
            #no invertible
            X = dot(dot(U, A.T), pinv(dot(A, A.T)))
    
        src = zip(self.srcTime.T,
                  X[range(0, 3 * numpoints, 3)].T,
                  X[range(1, 3 * numpoints, 3)].T,
                  X[range(2, 3 * numpoints, 3)].T
                  )
        # post condiciones
        assert(shape(src) == (numpoints, 4))
    
        # error de estimacion
        error = norm(U - dot(X, A), 2)
        src = array(src)
        rot, vec, val = _rotate(src)
    
        #condiciones necesarias de orden de los valores propios y las coordenadas de
        #la fuente
        order = sorted(range(3), key=lambda k:val[k])
        val = val[order]
    
        #assert(val[0] <= val[1] <= val[2])
        rot[:, 1:4] = rot[:, 1:4][:, order]
        
        
        return(src, error, rot, vec, val)

예제 #46

0

파일 보기

파일: lasso_with_plots.py 프로젝트: nyjgary/udacity_nanodegree

plt.subplot(121)
m_log_alphas = -np.log10(lasso.steps[1][1].alphas_)
plt.plot(m_log_alphas, lasso.steps[1][1].mse_path_, ':')
plt.plot(m_log_alphas, lasso.steps[1][1].mse_path_.mean(axis=-1), 'k',
         label='Average across the folds', linewidth=2)
plt.axvline(-np.log10(lasso.steps[1][1].alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')
plt.legend(loc = 'upper left')
plt.xlabel('-Log(alpha)')
plt.ylabel('Mean square error')
plt.title('Mean square error on each fold')
plt.axis('tight')

# Plot lasso paths 
plt.subplot(122)
alphas_lasso, coefs_lasso, _ = lasso_path(features, labels, alphas = alphas)
plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
plt.axvline(-np.log10(lasso.steps[1][1].alpha_), linestyle='--', color='k',
            label='alpha: CV estimate')
plt.xlabel('-Log(alpha)')
plt.ylabel('Coefficients')
plt.title('Lasso Paths')
plt.axis('tight')

lasso_coef = pd.DataFrame({'coef': lasso.steps[1][1].coef_.tolist()}, features_list[1:]).round(3) 
lasso_selection = lasso_coef[lasso_coef['coef']!=0]
lasso_selection

print "Lasso selected an alpha of %.2f with %d features:" % (alpha_lasso, len(lasso_selection)) 
lasso_selection.sort('coef', ascending=False)

예제 #47

0

파일 보기

파일: simulation.py 프로젝트: bthirion/ipmi_2015

def lasso_coefs(X, y):
    _, coefs, _ = lasso_path(X, y)
    return coefs.T

예제 #48

0

파일 보기

파일: fire_test.py 프로젝트: 9dcw/ML_testing

def main():

    path = '/users/davecwright/documents/kaggle/liberty_fire_cost/'
    path = 'c:\\users\\dwright\\code\\'
    train_name = path + 'train.csv'
    test_name = path + 'test.csv'
    # f = open(path + 'train.csv', 'rb')

    readRows = 2000 #None for all
    print 'loading train_data'
    train_data = pd.read_csv(train_name, nrows=readRows)

    print 'train_data loaded'
    y_data = train_data['target'].values
    train_data.drop('target', 1)
    train_data = scrub(train_data)
    y_data = y_data.astype(float)

    eps = 5e-3
    X = train_data
    y = y_data
    folds = 10
    kf = cross_validation.KFold(y_data.shape[0], n_folds=folds)
    alphas = range(folds)
    k = 0
    for test, train in kf:
        penalty = (alphas[k] + 1) * 1/folds
        print alpha
        clf = ElasticNet(l1_ratio=penalty, eps=eps)
        clf.train(X, y)
        # doing our own cv parametarization


    print 'computing lasso path'
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept = False)
    print 'computing enet path'
    alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

    plt.figure(1)
    ax = plt.gca()
    ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
    l1 = plt.plot(-np.log10(alphas_lasso), coefs_lasso.T)
    l2 = plt.plot(-np.log10(alphas_enet), coefs_enet.T, linestyle='--')

    plt.xlabel('-Log(alpha)')
    plt.ylabel('coefficients')
    plt.title('Lasso and Elastic-Net Paths')
    plt.legend((l1[-1], l2[-1]), ('Lasso', 'Elastic-Net'), loc='lower left')
    plt.axis('tight')
    plt.show()
    sys.exit()



    for train_index, test_index in kf:
        #print train_index, test_index
        print train_data.iloc[train_index], train_data.iloc[test_index]


    # I am implementing the lasso reduction here...
    #for train, test in kf:

    fig = plt.figure(figsize=(12, 9))
    #ax = fig.add_subplot(111)

    #ax.plot(np.sort(y_data))
    #plt.xlabel("Number of Features")
    #plt.ylabel("claims cost")
    #plt.title("claims_cost")
    #ax.set_xscale("log")

    #ax.set_position([box.x0, box.y0 + box.height * 0.3, box.width, box.height * 0.7])
    #ax.legend(**_PLT_LEGEND_OPTIONS)
    #plt.show()


    train_data = train_data.drop('target', 1)

    # A - preprocessing
    # encode the text variables, var1-var9, Z values are NaN

    # fill in missing values in the text variables and in the continuous variables
    # skip continuous for now

    # A3. build new features through the interactions of various items
    # skip for now

    #  A4. dimensionality reduction to take the feature set back down to something more manageable.

    est_clf = svm.SVR(kernel='linear', C=1)
    rks = select_ests(X_train, y_data, 100, est_clf)
    X_train = X_train[:, rks]

    clf = svm.SVR(kernel='linear', C=1)
    acy = cv(X_train, y_data, clf, None, estimator_name(clf))

    # the point here is to understand what accuracy is

    print 'accuracy:', acy

    #select_model(X_train, y_data)

    # B. split out test and fit sets

    test_data = pd.read_csv(test_name, nrows=readRows)
    test_data = encode_impute(test_data)
    X_test = scaler.transform(test_data)

예제 #49

0

파일 보기

파일: ex2.py 프로젝트: dluzenst/nostromo

for alpha in alphas:
    simpLasCoefs, Yhat = simpleLasso(X,y,alpha,True)
    # print "alpha = ", alpha
    # print simpLasCoefs
    # print Yhat
    simpLasCoefsCR, YhatCR = simpleLasso(X_cr,y_cr,alpha,False)
    # print simpLasCoefsCR
    # print YhatCR



################------ Exercice 2.4 ------###############################
print "Ex. 2.4: Figure"
#_, theta_lasso, _ =lasso_path(np.array(X), np.array(y), alphas=alphas,  fit_intercept=True, return_models=False)
_, theta_lasso_CR, _ =lasso_path(np.array(X_cr), np.array(y_cr), alphas=alphas,  fit_intercept=False, return_models=False)

# plot lasso path
# fig1=plt.figure(figsize=(12,8))
# plt.title("Chemin du Lasso: "+ r"$p={0}, n={1} $".format(nfeatures,nsamples),fontsize = 16)
# ax1 = fig1.add_subplot(111)
# ax1.plot(alphas,np.transpose(theta_lasso),linewidth=3)
# ax1.set_xscale('log')
# ax1.set_xlabel(r"$\lambda$")
# ax1.set_ylabel("Amplitude des coefficients")
# ax1.set_ylim([-2,0.5])
# ax1.set_xlim([lstart,lend])
# plt.show(block=False)

fig2=plt.figure(figsize=(12,8))
plt.title("Chemin du Lasso vars_CR: "+ r"$p={0}, n={1} $".format(nfeatures,nsamples),fontsize = 16)

예제 #50

0

파일 보기

파일: wineLassoCoefCurves.py 프로젝트: ChrisHoder/SelfLearning

#Convert list of list to np array for input to sklearn packages

#Unnormalized labels
Y = numpy.array(labels)

#normalized lables
Y = numpy.array(labelNormalized)

#Unnormalized X's
X = numpy.array(xList)

#Normlized Xss
X = numpy.array(xNormalized)

alphas, coefs, _  = linear_model.lasso_path(X, Y,  return_models=False)


plot.plot(alphas,coefs.T)

plot.xlabel('alpha')
plot.ylabel('Coefficients')
plot.axis('tight')
plot.semilogx()
ax = plot.gca()
ax.invert_xaxis()
plot.show()

nattr, nalpha = coefs.shape

#find coefficient ordering

예제 #51

0

파일 보기

파일: mnist_zs_stepsize.py 프로젝트: percyqdeng/dualsvm

data = load_mnist()
pos_ind = 6
neg_ind = 5
sig_D = 100
# lmda_list = [0.0005, 0.001, 0.01, 0.1, 0.3]
x, y = convert_binary(data, pos_ind, neg_ind)
n, p = x.shape
x = x.astype(float)
y = y.astype(float)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
x = min_max_scaler.fit_transform(x)
# xtest = min_max_scaler.transform(x)
# ntrain = ytrain.size

alphas, coefs, gaps = linear_model.lasso_path(x, y, n_alphas=5, return_models=False, fit_intercept=False)
lmda_list = alphas[::-1]
n_iter = 10
ss = cv.StratifiedShuffleSplit(y=y, n_iter=n_iter, test_size=0.3, random_state=5)
nzs_scg_T = np.zeros((n_iter, len(lmda_list)))
nzs_scg_bar = np.zeros((n_iter, len(lmda_list)))
nzs_rda_T = np.zeros((n_iter, len(lmda_list)))
nzs_rda_bar = np.zeros((n_iter, len(lmda_list)))
nzs_rda2_T = np.zeros((n_iter, len(lmda_list)))
nzs_rda2_bar = np.zeros((n_iter, len(lmda_list)))
nzs_cd_T = np.zeros((n_iter, len(lmda_list)))
nzs_cd_bar = np.zeros((n_iter, len(lmda_list)))
nzs_sgd = np.zeros((n_iter, len(lmda_list)))
nsweep = 5
b = 5
c = 1

예제 #52

0

파일 보기

파일: mnist_cd_sparsity.py 프로젝트: percyqdeng/dualsvm

xsum = x.sum(axis=0)
# ind = np.where(xsum>0)  # return object is tuple
# x = x[:, ind[0]]
x = x.astype(float)
y = y.astype(float)
n, p = x.shape
random_state = 21
lmda = 0.01
nsweep = 1
xtrain, xtest, ytrain, ytest = cv.train_test_split(x, y, test_size=0.2, random_state=random_state)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
xtrain = min_max_scaler.fit_transform(xtrain)
xtest = min_max_scaler.transform(xtest)
ntrain = ytrain.size

alphas, coefs, gaps = linear_model.lasso_path(xtrain, ytrain,n_alphas=10, return_models=False, fit_intercept=False)
alphas = alphas[::-1]
# zs = (coefs==0).sum(axis=0)
# zs = zs[::-1]
# gaps = gaps[::-1]

obj = np.zeros(len(alphas))
zs2 = np.zeros(len(alphas))
obj2 = np.zeros(len(alphas))
zs3 = np.zeros(len(alphas))
zs = np.zeros(len(alphas))
obj3 = np.zeros(len(alphas))
for i, alpha in enumerate(alphas):
    print "alpha: %f" % alpha
    print 'cd with random permutation'
    clf = LassoLI(lmda=alpha, algo='cd', cd_ord='rand', T=6000)

예제 #53

0

파일 보기

파일: lassopath.py 프로젝트: smoitra87/scratch

from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(0) # Standardize data (easier to set the rho parameter)

################################################################################
# Compute paths

eps = 5e-3 # the smaller it is the longer is the path

print "Computing regularization path using the lasso..."
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print "Computing regularization path using the elastic net..."
models = enet_path(X, y, eps=eps, rho=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

################################################################################
# Display results

ax = pl.gca()
ax.set_color_cycle(2 * ['b', 'r', 'g', 'c', 'k'])
l1 = pl.plot(coefs_lasso)
l2 = pl.plot(coefs_enet, linestyle='--')

예제 #54

0

파일 보기

파일: leadExplore.py 프로젝트: jwainer1/ArnholdInstituteDataChallenge

plt.axvline(BLLModel.alpha_, linestyle='--', label='CV Estimate of Best alpha')
plt.semilogx()
plt.legend()
ax = plt.gca()
ax.invert_xaxis()
plt.xlabel('alpha')
plt.ylabel('Mean Square Error')
plt.axis('tight')
plt.title('Determining alpha via LASSO 10-fold CV')
plt.show()

print "alpha Value that Minimizes CV Error ", BLLModel.alpha_
print "Minimum MSE ", min(BLLModel.mse_path_.mean(axis=-1))
bestAlpha = BLLModel.alpha_

alphas, coefs, _ = lasso_path(X, y, return_models=False)
plt.plot(alphas,coefs.T)
plt.xlabel('alpha')
plt.ylabel('Coefficients')
plt.axis('tight')
plt.title('Variables as they enter the model')
plt.semilogx()
plt.legend(loc='upper left')
ax = plt.gca()
ax.invert_xaxis()
plt.show()

nattr, nalpha = coefs.shape	

#find coefficient ordering
nzList = []

예제 #55

0

파일 보기

파일: HW02P3.py 프로젝트: AkiraKane/STAT640_Statistical_Machine_Learning

print("Computing regularization path using the ridge...")
n_alphas = 200
alphas = np.logspace(-4, 2, n_alphas)
clf = linear_model.Ridge(fit_intercept=False)

coefs = []
for a in alphas:
    clf.set_params(alpha=a, max_iter=1000)
    clf.fit(xtrain, ytrain)
    coefs.append(clf.coef_)

# lasso and elastic net path
eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(xtrain, ytrain, eps, fit_intercept=False)


print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    xtrain, ytrain, eps=eps, l1_ratio=0.8, fit_intercept=False)

# Display results

plt.figure(1)
ax = plt.gca()
ax.set_color_cycle(['b', 'r', 'g', 'c', 'k', 'y', 'm'])

ax.plot(alphas, coefs)
ax.set_xscale('log')
ax.set_xlim(ax.get_xlim()[::-1])  # reverse axis

예제 #56

0

파일 보기

파일: plot_lasso_coordinate_descent_path.py 프로젝트: abouaziz/scikit-learn

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
# The return_models parameter sets that lasso_path will return
# the alphas and the coefficients as output, instead of a list
# of models as it does by default. Returning the list of models
# is deprecated and will eventually be removed in 0.15
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, return_models=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(X, y, eps,
                                                        positive=True,
                                                        return_models=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(X, y, eps=eps, l1_ratio=0.8,
                                    return_models=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(X, y, eps=eps,
                                                      l1_ratio=0.8,
                                                      positive=True,
                                                      return_models=False)

예제 #57

0

파일 보기

파일: plot_lasso_coordinate_descent_path.py 프로젝트: Jim-Holmstroem/scikit-learn

from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(0)  # Standardize data (easier to set the l1_ratio parameter)

###############################################################################
# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
models = lasso_path(X, y, eps=eps)
alphas_lasso = np.array([model.alpha for model in models])
coefs_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the positive lasso...")
models = lasso_path(X, y, eps=eps, positive=True)
alphas_positive_lasso = np.array([model.alpha for model in models])
coefs_positive_lasso = np.array([model.coef_ for model in models])

print("Computing regularization path using the elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8)
alphas_enet = np.array([model.alpha for model in models])
coefs_enet = np.array([model.coef_ for model in models])

print("Computing regularization path using the positve elastic net...")
models = enet_path(X, y, eps=eps, l1_ratio=0.8, positive=True)

예제 #58

0

파일 보기

파일: bench_plot_lasso_path.py 프로젝트: AlexisMignon/scikit-learn

def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            dataset_kwargs = {
                'n_samples': n_samples,
                'n_features': n_features,
                'n_informative': n_features // 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            X, y = make_regression(**dataset_kwargs)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (without Gram)'].append(delta)

    return results

예제 #59

0

파일 보기

파일: plot_lasso_coordinate_descent_path.py 프로젝트: id774/sandbox

from sklearn.linear_model import lasso_path, enet_path
from sklearn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X /= X.std(axis=0)  # Standardize data (easier to set the l1_ratio parameter)

# Compute paths

eps = 5e-3  # the smaller it is the longer is the path

print("Computing regularization path using the lasso...")
alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps, fit_intercept=False)

print("Computing regularization path using the positive lasso...")
alphas_positive_lasso, coefs_positive_lasso, _ = lasso_path(
    X, y, eps, positive=True, fit_intercept=False)
print("Computing regularization path using the elastic net...")
alphas_enet, coefs_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, fit_intercept=False)

print("Computing regularization path using the positve elastic net...")
alphas_positive_enet, coefs_positive_enet, _ = enet_path(
    X, y, eps=eps, l1_ratio=0.8, positive=True, fit_intercept=False)

# Display results

plt.figure(1)