def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have seleted an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter, precompute=True)
    ignore_warnings(clf.fit)(X, y)


    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have seleted an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)
def runPrintResults(X, y, alpha, name):

    print(name+":\n=========")

    if (alpha is not None):
        X_new = np.divide(X, alpha)
    else:
        X_new = X

    enetCV = ElasticNetCV(l1_ratio=0.8, fit_intercept=False) # cv=nCV, max_iter=5000
    # enetCV = LassoCV(fit_intercept=False) # cv=nCV, max_iter=5000

    enetCV.fit(X_new, y)
    y_pred_enet = enetCV.predict(X_new)
    r2_score_enet = r2_score(y, y_pred_enet)
    print("R2= ", r2_score_enet)


    if (alpha is not None):
        enetCV_coef = np.divide(enetCV.coef_, alpha)
    else:
        enetCV_coef = enetCV.coef_

    print("Best Alpha: {}".format(enetCV.alpha_))
    # print("coefs_: {}".format(enetCV.coef_))
    print("coefs_/alpha: {}".format(enetCV_coef))

    return enetCV.alpha_, enetCV_coef
def test_path_parameters():
    X, y, _, _ = build_dataset()
    max_iter = 50

    clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, l1_ratio=0.5)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert_equal(50, clf.n_alphas)
    assert_equal(50, len(clf.alphas_))
def test_path_parameters():
    X, y = make_sparse_data()
    max_iter = 50
    n_alphas = 10
    clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
                       l1_ratio=0.5, fit_intercept=False)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert_equal(n_alphas, clf.n_alphas)
    assert_equal(n_alphas, len(clf.alphas_))
def test_1d_multioutput_enet_and_multitask_enet_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_1d_multioutput_enet_and_multitask_enet_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])
Example #7
0
def test_path_parameters():
    X, y, _, _ = build_dataset()
    max_iter = 100

    clf = ElasticNetCV(n_alphas=50,
                       eps=1e-3,
                       max_iter=max_iter,
                       l1_ratio=0.5,
                       tol=1e-3)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert_equal(50, clf.n_alphas)
    assert_equal(50, len(clf.alphas_))
def test_path_parameters():
    X, y = make_sparse_data()
    max_iter = 50
    n_alphas = 10
    clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
                       l1_ratio=0.5, fit_intercept=False)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert_equal(n_alphas, clf.n_alphas)
    assert_equal(n_alphas, len(clf.alphas_))
    sparse_mse_path = clf.mse_path_
    clf.fit(X.toarray(), y)  # compare with dense data
    assert_almost_equal(clf.mse_path_, sparse_mse_path)
Example #9
0
def test_path_parameters():
    X, y = make_sparse_data()
    max_iter = 50
    n_alphas = 10
    clf = ElasticNetCV(n_alphas=n_alphas,
                       eps=1e-3,
                       max_iter=max_iter,
                       rho=0.5,
                       fit_intercept=False)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.rho)
    assert_equal(n_alphas, clf.n_alphas)
    assert_equal(n_alphas, len(clf.alphas_))
Example #10
0
    def train_all(self):
        positions = ['PG.csv', 'SG.csv', 'SF.csv', 'PF.csv', 'C.csv']
        with open(self.models_file_path, 'w') as model_file:
            model_file_writer = csv.writer(model_file)
            for (first, filename) in izip(chain((True, ), repeat(False)),
                                          positions):
                with open(
                        os.path.join(self.cleaned_data_directory_path,
                                     filename), 'r') as cleaned_data:
                    cleaned_data_reader = csv.reader(cleaned_data)
                    cleaned_data_headers = cleaned_data_reader.next()
                    lines = [
                        map(float, line[:-1]) + line[-1:]
                        for line in cleaned_data_reader if len(line) >= 2
                    ]

                # conver lines to numpy array
                num_data = len(lines)
                num_features = len(lines[0]) - 2

                X = np.zeros((num_data, num_features))
                Y = np.zeros((num_data))

                for (i, data) in enumerate(lines):
                    for (ii, feature) in enumerate(data[:-2]):
                        X[i][ii] = feature
                    Y[i] = lines[i][-2]  # last one is name

                # create an instance of elasticnet
                net = ElasticNetCV(alphas=[0.01, 0.05, 0.1],
                                   eps=2e-3,
                                   l1_ratio=[0.5, 0.7, 1],
                                   cv=3,
                                   normalize=True)

                # create a model based on our data
                net.fit(X, Y)
                if first:
                    model_file_writer.writerow(cleaned_data_headers[:-2])
                model_file_writer.writerow(net.coef_)

                with open(
                        os.path.join(self.residual_data_path, '_'.join(
                            ('resid', filename))), 'w') as resid_file:
                    resid_file_writer = csv.writer(resid_file)
                    # get the residuals
                    resid = X.dot(net.coef_) - Y
                    for (name, row) in izip(imap(lambda l: l[-1], lines),
                                            resid):
                        resid_file_writer.writerow((name, row))
                    print sum(resid)
def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [
            ElasticNetCV(precompute="invalid"),
            LassoCV(precompute="invalid")
    ]:
        assert_raises(ValueError, clf.fit, X, y)
Example #13
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
def test_path_parameters():

    # build an ill-posed linear regression problem with many noisy features and
    # comparatively few samples
    n_samples, n_features, max_iter = 50, 200, 50
    random_state = np.random.RandomState(0)
    w = random_state.randn(n_features)
    w[10:] = 0.0  # only the top 10 features are impacting the model
    X = random_state.randn(n_samples, n_features)
    y = np.dot(X, w)

    clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, rho=0.5)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.rho)
    assert_equal(50, clf.n_alphas)
    assert_equal(50, len(clf.alphas))
    def connectWidgets(self):
        self.elasticNetCVGroupBox.setHidden(True)
        en = ElasticNet()
        encv = ElasticNetCV()

        self.alpha_text.setText(str(en.alpha))
        self.enl1_ratioDoubleSpinBox.setValue(en.l1_ratio)
        self.enfit_interceptCheckBox.setChecked(en.fit_intercept)
        self.ennormalizeCheckBox.setChecked(en.normalize)
        self.enprecomputeCheckBox.setChecked(en.precompute)
        self.enmax_iterSpinBox.setValue(en.max_iter)
        self.encopy_XCheckBox.setChecked(en.copy_X)
        self.entolDoubleSpinBox.setValue(en.tol)
        self.enwarm_startCheckBox.setChecked(en.warm_start)
        self.enpositiveCheckBox.setChecked(en.positive)
        self.setComboBox(self.enselectionComboBox, ['cyclic', 'random'])
        self.defaultComboItem(self.enselectionComboBox, en.selection)

        self.l1_ratioDoubleSpinBox.setValue(encv.l1_ratio)
        self.epsDoubleSpinBox.setValue(encv.eps)
        self.n_alphasSpinBox.setValue(encv.n_alphas)
        self.alphasLineEdit.setText('None')
        self.fit_interceptCheckBox.setChecked(encv.fit_intercept)
        self.normalizeCheckBox.setChecked(encv.normalize)
        self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like'])
        self.defaultComboItem(self.precomputeComboBox, encv.precompute)
        self.max_iterSpinBox.setValue(encv.max_iter)
        self.tolDoubleSpinBox.setValue(encv.tol)
        self.cVSpinBox.setValue(3)
        self.copy_XCheckBox.setChecked(encv.copy_X)
        self.verboseCheckBox.setChecked(encv.verbose)
        self.n_jobsSpinBox.setValue(encv.n_jobs)
        self.positiveCheckBox.setChecked(encv.positive)
        self.setComboBox(self.selectionComboBox, ['cyclic', 'random'])
        self.defaultComboItem(self.selectionComboBox, encv.selection)
Example #16
0
class ElasticNetCVImpl():
    def __init__(self,
                 l1_ratio=0.5,
                 eps=0.001,
                 n_alphas=100,
                 alphas=None,
                 fit_intercept=True,
                 normalize=False,
                 precompute='auto',
                 max_iter=1000,
                 tol=0.0001,
                 cv=3,
                 copy_X=True,
                 verbose=0,
                 n_jobs=None,
                 positive=False,
                 random_state=None,
                 selection='cyclic'):
        self._hyperparams = {
            'l1_ratio': l1_ratio,
            'eps': eps,
            'n_alphas': n_alphas,
            'alphas': alphas,
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'precompute': precompute,
            'max_iter': max_iter,
            'tol': tol,
            'cv': cv,
            'copy_X': copy_X,
            'verbose': verbose,
            'n_jobs': n_jobs,
            'positive': positive,
            'random_state': random_state,
            'selection': selection
        }
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
def test_path_parameters():

    # build an ill-posed linear regression problem with many noisy features and
    # comparatively few samples
    n_samples, n_features, max_iter = 50, 200, 50
    random_state = np.random.RandomState(0)
    w = random_state.randn(n_features)
    w[10:] = 0.0  # only the top 10 features are impacting the model
    X = random_state.randn(n_samples, n_features)
    y = np.dot(X, w)

    clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter,
                       rho=0.5)
    clf.fit(X, y)  # new params
    assert_almost_equal(0.5, clf.rho)
    assert_equal(50, clf.n_alphas)
    assert_equal(50, len(clf.alphas))
def test_enet_path():

    # build an ill-posed linear regression problem with many noisy features and
    # comparatively few samples
    n_samples, n_features, max_iter = 50, 200, 50
    random_state = np.random.RandomState(0)
    w = random_state.randn(n_features)
    w[10:] = 0.0  # only the top 10 features are impacting the model
    X = random_state.randn(n_samples, n_features)
    y = np.dot(X, w)

    clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5,
            max_iter=max_iter)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.002, 2)

    clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5,
                       max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.002, 2)

    # test set
    X_test = random_state.randn(n_samples, n_features)
    y_test = np.dot(X_test, w)
    assert clf.score(X_test, y_test) > 0.99
def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter, precompute=True)
    ignore_warnings(clf.fit)(X, y)

    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)

    # Multi-output/target case
    X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
                                cv=3, max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)
    assert_equal(clf.coef_.shape, (3, 10))

    # Mono-output should have same cross-validated alpha_ and l1_ratio_
    # in both cases.
    X, y, _, _ = build_dataset(n_features=10)
    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf2.fit(X, y[:, np.newaxis])
    assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
    assert_almost_equal(clf1.alpha_, clf2.alpha_)
Example #20
0
    def train_all(self):
        positions = ['PG.csv', 'SG.csv', 'SF.csv', 'PF.csv', 'C.csv']
        with open(self.models_file_path, 'w') as model_file:
            model_file_writer = csv.writer(model_file)
            for (first, filename) in izip(chain((True,), repeat(False)), positions):
                with open(os.path.join(self.cleaned_data_directory_path, filename),
                          'r') as cleaned_data:
                    cleaned_data_reader = csv.reader(cleaned_data)
                    cleaned_data_headers = cleaned_data_reader.next()
                    lines = [map(float, line[:-1]) + line[-1:] for line in cleaned_data_reader
                             if len(line) >= 2]

                # conver lines to numpy array
                num_data = len(lines)
                num_features = len(lines[0]) - 2

                X = np.zeros((num_data, num_features))
                Y = np.zeros((num_data))

                for (i, data) in enumerate(lines):
                    for (ii, feature) in enumerate(data[:-2]):
                        X[i][ii] = feature
                    Y[i] = lines[i][-2]  # last one is name

                # create an instance of elasticnet
                net = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                                   l1_ratio=[0.5, 0.7, 1], cv=3, normalize=True)

                # create a model based on our data
                net.fit(X, Y)
                if first:
                    model_file_writer.writerow(cleaned_data_headers[:-2])
                model_file_writer.writerow(net.coef_)

                with open(os.path.join(
                        self.residual_data_path,
                        '_'.join(('resid', filename))), 'w') as resid_file:
                    resid_file_writer = csv.writer(resid_file)
                    # get the residuals
                    resid = X.dot(net.coef_) - Y
                    for (name, row) in izip(imap(lambda l: l[-1], lines), resid):
                        resid_file_writer.writerow((name, row))
                    print sum(resid)
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
Example #22
0
def test_same_output_sparse_dense_lasso_and_enet_cv():
    X, y = make_sparse_data(n_samples=40, n_features=10)
    for normalize in [True, False]:
        clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
        ignore_warnings(clfs.fit)(X, y)
        clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
        ignore_warnings(clfd.fit)(X.toarray(), y)
        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)

        clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
        ignore_warnings(clfs.fit)(X, y)
        clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
        ignore_warnings(clfd.fit)(X.toarray(), y)
        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200,
                                         n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(n_alphas=5,
                       eps=2e-3,
                       l1_ratio=[0.5, 0.7],
                       cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have seleted an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    clf = ElasticNetCV(n_alphas=5,
                       eps=2e-3,
                       l1_ratio=[0.5, 0.7],
                       cv=3,
                       max_iter=max_iter,
                       precompute=True)
    ignore_warnings(clf.fit)(X, y)

    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have seleted an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)
def test_path_parameters():
    X, y = make_sparse_data()
    max_iter = 50
    n_alphas = 10
    clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
                       l1_ratio=0.5, fit_intercept=False)
    ignore_warnings(clf.fit)(X, y)  # new params
    assert_almost_equal(0.5, clf.l1_ratio)
    assert n_alphas == clf.n_alphas
    assert n_alphas == len(clf.alphas_)
    sparse_mse_path = clf.mse_path_
    ignore_warnings(clf.fit)(X.toarray(), y)  # compare with dense data
    assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_enet_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1,
                                        max_iter=max_iter,
                                        cv=2, n_jobs=1)
    enetcv_unconstrained.fit(X, y)
    assert_true(min(enetcv_unconstrained.coef_) < 0)

    # On same data, constrained fit has non-negative coefficients
    enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                                      cv=2, positive=True, n_jobs=1)
    enetcv_constrained.fit(X, y)
    assert_true(min(enetcv_constrained.coef_) >= 0)
Example #26
0
    def run(self):
        p_attrib = {'False': False, 'True': True, 'Array-like': 'array-like'}
        r_attrib = {'None': None}
        # TODO Add back the random state later.
        # try:
        #     r_state = int(self.randomStateLineEdit.text())
        # except:
        #     r_state = r_attrib[self.randomStateLineEdit.text()]

        index = self.precomputeComboBox.currentIndex()
        precomputeComboBox = self.precomputeComboBox.itemText(index)

        if self.CVCheckBox.isChecked():
            params = {
                'l1_ratio': self.l1_ratioDoubleSpinBox.value(),
                'eps': self.epsDoubleSpinBox.value(),
                'n_alphas': self.n_alphasSpinBox.value(),
                'alphas': {
                    'None': None
                }.get(self.alphasLineEdit.text()),
                'fit_intercept': self.fit_interceptCheckBox.isChecked(),
                'normalize': self.normalizeCheckBox.isChecked(),
                'precompute': self.precomputeComboBox.currentText(),
                'max_iter': self.max_iterSpinBox.value(),
                'tol': self.max_iterSpinBox.value(),
                'cv': self.cVSpinBox.value(),
                'copy_X': self.copy_XCheckBox.isChecked(),
                'verbose': self.verboseCheckBox.isChecked(),
                'n_jobs': self.n_jobsSpinBox.value(),
                'positive': self.positiveCheckBox.isChecked(),
                'selection': self.selectionComboBox.currentText()
            }
            return params, self.getChangedValues(params, ElasticNetCV())

        else:
            params = {
                'alpha': self.alpha_text.text(),
                'l1_ratio': self.enl1_ratioDoubleSpinBox.value(),
                'fit_intercept': self.enfit_interceptCheckBox.isChecked(),
                'normalize': self.ennormalizeCheckBox.isChecked(),
                'precompute': self.enprecomputeCheckBox.isChecked(),
                'max_iter': self.enmax_iterSpinBox.value(),
                'copy_X': self.encopy_XCheckBox.isChecked(),
                'tol': self.entolDoubleSpinBox.value(),
                'warm_start': self.enwarm_startCheckBox.isChecked(),
                'positive': self.enpositiveCheckBox.isChecked(),
                'selection': self.selectionComboBox.currentText()
            }
            return params, self.getChangedValues(params, ElasticNet())
def test_enet_path():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150

    with warnings.catch_warnings():
        # Here we have a small number of iterations, and thus the
        # ElasticNet might not converge. This is to speed up tests
        warnings.simplefilter("ignore", UserWarning)
        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3,
                           max_iter=max_iter)
        clf.fit(X, y)
        assert_almost_equal(clf.alpha_, 0.002, 2)
        assert_equal(clf.l1_ratio_, 0.95)

        clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3,
                           max_iter=max_iter, precompute=True)
        clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.002, 2)
    assert_equal(clf.l1_ratio_, 0.95)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)
def test_enet_path():
    X, y, X_test, y_test = build_dataset()
    max_iter = 50

    clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5,
            max_iter=max_iter)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.002, 2)

    clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5,
                       max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.002, 2)

    # test set
    assert clf.score(X_test, y_test) > 0.99
def test_enet_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1)
    enetcv_unconstrained.fit(X, y)
    assert_true(min(enetcv_unconstrained.coef_) < 0)

    # On same data, constrained fit has non-negative coefficients
    enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, positive=True, n_jobs=1)
    enetcv_constrained.fit(X, y)
    assert_true(min(enetcv_constrained.coef_) >= 0)
def test_enet_path():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150

    with warnings.catch_warnings():
        # Here we have a small number of iterations, and thus the
        # ElasticNet might not converge. This is to speed up tests
        warnings.simplefilter("ignore", UserWarning)
        clf = ElasticNetCV(n_alphas=5, eps=2e-3, rho=[0.9, 0.95], cv=3, max_iter=max_iter)
        clf.fit(X, y)
        assert_almost_equal(clf.alpha, 0.002, 2)
        assert_equal(clf.rho_, 0.95)

        clf = ElasticNetCV(n_alphas=5, eps=2e-3, rho=[0.9, 0.95], cv=3, max_iter=max_iter, precompute=True)
        clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.002, 2)
    assert_equal(clf.rho_, 0.95)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)
Example #31
0
 def __init__(self,
              l1_ratio=0.5,
              eps=0.001,
              n_alphas=100,
              alphas=None,
              fit_intercept=True,
              normalize=False,
              precompute='auto',
              max_iter=1000,
              tol=0.0001,
              cv=3,
              copy_X=True,
              verbose=0,
              n_jobs=None,
              positive=False,
              random_state=None,
              selection='cyclic'):
     self._hyperparams = {
         'l1_ratio': l1_ratio,
         'eps': eps,
         'n_alphas': n_alphas,
         'alphas': alphas,
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'precompute': precompute,
         'max_iter': max_iter,
         'tol': tol,
         'cv': cv,
         'copy_X': copy_X,
         'verbose': verbose,
         'n_jobs': n_jobs,
         'positive': positive,
         'random_state': random_state,
         'selection': selection
     }
     self._wrapped_model = SKLModel(**self._hyperparams)
def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg, ElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
Example #33
0
def build_auto(regressor, name):
    regressor = regressor.fit(auto_X, auto_y)
    store_pkl(regressor, name + ".pkl")
    mpg = DataFrame(regressor.predict(auto_X), columns=["mpg"])
    store_csv(mpg, name + ".csv")


build_auto(DecisionTreeRegressor(random_state=13, min_samples_leaf=5),
           "DecisionTreeAuto")
build_auto(
    BaggingRegressor(DecisionTreeRegressor(random_state=13,
                                           min_samples_leaf=5),
                     random_state=13,
                     n_estimators=3,
                     max_features=0.5), "DecisionTreeEnsembleAuto")
build_auto(ElasticNetCV(random_state=13), "ElasticNetAuto")
build_auto(ExtraTreesRegressor(random_state=13, min_samples_leaf=5),
           "ExtraTreesAuto")
build_auto(GradientBoostingRegressor(random_state=13, init=None),
           "GradientBoostingAuto")
build_auto(LassoCV(random_state=13), "LassoAuto")
build_auto(LinearRegression(), "LinearRegressionAuto")
build_auto(
    BaggingRegressor(LinearRegression(), random_state=13, max_features=0.5),
    "LinearRegressionEnsembleAuto")
build_auto(RandomForestRegressor(random_state=13, min_samples_leaf=5),
           "RandomForestAuto")
build_auto(RidgeCV(), "RidgeAuto")
build_auto(XGBRegressor(objective="reg:linear"), "XGBAuto")

housing_df = load_csv("Housing.csv")
def test_sparse_input_dtype_enet_and_lassocv():
    X, y, _, _ = build_dataset(n_features=10)
    clf = ElasticNetCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = ElasticNetCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)

    clf = LassoCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = LassoCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
def test_enet_l1_ratio():
    # Test that an error message is raised if an estimator that
    # uses _alpha_grid is called with l1_ratio=0
    msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. "
           "Please supply a grid by providing your estimator with the "
           "appropriate `alphas=` argument.")
    X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T
    y = np.array([12, 10, 11, 21, 5])

    assert_raise_message(ValueError, msg, ElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y)
    assert_raise_message(ValueError, msg, MultiTaskElasticNetCV(
        l1_ratio=0, random_state=42).fit, X, y[:, None])

    # Test that l1_ratio=0 is allowed if we supply a grid manually
    alphas = [0.1, 10]
    estkwds = {'alphas': alphas, 'random_state': 42}
    est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = ElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est_desired.fit(X, y)
        est.fit(X, y)
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)

    est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds)
    est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds)
    with ignore_warnings():
        est.fit(X, y[:, None])
        est_desired.fit(X, y[:, None])
    assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
Example #36
0
        f = pl.figure()

    for i in range(n_rows):
        a = f.add_subplot(n_rows, n_rows, (n_rows) * (j % n_rows) + (i + 1))
        title = node_names[indexes[j][0]] + ' -- ' + node_names[indexes[j][1]]
        pl.scatter(x[groups == i],
                   y[groups == i],
                   c=color[i],
                   s=40,
                   label=labels_group[i])
        a.set_title(title)
        pl.legend()

    j += 1
######################################################
enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50),
                      cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))

lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50),
                  cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))
for i in range(n_rows):

    X_ = conn_data[groups == i, :]
    y_ = y[groups == i]

    enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50),
                          cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))

    lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50),
                      cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))

    lassocv.fit(X_, y_)
def test_sparse_input_dtype_enet_and_lassocv():
    X, y, _, _ = build_dataset(n_features=10)
    clf = ElasticNetCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = ElasticNetCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)

    clf = LassoCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = LassoCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
                               ('logistic', LogisticRegression())]))],
                   'hard',
                   weights=[1.01, 1.01]), ['predict'],
  create_weird_classification_problem_1()),
 (GradientBoostingClassifier(max_depth=10,
                             n_estimators=10), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (LogisticRegression(), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (IsotonicRegression(out_of_bounds='clip'), ['predict'],
  create_isotonic_regression_problem_1()),
 (Earth(), ['predict', 'transform'], create_regression_problem_1()),
 (Earth(allow_missing=True), ['predict', 'transform'],
  create_regression_problem_with_missingness_1()),
 (ElasticNet(), ['predict'], create_regression_problem_1()),
 (ElasticNetCV(), ['predict'], create_regression_problem_1()),
 (LassoCV(), ['predict'], create_regression_problem_1()),
 (Ridge(), ['predict'], create_regression_problem_1()),
 (RidgeCV(), ['predict'], create_regression_problem_1()),
 (SGDRegressor(), ['predict'], create_regression_problem_1()),
 (Lasso(), ['predict'], create_regression_problem_1()),
 (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]),
  ['predict', 'predict_proba'], create_weird_classification_problem_1()),
 (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))],
               transformer_weights={
                   'earth': 1,
                   'earth2': 2
               }), ['transform'], create_weird_classification_problem_1()),
 (RandomForestRegressor(), ['predict'], create_regression_problem_1()),
 (CalibratedClassifierCV(LogisticRegression(),
                         'isotonic'), ['predict_proba'],
def test_multioutput_enetcv_error():
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)
    y = rng.randn(10, 2)
    clf = ElasticNetCV()
    assert_raises(ValueError, clf.fit, X, y)
def test_multioutput_enetcv_error():
    X = np.random.randn(10, 2)
    y = np.random.randn(10, 2)
    clf = ElasticNetCV()
    assert_raises(ValueError, clf.fit, X, y)
Example #41
0
labels_group = ['elderly', 'mci', 'young']
j = 0
for _, x in enumerate(X.T):
    if (j%n_rows) == 0:
        f = pl.figure()
    
    for i in range(n_rows):
        a = f.add_subplot(n_rows, n_rows,(n_rows)*(j%n_rows)+(i+1))
        title = node_names[indexes[j][0]]+' -- '+node_names[indexes[j][1]]
        pl.scatter(x[groups==i], y[groups==i], c=color[i], s=40, label=labels_group[i])
        a.set_title(title)
        pl.legend()
    
    j+=1
######################################################
enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), 
                          cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))

lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), 
                          cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))
for i in range(n_rows):
    
    X_ = conn_data[groups==i,:]
    y_ = y[groups==i]
    
    enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), 
                          cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))

    lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), 
                          cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))
    
    
def test_enet_path():
    # We use a large number of samples and of informative features so that
    # the l1_ratio selected is more toward ridge than lasso
    X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100,
                                         n_informative_features=100)
    max_iter = 150

    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests
    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                       l1_ratio=[0.5, 0.7], cv=3,
                       max_iter=max_iter, precompute=True)
    ignore_warnings(clf.fit)(X, y)

    # Well-conditioned settings, we should have selected our
    # smallest penalty
    assert_almost_equal(clf.alpha_, min(clf.alphas_))
    # Non-sparse ground truth: we should have selected an elastic-net
    # that is closer to ridge than to lasso
    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))

    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)

    # Multi-output/target case
    X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
    clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7],
                                cv=3, max_iter=max_iter)
    ignore_warnings(clf.fit)(X, y)
    # We are in well-conditioned settings with low noise: we should
    # have a good test-set performance
    assert_greater(clf.score(X_test, y_test), 0.99)
    assert_equal(clf.coef_.shape, (3, 10))

    # Mono-output should have same cross-validated alpha_ and l1_ratio_
    # in both cases.
    X, y, _, _ = build_dataset(n_features=10)
    clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf1.fit(X, y)
    clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7])
    clf2.fit(X, y[:, np.newaxis])
    assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_)
    assert_almost_equal(clf1.alpha_, clf2.alpha_)
Example #43
0
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
			'DecisionTreeRegressor':DecisionTreeRegressor(),
			'DictionaryLearning':DictionaryLearning(),
			'ElasticNet':ElasticNet(),
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),
			'ExtraTreeRegressor':ExtraTreeRegressor(),
			'ExtraTreesClassifier':ExtraTreesClassifier(),
			'ExtraTreesRegressor':ExtraTreesRegressor(),
			'FactorAnalysis':FactorAnalysis(),
			'FastICA':FastICA(),
			'FeatureAgglomeration':FeatureAgglomeration(),
			'FunctionTransformer':FunctionTransformer(),
			'GMM':GMM(),
			'GaussianMixture':GaussianMixture(),
			'GaussianNB':GaussianNB(),
			'GaussianProcess':GaussianProcess(),
			'GaussianProcessClassifier':GaussianProcessClassifier(),
			'GaussianProcessRegressor':GaussianProcessRegressor(),