Beispiel #1
0
class LassoCVImpl():

    def __init__(self, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, copy_X=True, cv=3, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'):
        self._hyperparams = {
            'eps': eps,
            'n_alphas': n_alphas,
            'alphas': alphas,
            'fit_intercept': fit_intercept,
            'normalize': normalize,
            'precompute': precompute,
            'max_iter': max_iter,
            'tol': tol,
            'copy_X': copy_X,
            'cv': cv,
            'verbose': verbose,
            'n_jobs': n_jobs,
            'positive': positive,
            'random_state': random_state,
            'selection': selection}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_sparse_input_dtype_enet_and_lassocv():
    X, y, _, _ = build_dataset(n_features=10)
    clf = ElasticNetCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = ElasticNetCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)

    clf = LassoCV(n_alphas=5)
    clf.fit(sparse.csr_matrix(X), y)
    clf1 = LassoCV(n_alphas=5)
    clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y)
    assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6)
    assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [
            ElasticNetCV(precompute="invalid"),
            LassoCV(precompute="invalid")
    ]:
        assert_raises(ValueError, clf.fit, X, y)
def test_uniform_targets():
    enet = ElasticNetCV(fit_intercept=True, n_alphas=3)
    m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3)
    lasso = LassoCV(fit_intercept=True, n_alphas=3)
    m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3)

    models_single_task = (enet, lasso)
    models_multi_task = (m_enet, m_lasso)

    rng = np.random.RandomState(0)

    X_train = rng.random_sample(size=(10, 3))
    X_test = rng.random_sample(size=(10, 3))

    y1 = np.empty(10)
    y2 = np.empty((10, 2))

    for model in models_single_task:
        for y_values in (0, 5):
            y1.fill(y_values)
            assert_array_equal(model.fit(X_train, y1).predict(X_test), y1)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)

    for model in models_multi_task:
        for y_values in (0, 5):
            y2[:, 0].fill(y_values)
            y2[:, 1].fill(2 * y_values)
            assert_array_equal(model.fit(X_train, y2).predict(X_test), y2)
            assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
Beispiel #6
0
 def __init__(self,
              eps=0.001,
              n_alphas=100,
              alphas=None,
              fit_intercept=True,
              normalize=False,
              precompute='auto',
              max_iter=1000,
              tol=0.0001,
              copy_X=True,
              cv=3,
              verbose=False,
              n_jobs=None,
              positive=False,
              random_state=None,
              selection='cyclic'):
     self._hyperparams = {
         'eps': eps,
         'n_alphas': n_alphas,
         'alphas': alphas,
         'fit_intercept': fit_intercept,
         'normalize': normalize,
         'precompute': precompute,
         'max_iter': max_iter,
         'tol': tol,
         'copy_X': copy_X,
         'cv': cv,
         'verbose': verbose,
         'n_jobs': n_jobs,
         'positive': positive,
         'random_state': random_state,
         'selection': selection
     }
     self._wrapped_model = Op(**self._hyperparams)
Beispiel #7
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
Beispiel #8
0
def test_lassoCV_does_not_set_precompute(monkeypatch, precompute,
                                         inner_precompute):
    X, y, _, _ = build_dataset()
    calls = 0

    class LassoMock(Lasso):
        def fit(self, X, y):
            super().fit(X, y)
            nonlocal calls
            calls += 1
            assert self.precompute == inner_precompute

    monkeypatch.setattr("sklearn.linear_model.coordinate_descent.Lasso",
                        LassoMock)
    clf = LassoCV(precompute=precompute)
    clf.fit(X, y)
    assert calls > 0
def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert_true(np.abs(
        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(), significant=2)

    # test set
    assert_greater(clf.score(X_test, y_test), 0.99)
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [ElasticNetCV(precompute="invalid"),
                LassoCV(precompute="invalid")]:
        assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*"
                            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
Beispiel #11
0
def test_same_output_sparse_dense_lasso_and_enet_cv():
    X, y = make_sparse_data(n_samples=40, n_features=10)
    for normalize in [True, False]:
        clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
        ignore_warnings(clfs.fit)(X, y)
        clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize)
        ignore_warnings(clfd.fit)(X.toarray(), y)
        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)

        clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
        ignore_warnings(clfs.fit)(X, y)
        clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
        ignore_warnings(clfd.fit)(X.toarray(), y)
        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
Beispiel #12
0
def test_lasso_cv_with_some_model_selection():
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import StratifiedKFold
    from sklearn import datasets
    from sklearn.linear_model import LassoCV

    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target

    pipe = make_pipeline(StandardScaler(), LassoCV(cv=StratifiedKFold()))
    pipe.fit(X, y)
def test_lasso_cv_positive_constraint():
    X, y, X_test, y_test = build_dataset()
    max_iter = 500

    # Ensure the unconstrained fit has a negative coefficient
    clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2,
                                n_jobs=1)
    clf_unconstrained.fit(X, y)
    assert_true(min(clf_unconstrained.coef_) < 0)

    # On same data, constrained fit has non-negative coefficients
    clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter,
                              positive=True, cv=2, n_jobs=1)
    clf_constrained.fit(X, y)
    assert_true(min(clf_constrained.coef_) >= 0)
def test_lasso_path():

    # build an ill-posed linear regression problem with many noisy features and
    # comparatively few samples
    n_samples, n_features, max_iter = 50, 200, 30
    random_state = np.random.RandomState(0)
    w = random_state.randn(n_features)
    w[10:] = 0.0  # only the top 10 features are impacting the model
    X = random_state.randn(n_samples, n_features)
    y = np.dot(X, w)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y)
    assert_almost_equal(clf.alpha, 0.011, 2)

    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha, 0.011, 2)

    # test set
    X_test = random_state.randn(n_samples, n_features)
    y_test = np.dot(X_test, w)
    assert clf.score(X_test, y_test) > 0.85
Beispiel #15
0
        a = f.add_subplot(n_rows, n_rows, (n_rows) * (j % n_rows) + (i + 1))
        title = node_names[indexes[j][0]] + ' -- ' + node_names[indexes[j][1]]
        pl.scatter(x[groups == i],
                   y[groups == i],
                   c=color[i],
                   s=40,
                   label=labels_group[i])
        a.set_title(title)
        pl.legend()

    j += 1
######################################################
enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50),
                      cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))

lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50),
                  cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25))
for i in range(n_rows):

    X_ = conn_data[groups == i, :]
    y_ = y[groups == i]

    enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50),
                          cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))

    lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50),
                      cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))

    lassocv.fit(X_, y_)
    enetcv.fit(X_, y_)
    f = pl.figure()
    a = f.add_subplot(211)
Beispiel #16
0
			'IsolationForest':IsolationForest(),
			'Isomap':Isomap(),
			'KMeans':KMeans(),
			'KNeighborsClassifier':KNeighborsClassifier(),
			'KNeighborsRegressor':KNeighborsRegressor(),
			'KernelCenterer':KernelCenterer(),
			'KernelDensity':KernelDensity(),
			'KernelPCA':KernelPCA(),
			'KernelRidge':KernelRidge(),
			'LSHForest':LSHForest(),
			'LabelPropagation':LabelPropagation(),
			'LabelSpreading':LabelSpreading(),
			'Lars':Lars(),
			'LarsCV':LarsCV(),
			'Lasso':Lasso(),
			'LassoCV':LassoCV(),
			'LassoLars':LassoLars(),
			'LassoLarsCV':LassoLarsCV(),
			'LassoLarsIC':LassoLarsIC(),
			'LatentDirichletAllocation':LatentDirichletAllocation(),
			'LedoitWolf':LedoitWolf(),
			'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(),
			'LinearRegression':LinearRegression(),
			'LinearSVC':LinearSVC(),
			'LinearSVR':LinearSVR(),
			'LocallyLinearEmbedding':LocallyLinearEmbedding(),
			'LogisticRegression':LogisticRegression(),
			'LogisticRegressionCV':LogisticRegressionCV(),
			'MDS':MDS(),
			'MLPClassifier':MLPClassifier(),
			'MLPRegressor':MLPRegressor(),
                   'hard',
                   weights=[1.01, 1.01]), ['predict'],
  create_weird_classification_problem_1()),
 (GradientBoostingClassifier(max_depth=10,
                             n_estimators=10), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (LogisticRegression(), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (IsotonicRegression(out_of_bounds='clip'), ['predict'],
  create_isotonic_regression_problem_1()),
 (Earth(), ['predict', 'transform'], create_regression_problem_1()),
 (Earth(allow_missing=True), ['predict', 'transform'],
  create_regression_problem_with_missingness_1()),
 (ElasticNet(), ['predict'], create_regression_problem_1()),
 (ElasticNetCV(), ['predict'], create_regression_problem_1()),
 (LassoCV(), ['predict'], create_regression_problem_1()),
 (Ridge(), ['predict'], create_regression_problem_1()),
 (RidgeCV(), ['predict'], create_regression_problem_1()),
 (SGDRegressor(), ['predict'], create_regression_problem_1()),
 (Lasso(), ['predict'], create_regression_problem_1()),
 (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]),
  ['predict', 'predict_proba'], create_weird_classification_problem_1()),
 (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))],
               transformer_weights={
                   'earth': 1,
                   'earth2': 2
               }), ['transform'], create_weird_classification_problem_1()),
 (RandomForestRegressor(), ['predict'], create_regression_problem_1()),
 (CalibratedClassifierCV(LogisticRegression(),
                         'isotonic'), ['predict_proba'],
  create_weird_classification_problem_1()),
Beispiel #18
0

build_auto(DecisionTreeRegressor(random_state=13, min_samples_leaf=5),
           "DecisionTreeAuto")
build_auto(
    BaggingRegressor(DecisionTreeRegressor(random_state=13,
                                           min_samples_leaf=5),
                     random_state=13,
                     n_estimators=3,
                     max_features=0.5), "DecisionTreeEnsembleAuto")
build_auto(ElasticNetCV(random_state=13), "ElasticNetAuto")
build_auto(ExtraTreesRegressor(random_state=13, min_samples_leaf=5),
           "ExtraTreesAuto")
build_auto(GradientBoostingRegressor(random_state=13, init=None),
           "GradientBoostingAuto")
build_auto(LassoCV(random_state=13), "LassoAuto")
build_auto(LinearRegression(), "LinearRegressionAuto")
build_auto(
    BaggingRegressor(LinearRegression(), random_state=13, max_features=0.5),
    "LinearRegressionEnsembleAuto")
build_auto(RandomForestRegressor(random_state=13, min_samples_leaf=5),
           "RandomForestAuto")
build_auto(RidgeCV(), "RidgeAuto")
build_auto(XGBRegressor(objective="reg:linear"), "XGBAuto")

housing_df = load_csv("Housing.csv")

print(housing_df.dtypes)

housing_df["CHAS"] = housing_df["CHAS"].astype(float)
housing_df["RAD"] = housing_df["RAD"].astype(float)
Beispiel #19
0
def set_learning_method(config, X_train, y_train):
    """
    Instantiates the sklearn's class corresponding to the value set in the 
    configuration file for running the learning method.
    
    TODO: use reflection to instantiate the classes
    
    @param config: configuration object
    @return: an estimator with fit() and predict() methods
    """
    estimator = None

    learning_cfg = config.get("learning", None)
    if learning_cfg:
        p = learning_cfg.get("parameters", None)
        o = learning_cfg.get("optimize", None)
        scorers = \
        set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse']))

        method_name = learning_cfg.get("method", None)
        if method_name == "SVR":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVR(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = SVR(C=p.get("C", 10),
                                epsilon=p.get('epsilon', 0.01),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0034),
                                tol=p.get('tol', 1e-3),
                                verbose=False)
            else:
                estimator = SVR()

        elif method_name == "RandomForestRegressor":
            if o:
                tune_params = set_optimization_params(o)
                print tune_params
                estimator = optimize_model(RandomForestRegressor(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            elif p:
                estimator = RandomForestRegressor(
                    n_estimators=p.get("n_estimators", 100),
                    criterion=p.get("criterion", 'mse'),
                    n_jobs=p.get("n_jobs", -1),
                    random_state=p.get("random_state", 0),
                    max_features=p.get("max_features", 'auto'))

        elif method_name == "SVC":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(SVC(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get('cv', 5),
                                           o.get('verbose', True),
                                           o.get('n_jobs', 1))

            elif p:
                estimator = SVC(C=p.get('C', 1.0),
                                kernel=p.get('kernel', 'rbf'),
                                degree=p.get('degree', 3),
                                gamma=p.get('gamma', 0.0),
                                coef0=p.get('coef0', 0.0),
                                tol=p.get('tol', 1e-3),
                                verbose=p.get('verbose', False))
            else:
                estimator = SVC()

        elif method_name == "LassoCV":
            if p:
                estimator = LassoCV(eps=p.get('eps', 1e-3),
                                    n_alphas=p.get('n_alphas', 100),
                                    normalize=p.get('normalize', False),
                                    precompute=p.get('precompute', 'auto'),
                                    max_iter=p.get('max_iter', 1000),
                                    tol=p.get('tol', 1e-4),
                                    cv=p.get('cv', 10),
                                    verbose=False)
            else:
                estimator = LassoCV()

        elif method_name == "LassoLars":
            if o:
                tune_params = set_optimization_params(o)
                estimator = optimize_model(LassoLars(), X_train,
                                           y_train, tune_params, scorers,
                                           o.get("cv", 5),
                                           o.get("verbose", True),
                                           o.get("n_jobs", 1))

            if p:
                estimator = LassoLars(alpha=p.get('alpha', 1.0),
                                      fit_intercept=p.get(
                                          'fit_intercept', True),
                                      verbose=p.get('verbose', False),
                                      normalize=p.get('normalize', True),
                                      max_iter=p.get('max_iter', 500),
                                      fit_path=p.get('fit_path', True))
            else:
                estimator = LassoLars()

        elif method_name == "LassoLarsCV":
            if p:
                estimator = LassoLarsCV(max_iter=p.get('max_iter', 500),
                                        normalize=p.get('normalize', True),
                                        max_n_alphas=p.get(
                                            'max_n_alphas', 1000),
                                        n_jobs=p.get('n_jobs', 1),
                                        cv=p.get('cv', 10),
                                        verbose=False)
            else:
                estimator = LassoLarsCV()

    return estimator, scorers