def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100, n_informative_features=100) max_iter = 150 # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have seleted an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter, precompute=True) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have seleted an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert_greater(clf.score(X_test, y_test), 0.99)
def runPrintResults(X, y, alpha, name): print(name+":\n=========") if (alpha is not None): X_new = np.divide(X, alpha) else: X_new = X enetCV = ElasticNetCV(l1_ratio=0.8, fit_intercept=False) # cv=nCV, max_iter=5000 # enetCV = LassoCV(fit_intercept=False) # cv=nCV, max_iter=5000 enetCV.fit(X_new, y) y_pred_enet = enetCV.predict(X_new) r2_score_enet = r2_score(y, y_pred_enet) print("R2= ", r2_score_enet) if (alpha is not None): enetCV_coef = np.divide(enetCV.coef_, alpha) else: enetCV_coef = enetCV.coef_ print("Best Alpha: {}".format(enetCV.alpha_)) # print("coefs_: {}".format(enetCV.coef_)) print("coefs_/alpha: {}".format(enetCV_coef)) return enetCV.alpha_, enetCV_coef
def test_path_parameters(): X, y, _, _ = build_dataset() max_iter = 50 clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, l1_ratio=0.5) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert_equal(50, clf.n_alphas) assert_equal(50, len(clf.alphas_))
def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, fit_intercept=False) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert_equal(n_alphas, clf.n_alphas) assert_equal(n_alphas, len(clf.alphas_))
def test_1d_multioutput_enet_and_multitask_enet_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf.fit(X, y[:, 0]) clf1 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf1.fit(X, y) assert_almost_equal(clf.l1_ratio_, clf1.l1_ratio_) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_path_parameters(): X, y, _, _ = build_dataset() max_iter = 100 clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, tol=1e-3) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert_equal(50, clf.n_alphas) assert_equal(50, len(clf.alphas_))
def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, fit_intercept=False) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert_equal(n_alphas, clf.n_alphas) assert_equal(n_alphas, len(clf.alphas_)) sparse_mse_path = clf.mse_path_ clf.fit(X.toarray(), y) # compare with dense data assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, rho=0.5, fit_intercept=False) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.rho) assert_equal(n_alphas, clf.n_alphas) assert_equal(n_alphas, len(clf.alphas_))
def train_all(self): positions = ['PG.csv', 'SG.csv', 'SF.csv', 'PF.csv', 'C.csv'] with open(self.models_file_path, 'w') as model_file: model_file_writer = csv.writer(model_file) for (first, filename) in izip(chain((True, ), repeat(False)), positions): with open( os.path.join(self.cleaned_data_directory_path, filename), 'r') as cleaned_data: cleaned_data_reader = csv.reader(cleaned_data) cleaned_data_headers = cleaned_data_reader.next() lines = [ map(float, line[:-1]) + line[-1:] for line in cleaned_data_reader if len(line) >= 2 ] # conver lines to numpy array num_data = len(lines) num_features = len(lines[0]) - 2 X = np.zeros((num_data, num_features)) Y = np.zeros((num_data)) for (i, data) in enumerate(lines): for (ii, feature) in enumerate(data[:-2]): X[i][ii] = feature Y[i] = lines[i][-2] # last one is name # create an instance of elasticnet net = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7, 1], cv=3, normalize=True) # create a model based on our data net.fit(X, Y) if first: model_file_writer.writerow(cleaned_data_headers[:-2]) model_file_writer.writerow(net.coef_) with open( os.path.join(self.residual_data_path, '_'.join( ('resid', filename))), 'w') as resid_file: resid_file_writer = csv.writer(resid_file) # get the residuals resid = X.dot(net.coef_) - Y for (name, row) in izip(imap(lambda l: l[-1], lines), resid): resid_file_writer.writerow((name, row)) print sum(resid)
def test_uniform_targets(): enet = ElasticNetCV(fit_intercept=True, n_alphas=3) m_enet = MultiTaskElasticNetCV(fit_intercept=True, n_alphas=3) lasso = LassoCV(fit_intercept=True, n_alphas=3) m_lasso = MultiTaskLassoCV(fit_intercept=True, n_alphas=3) models_single_task = (enet, lasso) models_multi_task = (m_enet, m_lasso) rng = np.random.RandomState(0) X_train = rng.random_sample(size=(10, 3)) X_test = rng.random_sample(size=(10, 3)) y1 = np.empty(10) y2 = np.empty((10, 2)) for model in models_single_task: for y_values in (0, 5): y1.fill(y_values) assert_array_equal(model.fit(X_train, y1).predict(X_test), y1) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3) for model in models_multi_task: for y_values in (0, 5): y2[:, 0].fill(y_values) y2[:, 1].fill(2 * y_values) assert_array_equal(model.fit(X_train, y2).predict(X_test), y2) assert_array_equal(model.alphas_, [np.finfo(float).resolution]*3)
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid") ]: assert_raises(ValueError, clf.fit, X, y)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_path_parameters(): # build an ill-posed linear regression problem with many noisy features and # comparatively few samples n_samples, n_features, max_iter = 50, 200, 50 random_state = np.random.RandomState(0) w = random_state.randn(n_features) w[10:] = 0.0 # only the top 10 features are impacting the model X = random_state.randn(n_samples, n_features) y = np.dot(X, w) clf = ElasticNetCV(n_alphas=50, eps=1e-3, max_iter=max_iter, rho=0.5) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.rho) assert_equal(50, clf.n_alphas) assert_equal(50, len(clf.alphas))
def connectWidgets(self): self.elasticNetCVGroupBox.setHidden(True) en = ElasticNet() encv = ElasticNetCV() self.alpha_text.setText(str(en.alpha)) self.enl1_ratioDoubleSpinBox.setValue(en.l1_ratio) self.enfit_interceptCheckBox.setChecked(en.fit_intercept) self.ennormalizeCheckBox.setChecked(en.normalize) self.enprecomputeCheckBox.setChecked(en.precompute) self.enmax_iterSpinBox.setValue(en.max_iter) self.encopy_XCheckBox.setChecked(en.copy_X) self.entolDoubleSpinBox.setValue(en.tol) self.enwarm_startCheckBox.setChecked(en.warm_start) self.enpositiveCheckBox.setChecked(en.positive) self.setComboBox(self.enselectionComboBox, ['cyclic', 'random']) self.defaultComboItem(self.enselectionComboBox, en.selection) self.l1_ratioDoubleSpinBox.setValue(encv.l1_ratio) self.epsDoubleSpinBox.setValue(encv.eps) self.n_alphasSpinBox.setValue(encv.n_alphas) self.alphasLineEdit.setText('None') self.fit_interceptCheckBox.setChecked(encv.fit_intercept) self.normalizeCheckBox.setChecked(encv.normalize) self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like']) self.defaultComboItem(self.precomputeComboBox, encv.precompute) self.max_iterSpinBox.setValue(encv.max_iter) self.tolDoubleSpinBox.setValue(encv.tol) self.cVSpinBox.setValue(3) self.copy_XCheckBox.setChecked(encv.copy_X) self.verboseCheckBox.setChecked(encv.verbose) self.n_jobsSpinBox.setValue(encv.n_jobs) self.positiveCheckBox.setChecked(encv.positive) self.setComboBox(self.selectionComboBox, ['cyclic', 'random']) self.defaultComboItem(self.selectionComboBox, encv.selection)
class ElasticNetCVImpl(): def __init__(self, l1_ratio=0.5, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, cv=3, copy_X=True, verbose=0, n_jobs=None, positive=False, random_state=None, selection='cyclic'): self._hyperparams = { 'l1_ratio': l1_ratio, 'eps': eps, 'n_alphas': n_alphas, 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'precompute': precompute, 'max_iter': max_iter, 'tol': tol, 'cv': cv, 'copy_X': copy_X, 'verbose': verbose, 'n_jobs': n_jobs, 'positive': positive, 'random_state': random_state, 'selection': selection } self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def test_enet_path(): # build an ill-posed linear regression problem with many noisy features and # comparatively few samples n_samples, n_features, max_iter = 50, 200, 50 random_state = np.random.RandomState(0) w = random_state.randn(n_features) w[10:] = 0.0 # only the top 10 features are impacting the model X = random_state.randn(n_samples, n_features) y = np.dot(X, w) clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) # test set X_test = random_state.randn(n_samples, n_features) y_test = np.dot(X_test, w) assert clf.score(X_test, y_test) > 0.99
def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100, n_informative_features=100) max_iter = 150 # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter, precompute=True) ignore_warnings(clf.fit)(X, y) # Well-conditioned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert_greater(clf.score(X_test, y_test), 0.99) # Multi-output/target case X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3) clf = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) ignore_warnings(clf.fit)(X, y) # We are in well-conditioned settings with low noise: we should # have a good test-set performance assert_greater(clf.score(X_test, y_test), 0.99) assert_equal(clf.coef_.shape, (3, 10)) # Mono-output should have same cross-validated alpha_ and l1_ratio_ # in both cases. X, y, _, _ = build_dataset(n_features=10) clf1 = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf1.fit(X, y) clf2 = MultiTaskElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7]) clf2.fit(X, y[:, np.newaxis]) assert_almost_equal(clf1.l1_ratio_, clf2.l1_ratio_) assert_almost_equal(clf1.alpha_, clf2.alpha_)
def train_all(self): positions = ['PG.csv', 'SG.csv', 'SF.csv', 'PF.csv', 'C.csv'] with open(self.models_file_path, 'w') as model_file: model_file_writer = csv.writer(model_file) for (first, filename) in izip(chain((True,), repeat(False)), positions): with open(os.path.join(self.cleaned_data_directory_path, filename), 'r') as cleaned_data: cleaned_data_reader = csv.reader(cleaned_data) cleaned_data_headers = cleaned_data_reader.next() lines = [map(float, line[:-1]) + line[-1:] for line in cleaned_data_reader if len(line) >= 2] # conver lines to numpy array num_data = len(lines) num_features = len(lines[0]) - 2 X = np.zeros((num_data, num_features)) Y = np.zeros((num_data)) for (i, data) in enumerate(lines): for (ii, feature) in enumerate(data[:-2]): X[i][ii] = feature Y[i] = lines[i][-2] # last one is name # create an instance of elasticnet net = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7, 1], cv=3, normalize=True) # create a model based on our data net.fit(X, Y) if first: model_file_writer.writerow(cleaned_data_headers[:-2]) model_file_writer.writerow(net.coef_) with open(os.path.join( self.residual_data_path, '_'.join(('resid', filename))), 'w') as resid_file: resid_file_writer = csv.writer(resid_file) # get the residuals resid = X.dot(net.coef_) - Y for (name, row) in izip(imap(lambda l: l[-1], lines), resid): resid_file_writer.writerow((name, row)) print sum(resid)
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
def test_same_output_sparse_dense_lasso_and_enet_cv(): X, y = make_sparse_data(n_samples=40, n_features=10) for normalize in [True, False]: clfs = ElasticNetCV(max_iter=100, cv=5, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = ElasticNetCV(max_iter=100, cv=5, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_) clfs = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfs.fit)(X, y) clfd = LassoCV(max_iter=100, cv=4, normalize=normalize) ignore_warnings(clfd.fit)(X.toarray(), y) assert_almost_equal(clfs.alpha_, clfd.alpha_, 7) assert_almost_equal(clfs.intercept_, clfd.intercept_, 7) assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_) assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
def test_path_parameters(): X, y = make_sparse_data() max_iter = 50 n_alphas = 10 clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter, l1_ratio=0.5, fit_intercept=False) ignore_warnings(clf.fit)(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) assert n_alphas == clf.n_alphas assert n_alphas == len(clf.alphas_) sparse_mse_path = clf.mse_path_ ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense data assert_almost_equal(clf.mse_path_, sparse_mse_path)
def test_enet_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient enetcv_unconstrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) enetcv_unconstrained.fit(X, y) assert_true(min(enetcv_unconstrained.coef_) < 0) # On same data, constrained fit has non-negative coefficients enetcv_constrained = ElasticNetCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, positive=True, n_jobs=1) enetcv_constrained.fit(X, y) assert_true(min(enetcv_constrained.coef_) >= 0)
def run(self): p_attrib = {'False': False, 'True': True, 'Array-like': 'array-like'} r_attrib = {'None': None} # TODO Add back the random state later. # try: # r_state = int(self.randomStateLineEdit.text()) # except: # r_state = r_attrib[self.randomStateLineEdit.text()] index = self.precomputeComboBox.currentIndex() precomputeComboBox = self.precomputeComboBox.itemText(index) if self.CVCheckBox.isChecked(): params = { 'l1_ratio': self.l1_ratioDoubleSpinBox.value(), 'eps': self.epsDoubleSpinBox.value(), 'n_alphas': self.n_alphasSpinBox.value(), 'alphas': { 'None': None }.get(self.alphasLineEdit.text()), 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': self.precomputeComboBox.currentText(), 'max_iter': self.max_iterSpinBox.value(), 'tol': self.max_iterSpinBox.value(), 'cv': self.cVSpinBox.value(), 'copy_X': self.copy_XCheckBox.isChecked(), 'verbose': self.verboseCheckBox.isChecked(), 'n_jobs': self.n_jobsSpinBox.value(), 'positive': self.positiveCheckBox.isChecked(), 'selection': self.selectionComboBox.currentText() } return params, self.getChangedValues(params, ElasticNetCV()) else: params = { 'alpha': self.alpha_text.text(), 'l1_ratio': self.enl1_ratioDoubleSpinBox.value(), 'fit_intercept': self.enfit_interceptCheckBox.isChecked(), 'normalize': self.ennormalizeCheckBox.isChecked(), 'precompute': self.enprecomputeCheckBox.isChecked(), 'max_iter': self.enmax_iterSpinBox.value(), 'copy_X': self.encopy_XCheckBox.isChecked(), 'tol': self.entolDoubleSpinBox.value(), 'warm_start': self.enwarm_startCheckBox.isChecked(), 'positive': self.enpositiveCheckBox.isChecked(), 'selection': self.selectionComboBox.currentText() } return params, self.getChangedValues(params, ElasticNet())
def test_enet_path(): X, y, X_test, y_test = build_dataset() max_iter = 150 with warnings.catch_warnings(): # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests warnings.simplefilter("ignore", UserWarning) clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3, max_iter=max_iter) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.002, 2) assert_equal(clf.l1_ratio_, 0.95) clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.9, 0.95], cv=3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.002, 2) assert_equal(clf.l1_ratio_, 0.95) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def test_enet_path(): X, y, X_test, y_test = build_dataset() max_iter = 50 clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) clf = ElasticNetCV(n_alphas=10, eps=1e-3, rho=0.95, cv=5, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) # test set assert clf.score(X_test, y_test) > 0.99
def test_enet_path(): X, y, X_test, y_test = build_dataset() max_iter = 150 with warnings.catch_warnings(): # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests warnings.simplefilter("ignore", UserWarning) clf = ElasticNetCV(n_alphas=5, eps=2e-3, rho=[0.9, 0.95], cv=3, max_iter=max_iter) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) assert_equal(clf.rho_, 0.95) clf = ElasticNetCV(n_alphas=5, eps=2e-3, rho=[0.9, 0.95], cv=3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.002, 2) assert_equal(clf.rho_, 0.95) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def __init__(self, l1_ratio=0.5, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, cv=3, copy_X=True, verbose=0, n_jobs=None, positive=False, random_state=None, selection='cyclic'): self._hyperparams = { 'l1_ratio': l1_ratio, 'eps': eps, 'n_alphas': n_alphas, 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'precompute': precompute, 'max_iter': max_iter, 'tol': tol, 'cv': cv, 'copy_X': copy_X, 'verbose': verbose, 'n_jobs': n_jobs, 'positive': positive, 'random_state': random_state, 'selection': selection } self._wrapped_model = SKLModel(**self._hyperparams)
def test_enet_l1_ratio(): # Test that an error message is raised if an estimator that # uses _alpha_grid is called with l1_ratio=0 msg = ("Automatic alpha grid generation is not supported for l1_ratio=0. " "Please supply a grid by providing your estimator with the " "appropriate `alphas=` argument.") X = np.array([[1, 2, 4, 5, 8], [3, 5, 7, 7, 8]]).T y = np.array([12, 10, 11, 21, 5]) assert_raise_message(ValueError, msg, ElasticNetCV( l1_ratio=0, random_state=42).fit, X, y) assert_raise_message(ValueError, msg, MultiTaskElasticNetCV( l1_ratio=0, random_state=42).fit, X, y[:, None]) # Test that l1_ratio=0 is allowed if we supply a grid manually alphas = [0.1, 10] estkwds = {'alphas': alphas, 'random_state': 42} est_desired = ElasticNetCV(l1_ratio=0.00001, **estkwds) est = ElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est_desired.fit(X, y) est.fit(X, y) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5) est_desired = MultiTaskElasticNetCV(l1_ratio=0.00001, **estkwds) est = MultiTaskElasticNetCV(l1_ratio=0, **estkwds) with ignore_warnings(): est.fit(X, y[:, None]) est_desired.fit(X, y[:, None]) assert_array_almost_equal(est.coef_, est_desired.coef_, decimal=5)
def build_auto(regressor, name): regressor = regressor.fit(auto_X, auto_y) store_pkl(regressor, name + ".pkl") mpg = DataFrame(regressor.predict(auto_X), columns=["mpg"]) store_csv(mpg, name + ".csv") build_auto(DecisionTreeRegressor(random_state=13, min_samples_leaf=5), "DecisionTreeAuto") build_auto( BaggingRegressor(DecisionTreeRegressor(random_state=13, min_samples_leaf=5), random_state=13, n_estimators=3, max_features=0.5), "DecisionTreeEnsembleAuto") build_auto(ElasticNetCV(random_state=13), "ElasticNetAuto") build_auto(ExtraTreesRegressor(random_state=13, min_samples_leaf=5), "ExtraTreesAuto") build_auto(GradientBoostingRegressor(random_state=13, init=None), "GradientBoostingAuto") build_auto(LassoCV(random_state=13), "LassoAuto") build_auto(LinearRegression(), "LinearRegressionAuto") build_auto( BaggingRegressor(LinearRegression(), random_state=13, max_features=0.5), "LinearRegressionEnsembleAuto") build_auto(RandomForestRegressor(random_state=13, min_samples_leaf=5), "RandomForestAuto") build_auto(RidgeCV(), "RidgeAuto") build_auto(XGBRegressor(objective="reg:linear"), "XGBAuto") housing_df = load_csv("Housing.csv")
def test_sparse_input_dtype_enet_and_lassocv(): X, y, _, _ = build_dataset(n_features=10) clf = ElasticNetCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = ElasticNetCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6) clf = LassoCV(n_alphas=5) clf.fit(sparse.csr_matrix(X), y) clf1 = LassoCV(n_alphas=5) clf1.fit(sparse.csr_matrix(X, dtype=np.float32), y) assert_almost_equal(clf.alpha_, clf1.alpha_, decimal=6) assert_almost_equal(clf.coef_, clf1.coef_, decimal=6)
f = pl.figure() for i in range(n_rows): a = f.add_subplot(n_rows, n_rows, (n_rows) * (j % n_rows) + (i + 1)) title = node_names[indexes[j][0]] + ' -- ' + node_names[indexes[j][1]] pl.scatter(x[groups == i], y[groups == i], c=color[i], s=40, label=labels_group[i]) a.set_title(title) pl.legend() j += 1 ###################################################### enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) for i in range(n_rows): X_ = conn_data[groups == i, :] y_ = y[groups == i] enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv.fit(X_, y_)
('logistic', LogisticRegression())]))], 'hard', weights=[1.01, 1.01]), ['predict'], create_weird_classification_problem_1()), (GradientBoostingClassifier(max_depth=10, n_estimators=10), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (LogisticRegression(), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (IsotonicRegression(out_of_bounds='clip'), ['predict'], create_isotonic_regression_problem_1()), (Earth(), ['predict', 'transform'], create_regression_problem_1()), (Earth(allow_missing=True), ['predict', 'transform'], create_regression_problem_with_missingness_1()), (ElasticNet(), ['predict'], create_regression_problem_1()), (ElasticNetCV(), ['predict'], create_regression_problem_1()), (LassoCV(), ['predict'], create_regression_problem_1()), (Ridge(), ['predict'], create_regression_problem_1()), (RidgeCV(), ['predict'], create_regression_problem_1()), (SGDRegressor(), ['predict'], create_regression_problem_1()), (Lasso(), ['predict'], create_regression_problem_1()), (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]), ['predict', 'predict_proba'], create_weird_classification_problem_1()), (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))], transformer_weights={ 'earth': 1, 'earth2': 2 }), ['transform'], create_weird_classification_problem_1()), (RandomForestRegressor(), ['predict'], create_regression_problem_1()), (CalibratedClassifierCV(LogisticRegression(), 'isotonic'), ['predict_proba'],
def test_multioutput_enetcv_error(): rng = np.random.RandomState(0) X = rng.randn(10, 2) y = rng.randn(10, 2) clf = ElasticNetCV() assert_raises(ValueError, clf.fit, X, y)
def test_multioutput_enetcv_error(): X = np.random.randn(10, 2) y = np.random.randn(10, 2) clf = ElasticNetCV() assert_raises(ValueError, clf.fit, X, y)
labels_group = ['elderly', 'mci', 'young'] j = 0 for _, x in enumerate(X.T): if (j%n_rows) == 0: f = pl.figure() for i in range(n_rows): a = f.add_subplot(n_rows, n_rows,(n_rows)*(j%n_rows)+(i+1)) title = node_names[indexes[j][0]]+' -- '+node_names[indexes[j][1]] pl.scatter(x[groups==i], y[groups==i], c=color[i], s=40, label=labels_group[i]) a.set_title(title) pl.legend() j+=1 ###################################################### enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) for i in range(n_rows): X_ = conn_data[groups==i,:] y_ = y[groups==i] enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25))
'BaggingRegressor':BaggingRegressor(), 'BayesianGaussianMixture':BayesianGaussianMixture(), 'BayesianRidge':BayesianRidge(), 'BernoulliNB':BernoulliNB(), 'BernoulliRBM':BernoulliRBM(), 'Binarizer':Binarizer(), 'Birch':Birch(), 'CCA':CCA(), 'CalibratedClassifierCV':CalibratedClassifierCV(), 'DBSCAN':DBSCAN(), 'DPGMM':DPGMM(), 'DecisionTreeClassifier':DecisionTreeClassifier(), 'DecisionTreeRegressor':DecisionTreeRegressor(), 'DictionaryLearning':DictionaryLearning(), 'ElasticNet':ElasticNet(), 'ElasticNetCV':ElasticNetCV(), 'EmpiricalCovariance':EmpiricalCovariance(), 'ExtraTreeClassifier':ExtraTreeClassifier(), 'ExtraTreeRegressor':ExtraTreeRegressor(), 'ExtraTreesClassifier':ExtraTreesClassifier(), 'ExtraTreesRegressor':ExtraTreesRegressor(), 'FactorAnalysis':FactorAnalysis(), 'FastICA':FastICA(), 'FeatureAgglomeration':FeatureAgglomeration(), 'FunctionTransformer':FunctionTransformer(), 'GMM':GMM(), 'GaussianMixture':GaussianMixture(), 'GaussianNB':GaussianNB(), 'GaussianProcess':GaussianProcess(), 'GaussianProcessClassifier':GaussianProcessClassifier(), 'GaussianProcessRegressor':GaussianProcessRegressor(),