def test_lasso_fit_intercept(): X = [[-1], [0], [1]] Y = [-1, 0, 1] clf = Lasso(fit_intercept=False) clf.fit(X, Y) assert_equal(clf.coef_.shape, (1,)) clf2 = Lasso(fit_intercept=True) clf2.fit(X, Y) assert_equal(clf.coef_.shape, (1,))
def test_lasso_positive_constraint(): X = [[-1], [0], [1]] y = [1, 0, -1] # just a straight line with negative slope lasso = Lasso(alpha=0.1, max_iter=1000, positive=True) lasso.fit(X, y) assert_true(min(lasso.coef_) >= 0) lasso = Lasso(alpha=0.1, max_iter=1000, precompute=True, positive=True) lasso.fit(X, y) assert_true(min(lasso.coef_) >= 0)
def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, DockerContainer] = None) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) # False self._clf = Lasso( alpha=self.hyperparams['alpha'], # fit_intercept=self.hyperparams['fit_intercept'], # normalize=self.hyperparams['normalize'], # precompute=self.hyperparams['precompute'], # max_iter=self.hyperparams['max_iter'], # tol=self.hyperparams['tol'], # warm_start=self.hyperparams['warm_start'], # positive=self.hyperparams['positive'], # selection=self.hyperparams['selection'], random_state=self.random_seed, ) # self._F = None # self._F_inv = None self._training_inputs = None self._training_outputs = None self._target_names = None self._training_indices = None self._target_column_indices = None self._target_columns_metadata: List[OrderedDict] = None self._fitted = False
def test_deprection_precompute_enet(): # Test that setting precompute="auto" gives a Deprecation Warning. X, y, _, _ = build_dataset(n_samples=20, n_features=10) clf = ElasticNet(precompute="auto") assert_warns(DeprecationWarning, clf.fit, X, y) clf = Lasso(precompute="auto") assert_warns(DeprecationWarning, clf.fit, X, y)
def test_sparse_enet_coordinate_descent(): """Test that a warning is issued if model does not converge""" clf = Lasso(max_iter=2) n_samples = 5 n_features = 2 X = sp.csc_matrix((n_samples, n_features)) * 1e50 y = np.ones(n_samples) assert_warns(ConvergenceWarning, clf.fit, X, y)
def test_fit_simple_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.Lasso enet = Solver(glm_stop_early=False, backend=backend) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X, y)) enet_wrapper = Solver(positive=True, random_state=1234, backend=backend) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.coordinate_descent import Lasso enet_sk = Lasso(positive=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() enet_sk_sparse_coef = csr_matrix(enet_sk.sparse_coef_, dtype=np.float32).toarray() if backend != 'h2o4gpu': print(enet_sk.coef_) print(enet_sk.sparse_coef_) print(enet_sk_coef) print(enet_sk_sparse_coef) print(enet_wrapper.coef_) print(enet_wrapper.sparse_coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print(enet_wrapper.time_prepare) print(enet_wrapper.time_upload_data) print(enet_wrapper.time_fitonly) assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_) assert np.allclose(enet_wrapper.n_iter_, enet_sk.n_iter_)
def run(self): params = {'alpha': float(self.alpha_text.text()), 'fit_intercept': self.fitInterceptCheckBox.isChecked(), 'max_iter': int(self.maxNumOfIterationsSpinBox.value()), 'tol': self.toleranceDoubleSpinBox.value(), 'positive': self.forcePositiveCoefficientsCheckBox.isChecked(), 'selection': 'random'} # 'CV': self.optimizeWCrossValidaitonCheckBox.isChecked()} return params, self.getChangedValues(params, Lasso())
def test_lasso_zero(): # Check that the lasso can handle zero data without crashing X = [[0], [0], [0]] y = [0, 0, 0] clf = Lasso(alpha=0.1).fit(X, y) pred = clf.predict([[1], [2], [3]]) assert_array_almost_equal(clf.coef_, [0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0)
def test_lasso_zero(): """Check that the sparse lasso can handle zero data without crashing""" X = sp.csc_matrix((3, 1)) y = [0, 0, 0] T = np.array([[1], [2], [3]]) clf = Lasso().fit(X, y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0)
def test_lasso_readonly_data(): X = np.array([[-1], [0], [1]]) Y = np.array([-1, 0, 1]) # just a straight line T = np.array([[2], [3], [4]]) # test sample with TempMemmap((X, Y)) as (X, Y): clf = Lasso(alpha=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.25]) assert_array_almost_equal(pred, [0.5, 0.75, 1.]) assert_almost_equal(clf.dual_gap_, 0)
def test_lasso_alpha_warning(): check_warnings() # Skip if unsupported Python version with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line clf = Lasso(alpha=0) clf.fit(X, Y) assert_greater(len(w), 0) # warnings should be raised
def test_sparse_input_convergence_warning(): X, y, _, _ = build_dataset(n_samples=1000, n_features=500) with pytest.warns(ConvergenceWarning): ElasticNet(max_iter=1, tol=0).fit( sparse.csr_matrix(X, dtype=np.float32), y) # check that the model converges w/o warnings with pytest.warns(None) as record: Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y) assert not record.list
def test_lasso_toy(): """ Test Lasso on a toy example for various values of alpha. When validating this against glmnet notice that glmnet divides it against nobs. """ X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line T = [[2], [3], [4]] # test sample clf = Lasso(alpha=1e-8) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.85]) assert_array_almost_equal(pred, [1.7, 2.55, 3.4]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.25]) assert_array_almost_equal(pred, [0.5, 0.75, 1.]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0)
def test_sparse_lasso_not_as_toy_dataset(): n_samples = 100 max_iter = 1000 n_informative = 10 X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative) X_train, X_test = X[n_samples // 2:], X[:n_samples // 2] y_train, y_test = y[n_samples // 2:], y[:n_samples // 2] s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train.toarray(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) # check that the coefs are sparse assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)
def K_fold_CrossValidation(k , dataFrame , target , regressorType): trainDataSet = pd.DataFrame(dataFrame) regressor = Regression if(regressorType == "GDB"): regressor = ensemble.GradientBoostingRegressor(n_estimators=1000, max_depth=4, min_samples_split=2, learning_rate=0.001, loss='ls') if(regressorType == "LN"): regressor = LinearRegression() if (regressorType == "SVR"): regressor = SVR(kernel='linear', C=1e3) if (regressorType == "LS"): regressor = Lasso(alpha=0.001, normalize=True) part_size = int(np.floor(len(trainDataSet) / float(k))) best_part = 0 min_error = 1000 for i in range(0,k): trainSubSet = trainDataSet[:][0:i*part_size].append(trainDataSet[:][(i+1)*part_size:]) testSubSet = trainDataSet[i*part_size:(i+1)*part_size] targetSubSet = target[:][0:i*part_size].append(target[:][(i+1)*part_size:]) desireValue = target[i*part_size:(i+1)*part_size] regressor.fit(trainSubSet,targetSubSet.values.ravel()) predictedValue = regressor.predict(testSubSet) value = 0.00 for i in range(len(predictedValue)): print predictedValue[i] print desireValue.values[i] value += ((predictedValue[i] - desireValue.values[i]) ** 2) print "value -- " , value error = math.sqrt(value / part_size) print "error = " , error if(error < min_error): min_error = error best_part = i print("min_error = " , min_error ) trainSubSet = trainDataSet[:][0:best_part*part_size].append(trainDataSet[:][(best_part+1)*part_size:]) targetSubSet = target[:][0:best_part*part_size].append(target[:][(best_part+1)*part_size:]) regressor.fit(trainSubSet,targetSubSet.values.ravel()) return regressor
print y_test.shape print X_train[123, :] ''' norm1 = np.linalg.norm(y_train) if norm1 != 0: y_train, y_test = y_train/norm1, y_test/norm1 print norm1 ''' print y_train.shape model = SVR(C=1.0, gamma=1.0) model = LinearRegression() lasso = Lasso(alpha=0.1).fit(X_train, y_train) enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train) y_pred = lasso.predict(X_test) print "MSE", mean_squared_error(y_test, y_pred) m = np.mean(y_test) print "MSE (Mean)", mean_squared_error(y_test, m * np.ones(len(y_test))) print "r^2 on test data", r2_score(y_test, y_pred) plt.plot(enet.coef_, label='Elastic net coefficients') plt.plot(lasso.coef_, label='Lasso coefficients') plt.legend(loc='best') plt.title("Lasso R^2: %f, Elastic Net R^2: %f" % (r2_score( y_test, lasso.predict(X_test)), r2_score(y_test, enet.predict(X_test))))
'IncrementalPCA':IncrementalPCA(), 'IsolationForest':IsolationForest(), 'Isomap':Isomap(), 'KMeans':KMeans(), 'KNeighborsClassifier':KNeighborsClassifier(), 'KNeighborsRegressor':KNeighborsRegressor(), 'KernelCenterer':KernelCenterer(), 'KernelDensity':KernelDensity(), 'KernelPCA':KernelPCA(), 'KernelRidge':KernelRidge(), 'LSHForest':LSHForest(), 'LabelPropagation':LabelPropagation(), 'LabelSpreading':LabelSpreading(), 'Lars':Lars(), 'LarsCV':LarsCV(), 'Lasso':Lasso(), 'LassoCV':LassoCV(), 'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(),
def test_lasso_alpha_warning(): X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line clf = Lasso(alpha=0) assert_warns(UserWarning, clf.fit, X, Y)
n_estimators=10), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (LogisticRegression(), ['predict_proba', 'predict'], create_weird_classification_problem_1()), (IsotonicRegression(out_of_bounds='clip'), ['predict'], create_isotonic_regression_problem_1()), (Earth(), ['predict', 'transform'], create_regression_problem_1()), (Earth(allow_missing=True), ['predict', 'transform'], create_regression_problem_with_missingness_1()), (ElasticNet(), ['predict'], create_regression_problem_1()), (ElasticNetCV(), ['predict'], create_regression_problem_1()), (LassoCV(), ['predict'], create_regression_problem_1()), (Ridge(), ['predict'], create_regression_problem_1()), (RidgeCV(), ['predict'], create_regression_problem_1()), (SGDRegressor(), ['predict'], create_regression_problem_1()), (Lasso(), ['predict'], create_regression_problem_1()), (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]), ['predict', 'predict_proba'], create_weird_classification_problem_1()), (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))], transformer_weights={ 'earth': 1, 'earth2': 2 }), ['transform'], create_weird_classification_problem_1()), (RandomForestRegressor(), ['predict'], create_regression_problem_1()), (CalibratedClassifierCV(LogisticRegression(), 'isotonic'), ['predict_proba'], create_weird_classification_problem_1()), (AdaBoostRegressor(), ['predict'], create_regression_problem_1()), (BaggingRegressor(), ['predict'], create_regression_problem_1()), (BaggingClassifier(), ['predict_proba'], create_weird_classification_problem_1()),
def test_coef_shape_not_zero(): est_no_intercept = Lasso(fit_intercept=False) est_no_intercept.fit(np.c_[np.ones(3)], np.ones(3)) assert est_no_intercept.coef_.shape == (1, )
K_N_N = KNeighborsClassifier() SUPPORT_VECTOR = svm.SVC(kernel="linear") # Ensemble classifiers RANDOM_FOREST = RandomForestClassifier(n_estimators=100) GRADIENT_BOOST_CL = GradientBoostingClassifier(n_estimators=100) ADA_BOOST = AdaBoostClassifier(n_estimators=100) EXTRA_TREE = ExtraTreesClassifier(n_estimators=100) # Regressors GRADIENT_BOOST_RG = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1) LINEAR_RG = LinearRegression() RIDGE_RG = Ridge() LASSO_RG = Lasso() SVR_RG = SVR() def getClassifierMap(): CLASSIFIER_MAP = { "DECISION_TREE": DECISION_TREE, "LOGISTIC_REGRESSION": LOGISTIC_REGRESSION, "NAIVE_BAYS": NAIVE_BAYS, "K_N_N": K_N_N, "SUPPORT_VECTOR": SUPPORT_VECTOR, "RANDOM_FOREST": RANDOM_FOREST, "GRADIENT_BOOST": GRADIENT_BOOST_CL, "ADA_BOOST": GRADIENT_BOOST_CL, "EXTRA_TREE": EXTRA_TREE } return CLASSIFIER_MAP