def test_rank_deficient_design(): # consistency test that checks that LARS Lasso is handling rank # deficient input data (with n_features < rank) in the same way # as coordinate descent Lasso y = [5, 0, 5] for X in ( [[5, 0], [0, 5], [10, 10]], [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]], ): # To be able to use the coefs to compute the objective function, # we need to turn off normalization lars = linear_model.LassoLars(.1, normalize=False) coef_lars_ = lars.fit(X, y).coef_ obj_lars = (1. / (2. * 3.) * linalg.norm(y - np.dot(X, coef_lars_))**2 + .1 * linalg.norm(coef_lars_, 1)) coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False) coef_cd_ = coord_descent.fit(X, y).coef_ obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_))**2 + .1 * linalg.norm(coef_cd_, 1)) assert_less(obj_lars, obj_cd * (1. + 1e-8))
def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing X = diabetes.data Y = np.vstack([diabetes.target, diabetes.target ** 2]).T n_targets = Y.shape[1] for estimator in (linear_model.LassoLars(), linear_model.Lars()): estimator.fit(X, Y) Y_pred = estimator.predict(X) Y_dec = assert_warns(DeprecationWarning, estimator.decision_function, X) assert_array_almost_equal(Y_pred, Y_dec) alphas, active, coef, path = (estimator.alphas_, estimator.active_, estimator.coef_, estimator.coef_path_) for k in range(n_targets): estimator.fit(X, Y[:, k]) y_pred = estimator.predict(X) assert_array_almost_equal(alphas[k], estimator.alphas_) assert_array_almost_equal(active[k], estimator.active_) assert_array_almost_equal(coef[k], estimator.coef_) assert_array_almost_equal(path[k], estimator.coef_path_) assert_array_almost_equal(Y_pred[:, k], y_pred)
def dtc07(self): #将y转化为一维形式:self.y_train,self.y_test self.y01_train = list() self.y01_test = list() for a in range(len(self.y_train)): self.y01_train.append(self.y_train[a][0]) for b in range(len(self.y_test)): self.y01_test.append(self.y_test[b][0]) if not self.lar_edit.text().strip(): self.lar_alpha = 1.0 else: self.lar_alpha = float(self.lar_edit.text()) #LR算法实现 self.clf_lar = linear_model.LassoLars(alpha = self.lar_alpha) self.clf_lar.fit(self.x_train, self.y01_train) self.y_pred = self.clf_lar.predict(self.x_test) self.x_pred = self.clf_lar.predict(self.x_train) #设置值 self.stab(self.lar_table02, self.lar_table03) self.eetab(self.lar_table01)
def models_evaluation(self): classifiers = [ # Allows for easy selection for SMVI testing svm.SVR(), linear_model.SGDRegressor(), linear_model.BayesianRidge(), linear_model.LassoLars(), linear_model.ARDRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.TheilSenRegressor(), linear_model.LinearRegression() ] prediction_length = 10000 trainingData_stock, trainingScores_stock, predictionData_stock = self.get_model_data( prediction_length, self.joint_data_frame['# of Tweets'].tolist(), self.joint_data_frame['Stock Volume'].tolist()) trainingData_base, trainingScores_base, predictionData_base = self.get_model_data( prediction_length, self.joint_data_frame['# of Tweets'].tolist(), self.joint_data_frame['Base Volume'].tolist()) predicted_stock = classifiers[2].fit( trainingData_stock, trainingScores_stock).predict(predictionData_stock) predicted_base = classifiers[2].fit( trainingData_base, trainingScores_base).predict(predictionData_base) Stock_SVMI = (sum(predicted_stock) / prediction_length) / len(trainingData_stock) Base_SMVI = (sum(predicted_base) / prediction_length) / len(trainingData_base) os.system('clear') print('Stock SMVI: ', Stock_SVMI) print('Base SMVI: ', Base_SMVI) self.SMVI = abs( abs(Stock_SVMI) - abs(Base_SMVI) ) # Using the difference between the SMVI for the stock and the base allows us to remove the possibility of a market crash print('Real SMVI (Unscaled): ', self.SMVI)
def train_models(mod, save=True, cutoff=0.999, percent=50, plot=True, scale=False): if mod == 'linear': clf = linear_model.LinearRegression(n_jobs=-1) elif mod == 'lasso': clf = linear_model.Lasso(alpha=1000, max_iter=10000, tol=0.001, normalize=True, positive=True) elif mod == 'lassolars': clf = linear_model.LassoLars(alpha=0.001) elif mod == 'multilasso': clf = linear_model.MultiTaskLasso(alpha=0.1) elif mod == 'ridgeCV': clf = linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0]) elif mod == 'ridge': clf = linear_model.Ridge(alpha=[1000]) elif mod == 'bayes': clf = linear_model.BayesianRidge() elif mod == 'huber': clf = linear_model.HuberRegressor() elif mod == 'poly': #clf = poly_clf() clf = PolynomialFeatures(degree=2) clf, continuum = train(clf, mod, save=save, cutoff=cutoff, percent=percent, plot=plot, scale=scale) return clf, continuum
def regression(y_arr,ALPHA): x = np.array(range(1,22)).reshape((21,1)) x_pridict = np.array(range(22,32)).reshape(10,1) y_predict = np.zeros((66,10)) ##### ## 此处可尝试不同的回归方法,只需更改一行模型的代码 ## # clf = linear_model.Ridge(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 20000 # clf = linear_model.Lasso(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 2000 clf = linear_model.LassoLars(alpha=ALPHA) ## limit; MAPE=2.0 ALPHA = 20 # clf = linear_model.BayesianRidge(alpha_1=ALPHA,alpha_2=ALPHA) ## limit; MAPE=2.8774 ALPHA = 0.0002 # clf = LinearRegression() ## MAPE=3.7188 for k in range(0,66): clf.fit(x,y_arr[k]) y_predict[k,:] = clf.predict(x_pridict) return y_predict
def compute_rmse_regressors(): x_train, x_test, y_train, y_test = adjust_training_sets(load_dataset()) classifiers = { 'SVM.Svr' : svm.SVR(), 'Bayesian Rigde': linear_model.BayesianRidge(), 'LassoLars' : linear_model.LassoLars(), 'ARDRegression' : linear_model.ARDRegression(), 'PassiveAgressiveRegressor' : linear_model.PassiveAggressiveRegressor(), 'TheilSenRegressor' : linear_model.TheilSenRegressor(), 'LinearRegression' : linear_model.LinearRegression() } response = {} for classifier_name, classifier in classifiers.items(): classifier.fit(x_train, y_train) y_pred = classifier.predict(x_test) response[classifier_name] = sqrt(mean_squared_error(y_pred, y_test)) return response
def getFileModel(hold_out_feature, average, perct, idx, mdl): """ Return the path the result is output to and the machine learning model. """ # cat = '' # if hold_out_feature and not average: # cat = 'sub_0' # elif hold_out_feature and average: # cat = 'sub_avg' # elif not hold_out_feature and not average: # cat = 'full_0' # else: # cat = 'full_avg' cat = 'per_{}'.format(perct) print('method: ' + cat + '\tmodel: ' + mdl) models = { 'svr': svm.SVR(), 'lsl': linear_model.LassoLars(), 'lr': linear_model.LinearRegression(), 'dt': DecisionTreeRegressor() } filename = 'output_2/' + cat + '/' + mdl + '_' + str(idx) + '.txt' return filename, models[mdl]
def linear_regression_diabetes(test_set_size=-20): alphas = numpy.logspace(-4, -1, 6) diabetes = datasets.load_diabetes() x_diabetes = diabetes.data y_diabetes = diabetes.target x_diabetes_train, x_diabetes_test, y_diabetes_train, y_diabetes_test = utils.split_train_test_data( x_diabetes, y_diabetes, test_set_size=test_set_size) regression = linear_model.LinearRegression() print([ regression.fit(x_diabetes_train, y_diabetes_train).score(x_diabetes_test, y_diabetes_test) for alpha in alphas ]) regression = linear_model.Ridge() print([ regression.set_params(alpha=alpha).fit(x_diabetes_train, y_diabetes_train).score( x_diabetes_test, y_diabetes_test) for alpha in alphas ]) regression = linear_model.Lasso() print([ regression.set_params(alpha=alpha).fit(x_diabetes_train, y_diabetes_train).score( x_diabetes_test, y_diabetes_test) for alpha in alphas ]) regression = linear_model.LassoLars() print([ regression.set_params(alpha=alpha).fit(x_diabetes_train, y_diabetes_train).score( x_diabetes_test, y_diabetes_test) for alpha in alphas ])
def test_all_classfiers(dataframe, feature_to_predict): X_train, X_test, y_train, y_test = create_dataset(dataframe, feature_to_predict) classifiers = [ svm.SVR(), RandomForestRegressor(max_depth=2, random_state=0), linear_model.BayesianRidge(), linear_model.LassoLars(), linear_model.TheilSenRegressor() ] df_result_metric = pd.DataFrame( index=[item.__str__().split("(")[0] for item in classifiers] + ["NeuralNetwork"], columns=['mse', 'rmse', 'r2', 'correlation']) for item in classifiers: clf = item clf.fit(pd.np.array(X_train), pd.np.array(y_train)) pred = clf.predict(pd.np.array(X_test)) predicted_df = pd.DataFrame({ 'observed': pd.np.array(y_test[feature_to_predict]), 'predicted': pred }) #Metrics for regression mse = mean_squared_error(predicted_df.observed, predicted_df.predicted) rmse = sqrt( mean_squared_error(predicted_df.observed, predicted_df.predicted)) r2 = r2_score(predicted_df.observed, predicted_df.predicted) correlation = pd.np.corrcoef(pd.np.array(y_test[feature_to_predict]), pred) # Store Metrics for regression df_result_metric.loc[item.__str__().split("(")[0]]['mse'] = mse df_result_metric.loc[item.__str__().split("(")[0]]['rmse'] = rmse df_result_metric.loc[item.__str__().split("(")[0]]['r2'] = r2 df_result_metric.loc[item.__str__().split("(") [0]]['correlation'] = correlation[0, 1] return df_result_metric
def __init__(self, data, classifier='linear', save=True, load=False, fname='FASMA_ML.pkl'): self.classifier = classifier self.data = data self.save = save self.load = load self.fname = fname self.X_train, self.y_train = data.X, data.y if self.classifier == 'linear': self.clf = linear_model.LinearRegression(n_jobs=-1) elif self.classifier == 'lasso': self.clf = linear_model.Lasso(alpha=0.00001) elif self.classifier == 'lassolars': self.clf = linear_model.LassoLars(alpha=1000) elif self.classifier == 'multilasso': self.clf = linear_model.MultiTaskLasso(alpha=1000) elif self.classifier == 'ridgeCV': self.clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0, 100]) elif self.classifier == 'ridge': self.clf = linear_model.Ridge(alpha=10) elif self.classifier == 'bayes': self.clf = linear_model.BayesianRidge() elif self.classifier == 'huber': self.clf = linear_model.HuberRegressor() # Train the classifier if not self.load: t = time() self.train_classifier() print('Trained classifier in {}s'.format(round(time() - t, 2))) else: with open(self.fname, 'rb') as f: self.clf = cPickle.load(f)
def test_lasso_lars_vs_lasso_cd(verbose=False): """ Test that LassoLars and Lasso using coordinate descent give the same results """ X = 3 * diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8) for c, a in zip(lasso_path.T, alphas): if a == 0: continue lasso_cd.alpha = a lasso_cd.fit(X, y) error = np.linalg.norm(c - lasso_cd.coef_) assert_less(error, 0.01) # similar test, with the classifiers for alpha in np.linspace(1e-2, 1 - 1e-2): clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y) clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8, normalize=False).fit(X, y) err = np.linalg.norm(clf1.coef_ - clf2.coef_) assert_less(err, 1e-3) # same test, with normalized data X = diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True, tol=1e-8) for c, a in zip(lasso_path.T, alphas): if a == 0: continue lasso_cd.alpha = a lasso_cd.fit(X, y) error = np.linalg.norm(c - lasso_cd.coef_) assert_less(error, 0.01)
def test_lars_drop_for_good(): # Create an ill-conditioned situation in which the LARS has to go # far in the path to converge, and check that LARS and coordinate # descent give the same answers X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]] y = [10, 10, 1] alpha = .0001 def objective_function(coef): return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef))**2 + alpha * linalg.norm(coef, 1)) lars = linear_model.LassoLars(alpha=alpha, normalize=False) assert_warns(ConvergenceWarning, lars.fit, X, y) lars_coef_ = lars.coef_ lars_obj = objective_function(lars_coef_) coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-10, normalize=False) with ignore_warnings(): cd_coef_ = coord_descent.fit(X, y).coef_ cd_obj = objective_function(cd_coef_) assert_less(lars_obj, cd_obj * (1. + 1e-8))
def run_all_models(dataframe): X_train, X_test, y_train, y_test = create_dataset(dataframe) classifiers = [ svm.SVR(), linear_model.BayesianRidge(), linear_model.LassoLars(), linear_model.ARDRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.TheilSenRegressor() ] for item in classifiers: print("##################") print(item.__str__().split("(")[0]) clf = item clf.fit(pd.np.array(X_train), pd.np.array(y_train)) pred = clf.predict(pd.np.array(X_test)) rms = sqrt(mean_squared_error(pd.np.array(y_test), pred)) prediction_to_plot = pd.DataFrame({ 'observed': pd.np.array(y_test[pr.PowerPV]), 'predicted': pred }) x = prediction_to_plot[:48].index fig = plt.figure() for i_feature in prediction_to_plot.columns: plt.plot(x, prediction_to_plot[i_feature][:48], label=str(i_feature)) plt.title(item.__str__().split("(")[0]) plt.legend(loc='best') file_name = 'results/' + item.__str__().split("(")[0] plt.savefig(file_name) plt.close(fig) print("the rmse : " + str(rms)) print("##################") print("END")
def CrimePipeline(train, test): preds = [] tr_data = train[:, 1:] target = train[:, 0] test = test[:, 1:] clf = ensemble.RandomForestRegressor(n_estimators=101, random_state=0) clf.fit(tr_data, target) preds.append(clf.predict(test)) clf = linear_model.LassoLars(alpha=0.0002) clf.fit(tr_data, target) preds.append(clf.predict(test)) clf = linear_model.ElasticNet(alpha=0.002, l1_ratio=0.6) clf.fit(tr_data, target) preds.append(clf.predict(test)) clf = linear_model.BayesianRidge() clf.fit(tr_data, target) preds.append(clf.predict(test)) return np.mean(np.array(preds), axis=0)
if save: with open('FASMA_ML.pkl', 'wb') as f: cPickle.dump(clf, f) return clf if __name__ == '__main__': args = _parser() if args.train: if args.classifier == 'linear': clf = linear_model.LinearRegression() elif args.classifier == 'ridge': clf = linear_model.RidgeCV(alphas=[100.0, 0.01, 0.1, 1.0, 10.0]) elif args.classifier == 'lasso': clf = linear_model.LassoLars(alpha=0.001) clf = train(clf, save=args.save, plot=args.plot) else: with open('FASMA_ML.pkl', 'rb') as f: clf = cPickle.load(f) if args.spectrum: raise SystemExit('Please run ARES yourself. This is difficult enough') elif args.linelist: df = pd.read_csv('combined.csv') df.dropna(axis=1, inplace=True) wavelengths = np.array( map(lambda x: round(float(x), 2), df.columns[1:-4])) x = prepare_linelist(args.linelist, wavelengths=wavelengths) p = clf.predict(x)[0] print('\nStellar atmospheric parameters:')
def train_test_all_regressors_with_cross_validation(X, y, seed=SEED): """ Train, test and print the results of most available regressors presented in sklearn using cross validation. Args: X_train (matrix): matrix with features of the training set y_train (list): list of values of target of the training set X_test (matrix): matrix with features of the test set y_test (list): list of values of target of the test set """ assert isinstance(X, pd.core.frame.DataFrame) assert isinstance(y, pd.core.series.Series) assert isinstance(seed, int) from sklearn import linear_model from sklearn import tree from sklearn import ensemble from sklearn import neighbors from sklearn import neural_network from sklearn.model_selection import cross_val_score models = [] models.append(("BayesianRidge", linear_model.BayesianRidge())) models.append(("ElasticNet", linear_model.ElasticNet())) models.append(("HuberRegressor", linear_model.HuberRegressor())) models.append(("Lars", linear_model.Lars())) models.append(("Lasso", linear_model.Lasso())) models.append(("LassoLars", linear_model.LassoLars())) models.append(("LinearRegression", linear_model.LinearRegression())) models.append(("OrthogonalMatchingPursuit", linear_model.OrthogonalMatchingPursuit())) models.append(("PassiveAggressiveRegressor", linear_model.PassiveAggressiveRegressor())) models.append(("Ridge", linear_model.Ridge())) models.append(("SGDRegressor", linear_model.SGDRegressor())) models.append( ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed))) models.append( ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed))) models.append(("ExtraTreesRegressor", ensemble.ExtraTreesRegressor(random_state=seed))) models.append(("GradientBoostingRegressor", ensemble.GradientBoostingRegressor(random_state=seed))) models.append(("RandomForestRegressor", ensemble.RandomForestRegressor(random_state=seed))) models.append(("DecisionTreeRegressor", tree.DecisionTreeRegressor(random_state=seed))) models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor())) models.append(("MLPRegressor", neural_network.MLPRegressor())) best_rmse = 1000000000.0 best_model = '' for name, model in models: print( '------------------------------------------------------------------------------' ) print(name) print( '------------------------------------------------------------------------------' ) scores = cross_val_score(model, X, y, scoring='neg_root_mean_squared_error', cv=5) scores = -scores scores_mean = scores.mean() scores_std = scores.std() print("RMSE: %0.3f (+/- %0.2f)" % (scores_mean, scores_std * 2)) #mean_absolute_percentage_error_value = mean_absolute_percentage_error(y_test, y_pred) if scores_mean < best_rmse: best_rmse = scores_mean best_model = name print( '------------------------------------------------------------------------------' ) print('Best model: ' + best_model) print('Best RMSE: ' + str(best_rmse)) print( '------------------------------------------------------------------------------' )
def train_test_all_regressors(X_train, X_test, y_train, y_test, seed=SEED): """ Train, test and print the results of most available regressors presented in sklearn. Args: X_train (matrix): matrix with features of the training set y_train (list): list of values of target of the training set X_test (matrix): matrix with features of the test set y_test (list): list of values of target of the test set """ assert isinstance(X_train, pd.core.frame.DataFrame) assert isinstance(X_test, pd.core.frame.DataFrame) assert isinstance(y_train, pd.core.series.Series) assert isinstance(y_test, pd.core.series.Series) assert isinstance(seed, int) from sklearn import linear_model from sklearn import tree from sklearn import ensemble from sklearn import neighbors from sklearn import neural_network models = [] models.append(("BayesianRidge", linear_model.BayesianRidge())) models.append(("ElasticNet", linear_model.ElasticNet())) models.append(("HuberRegressor", linear_model.HuberRegressor())) models.append(("Lars", linear_model.Lars())) models.append(("Lasso", linear_model.Lasso())) models.append(("LassoLars", linear_model.LassoLars())) models.append(("LinearRegression", linear_model.LinearRegression())) models.append(("OrthogonalMatchingPursuit", linear_model.OrthogonalMatchingPursuit())) models.append(("PassiveAggressiveRegressor", linear_model.PassiveAggressiveRegressor())) models.append(("Ridge", linear_model.Ridge())) models.append(("SGDRegressor", linear_model.SGDRegressor())) models.append( ("AdaBoostRegressor", ensemble.AdaBoostRegressor(random_state=seed))) models.append( ("BaggingRegressor", ensemble.BaggingRegressor(random_state=seed))) models.append(("ExtraTreesRegressor", ensemble.ExtraTreesRegressor(random_state=seed))) models.append(("GradientBoostingRegressor", ensemble.GradientBoostingRegressor(random_state=seed))) models.append(("RandomForestRegressor", ensemble.RandomForestRegressor(random_state=seed))) models.append(("DecisionTreeRegressor", tree.DecisionTreeRegressor(random_state=seed))) models.append(("KNeighborsRegressor", neighbors.KNeighborsRegressor())) models.append(("MLPRegressor", neural_network.MLPRegressor())) best_mean_absolute_percentage_error = 100 best_model = '' for name, model in models: print( '------------------------------------------------------------------------------' ) print(name) print( '------------------------------------------------------------------------------' ) model.fit(X_train, y_train) print('Training Set') y_pred = model.predict(X_train) print_results(y_train, y_pred) print('Testing Set') y_pred = model.predict(X_test) print_results(y_test, y_pred) mean_absolute_percentage_error_value = mean_absolute_percentage_error( y_test, y_pred) if mean_absolute_percentage_error_value < best_mean_absolute_percentage_error: best_mean_absolute_percentage_error = mean_absolute_percentage_error best_model = name print( '------------------------------------------------------------------------------' ) print('Best model: ' + best_model) print('Best mean absolute percentage error: ' + str(best_mean_absolute_percentage_error)) print( '------------------------------------------------------------------------------' )
from math import sqrt import seaborn as sns import matplotlib.pyplot as plt data = pd.read_csv('C:/Users/vishnu.sk/Desktop/LifeCycleSavings.csv') target = "sr" columns = data.columns.tolist() columns.remove('sr') columns.remove('country') train = data.sample(frac=0.7, random_state=0) test = data.loc[~data.index.isin(train.index)] regressor = [ SVR(kernel='rbf', gamma=0.7, C=1), linear_model.Ridge(alpha=.5), linear_model.Lasso(alpha=0.1), linear_model.LassoLars(alpha=.1), linear_model.BayesianRidge(), MLPRegressor(), DecisionTreeRegressor(), KernelRidge(), PassiveAggressiveRegressor(), RANSACRegressor(), TheilSenRegressor(), ] result_cols = ["Regressor", "Accuracy"] result_frame = pd.DataFrame(columns=result_cols) for model in regressor: name = model.__class__.__name__ model.fit(train[columns], train[target])
neigh = KNeighborsRegressor(n_neighbors=2) neighFit = neigh.fit(x_train, y_train) mlp = MLPRegressor() mlpFit = mlp.fit(x_train, y_train) regr = AdaBoostRegressor(random_state=0, n_estimators=100) regrFit = regr.fit(x_train, y_train) clfRidge = Ridge(alpha=1.0) clfRidgeFit = clfRidge.fit(x_train, y_train) clfBayesian = linear_model.BayesianRidge() clfBayesianFit = clfBayesian.fit(x_train, y_train) reg = linear_model.LassoLars(alpha=0.01) regFit = reg.fit(x_train, y_train) bag = BaggingRegressor() bagFit = bag.fit(x_train, y_train) DT_MAD = mean_absolute_error(y_test, DT_regressionFit.predict(x_test)) SVR_MAD = mean_absolute_error(y_test, svr_regressionFit.predict(x_test)) KNN_MAD = mean_absolute_error(y_test, neighFit.predict(x_test)) MLP_MAD = mean_absolute_error(y_test, mlpFit.predict(x_test)) regr_MAD = mean_absolute_error(y_test, mlpFit.predict(x_test)) clfRidge_MAD = mean_absolute_error(y_test, clfRidgeFit.predict(x_test)) clfBayesion_MAD = mean_absolute_error(y_test, clfBayesianFit.predict(x_test))
def trainingMethod(self): self.model = linear_model.LassoLars() self.lassoLarsModel = self.model.fit(self.dataset, self.target) self.predicctions = self.lassoLarsModel.predict(self.dataset) self.r_score = self.lassoLarsModel.score(self.dataset, self.target)
def solve(self, results, gradient_results=None, solver=None, settings=None, matrix=None, verbose=False): """ Determines gPC coefficients Parameters ---------- results : [n_grid x n_out] np.ndarray of float Results from simulations with N_out output quantities gradient_results : ndarray of float [n_gradient x n_out x dim], optional, default: None Gradient of results in original parameter space in specific grid points solver : str Solver to determine the gPC coefficients - 'Moore-Penrose' ... Pseudoinverse of gPC matrix (SGPC.Reg, EGPC) - 'OMP' ... Orthogonal Matching Pursuit, sparse recovery approach (SGPC.Reg, EGPC) - 'LarsLasso' ... Least-Angle Regression using Lasso model (SGPC.Reg, EGPC) - 'NumInt' ... Numerical integration, spectral projection (SGPC.Quad) settings : dict Solver settings - 'Moore-Penrose' ... None - 'OMP' ... {"n_coeffs_sparse": int} Number of gPC coefficients != 0 or "sparsity": float 0...1 - 'LarsLasso' ... {"alpha": float 0...1} Regularization parameter - 'NumInt' ... None matrix : ndarray of float, optional, default: self.gpc_matrix or [self.gpc_matrix, self.gpc_matrix_gradient] Matrix to invert. Depending on gradient_enhanced option, this matrix consist of the standard gPC matrix and their derivatives. verbose : bool boolean value to determine if to print out the progress into the standard output Returns ------- coeffs: ndarray of float [n_coeffs x n_out] gPC coefficients """ ge_str = "" if matrix is None: matrix = self.gpc_matrix if self.gradient is False: matrix = self.gpc_matrix ge_str = "" else: if not solver == 'NumInt': if self.gpc_matrix_gradient is not None: matrix = np.vstack((self.gpc_matrix, self.gpc_matrix_gradient)) else: matrix = self.gpc_matrix ge_str = "(gradient enhanced)" else: Warning("Gradient enhanced version not applicable in case of numerical integration (quadrature).") # use default solver if not specified if solver is None: solver = self.solver # use default solver settings if not specified if solver is None: settings = self.settings iprint("Determine gPC coefficients using '{}' solver {}...".format(solver, ge_str), tab=0, verbose=verbose) # construct results array if not solver == 'NumInt' and gradient_results is not None: # transform gradient of results according to projection if self.p_matrix is not None: gradient_results = np.matmul(gradient_results, self.p_matrix.transpose() * self.p_matrix_norm[np.newaxis, :]) results_complete = np.vstack((results, ten2mat(gradient_results))) else: results_complete = results ################# # Moore-Penrose # ################# if solver == 'Moore-Penrose': # determine pseudoinverse of gPC matrix self.matrix_inv = np.linalg.pinv(matrix) try: coeffs = np.matmul(self.matrix_inv, results_complete) except ValueError: raise AttributeError("Please check format of parameter sim_results: [n_grid (* dim) x n_out] " "np.ndarray.") ############################### # Orthogonal Matching Pursuit # ############################### elif solver == 'OMP': # transform gPC matrix to fastmat format matrix_fm = fm.Matrix(matrix) if results_complete.ndim == 1: results_complete = results_complete[:, np.newaxis] # determine gPC-coefficients of extended basis using OMP if "n_coeffs_sparse" in settings.keys(): n_coeffs_sparse = int(settings["n_coeffs_sparse"]) elif "sparsity" in settings.keys(): n_coeffs_sparse = int(np.ceil(matrix.shape[1]*settings["sparsity"])) else: raise AttributeError("Please specify 'n_coeffs_sparse' or 'sparsity' in solver settings dictionary!") coeffs = fm.algs.OMP(matrix_fm, results_complete, n_coeffs_sparse) ################################ # Least-Angle Regression Lasso # ################################ elif solver == 'LarsLasso': if results_complete.ndim == 1: results_complete = results_complete[:, np.newaxis] # determine gPC-coefficients of extended basis using LarsLasso reg = linear_model.LassoLars(alpha=settings["alpha"], fit_intercept=False) reg.fit(matrix, results_complete) coeffs = reg.coef_ if coeffs.ndim == 1: coeffs = coeffs[:, np.newaxis] else: coeffs = coeffs.transpose() # TODO: @Lucas: Please add GPU support ######################### # Numerical Integration # ######################### elif solver == 'NumInt': # check if quadrature rule (grid) fits to the probability density distribution (pdf) grid_pdf_fit = True for i_p, p in enumerate(self.problem.parameters_random): if self.problem.parameters_random[p].pdf_type == 'beta': if not (self.grid.grid_type[i_p] == 'jacobi'): grid_pdf_fit = False break elif self.problem.parameters_random[p].pdf_type in ['norm', 'normal']: if not (self.grid.grid_type[i_p] == 'hermite'): grid_pdf_fit = False break # if not, calculate joint pdf if not grid_pdf_fit: joint_pdf = np.ones(self.grid.coords_norm.shape) for i_p, p in enumerate(self.problem.parameters_random): joint_pdf[:, i_p] = \ self.problem.parameters_random[p].pdf_norm(x=self.grid.coords_norm[:, i_p]) joint_pdf = np.array([np.prod(joint_pdf, axis=1)]).transpose() # weight sim_results with the joint pdf results_complete = results_complete * joint_pdf * 2 ** self.problem.dim # scale rows of gpc matrix with quadrature weights matrix_weighted = np.matmul(np.diag(self.grid.weights), matrix) # determine gpc coefficients [n_coeffs x n_output] coeffs = np.matmul(results_complete.transpose(), matrix_weighted).transpose() else: raise AttributeError("Unknown solver: '{}'!") return coeffs
predicted_medv = br_reg.predict(df_test) # 3.3.2 Model performance br_mse = round(mean_squared_error(expected_medv, predicted_medv), 3) br_r2 = round(r2_score(expected_medv, predicted_medv), 5) plt.subplot(2, 2, 2) sns.regplot(expected_medv, predicted_medv, color='red') plt.title( 'Bayesian Ridge Linear Regression.\nMSE= {0} , R-Squared= {1}'.format( br_mse, br_r2)) # 3.4 Lasso # 3.4.1 Creating a model and fit it lasso_reg = linear_model.LassoLars(alpha=.1) lasso_reg.fit(df_train, medv_train) predicted_medv = lasso_reg.predict(df_test) # 3.4.2 Model performance lasso_mse = round(mean_squared_error(expected_medv, predicted_medv), 3) lasso_r2 = round(r2_score(expected_medv, predicted_medv), 5) plt.subplot(2, 2, 3) sns.regplot(expected_medv, predicted_medv, color='orange') plt.xlabel('Expected Value') plt.ylabel('Predicted Value') plt.title('Lasso Linear Regression.\nMSE= {0} , R-Squared= {1}'.format( lasso_mse, lasso_r2))
def test_lasso_lars_vs_R_implementation(): # Test that sklearn LassoLars implementation agrees with the LassoLars # implementation available in R (lars library) under the following # scenarios: # 1) fit_intercept=False and normalize=False # 2) fit_intercept=True and normalize=True # Let's generate the data used in the bug report 7778 y = np.array( [-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366]) x = np.array( [[0.47299829, 0, 0, 0, 0], [0.08239882, 0.85784863, 0, 0, 0], [0.30114139, -0.07501577, 0.80895216, 0, 0], [-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0], [-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291]]) X = x.T ########################################################################### # Scenario 1: Let's compare R vs sklearn when fit_intercept=False and # normalize=False ########################################################################### # # The R result was obtained using the following code: # # library(lars) # model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE, # trace=TRUE, normalize=FALSE) # r = t(model_lasso_lars$beta) # r = np.array([[ 0, 0, 0, 0, 0, -79.810362809499026, -83.528788732782829, -83.777653739190711, -83.784156932888934, -84.033390591756657 ], [0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936], [ 0, -3.577397088285891, -4.702795355871871, -7.016748621359461, -7.614898471899412, -0.336938391359179, 0, 0, 0.001213370600853, 0.048162321585148 ], [ 0, 0, 0, 2.231558436628169, 2.723267514525966, 2.811549786389614, 2.813766976061531, 2.817462468949557, 2.817368178703816, 2.816221090636795 ], [ 0, 0, -1.218422599914637, -3.457726183014808, -4.021304522060710, -45.827461592423745, -47.776608869312305, -47.911561610746404, -47.914845922736234, -48.039562334265717 ]]) model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False, normalize=False) model_lasso_lars.fit(X, y) skl_betas = model_lasso_lars.coef_path_ assert_array_almost_equal(r, skl_betas, decimal=12) ########################################################################### ########################################################################### # Scenario 2: Let's compare R vs sklearn when fit_intercept=True and # normalize=True # # Note: When normalize is equal to True, R returns the coefficients in # their original units, that is, they are rescaled back, whereas sklearn # does not do that, therefore, we need to do this step before comparing # their results. ########################################################################### # # The R result was obtained using the following code: # # library(lars) # model_lasso_lars2 = lars(X, t(y), type="lasso", intercept=TRUE, # trace=TRUE, normalize=TRUE) # r2 = t(model_lasso_lars2$beta) r2 = np.array( [[0, 0, 0, 0, 0], [0, 0, 0, 8.371887668009453, 19.463768371044026], [0, 0, 0, 0, 9.901611055290553], [ 0, 7.495923132833733, 9.245133544334507, 17.389369207545062, 26.971656815643499 ], [0, 0, -1.569380717440311, -5.924804108067312, -7.996385265061972]]) model_lasso_lars2 = linear_model.LassoLars(alpha=0, fit_intercept=True, normalize=True) model_lasso_lars2.fit(X, y) skl_betas2 = model_lasso_lars2.coef_path_ # Let's rescale back the coefficients returned by sklearn before comparing # against the R result (read the note above) temp = X - np.mean(X, axis=0) normx = np.sqrt(np.sum(temp**2, axis=0)) skl_betas2 /= normx[:, np.newaxis] assert_array_almost_equal(r2, skl_betas2, decimal=12)
def test_lasso_lars_vs_lasso_cd_positive(verbose=False): # Test that LassoLars and Lasso using coordinate descent give the # same results when using the positive option # This test is basically a copy of the above with additional positive # option. However for the middle part, the comparison of coefficient values # for a range of alphas, we had to make an adaptations. See below. # not normalized data X = 3 * diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', positive=True) lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True) for c, a in zip(lasso_path.T, alphas): if a == 0: continue lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) assert_less(error, 0.01) # The range of alphas chosen for coefficient comparison here is restricted # as compared with the above test without the positive option. This is due # to the circumstance that the Lars-Lasso algorithm does not converge to # the least-squares-solution for small alphas, see 'Least Angle Regression' # by Efron et al 2004. The coefficients are typically in congruence up to # the smallest alpha reached by the Lars-Lasso algorithm and start to # diverge thereafter. See # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff for alpha in np.linspace(6e-1, 1 - 1e-2, 20): clf1 = linear_model.LassoLars(fit_intercept=False, alpha=alpha, normalize=False, positive=True).fit(X, y) clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8, normalize=False, positive=True).fit(X, y) err = linalg.norm(clf1.coef_ - clf2.coef_) assert_less(err, 1e-3) # normalized data X = diabetes.data alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', positive=True) lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True, tol=1e-8, positive=True) for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0 lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) assert_less(error, 0.01)
# features is the cols - 1 (the 1 is the output label) numFeatures = dataframe.shape[1] - 1 print(numFeatures) X = dataframe[features].values Y = dataframe[output_label] # prepare configuration for cross validation test harness num_folds = 10 seed = 7 # prepare models models = [] models.append(('LR', LinearRegression())) models.append(('Ridge', Ridge())) #models.append(('ARDRegression', linear_model.ARDRegression())) models.append(('Lasso', linear_model.Lasso())) models.append(('LassoCV', linear_model.LassoCV())) models.append(('LassoLars', linear_model.LassoLars())) # Decision tree models.append(('Dec tree', tree.DecisionTreeRegressor())) # sanity check models.append(('Dummy', DummyRegressor("median"))) def keras_baseline_model(): # create model model = Sequential() model.add( Dense(128, input_dim=numFeatures, init='normal', activation='relu')) model.add(Dense(1, init='normal', activation="relu")) # Compile model model.compile(loss='mean_squared_error', optimizer='adam')
def regression_ipyparallel(pars): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters: ---------- C_name: string memmap C Y_name: string memmap Y idxs_Y: np.array indices of the Calcium traces for each computed components idxs_C: np.array indices of the Calcium traces for each computed components method_least_square: method to perform the regression for the basis pursuit denoising. 'nnls_L0'. Nonnegative least square with L0 penalty 'lasso_lars' lasso lars function from scikit learn 'lasso_lars_old' lasso lars from old implementation, will be deprecated Returns: -------- px: np.ndarray positions o the regression idxs_C: np.ndarray indices of the Calcium traces for each computed components a: learned weight Raises: ------- Exception('Least Square Method not found!' """ # /!\ need to import since it is run from within the server import numpy as np import sys import gc from sklearn import linear_model Y_name, C_name, noise_sn, idxs_C, idxs_Y, method_least_square, cct = pars # we load from the memmap file if isinstance(Y_name, basestring): Y, _, _ = load_memmap(Y_name) Y = np.array(Y[idxs_Y, :]) else: Y = Y_name[idxs_Y, :] if isinstance(C_name, basestring): C = np.load(C_name, mmap_mode='r') C = np.array(C) else: C = C_name _, T = np.shape(C) # initialize values As = [] for y, px in zip(Y, idxs_Y): c = C[idxs_C[px], :] idx_only_neurons = idxs_C[px] if len(idx_only_neurons) > 0: cct_ = cct[idx_only_neurons[idx_only_neurons < len(cct)]] else: cct_ = [] if np.size(c) > 0: sn = noise_sn[px] ** 2 * T if method_least_square == 'lasso_lars_old': # lasso lars from old implementation, will be deprecated a = lars_regression_noise_old(y, c.T, 1, sn)[2] elif method_least_square == 'nnls_L0': # Nonnegative least square with L0 penalty a = nnls_L0(c.T, y, 1.2 * sn) elif method_least_square == 'lasso_lars': # lasso lars function from scikit learn lambda_lasso = 0 if np.size(cct_) == 0 else \ .5 * noise_sn[px] * np.sqrt(np.max(cct_)) / T clf = linear_model.LassoLars(alpha=lambda_lasso, positive=True) a_lrs = clf.fit(np.array(c.T), np.ravel(y)) a = a_lrs.coef_ else: raise Exception( 'Least Square Method not found!' + method_least_square) if not np.isscalar(a): a = a.T As.append((px, idxs_C[px], a)) if isinstance(Y_name, basestring): del Y if isinstance(C_name, basestring): del C if isinstance(Y_name, basestring): gc.collect() return As
from sklearn import ensemble from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_absolute_error #from feature_selection.MultiModelTest import multiModelTest ESTIMATORS = { "Linear Regression": linear_model.LinearRegression(), "Lasso Regression": linear_model.Lasso(alpha=0.5), "Elastic Net": linear_model.ElasticNet(alpha=0.5, l1_ratio=0.7), "Ridge": linear_model.Ridge(fit_intercept=False), "Lasso Lars": linear_model.LassoLars(alpha=0.5), "Bayesian Ridge": linear_model.BayesianRidge(compute_score=True), "AdaBoost": ensemble.AdaBoostRegressor(), "Bagging": ensemble.BaggingRegressor(), "Extra trees": ensemble.ExtraTreesRegressor(n_estimators=10, max_features=32, random_state=0), "K -nn": KNeighborsRegressor(), } ESTIMATORS_SINGLE = {
DTR = DTR.fit(X_train, y_train) ranks["DTR"] = ranking(np.abs(DTR.feature_importances_), colnames) Y_target_DTR = DTR.predict(X_test) #Decision Tree Classifier DTC = DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=0) DTC = DTC.fit(X_train, y_train) ranks["DTC"] = ranking(np.abs(DTC.feature_importances_), colnames) Y_target_DTC = DTC.predict(X_test) #LARS Lasso LARS_L = linear_model.LassoLars(alpha=.4) LARS_L = LARS_L.fit(X_train, y_train) ranks["LARS_L"] = ranking(np.abs(LARS_L.coef_), colnames) Y_target_lars_l = LARS_L.predict(X_test) #Bayesian Ridge BR = linear_model.BayesianRidge() BR = BR.fit(X_train, y_train) ranks["BR"] = ranking(np.abs(BR.coef_), colnames) Y_target_BR = BR.predict(X_test) #Random Forest Regressor RFR = RandomForestRegressor(n_jobs=-1, n_estimators=50, verbose=0) RFR = RFR.fit(X_train, y_train)
def regression_ipyparallel(pars): # need to import since it is run from within the server import numpy as np import sys import gc from sklearn import linear_model Y_name, C_name, noise_sn, idxs_C, idxs_Y, method_least_square, cct, rank_f = pars if isinstance(Y_name, basestring): # print("Reloading Y") Y, _, _ = load_memmap(Y_name) Y = np.array(Y[idxs_Y, :]) else: Y = Y_name[idxs_Y, :] if isinstance(C_name, basestring): #print("Reloading Y") C = np.load(C_name, mmap_mode='r') C = np.array(C) else: C = C_name _, T = np.shape(C) #sys.stdout = open(str(os.getpid()) + ".out", "w") As = [] # print "*****************:" + str(idxs_Y[0]) + ',' + str(idxs_Y[-1]) print('updating lars') # import os # print('**' + str(os.environ['OPENBLAS_NUM_THREADS'])) for y, px in zip(Y, idxs_Y): # print str(time.time()-st) + ": Pixel" + str(px) # print px,len(idxs_C),C.shape c = C[idxs_C[px], :] idx_only_neurons = idxs_C[px] cct_ = cct[idx_only_neurons[:-rank_f]] if np.size(c) > 0: sn = noise_sn[px]**2 * T if method_least_square == 'lasso_lars_old': # lasso lars from old implementation, will be deprecated a = lars_regression_noise_old(y, c.T, 1, sn)[2] elif method_least_square == 'nnls_L0': # Nonnegative least square with L0 penalty a = nnls_L0(c.T, y, 1.2 * sn) elif method_least_square == 'lasso_lars': # lasso lars function from scikit learn #a, RSS = scipy.optimize.nnls(c.T, np.ravel(y)) # RSS = RSS * RSS # if RSS <= 2*sn: # hard noise constraint hardly feasible lambda_lasso = .5 * noise_sn[px] * np.sqrt(np.max(cct_)) / T # lambda_lasso=1 clf = linear_model.LassoLars(alpha=lambda_lasso, positive=True) a_lrs = clf.fit(np.array(c.T), np.ravel(y)) a = a_lrs.coef_ # else: # print 'Problem infeasible' # pl.cla() # pl.plot(a.T.dot(c)); # pl.plot(y) # pl.pause(3) else: raise Exception('Least Square Method not found!' + method_least_square) if not np.isscalar(a): a = a.T As.append((px, idxs_C[px], a)) print('clearing variables') if isinstance(Y_name, basestring): #print("deleting Y") del Y if isinstance(C_name, basestring): del C if isinstance(Y_name, basestring): gc.collect() print('done!') return As