mod = lm.LinearRegression() n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod) argmax = n_features[np.argmax(r2_test)] plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[0], 'Regression') # %% L2 regularization mod = lm.Ridge(alpha=10) # lambda is alpha! n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod) argmax = n_features[np.argmax(r2_test)] plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[1], 'Ridge') # %% L1 regularization mod = lm.Lasso(alpha=.1) # lambda is alpha ! n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod) argmax = n_features[np.argmax(r2_test)] plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[2], 'Lasso') # %% L1-L2 regularization mod = lm.ElasticNet(alpha=.5, l1_ratio=.5) n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod) argmax = n_features[np.argmax(r2_test)] plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[3], 'ElasticNet') plt.tight_layout() axis[3].set_xlabel("Number of input features", fontsize=16) plt.savefig( "/home/ed203246/git/pystatsml/images/linear_regression_penalties.png")
#X = df.loc[0:,['lights','T1','RH_1','T2','RH_2','T3','RH_3','T4','RH_4','T5','RH_5','T6','RH_6','T7','RH_7','T8', 'RH_8', 'T9','RH_9', 'T_out', 'Press_mm_hg','RH_out','Windspeed','Visibility','Tdewpoint','rv1','rv2','nsm']] y = df.loc[0:, 'Appliances'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50) print("shape of X2", X.shape) #REGRESSION MODELS - defining the model, fitting the model, predticting then taking R2 Score of Prediction #Lasso Regression laso = linear_model.Lasso(alpha=0.2) laso.fit(X_train, y_train) y_predict_laso = laso.predict(X_test) print("R2 Score for Laso Regression: ", r2_score(y_test, y_predict_laso)) #Ridge Regression ridge = linear_model.Ridge(alpha=.5) ridge.fit(X_train, y_train) y_predict_ridge = ridge.predict(X_test) print("R2 Score for Ridge Regression: ", r2_score(y_test, y_predict_ridge)) #LassoLars Regression lassoLars = linear_model.LassoLars(alpha=.1) lassoLars.fit(X_train, y_train)
def graph(): if request.method == 'POST': f = request.files['file'] path = os.path.join(app.config['UPLOAD_FOLDER'], f.filename) df = pd.read_csv(path) df.head() close_px = df['Adj Close'] mavg = close_px.rolling(window=100).mean() #Setting Features dfreg = df.loc[:, ['Adj Close', 'Volume']] dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0 dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0 # Drop missing value dfreg.fillna(value=-99999, inplace=True ) # We want to separate 1 percent of the data to forecast forecast_out = int( math.ceil(0.01 * len(dfreg)) ) # Separating the label here, we want to predict the AdjClose forecast_col = 'Adj Close' dfreg['label'] = dfreg[forecast_col].shift(-forecast_out) X = np.array( dfreg.drop(['label'], 1) ) # Scale the X so that everyone can have the same distribution for linear regression X = preprocessing.scale( X ) # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation X_lately = X[-forecast_out:] X = X[:-forecast_out] # Separate label and identify it as y y = np.array(dfreg['label']) y = y[:-forecast_out] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # Linear regression clfreg = LinearRegression(n_jobs=-1) clfreg.fit(X_train, y_train) #Lasso Regression clflasreg = linear_model.Lasso(alpha=0.1) clflasreg.fit(X_train, y_train) # Quadratic Regression 2 clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge()) clfpoly2.fit(X_train, y_train) # Quadratic Regression 3 clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge()) clfpoly3.fit(X_train, y_train) # Get Scores confidencereg = clfreg.score(X_test, y_test) confidencelassref = clflasreg.score(X_test, y_test) confidencepoly2 = clfpoly2.score(X_test, y_test) confidencepoly3 = clfpoly3.score(X_test, y_test) #Prediction values here forecast_set_reg = clfreg.predict(X_lately) forecast_set_las_reg = clflasreg.predict(X_lately) forecast_set_poly_2_reg = clfpoly2.predict(X_lately) forecast_set_poly_3_reg = clfpoly3.predict(X_lately) # dfreg['Forecast'] = np.nan json_data = { 'Linear': forecast_set_reg, 'Lasso': forecast_set_las_reg, 'QRidge': forecast_set_poly_2_reg, 'QRidge3': forecast_set_poly_3_reg } # json_data = json.dumps(data) print(json_data) return render_template('graph.html', title='Graph', prediction=json_data) else: return render_template('graph.html', title='Graph')
# Cross Validation Classification LogLoss import numpy from pandas import read_table from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error import sklearn.linear_model as lm url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data" names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'car name'] dataframe = read_table(url, names=names, delimiter="\s+", na_values="?") dataframe = dataframe[(~numpy.isnan(dataframe['horsepower']))] array = dataframe.values X = array[:,1:8] Y = array[:,0] linregression = lm.LinearRegression() ridge = lm.Ridge() lasso = lm.Lasso() lars = lm.Lars() omp = lm.OrthogonalMatchingPursuit() br = lm.BayesianRidge() model = ridge model = model.fit(X, Y) prediction = model.predict(X) r2 = r2_score(Y, prediction) mse = mean_squared_error(Y, prediction) print("R2: %.3f, MSE: %.3f") % (r2, mse)
encoded = Dense(20, activation='relu')(input_img) decoded = Dense(X.shape[1], activation='sigmoid')(encoded) autoencoder = Model(input_img, decoded) autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') autoencoder.fit(X, X, epochs=20, batch_size=32, shuffle=True, validation_split=0.8) return autoencoder.predict(X) models = { 'ridge ': linear_model.Ridge(alpha=0.1, normalize= False), <<<<<<< HEAD 'lasso ': linear_model.Lasso(alpha=1e-6, max_iter=1e8), 'lr ': linear_model.LogisticRegression(solver='lbfgs',warm_start=True, max_iter=1e4), 'lrCV ': linear_model.LogisticRegressionCV(solver='lbfgs', max_iter=1e4, cv=5), ======= # 'lasso ': linear_model.Lasso(alpha=1e-6, max_iter=1e8), 'lr ': linear_model.LogisticRegression(solver='lbfgs',warm_start=True, max_iter=1e4), # 'lrCV ': linear_model.LogisticRegressionCV(solver='lbfgs', max_iter=1e4, cv=5), >>>>>>> bb38b4fdedf3f7cc3dab38fea065353aeef512fa 'mlp_clf': neural_network.MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(256, 64), random_state=1), 'mlp_reg': neural_network.MLPRegressor(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(256, 64, 32, 32, 32), random_state=1), 'svc ': svm.SVC(C= 10, kernel='rbf'), 'rfreg ': ensemble.RandomForestRegressor(max_depth=15), 'rfclf ': ensemble.RandomForestClassifier(max_depth=12), 'lgbclf ': lgb.LGBMClassifier(gamma='auto', num_leaves=4,learning_rate=0.001, n_estimators=2000, verbose = 100), 'lgbreg ': lgb.LGBMRegressor(gamma='auto', num_leaves=31,learning_rate=0.001, n_estimators=20000, verbose = 100), 'knn ': neighbors.KNeighborsClassifier(n_neighbors=30, n_jobs=15),
def LassoRegressor(self, noOfTrials, diagram, gateName): from sklearn import linear_model model = linear_model.Lasso() return self.RegressionTemplate(model, noOfTrials, diagram, gateName)
def Regression(z, p=3, lamb=0, model='OLS', resampling='kfold', error='MSE', x=x_array, y=y_array, intercept=True): X = CreateDesignMatrix(x, y, p) # print("polynomial: ", p, " Determinant: ", np.linalg.det(X.T.dot(X))) if (model == 'OLS' or model == 'Ridge'): if model == 'OLS': lamb = 0 if resampling == 'none': if (intercept): betas = beta(X, z, lamb) else: X_no = X[:, 1:] betas = beta(X_no, z, lamb) betas = np.insert(betas, 0, np.mean(z)) z_tilde = X.dot(betas) MSE, R2 = Error_Analysis(z, z_tilde, "MSE and R2") var_betas = Variance_Bias_Analysis(X, z, z_tilde, lamb, result='Variance', betas=betas) return MSE, R2, var_betas, betas, z_tilde elif resampling == 'kfold': return Kfold(X, z, lamb, intercept=intercept) if model == 'Lasso': # Here we used scikit learn if intercept: fit_intercept_bool = False else: fit_intercept_bool = True model_lasso = skl.Lasso(alpha=lamb, fit_intercept=fit_intercept_bool, normalize=True, tol=6000) if resampling == 'none': if (intercept): model_lasso.fit(X, z) betas = model_lasso.coef_ else: X_no = X[:, 1:] model_lasso.fit(X_no, z) betas = np.insert(model_lasso.coef_, 0, model_lasso.intercept_) z_tilde = X.dot(betas) MSE, R2 = Error_Analysis(z, z_tilde, "MSE and R2") var_betas = Variance_Bias_Analysis(X, z, z_tilde, lamb, result='Variance', betas=betas) return MSE, R2, var_betas, betas, z_tilde elif resampling == 'kfold': return Kfold(X, z, lamb, model_lasso, intercept=intercept) return
def fit_regression(P, x, u, rule="LS", retall=False, **kws): """ Fit a polynomial chaos expansion using linear regression. Args: P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`. x (array_like) : Collocation nodes with `x.shape=(D,K)`. u (array_like) : Model evaluations with `len(u)=K`. retall (bool) : If True return Fourier coefficients in addition to R. rule (str) : Regression method used. Returns: (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`. The Fourier coefficients in the estimation. Examples: >>> x, y = cp.variable(2) >>> P = cp.Poly([1, x, y]) >>> s = [[-1,-1,1,1], [-1,1,-1,1]] >>> u = [0,1,1,2] >>> print(cp.around(fit_regression(P, s, u), 14)) 0.5q0+0.5q1+1.0 """ x = np.array(x) if len(x.shape) == 1: x = x.reshape(1, *x.shape) u = np.array(u) Q = P(*x).T shape = u.shape[1:] u = u.reshape(u.shape[0], int(np.prod(u.shape[1:]))) rule = rule.upper() # Local rules if rule == "LS": uhat = linalg.lstsq(Q, u)[0].T elif rule == "T": uhat, alphas = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False, True) uhat = uhat.T elif rule == "TC": uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True) uhat = uhat.T else: # Scikit-learn wrapper try: _ = linear_model except: raise NotImplementedError("sklearn not installed") if rule == "BARD": solver = linear_model.ARDRegression(fit_intercept=False, copy_X=False, **kws) elif rule == "BR": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.BayesianRidge(**kws) elif rule == "EN": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.ElasticNet(**kws) elif rule == "ENC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.ElasticNetCV(**kws) elif rule == "LA": # success kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.Lars(**kws) elif rule == "LAC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LarsCV(**kws) elif rule == "LAS": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.Lasso(**kws) elif rule == "LASC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoCV(**kws) elif rule == "LL": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLars(**kws) elif rule == "LLC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLarsCV(**kws) elif rule == "LLIC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = linear_model.LassoLarsIC(**kws) elif rule == "OMP": solver = linear_model.OrthogonalMatchingPursuit(**kws) uhat = solver.fit(Q, u).coef_ u = u.reshape(u.shape[0], *shape) R = cp.poly.sum((P * uhat), -1) R = cp.poly.reshape(R, shape) if retall == 1: return R, uhat elif retall == 2: if rule == "T": return R, uhat, Q, alphas return R, uhat, Q return R
## 手順2. 画像をブロック分割して回帰データを作っておく yreg = [] cdat = [] aa = ImageWavelet(np.zeros((nV, nH))) # 零埋め画像のImageWaveletオブジェクト X = aa.BaseMat() # yreg には基底がはいる. for ytop in range(0, lobs.shape[0], stride): for xlft in range(0, lobs.shape[1], stride): ll = lobs[ytop:ytop + nV, xlft:xlft + nH] yreg.append(ll.reshape((nH * nV, )) - ll.mean()) # 平均は引いておく cdat.append(aa.Wv2coeff(ll)) yreg = np.array(yreg) cdat = np.array(cdat) ## 手順3. LASSO 回帰してみる reg = linear_model.Lasso(alpha=5e-4, fit_intercept=False, tol=1e-6) reg.fit(X[:, 1:], yreg.T) # 平均部分は推定しない 0 にしているので ## 手順4. 結果評価 lrec = np.zeros((imgsize, imgsize)) cnt = 0 recdat = np.hstack((cdat[:, 0].reshape(reg.coef_.shape[0], 1), reg.coef_)) for ytop in range(0, lrec.shape[0], stride): for xlft in range(0, lrec.shape[1], stride): lrec[ytop:ytop + nV, xlft:xlft + nH] = aa.Coeff2Wv(recdat[cnt, :]) cnt += 1 vmin = np.min((lobs, lrec, ltrue)) vmax = np.max((lobs, lrec, ltrue)) plt.figure()
#1(c3). Train and evaluate the predictive performance for polynomial (to the 4th degree) with Ridge regularization (regularization strength set to 0.5) ridge_model = linear_model.Ridge(alpha=0.5) ridge_model.fit(x_polyTrain, y_polyTrain) y_predicted_pl_ridge = ridge_model.predict(x_polyTest) plt.plot(x_test, y_test, "r.") plt.plot(x_test, y_predicted_pl_ridge, "b.") from sklearn.metrics import mean_squared_error evaluation_pl_ridge = mean_squared_error(y_polyTest, y_predicted_pl_ridge) print ("The mean squared error for the polynomial with Ridge regularization is:", evaluation_pl_ridge) #1(c4). Train and evaluate the predictive performance for polynomial (to the 4th degree) with Lasso regularization (regularization strength set to 0.5) lasso_model = linear_model.Lasso(alpha=0.5) lasso_model.fit(x_polyTrain, y_polyTrain) y_predicted_pl_lasso = lasso_model.predict(x_polyTest) plt.plot(x_test, y_test, "r.") plt.plot(x_test, y_predicted_pl_lasso, "b.") from sklearn.metrics import mean_squared_error evaluation_pl_lasso = mean_squared_error(y_polyTest, y_predicted_pl_lasso) print ("The mean squared error for the polynomial with Lasso regularization is:", evaluation_pl_lasso) #1(c5).Evaluate using mean squared error. Report all values in a single table. (Code and Write-up) fig = plt.figure(dpi=120) ax = fig.add_subplot(1,1,1) table_data=[ ["The mean squared error of linear regression", evaluation_lr],
house.all=pd.get_dummies(house.all,drop_first=True, dummy_na=False) # house.train().sample(10) x=house.train().drop(['SalePrice','test'],axis=1) y=house.train().SalePrice # x_train, x_test, y_train, y_test = train_test_split(x,y) x_train = x.as_matrix().astype(np.float) np.any(np.isnan(x_train)) y_train = y.as_matrix().astype(np.float) from sklearn.model_selection import GridSearchCV alpha_100 =[{'alpha': np.logspace(-4, 2, 100)}] lasso = linear_model.Lasso(normalize=True) # lasso.fit(x_train, y_train) # fit data para_search = GridSearchCV(estimator=lasso,param_grid=alpha_100,scoring='neg_mean_squared_log_error',cv=5) para_search.fit(x_train,y_train) rmsle(para_search.predict(x_test),y_test) #R ^2 Coefficient lasso.score(x_train,y_train) rmse_cv(lasso,x_train,y_train) rmsle(lasso.predict(x_test),y_test) # %% Compare with Add features # del house_sf # house_sf=House('data/train.csv','data/test.csv') #
def main(): #---------------LOAD PARAMETERS, INITIALIZE VARS--------------- results = [] para = params_setup("lasso") # "lasso" for lasso VAR # Which variables are > thresh? thresh = para.variable_threshold # How far in the future are we predicting? offset = para.horizon # What is the window of previous timesteps we're looking at? attn_length = para.attention_len #---------------PREPARE DATA------------------ (X_cut, y_cut) = load_data(para) num_timesteps = X_cut.shape[0] num_features = X_cut.shape[1] X = list() for i in range(attn_length, num_timesteps - offset): X.append( (X_cut[i - attn_length:i, :]).flatten() ) # must flatten (timesteps, features) into 1D b/c model only takes up to 2D X = np.array(X) y = y_cut[attn_length + offset:num_timesteps] # Split into training and testing cutoff_idx = math.floor(X.shape[0] * 0.8) X_train = X[0:cutoff_idx, :] y_train = y[0:cutoff_idx] X_test = X[cutoff_idx:, :] y_test = y[cutoff_idx:] print("X_train shape:", X_train.shape) print("y_train shape:", y_train.shape) print("X_test shape:", X_test.shape) print("y_test shape:", y_test.shape) for aa in ALPHAS: clf = linear_model.Lasso(alpha=aa, normalize=True) clf.fit(X_train, y_train) #0 rated emotion signal #1-13 MFCCs #14-26 dMFCCs #27-39 ddMFCCs #40 clarity #41 brightness #42 key_strength #43 rms #44 centroid #45 spread #46 skewness #47 kurtosis #48-59 chroma #60 mode #61 compress #62 hcdf #63 flux #64-74 lpcs # Determine which variables (timesteps, since this is AR) are most important inds = [ j for (i, j) in zip(clf.coef_, range(len(clf.coef_))) if abs(i) >= thresh ] inds_mod = [x % num_features for x in inds] # Get RMSE RMSE = ((len(y_test)**-1) * sum((clf.predict(X_test) - y_test)**2))**.5 #RMSE = ((len(y_test) ** -1) * sum((clf.predict(X_test[:][:-2]) - y_test[:-2]) ** 2))**.5 #RMSE = (len(y_test) ** -1) * sum((clf.predict(X_test) - y_test) ** 2) print("RMSE for alpha " + str(aa) + ": " + (str)(RMSE)) plt.plot(range(len(y_test[:-2])), clf.predict(X_test[:][:-2])) plt.plot(range(len(y_test[:-2])), y_test[:-2]) #plt.plot(range(len(y_test)),clf.predict(X_test)) #plt.plot(range(len(y_test)),y_test) #plt.show() #plt.pause(3) #plt.close() results.append( Result(aa, RMSE, clf.coef_, sorted(inds_mod), clf.intercept_)) min_rmse = results[0].RMSE min_idx = 0 for i in range(0, len(results)): if (results[i].RMSE < min_rmse): min_rmse = results[i].RMSE min_idx = i print("Minimum RMSE: " + str(min_rmse) + " for alpha=" + str(results[min_idx].alpha)) # ----------------- WRITE RESULT OF BEST ALPHA TO FILE ------------------- best_result = results[min_idx] with open(para.output_filename, 'w') as f: f.write("RMSE: " + (str)(best_result.RMSE)) f.write("\nAlpha: " + (str)(best_result.alpha)) f.write("\nCoefficients:\n") f.write(np.array2string(best_result.coefs, threshold=np.nan)) f.write("\nCoefficient indices over threshold " + str(para.variable_threshold) + ":\n") f.write(' '.join(str(x) for x in best_result.coef_indices)) f.write("\nTotal number of coefficients over threshold: " + str(len(best_result.coef_indices))) f.write("\nRegression bias term: " + str(best_result.bias)) # Write overall RMSE for each alpha as well f.write("\n\nRMSEs for each alpha:\n") for result in results: f.write("Alpha: " + str(result.alpha) + ",\tRMSE: " + str(result.RMSE) + "\n") print("Successfully wrote results to file " + para.output_filename)
def lasso_regression (df2, results): rlm = linear_model.Lasso(alpha=1) model = rlm.fit(df2, results) return model
def __init__(self, **kwargs): super().__init__(**kwargs) self.m = linear_model.Lasso(alpha=self.params.get("alpha", 0.1))
def lasso(alpha=1.0): reg = linear_model.Lasso(alpha=alpha) return reg
import numpy as np
train_X = np.array(data[train_idxs, 1:-1], dtype = float) train_y = np.array(data[train_idxs, -1], dtype = float) test_X = np.array(data[test_idxs, 1:-1], dtype = float) test_y = np.array(data[test_idxs, -1], dtype = float) # uncomment these lines to only include certain parts of the data # only use BST #train_X, test_X = [arr[:,:6] for arr in [train_X, test_X]] # only use atk, spa, spe #train_X, test_X = [arr[:,[1, 3, 5]] for arr in [train_X, test_X]] # use 4 different fitting models to compare accuracy x = 2638952 models = [svm.LinearSVC(random_state=x, max_iter=1e5), linear_model.Lasso(random_state=x, alpha=0.1), neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance'), svm.SVR(kernel='linear')] titles = ['Linear Classification', 'Lasso Regression', 'K-Neighbors Classification', 'Linear Regression'] # train & test the models # plot the deviations from the correct values of each model fig = plt.figure() fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.xlabel('Error', fontsize = 16) plt.ylabel('Density', fontsize = 16) for i in range(len(titles)): ax = fig.add_subplot(2, 2, i+1) clf = models[i] clf.fit(train_X, train_y)
def estimate_ddag_skeleton(X1, X2, candidate_edges, changed_nodes, alpha=.1, lasso_alpha=None, max_set_size=None, verbose=False): """ :param X1: (n1 x p) data matrix from first context :param X2: (n2 x p) data matrix from second context :param candidate_edges: set of edges that could possibly be in the skeleton :param changed_nodes: nodes adjacent to a candidate edge or with changed variance :param alpha: significance level to reject null hypothesis b1 = b2. Lower alpha makes it easier to accept the null hypothesis, so more edges will be deleted. :param lasso_alpha: :param max_set_size: :param verbose: :return: """ n1, p1 = X1.shape n2, p2 = X2.shape if p1 != p2: raise ValueError("X1 and X2 must have the same number of dimensions") if isinstance(alpha, Iterable): alpha_ = max(alpha) else: alpha_ = alpha retained_edges = set() retained_edges_with_p = {} deleted_edges = set() deleted_edges_with_p = {} candidate_edges = {tuple(sorted((i, j))) for i, j in candidate_edges} printv = print if verbose else (lambda x: 0) S1 = X1.T @ X1 S2 = X2.T @ X2 for i, j in candidate_edges: printv("Checking edge (%d, %d)" % (i, j)) not_ij = changed_nodes - {i, j} is_regression_invariant = False max_p = float('-inf') X1_j = X1[:, j] X2_j = X2[:, j] X1_i = X1[:, i] X2_i = X2[:, i] for cond_set in math_utils.powerset(not_ij, max_set_size=max_set_size): m_i = cond_set + (i, ) m_j = cond_set + (j, ) X1_mi = X1[:, m_i] X2_mi = X2[:, m_i] X1_mj = X1[:, m_j] X2_mj = X2[:, m_j] # marginal precision matrices if lasso_alpha is None: K1_ij = np.linalg.inv(S1[np.ix_(m_i, m_i)]) K2_ij = np.linalg.inv(S2[np.ix_(m_i, m_i)]) K1_ji = np.linalg.inv(S1[np.ix_(m_j, m_j)]) K2_ji = np.linalg.inv(S2[np.ix_(m_j, m_j)]) b1_mij = K1_ij @ S1[j, m_i].T b2_mij = K2_ij @ S2[j, m_i].T b1_mji = K1_ji @ S1[i, m_j].T b2_mji = K2_ji @ S2[i, m_j].T else: clf = linear_model.Lasso(alpha=lasso_alpha) clf.fit(X1_mi, X1_j) b1_mij = clf.coef_ clf.fit(X2_mi, X2_j) b2_mij = clf.coef_ clf.fit(X1_mj, X1_i) b1_mji = clf.coef_ clf.fit(X2_mj, X2_i) b2_mji = clf.coef_ # calculate t_ij (regressing j on i) and find its p-value ssr1_ij = np.sum(np.square(X1_j - X1_mi @ b1_mij.T)) ssr2_ij = np.sum(np.square(X2_j - X2_mi @ b2_mij.T)) var1_ij = ssr1_ij / (n1 - len(m_i)) var2_ij = ssr2_ij / (n2 - len(m_i)) b1_ij = b1_mij[-1] b2_ij = b2_mij[-1] t_ij = (b1_ij - b2_ij)**2 * np.linalg.inv(var1_ij * K1_ij + var2_ij * K2_ij)[-1, -1] p_ij = 1 - stats.f.cdf(t_ij, 1, n1 + n2 - len(m_i) - len(m_j)) if p_ij > alpha_: # accept hypothesis that b1_ij = b2_ij, delete edge is_regression_invariant = True deleted_edges.add((i, j)) deleted_edges_with_p[(i, j)] = p_ij printv("deleted") break # calculate t_ji (regressing i on j) and find its p-value ssr1_ji = np.sum(np.square(X1_i - X1_mj @ b1_mji.T)) ssr2_ji = np.sum(np.square(X2_i - X2_mj @ b2_mji.T)) var1_ji = ssr1_ji / (n1 - len(m_i)) var2_ji = ssr2_ji / (n2 - len(m_i)) b1_ji = b1_mji[-1] b2_ji = b2_mji[-1] t_ji = (b1_ji - b2_ji)**2 * np.linalg.inv(var1_ji * K1_ji + var2_ji * K2_ji)[-1, -1] p_ji = 1 - stats.f.cdf(t_ji, 1, n1 + n2 - len(m_i) - len(m_j)) if p_ji > alpha_: # accept hypothesis that b1_ji = b2_ji, delete edge is_regression_invariant = True deleted_edges.add((i, j)) deleted_edges_with_p[(i, j)] = p_ji printv("deleted") break max_p = max(max_p, p_ij, p_ji) # end of inner loop of powerset if not is_regression_invariant: printv("retained") retained_edges.add((i, j)) retained_edges_with_p[(i, j)] = max_p if isinstance(alpha, Iterable): retained_edges_dict = {alpha_: retained_edges} deleted_edges_dict = {alpha_: deleted_edges} for a in set(alpha) - {alpha_}: # if edge was deleted for highest alpha, it would have been deleted for lower alphas. deleted_edges_dict[a] = deleted_edges.copy() retained_edges_dict[a] = set() for (i, j), p in retained_edges_with_p.items(): if p > a: deleted_edges_dict[a].add((i, j)) else: retained_edges_dict[a].add((i, j)) printv("Retained edges: % s" % {k: sorted(r) for k, r in retained_edges_dict.items()}) return retained_edges_dict, deleted_edges_dict else: printv("Retained edges: % s" % sorted(retained_edges)) return retained_edges, retained_edges_with_p, deleted_edges, deleted_edges_with_p
coef0=2.0, random_state=RANDOM_SEED)), classification_binary( light_clf.KernelSVC(kernel="sigmoid", random_state=RANDOM_SEED)), classification_binary( light_clf.KernelSVC(kernel="cosine", random_state=RANDOM_SEED)), # Sklearn Linear Regression regression(linear_model.ARDRegression()), regression(linear_model.BayesianRidge()), regression(linear_model.ElasticNet(random_state=RANDOM_SEED)), regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)), regression(linear_model.HuberRegressor()), regression(linear_model.Lars()), regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsCV()), regression(linear_model.LassoLarsIC()), regression(linear_model.LinearRegression()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), regression( linear_model.RANSACRegressor( base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS), random_state=RANDOM_SEED)), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()),
test_data = pd.read_csv('testing_data_set.csv') train_data = pd.read_csv('training_data_set.csv') # In[16]: x_train = train_data['Father'].values.reshape(-1, 1) y_train = train_data['Son'].values.reshape(-1, 1) x_test = test_data['Father'].values.reshape(-1, 1) y_test = test_data['Son'].values.reshape(-1, 1) # In[17]: poly = PolynomialFeatures(degree=10) X_modified_train = poly.fit_transform(x_train) X_modified_test = poly.fit_transform(x_test) model1 = linear_model.Lasso(alpha=0.5) model1.fit(X_modified_train, y_train) y_predicted_test = model1.predict(X_modified_test) y_predicted_train = model1.predict(X_modified_train) a = sqrt(mean_squared_error(y_train, y_predicted_train)) b = sqrt(mean_squared_error(y_test, y_predicted_test)) print(a) print(b) # In[18]: train_err = [] test_err = [] alpha_vals = np.linspace(0, 1, 9) for alpha_v in alpha_vals: polyreg = linear_model.Lasso(alpha=alpha_v)
### Make Train and Test from the Data msk = np.random.rand(len(convertedX)) < 0.8 train = convertedX[msk] test = convertedX[~msk] train_y = train.MarketShare_total train = train.drop('MarketShare_total', axis=1) test_y = test.MarketShare_total test = test.drop('MarketShare_total', axis=1) ### Feature Selection and Transform the Data clf = linear_model.Lasso(alpha=0.1).fit(train, train_y) model = SelectFromModel(clf, prefit=True) X_new = model.transform(train) print(X_new.shape) test_new = model.transform(test) ### Make the Dense Network Model and Evaluation model = baseline_model(train) # fit model history = model.fit(train, train_y, batch_size=100, validation_data=(test, test_y), epochs=100, verbose=1) # evaluate the model train_mse = model.evaluate(train, train_y, verbose=0) test_mse = model.evaluate(test, test_y, verbose=0)
def fit_regression(P, x, u, rule="LS", retall=False, **kws): """ Fit a polynomial chaos expansion using linear regression. Parameters ---------- P : Poly Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`. x : array_like Collocation nodes with `x.shape=(D,K)`. u : array_like Model evaluations with `len(u)=K`. retall : bool If True return uhat in addition to R rule : str Regression method used. The follwong methods uses scikits-learn as backend. See `sklearn.linear_model` for more details. Key Scikit-learn Description --- ------------ ----------- Parameters Description ---------- ----------- "BARD" ARDRegression Bayesian ARD Regression n_iter=300 Maximum iterations tol=1e-3 Optimization tolerance alpha_1=1e-6 Gamma scale parameter alpha_2=1e-6 Gamma inverse scale parameter lambda_1=1e-6 Gamma shape parameter lambda_2=1e-6 Gamma inverse scale parameter threshold_lambda=1e-4 Upper pruning threshold "BR" BayesianRidge Bayesian Ridge Regression n_iter=300 Maximum iterations tol=1e-3 Optimization tolerance alpha_1=1e-6 Gamma scale parameter alpha_2=1e-6 Gamma inverse scale parameter lambda_1=1e-6 Gamma shape parameter lambda_2=1e-6 Gamma inverse scale parameter "EN" ElastiNet Elastic Net alpha=1.0 Dampening parameter rho Mixing parameter in [0,1] max_iter=300 Maximum iterations tol Optimization tolerance "ENC" ElasticNetCV EN w/Cross Validation rho Dampening parameter(s) eps=1e-3 min(alpha)/max(alpha) n_alphas Number of alphas alphas List of alphas max_iter Maximum iterations tol Optimization tolerance cv=3 Cross validation folds "LA" Lars Least Angle Regression n_nonzero_coefs Number of non-zero coefficients eps Cholesky regularization "LAC" LarsCV LAR w/Cross Validation max_iter Maximum iterations cv=5 Cross validation folds max_n_alphas Max points for residuals in cv "LAS" Lasso Least Absolute Shrinkage and Selection Operator alpha=1.0 Dampening parameter max_iter Maximum iterations tol Optimization tolerance "LASC" LassoCV LAS w/Cross Validation eps=1e-3 min(alpha)/max(alpha) n_alphas Number of alphas alphas List of alphas max_iter Maximum iterations tol Optimization tolerance cv=3 Cross validation folds "LL" LassoLars Lasso and Lars model max_iter Maximum iterations eps Cholesky regularization "LLC" LassoLarsCV LL w/Cross Validation max_iter Maximum iterations cv=5 Cross validation folds max_n_alphas Max points for residuals in cv eps Cholesky regularization "LLIC" LassoLarsIC LL w/AIC or BIC criterion "AIC" or "BIC" criterion max_iter Maximum iterations eps Cholesky regularization "OMP" OrthogonalMatchingPursuit n_nonzero_coefs Number of non-zero coefficients tol Max residual norm (instead of non-zero coef) Local methods Key Description --- ----------- "LS" Ordenary Least Squares "T" Ridge Regression/Tikhonov Regularization order Order of regularization (or custom matrix) alpha Dampning parameter (else estimated from gcv) "TC" T w/Cross Validation order Order of regularization (or custom matrix) alpha Dampning parameter (else estimated from gcv) Returns ------- R[, uhat] R : Poly Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`. uhat : np.ndarray The Fourier coefficients in the estimation. Examples -------- >>> P = cp.Poly([1, x, y]) >>> x = [[-1,-1,1,1], [-1,1,-1,1]] >>> u = [0,1,1,2] >>> print fit_regression(P, x, u) 0.5q1+0.5q0+1.0 """ x = np.array(x) if len(x.shape) == 1: x = x.reshape(1, *x.shape) u = np.array(u) Q = P(*x).T shape = u.shape[1:] u = u.reshape(u.shape[0], np.prod(u.shape[1:])) rule = rule.upper() # Local rules if rule == "LS": uhat = la.lstsq(Q, u)[0] elif rule == "T": uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False) elif rule == "TC": uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True) else: # Scikit-learn wrapper try: _ = lm except: raise NotImplementedError("sklearn not installed") if rule == "BARD": solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws) elif rule == "BR": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.BayesianRidge(**kws) elif rule == "EN": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.ElasticNet(**kws) elif rule == "ENC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.ElasticNetCV(**kws) elif rule == "LA": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.Lars(**kws) elif rule == "LAC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LarsCV(**kws) elif rule == "LAS": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.Lasso(**kws) elif rule == "LASC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoCV(**kws) elif rule == "LL": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLars(**kws) elif rule == "LLC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLarsCV(**kws) elif rule == "LLIC": kws["fit_intercept"] = kws.get("fit_intercept", False) solver = lm.LassoLarsIC(**kws) elif rule == "OMP": solver = lm.OrthogonalMatchingPursuit(**kws) uhat = solver.fit(Q, u).coef_ u = u.reshape(u.shape[0], *shape) R = po.sum((P * uhat.T), -1) R = po.reshape(R, shape) if retall == 1: return R, uhat elif retall == 2: return R, uhat, Q return R
from sklearn import metrics def plot(y_pred, y_test): plt.plot(y_test, y_pred, '.') #actual values of x and y plt.title('LASSO Least Angled Regression') plt.xlabel('Y_test') plt.ylabel('Y_pred') plt.show() path = '/scratch/Trainee_DATA/Juhi/cyclone_files/' aa3 = np.loadtxt(path + 'mix_training_cyclone_set.txt', unpack=True) data3 = aa3.T x_train = np.squeeze(data3[:, 4:]) y_train = np.squeeze(data3[:, 3]) aa4 = np.loadtxt(path + 'mix_testing_cyclone_set.txt', unpack=True) data4 = aa4.T x_test = np.squeeze(data4[:, 4:]) y_test = np.squeeze(data4[:, 3]) reg = linear_model.Lasso(alpha=.5) reg.fit(x_train, y_train) y_pred = reg.predict(x_test) print(metrics.mean_absolute_error(y_test, y_pred)) print(metrics.mean_squared_error(y_test, y_pred)) print(np.sqrt(metrics.mean_squared_error(y_test, y_pred))) plot(y_pred, y_test)
getCategoricalColumns(all_houses)) all_houses_onehot.info() train_updtd, test_updtd = fileSplit(all_houses_onehot, train.shape[0]) y_train = train_updtd['SalePrice'] filterFeatures(train_updtd, ['SalePrice', 'log_sale_price']) X_train = train_updtd X_train.info() def rmse(y_orig, y_pred): return math.sqrt(metrics.mean_squared_error(y_orig, y_pred)) lasso_estimator = linear_model.Lasso(random_state=2017) lasso_grid = {'alpha': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]} grid_lasso_estimator = model_selection.GridSearchCV( lasso_estimator, lasso_grid, scoring=metrics.make_scorer(rmse), cv=10) grid_lasso_estimator.fit(X_train, y_train) print(grid_lasso_estimator.grid_scores_) print(grid_lasso_estimator.best_params_) print(grid_lasso_estimator.best_score_) print(grid_lasso_estimator.score(X_train, y_train)) estimator = grid_lasso_estimator.best_estimator_ estimator.coef_ estimator.intercept_ ##################Final Prections Preparation #total_missing_test = test_updtd.isnull().sum()
coeff_names_w = weather_generation_dep.columns.values y_pos = np.arange(len(coeff_names_w)) fig = plt.figure() plt.barh(coeff_names_w, scores, alpha=0.5) plt.xlabel('R-squared value') plt.title('Coeff of Determination') plt.show() # Lasso Regression # Looking at magnitude of coefficients as a measure of predictive power clf = linear_model.Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=100, normalize=True, positive=False, random_state=None, selection='cyclic', tol=0.0001, warm_start=False) clf.fit(weather_generation_dep, gen_sum_target) # plotting coefficient magnitudes coeff_names = weather_generation_dep.columns.values y_pos = np.arange(len(coeff_names)) fig = plt.figure() plt.barh(coeff_names, clf.coef_, align='center', alpha=0.5) plt.xlabel('coeff magnitude') plt.title('LASSO : Predicting generation from weather coeff magnitudes')
def LASSO(A, y, w): # scikit learn lasso = lm.Lasso(alpha = w) lasso.fit(A, y) return lasso.coef_
# TODO: Add import statements import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn import linear_model # Assign the data to predictor and outcome variables # TODO: Load the data train_data = pd.read_csv('data.csv') X = train_data.iloc[:, :-1] y = train_data.iloc[:, -1] # TODO: Create the standardization scaling object. scaler = StandardScaler() # TODO: Fit the standardization parameters and scale the data. X_scaled = scaler.fit_transform(X) # TODO: Create the linear regression model with lasso regularization. lasso_reg = linear_model.Lasso() # TODO: Fit the model. lasso_reg.fit(X_scaled, y) # TODO: Retrieve and print out the coefficients from the regression model. reg_coef = lasso_reg.coef_ print(reg_coef)
#new array with reduced number of features to store the small size images sX = np.empty((0, 484), int) ss = 42000 #Perform convolve on all images for img in X[0:ss, :]: img2D = np.reshape(img, (28, 28)) nImg = convolve2D(img2D, filter) nImg1D = np.reshape(nImg, (-1, 484)) sX = np.append(sX, nImg1D, axis=0) Y = Y.to_numpy() sY = Y[0:ss] # train and test model sXTrain, sXTest, yTrain, yTest = train_test_split(sX, sY, test_size=0.2, random_state=0) clf = linear_model.Lasso(alpha=1) clf.fit(sXTrain, yTrain) #Printing our score and creating predictions print(clf.score(sXTest, yTest)) prediction = clf.predict(sXTest) #Reading our sample submission file and upadting it submissionFile = pd.read_csv('sample_submission.csv') submissionFile['Label'] = prediction submissionFile.to_csv('Linear7x7.csv', index=False)
def k_fold_x_validation(x, y, k, mode, *param): idx_shuffle = np.random.permutation(np.size(x, 0)) if len(x.shape) == 2: x = x[idx_shuffle, :] else: x = x[idx_shuffle] y = y[idx_shuffle] test_size = int(np.size(x, 0) / k) test_MSE = np.zeros((k)) for ii in range(k): test_x = x[ii * test_size:(ii + 1) * test_size] test_y = y[ii * test_size:(ii + 1) * test_size] train_x = np.delete(x, np.arange(ii * test_size, (ii + 1) * test_size), 0) train_y = np.delete(y, np.arange(ii * test_size, (ii + 1) * test_size)) if mode == 'Linear': if param: alpha = param[0] else: alpha = 0.01 beta, train_MSE, test_MSE[ii], y_etim = linear_regression( train_x, train_y, alpha, test_x, test_y) elif mode == 'RandomForest': if param: depth = param[0] else: depth = 4 regressor = RandomForestRegressor(n_estimators=20, max_depth=depth) regressor.fit(train_x, train_y) test_y_estim = regressor.predict(test_x) test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2)) elif mode == 'NeuralNetwork': if param: h_layer_sizes = param else: h_layer_sizes = (30) neural_net = MLPRegressor(hidden_layer_sizes=h_layer_sizes, solver='sgd') neural_net.fit(train_x, train_y) test_y_estim = neural_net.predict(test_x) test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2)) elif mode == 'Polynomial': if param: deg = param[0] else: deg = 2 poly = PolynomialFeatures(degree=deg) train_x_poly = poly.fit_transform(train_x) test_x_poly = poly.fit_transform(test_x) regressor = linear_model.Ridge(alpha=0.01) regressor.fit(train_x_poly, train_y) test_y_estim = regressor.predict(test_x_poly) test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2)) elif mode == 'Lasso': if param: a = param[0] else: a = 0.1 regressor = linear_model.Lasso(alpha=a) regressor.fit(train_x, train_y) test_y_estim = regressor.predict(test_x) test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2)) elif mode == 'Ridge': if param: a = param[0] else: a = 0.1 regressor = linear_model.Ridge(alpha=a) regressor.fit(train_x, train_y) test_y_estim = regressor.predict(test_x) test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2)) else: raise ValueError('Regression mode used is noot defined!') if mode == 'Linear': beta, train_MSE, y_estim = linear_regression(x, y, alpha) return beta, train_MSE, test_MSE, y_estim elif mode == 'RandomForest': regressor = RandomForestRegressor(n_estimators=20, max_depth=depth) regressor.fit(x, y) y_estim = regressor.predict(x) train_MSE = np.sqrt(np.mean((y - y_estim)**2)) plt.figure() plt.scatter(y_estim, y - y_estim, alpha=1, marker='o') plt.xlabel('Fitted Value', fontsize=30) plt.ylabel('Residual', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) plt.figure() plt.scatter(y, y_estim, alpha=1, marker='o') plt.xlabel('Data', fontsize=30) plt.ylabel('Fitted Value', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) return regressor, train_MSE, test_MSE, y_estim elif mode == 'NeuralNetwork': neural_net = MLPRegressor(hidden_layer_sizes=h_layer_sizes, solver='sgd') neural_net.fit(x, y) y_estim = neural_net.predict(x) train_MSE = np.sqrt(np.mean((y - y_estim)**2)) return neural_net, train_MSE, test_MSE, y_estim elif mode == 'Polynomial': poly = PolynomialFeatures(degree=deg) x_poly = poly.fit_transform(x) regressor = linear_model.Ridge(alpha=0.01) regressor.fit(x_poly, y) y_estim = regressor.predict(x_poly) train_MSE = np.sqrt(np.mean((y - y_estim)**2)) return regressor, train_MSE, test_MSE, y_estim elif mode == 'Lasso': regressor = linear_model.Lasso(alpha=a) regressor.fit(x, y) y_estim = regressor.predict(x) train_MSE = np.sqrt(np.mean((y - y_estim)**2)) plt.figure() plt.scatter(y_estim, y - y_estim, alpha=1, marker='o') plt.xlabel('Fitted Value', fontsize=30) plt.ylabel('Residual', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) plt.figure() plt.scatter(y, y_estim, alpha=1, marker='o') plt.xlabel('Data', fontsize=30) plt.ylabel('Fitted Value', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) return regressor.coef_, train_MSE, test_MSE, y_estim elif mode == 'Ridge': regressor = linear_model.Ridge(alpha=a) regressor.fit(x, y) y_estim = regressor.predict(x) train_MSE = np.sqrt(np.mean((y - y_estim)**2)) plt.figure() plt.scatter(y_estim, y - y_estim, alpha=1, marker='o') plt.xlabel('Fitted Value', fontsize=30) plt.ylabel('Residual', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) plt.figure() plt.scatter(y, y_estim, alpha=1, marker='o') plt.xlabel('Data', fontsize=30) plt.ylabel('Fitted Value', fontsize=30) plt.xticks(fontsize=30) plt.yticks(fontsize=30) return regressor.coef_, train_MSE, test_MSE, y_estim
#feature_model = SelectFromModel(llas, prefit=True) #F = feature_model.transform(Features) # Use principal component analysis for best feature selection random.seed(10) # set random starting point pca = PCA(n_components=num_best_features) pcam = pca.fit(Features, y) F = pcam.transform(Features) plt.figure(1) plt.plot(np.cumsum(pcam.explained_variance_ratio_)) plt.xlabel('Principal Component') plt.ylabel('Cumulative Explained Variance') llas = linear_model.Lasso(alpha=0.1).fit(F, y) feature_model = SelectFromModel(llas, prefit=True) F = feature_model.transform(F) # split F and y into training and testing sets F_train, F_test, y_train, y_test = train_test_split( F, y, test_size=test_size) #use non random data splitting ## Run best features on the Machine learning classifier model ## x_train = F_train #X_new_train y_train = y_train x_test = F_test #X_new_test y_test = y_test #Define model: uncomment the model of interest below