def elasticNet(X,y): print("\n### ~~~~~~~~~~~~~~~~~~~~ ###") print("Lasso Regression") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### myDegree = 40 polynomialFeatures = PolynomialFeatures(degree=myDegree, include_bias=False) Xp = polynomialFeatures.fit_transform(X) myScaler = StandardScaler() scaled_Xp = myScaler.fit_transform(Xp) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### elasticNet = ElasticNet(alpha=1e-7,l1_ratio=0.5) elasticNet.fit(scaled_Xp,y) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### dummyX = np.arange(0,2,0.01) dummyX = dummyX.reshape((dummyX.shape[0],1)) dummyXp = polynomialFeatures.fit_transform(dummyX) scaled_dummyXp = myScaler.transform(dummyXp) dummyY = elasticNet.predict(scaled_dummyXp) outputFILE = 'plot-elasticNet.png' fig, ax = plt.subplots() fig.set_size_inches(h = 6.0, w = 10.0) ax.axis([0,2,0,15]) ax.scatter(X,y,color="black",s=10.0) ax.plot(dummyX, dummyY, color='red', linewidth=1.5) plt.savefig(filename = outputFILE, bbox_inches='tight', pad_inches=0.2, dpi = 600) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### return( None )
def enet(a): print ("Doing elastic net") clf3 = ElasticNet(alpha=a) clf3.fit(base_X, base_Y) print ("Score = %f" % clf3.score(base_X, base_Y)) clf3_pred = clf3.predict(X_test) write_to_file("elastic.csv", clf3_pred)
def check_ElasticNet(X, y, pred, tol, reg_alpha, reg_lambda, weights): enet = ElasticNet(alpha=reg_alpha + reg_lambda, l1_ratio=reg_alpha / (reg_alpha + reg_lambda)) enet.fit(X, y) enet_pred = enet.predict(X) assert np.isclose(weights, enet.coef_, rtol=tol, atol=tol).all() assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all()
def report_ff_en(): # Fastfood approximation of Gaussian kernel para = FastfoodPara(n, d) st = time() PHI_train, _ = FastfoodForKernel(trainData, para, sgm) elapsed_ff_kern_train = time() - st st = time() PHI_valid, _ = FastfoodForKernel(validationData, para, sgm) elapsed_ff_kern_valid = time() - st # Train elastic net on projected training data en = ElasticNet() st = time() en.fit(PHI_train.T, trainLabels) elapsed_en_fit = time() - st # Predict labels for projected validation data st = time() y_pred = en.predict(PHI_valid.T) elapsed_en_pred = time() - st # Report performance mse_proj = metrics.mean_squared_error(validationLabels, y_pred) # print("For projected data, MSE = {:0.4g}.".format(mse_proj)) return mse_proj, elapsed_en_fit, elapsed_ff_kern_train
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000): """ Return the cv-parameters tested across the whole data :param X_t: :param y_t: :param top_df: :return: res_df, test_betas """ test_errs = np.zeros(len(top_df)) scores = np.zeros(len(top_df)) dfs = np.zeros(len(top_df)) test_coefs = np.zeros((len(top_df), X_t.shape[1])) for i in range(len(top_df)): alpha = top_df.iloc[i]["alpha"] lambda_min = top_df.iloc[i]["lambda.min"] enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter) enet.fit(X_t, y_t) y_pred = enet.predict(X_t) test_errs[i] = np.average((y_t - y_pred)**2) scores[i] = enet.score(X_t, y_t) test_coefs[i] = enet.coef_ dfs[i] = len(np.where(enet.coef_)[0]) top_df["test_err"] = test_errs top_df["score"] = scores top_df["df"] = dfs return top_df, test_coefs
def elastic_net(self): enet = ElasticNet() # features = ['season', 'holiday', 'workingday', 'weather', 'humidity', 'temp', 'windspeed', 'hour', 'month', 'year', 'day_of_week'] features = ['season', 'workingday', 'weather', 'humidity', 'windspeed', 'hour', 'month', 'year', 'day_of_week'] enet = ElasticNetCV() enet.fit(self.train[features], self.train['log-count']) return self.predict(enet, "Elastic Net", features)
def train_model(features_filename): training_data = np.loadtxt(features_filename, delimiter=",") X = training_data[:, :-1] y = training_data[:, -1] model = ElasticNet(alpha=1.0, l1_ratio=0.5, fit_intercept=True, precompute='auto', rho=None) model.fit(X, y) return model
def __init__(self, Dict_TrainingData, Flt_Lambda, Flt_L1): # Only for two class # Dict_Trainingdata # Key : 0,1 # Row : data self.Data1 = Dict_TrainingData[0] # N by 256 matrix self.Data2 = Dict_TrainingData[1] # V by 256 matrix self.Dim = len(self.Data1[0]) # 256 self.X = np.concatenate((self.Data1, self.Data2), axis=0) # N / V augmented matrix self.X = self.X - np.mean(self.X,axis=0) self.NumClass1 = len(self.Data1) # N self.NumClass2 = len(self.Data2) # V self.TotalNum = self.NumClass1 + self.NumClass2 self.Y = self.Construct_Y() self.D = np.dot(np.transpose(self.Y), self.Y) / float(self.TotalNum) # P self.Q = np.ones((2,1)) InitialTheta = np.array([2,5]) I = np.eye(2) Theta = np.dot(I - np.dot(np.dot(self.Q, np.transpose(self.Q)), self.D ), InitialTheta) Theta /= np.sqrt(np.dot(np.dot(np.transpose(Theta), self.D), Theta)) MaxIter = 10000 PrevTheta = InitialTheta PrevB = np.ones(self.Dim) for idx in range(MaxIter): NewResp = np.dot(self.Y, Theta) elas = ElasticNet(alpha=Flt_Lambda, l1_ratio=Flt_L1) # # # Compute Coefficient # B = lasso.fit(X=self.X, y= NewResp).coef_ B = elas.fit(X=self.X, y= NewResp).coef_ # print B # # New OptScore Part1 = I - np.dot(np.dot(self.Q, np.transpose(self.Q)),self.D) Part2 = np.dot(Part1, np.linalg.inv(self.D)) Part3 = np.dot(Part2, np.transpose(self.Y)) WaveTheta = np.dot(np.dot(Part3, self.X), B) # print WaveTheta Theta = WaveTheta / np.sqrt(np.dot(np.dot(np.transpose(WaveTheta),self.D),WaveTheta)) if np.sum(np.abs(B - PrevB)) < 1e-6: break else: PrevB = B # print B self.B = B
def fit_model_12(self,toWrite=False): model = ElasticNet(alpha=1.0) for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict(X_test) print("Model 12 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model12/model.pkl','w') pickle.dump(model,f2) f2.close()
def predict_linear(self, enet=True): """How well can we do on this SRFF with a linear regression (with optional elastic-net regularisation)?""" if enet: clf = ElasticNet() else: clf = LinearRegression() # we have to transpose X here because sklearn uses the # opposite order (rows v columns). maybe this is a sign that # I'm using the wrong order. clf.fit(self.train_X.T, self.train_y) yhat = clf.predict(self.test_X.T) err = self.defn(self.test_y, yhat) return clf.intercept_, clf.coef_, err
def sklean_linear_model_elastic_net(): en = ElasticNet(fit_intercept=True, alpha=0.5) boston = load_boston() x = boston.data y = boston.target kf = KFold(len(x), n_folds=10) err = 0 for train, test in kf: en.fit(x[train], y[train]) p = map(en.predict, x[test]) e = p - y[test] err += np.sum(e * e) rmse_10cv = np.sqrt(err / len(x)) print "RMSE on 10-fold CV: {}".format(rmse_10cv)
def report_orig_en(): # Train elastic net on original training data en = ElasticNet() st = time() en.fit(trainData.T, trainLabels) elapsed_en_fit = time() - st # Predict labels for original validation data st = time() y_pred = en.predict(validationData.T) elapsed_en_pred = time() - st # Report performance mse_orig = metrics.mean_squared_error(validationLabels, y_pred) return mse_orig, elapsed_en_fit, 0.
def fit_enet(train_X, train_y, test_X): """ Use linear regression to predict. Elastic net is LR with L1 and L2 regularisation. :param train_X: :param train_y: :param test_X: :return: """ enet = ElasticNet() enet.fit(train_X, train_y) model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_)) yhat_train = enet.predict(train_X) yhat_test = enet.predict(test_X) return model, yhat_train, yhat_test
def create_ml_classifier(df): import operator X = np.array(df.drop('base_ip_release',1)) y = np.array(df['base_ip_release']) #clf = LinearRegression() clf = ElasticNet(alpha=1,l1_ratio=0.5) #clf = Ridge(alpha=2) # train_X,test_X,train_y,test_y = cross_validation.train_test_split(X,y,train_size=0.9) # # # sc = StandardScaler() # sc.fit(train_X) # X_train_std = sc.transform(train_X) # X_test_std = sc.transform(test_X) # # clf.fit(X_train_std,train_y) # print clf.predict(X_test_std) # print accuracy_score(test_y,clf.predict(X_test_std)) c = np.zeros(len(X)/10) kf = k(len(y),n_folds=10) c = 0 min_dict = {} get_error = [] for train,test in kf: get_clif = clf.fit(X[train],y[train]) p = clf.predict(X[test]) #print p e = (p - y[test]) #print e, len(e) t = np.dot(e,e) # print t c += t # print c #print p, y[test] min_dict[t] = get_clif get_error.append(t) #print min_dict min_error = min(get_error) print sorted(min_dict.items(),key=operator.itemgetter(0)) print min_dict[min_error] print c print np.sqrt(c/len(X)) return min_dict[min_error]
def ElasticNetRegression(input_dict): # from sklearn.datasets import load_iris # from sklearn import tree # iris = load_iris() # clf = tree.DecisionTreeClassifier() # clf = clf.fit(iris.data, iris.target) from sklearn.datasets import load_diabetes dta = load_diabetes() n_sample = dta.data n_feature = dta.target print "*******SAMPLES********" print n_sample print "******FEARTURES*******" print n_feature from sklearn.linear_model import ElasticNet rgs = ElasticNet().fit(n_sample, n_feature) print rgs print rgs.predict(n_sample)
def main(): seq = [[(i * .1, k * .1) for i in range(1, 3)] for k in range(1, 3)] seq = list(itertools.chain.from_iterable(seq)) counter = 1 boston = datasets.load_boston() X = boston.data y = boston.target kfolds = KFold(X.shape[0], n_folds=4) for traini, testi in kfolds: alpha, l1 = seq[counter] print seq[counter] print alpha, l1 enet = ElasticNet(alpha=alpha, l1_ratio=l1) y_pred = enet.fit(X[traini], y[traini]).predict(X[testi]) score = r2_score(y[testi], y_pred) print score
def testLasso(): # 目标函数加入了对w和样本个数的惩罚 # 基于稀疏模型的情况,进行线性拟合,这时的效果较好 import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import r2_score #我们先手动生成一些稀疏数据 print np.random.seed(42) n_samples, n_features = 50, 200 X = np.random.randn(n_samples, n_features) coef = 3 * np.random.randn(n_features) #这个就是实际的参数 inds = np.arange(n_features) np.random.shuffle(inds) #打乱 coef[inds[10:]] = 0 #生成稀疏数据 y = np.dot(X, coef) #参数与本地点乘 #来点噪音 y += 0.01 * np.random.normal((n_samples,)) X_train, y_train = X[:n_samples/2], y[:n_samples/2] X_test, y_test = X[n_samples/2:], y[n_samples/2:] from sklearn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train,y_train).predict(X_test) r2_score_lasso = r2_score(y_test, y_pred_lasso) #这里是0.38 print lasso print "r2_score's result is %f" % r2_score_lasso from sklearn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, l1_ratio=0.7) y_pred_enet = enet.fit(X_train,y_train).predict(X_test) r2_score_enet = r2_score(y_test, y_pred_enet) #0.24 没有lasso好 print enet print "nent's result is %f" % r2_score_enet plt.plot(enet.coef_, label='Elastic net coefficients') plt.plot(lasso.coef_, label='Lasso coefficients') plt.plot(coef, '--', label='original coefficients') plt.legend(loc="best") plt.title("Lasso R^2: %f, Elastic Net R^2: %f" % (r2_score_lasso, r2_score_enet)) plt.show()
def assert_regression_result(results, tol): regression_results = [r for r in results if r["param"]["objective"] == "reg:linear"] for res in regression_results: X = scale(res["dataset"].X, with_mean=isinstance(res["dataset"].X, np.ndarray)) y = res["dataset"].y reg_alpha = res["param"]["alpha"] reg_lambda = res["param"]["lambda"] pred = res["bst"].predict(xgb.DMatrix(X)) weights = xgb_get_weights(res["bst"])[1:] enet = ElasticNet(alpha=reg_alpha + reg_lambda, l1_ratio=reg_alpha / (reg_alpha + reg_lambda)) enet.fit(X, y) enet_pred = enet.predict(X) assert np.isclose(weights, enet.coef_, rtol=tol, atol=tol).all(), (weights, enet.coef_) assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all(), ( res["dataset"].name, enet_pred[:5], pred[:5])
def imputer_train(col_ind): import pandas as pd data = pd.DataFrame.from_csv("/Users/DboyLiao/Documents/kaggle/data/Display_Advertising_Challenge/complete_train.csv") print "[" + str(col_ind) + "th column] " + "Loading data." data = data.set_index("Id") data = data.drop("Label", 1) col_name = data.columns[col_ind] col_classes = ["numeric" if ind <= 12 else "categorical" for ind in range(39)] col_class = col_classes[col_ind] print "[" + str(col_ind) + "th column] " + "Processing." Y = data[col_name] X = data.drop(col_name, 1) if col_class == 'categorical': svc = SVC(C = 10) imputer = svc.fit(X, Y) elif col_class == 'numeric': EN = ElasticNet() imputer = EN.fit(X, Y) else: pass return imputer
def __init__(self, model_name, model_type, n_clusters=None, n_components=None, n_lag=None, regularisation=None): self.n_lag = n_lag self.model_name = model_name self.model_type = model_type # self.n_clusters = n_clusters # self.clustering = NeuronClustering(self.n_clusters, signal_correlation) if model_name == 'cca': self.n_components = n_components self.model = CCA(n_components=self.n_components) elif model_name == 'linear-regression': if regularisation is None: self.model = LinearRegression() elif regularisation == 'l1': self.model = Lasso() elif regularisation == 'l2': self.model = Ridge() elif regularisation == 'l1l2': self.model = ElasticNet() else: raise NotImplementedError
def Lasso(): from sklearn.linear_model import Lasso from sklearn.metrics import r2_score alpha = 0.1 lasso = Lasso(alpha=alpha) trainDat = shortData trainLab = shortLabels lassoPred = lasso.fit(trainDat,trainLab) labPredict = lassoPred.predict(testDat) r2val = r2_score(testLab,labPredict) print(lasso) print "r^2 for lasso testing is: ", r2val from sklearn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, l1_ratio = 0.7) enetPred = enet.fit(trainDat, trainLab) labPredict_enet = enet.predict(testDat) r2val_enet = r2_score(testLab, labPredict_enet) print enet print "r^2 for enet testing is: ", r2val_enet
def lasso(filename, x_train_orig, x_devel_orig, x_test_orig, lab_train_orig, lab_devel_orig, lab_test_orig): # Normalize the data scaler_data = preprocessing.StandardScaler().fit(x_train_orig.toarray()) x_train = scaler_data.transform(x_train_orig.toarray()) x_devel = scaler_data.transform(x_devel_orig.toarray()) x_test = scaler_data.transform(x_test_orig.toarray()) scaler_lab = preprocessing.StandardScaler().fit(lab_train_orig) lab_train = scaler_lab.transform(lab_train_orig) lab_devel = scaler_lab.transform(lab_devel_orig) lab_test = scaler_lab.transform(lab_test_orig) # Elastic Net clf = ElasticNet(alpha = 0.025, l1_ratio = 0.7) clf.fit (x_train, lab_train) nz = (clf.coef_ != 0) # Se guardan los ficheros de parametros resultantes dump_svmlight_file(x_train_orig[:, nz], lab_train_orig, filename+"_elasso.train.libsvm", zero_based=False, comment=None, query_id=None) dump_svmlight_file(x_devel_orig[:, nz], lab_devel_orig, filename+"_elasso.devel.libsvm", zero_based=False, comment=None, query_id=None) dump_svmlight_file(x_test_orig[:, nz], lab_test_orig, filename+"_elasso.test.libsvm", zero_based=False, comment=None, query_id=None)
# different than Lasso, penalizes but still beta for most features will remain > 0 print "Ridge regression" for alpha in range(1,5): ridge = Ridge(alpha) ridge_scores =cross_val_score(ridge, x, y, cv = 5) print "alpha={a}".format(a=alpha) print ridge_scores.mean() print ridge_scores # combination of ridge and Lasso print "Elastic net regularization" for alpha in range(1,5): elastic_net = ElasticNet(alpha) elastic_net_scores =cross_val_score(elastic_net, x, y, cv = 5) print "alpha={a}".format(a=alpha) print elastic_net_scores.mean() print elastic_net_scores # best performing regressor for this data set was Elastic net with alpha=1 # with score = 0.472705248975 # draw scatter plot for values predicted with this regressor print "Showing scatter plot for elastic net with alpha = 1" elastic_net = ElasticNet(1) elastic_net.fit(x, y) predicted_y = elastic_net.predict(x)
pc_file = "/home/pokoro/data/mesa_models/"+pop.lower()+"/"+pop.upper()+"_3_PCs.txt" gene_annotation_file = "/home/pokoro/data/mesa_models/gencode.v18.annotation.parsed.txt" snp_annotation_file = "/home/pokoro/data/mesa_models/"+pop.lower()+"/"+pop.upper()+"_"+chrom+"_annot.txt" # parse the files snpannot = get_filtered_snp_annot(snp_annotation_file) geneannot = get_gene_annotation(gene_annotation_file, chrom) cov = get_covariates(pc_file) expr_df = get_gene_expression(gene_expression_file, geneannot) genes = list(expr_df.columns) gt_df = get_maf_filtered_genotype(snp_dosage_file) en = ElasticNet(max_iter=10000, random_state=1234) #where to write out result open("/home/pokoro/data/mesa_models/en_R_Python_compare/"+pop+"_en_py_chr"+chrom+ ".txt", "w").write("gene_id"+"\t"+"gene_name"+"\t"+"chr"+"\t"+"cvr2") #Go through all protein coding genes for gene in genes: coords = get_gene_coords(geneannot, gene) gene_name = get_gene_name(geneannot, gene) expr_vec = expr_df[gene] adj_exp = adjust_for_covariates(list(expr_vec), cov) cis_gt = get_cis_genotype(gt_df, snpannot, coords)
reduce_cmd = "%s -v --reduce %s/config.json" % (exec_path, WD) os.system(map_cmd) os.system(reduce_cmd) ########################################################################### ## Do it without mapreduce res = list() for i, (tr, te) in enumerate(cv): # key = params[0] y_true = list() y_pred = list() for key in params: # tr, te = cv[0] Xtrain = X[tr, :] Xtest = X[te, :] ytrain = y[tr, :].ravel() ytest = y[te, :].ravel() mod = ElasticNet(alpha=key[0], l1_ratio=key[1]) y_pred.append(mod.fit(Xtrain, ytrain).predict(Xtest)) y_true.append(ytest) y_true = np.hstack(y_true) y_pred = np.hstack(y_pred) res.append([i, r2_score(y_true, y_pred)]) true = pd.DataFrame(res, columns=["resample_key", "r2"]) mr = pd.read_csv(os.path.join(WD, 'results.csv')) # Check same keys assert np.all(np.sort(true.resample_key) == np.sort(mr.resample_key)) m = pd.merge(true, mr, on="resample_key", suffixes=["_true", "_mr"]) # Check same scores assert np.allclose(m.r2_true, m.r2_mr)
# cv_mse = numpy.append(cv_mse, [mse_10cv]) # print('{:.3f}\t {:.4f}\t\t {:.4f}'.format(a,mse_train,mse_10cv)) # pl.plot(alpha, t_mse, label='MSE_train') # pl.plot(alpha, cv_mse, label='MSE_CrossVal') # pl.legend( ('MSE_train', 'MSE_CrossVal') ) # pl.ylabel('MSE') # pl.xlabel('alpha') # pl.show() a = 0.5 for name, met in [ ('linear_regression', LinearRegression()), ('lasso', Lasso(fit_intercept=True, alpha=a, normalize=True)), ('ridge', Ridge(fit_intercept=True, alpha=a, normalize=True)), ('elastic_net', ElasticNet(fit_intercept=True, alpha=a, normalize=True)) ]: met.fit(x, y) with open('../data/' + name + str(i) + '.txt', 'wb') as model_file: pickle.dump(met, model_file) # p = np.array([met.predict(xi) for xi in x]) p = met.predict(x) e = bound(p) - y total_error = numpy.dot(e, e) mse_train = total_error / len(p) # kf = KFold(10) # err = 0
def get_gridsearch( frequency, horizon=10, n_splits=5, between_split_lag=None, scoring="neg_mean_absolute_error", country_code_column=None, country_code=None, sklearn_models=False, sklearn_models_optimize_for_horizon=False, autosarimax_models=False, autoarima_dict=None, prophet_models=True, tbats_models=False, exp_smooth_models=False, average_ensembles=False, stacking_ensembles=False, stacking_ensembles_train_horizon=10, stacking_ensembles_train_n_splits=20, clip_predictions_lower=None, clip_predictions_upper=None, exog_cols=None, ): """Get grid search object based on selection criteria. Parameters ---------- frequency : str Frequency of timeseries. Pandas compatible frequncies horizon : int How many units of frequency (e.g. 4 quarters), should be used to find the best models n_splits : int How many cross-validation folds should be used in model selection between_split_lag : int How big lag of observations should cv_splits have If kept as None, horizon is used resulting in non-overlaping cv_splits scoring : str, callable String of sklearn regression metric name, or hcrystalball compatible scorer. For creation of hcrystalball compatible scorer use `make_ts_scorer` function. country_code_column : str Column in data, that contains country code in str (e.g. 'DE'). Used in holiday transformer. Only one of `country_code_column` or `country_code` can be set. country_code : str Country code in str (e.g. 'DE'). Used in holiday transformer. Only one of `country_code_column` or `country_code` can be set. sklearn_models : bool Whether to consider sklearn models sklearn_models_optimize_for_horizon: bool Whether to add to default sklearn behavior also models, that optimize predictions for each horizon autosarimax_models : bool Whether to consider auto sarimax models autoarima_dict : dict Specification of pmdautoarima search space prophet_models : bool Whether to consider FB prophet models exp_smooth_models : bool Whether to consider exponential smoothing models average_ensembles : bool Whether to consider average ensemble models stacking_ensembles : bool Whether to consider stacking ensemble models stacking_ensembles_train_horizon : int Which horizon should be used in meta model in stacking ensembles stacking_ensembles_train_n_splits : int Number of splits used in meta model in stacking ensembles clip_predictions_lower : float, int Minimal number allowed in the predictions clip_predictions_upper : float, int Maximal number allowed in the predictions exog_cols : list List of columns to be used as exogenous variables Returns ------- sklearn.model_selection.GridSearchCV CV / Model selection configuration """ exog_cols = exog_cols if exog_cols is not None else [] # ensures only exogenous columns and country code column will be passed to model if provided # and columns names will be stored in TSColumnTransformer if exog_cols: cols = exog_cols + [country_code_column ] if country_code_column else exog_cols exog_passthrough = TSColumnTransformer(transformers=[("raw_cols", "passthrough", cols)]) else: exog_passthrough = "passthrough" # ensures holiday transformer is added to the pipeline if requested if country_code or country_code_column: from hcrystalball.feature_extraction import HolidayTransformer holiday = HolidayTransformer(country_code=country_code, country_code_column=country_code_column) else: holiday = "passthrough" estimator = Pipeline([("exog_passthrough", exog_passthrough), ("holiday", holiday), ("model", "passthrough")]) scoring = get_scorer(scoring) cv = FinerTimeSplit(n_splits=n_splits, horizon=horizon, between_split_lag=between_split_lag) grid_search = GridSearchCV( estimator=estimator, param_grid=[], scoring=scoring, cv=cv, refit=False, error_score=np.nan, ) if autosarimax_models: # adding autosarimax to param_grid might cause differently found models # for different splits and raise inconsistency based errors. # sarimax pipeline is added to new grid_search's attribute (`grid_search.autosarimax`) # and handled in `hcrystalball.model_seleciton.select_model` function in following way # 1. get best model for the data part on last split # 2. append this best model to original `param_grid` # 3. run full grid search with `param_grid` containing # sarimax model selected from autosarimax in point 1 from hcrystalball.wrappers import SarimaxWrapper if autoarima_dict is None: autoarima_dict = {} if "error_action" not in autoarima_dict: autoarima_dict.update({"error_action": "raise"}) grid_search.autosarimax = Pipeline(estimator.steps[:-1]) grid_search.autosarimax.steps.append(( "model", SarimaxWrapper( init_with_autoarima=True, autoarima_dict=autoarima_dict, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ), )) if stacking_ensembles or average_ensembles or sklearn_models: from sklearn.linear_model import ElasticNet from sklearn.ensemble import RandomForestRegressor # TODO when scoring time is fixed, add HistGradientBoostingRegressor # from sklearn.experimental import enable_hist_gradient_boosting # from sklearn.ensemble import HistGradientBoostingRegressor from hcrystalball.wrappers import get_sklearn_wrapper from hcrystalball.feature_extraction import SeasonalityTransformer sklearn_model = get_sklearn_wrapper( RandomForestRegressor, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) sklearn_model_pipeline = Pipeline([ ("seasonality", SeasonalityTransformer(auto=True, freq=frequency)), ("model", sklearn_model) ]) # TODO make sure naming here works as expected sklearn_model_pipeline.name = f"seasonality_{sklearn_model.name}" if sklearn_models: classes = [ElasticNet, RandomForestRegressor] models = { model_class.__name__: get_sklearn_wrapper( model_class, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) for model_class in classes } optimize_for_horizon = [ False, True ] if sklearn_models_optimize_for_horizon else [False] grid_search.param_grid.append({ "model": [sklearn_model_pipeline], "model__seasonality__weekly": [True, False], "model__model": list(models.values()), # TODO change add once HistGradientBoostingRegressor is back # "model__model": list(models.values()) + [sklearn_model] "model__model__optimize_for_horizon": optimize_for_horizon, "model__model__lags": [3, 7, 10, 14], }) grid_search.param_grid.append({ "model": [sklearn_model_pipeline], "model__seasonality__weekly": [True, False], "model__model__optimize_for_horizon": optimize_for_horizon, "model__model": [sklearn_model], "model__model__max_depth": [6], }) if prophet_models: from hcrystalball.wrappers import ProphetWrapper extra_regressors = [None] if exog_cols is None else [None, exog_cols] grid_search.param_grid.append({ "model": [ ProphetWrapper( clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__seasonality_mode": ["multiplicative", "additive"], "model__extra_regressors": extra_regressors, }) grid_search.param_grid.append({ "model": [ ProphetWrapper( clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__extra_seasonalities": [[{ "name": "quarterly", "period": 90.0625, "fourier_order": 5, "prior_scale": 15.0, "mode": None, }]], "model__extra_regressors": extra_regressors, }) if exp_smooth_models: from hcrystalball.wrappers import ExponentialSmoothingWrapper from hcrystalball.wrappers import HoltSmoothingWrapper from hcrystalball.wrappers import SimpleSmoothingWrapper # commented options show non deterministic behavior grid_search.param_grid.append({ "model": [ ExponentialSmoothingWrapper( freq=frequency, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__trend": ["add"], "model__seasonal": [None, "add"], "model__damped": [True, False], "model__fit_params": [ { "use_boxcox": True, "use_basinhopping": False }, # {'use_boxcox':True, 'use_basinhopping':True}, { "use_boxcox": False, "use_basinhopping": False }, # {'use_boxcox':False, 'use_basinhopping':True} ], }) grid_search.param_grid.append({ "model": [ ExponentialSmoothingWrapper( freq=frequency, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__trend": ["add"], "model__seasonal": ["mul"], "model__damped": [True, False], "model__fit_params": [ { "use_boxcox": False, "use_basinhopping": False }, # {'use_boxcox':False, 'use_basinhopping':True} ], }) grid_search.param_grid.append({ "model": [ ExponentialSmoothingWrapper( freq=frequency, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__trend": [None], "model__seasonal": [None, "add", "mul"], "model__damped": [False], "model__fit_params": [ { "use_boxcox": False, "use_basinhopping": False }, # {'use_boxcox':False, 'use_basinhopping':True} ], }) grid_search.param_grid.append({ "model": [ SimpleSmoothingWrapper( freq=frequency, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ), HoltSmoothingWrapper( freq=frequency, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ), ] }) if tbats_models: from hcrystalball.wrappers import TBATSWrapper grid_search.param_grid.append({ "model": [ TBATSWrapper( use_arma_errors=False, clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ] }) if stacking_ensembles: from hcrystalball.ensemble import StackingEnsemble from hcrystalball.wrappers import ProphetWrapper from sklearn.ensemble import RandomForestRegressor grid_search.param_grid.append({ "model": [ StackingEnsemble( train_n_splits=stacking_ensembles_train_n_splits, train_horizon=stacking_ensembles_train_horizon, meta_model=ElasticNet(), horizons_as_features=True, weekdays_as_features=True, base_learners=[], clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__meta_model": [ElasticNet(), RandomForestRegressor()], "model__base_learners": [ [ ProphetWrapper( clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ), sklearn_model_pipeline, ], ], }) if average_ensembles: from hcrystalball.ensemble import SimpleEnsemble from hcrystalball.wrappers import ProphetWrapper grid_search.param_grid.append({ "model": [ SimpleEnsemble( base_learners=[], clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ) ], "model__base_learners": [ [ ProphetWrapper( clip_predictions_lower=clip_predictions_lower, clip_predictions_upper=clip_predictions_upper, ), sklearn_model_pipeline, ], ], }) return grid_search
np.sqrt(np.mean((pred_train_ridge - y_train)**2)) # 7582 np.sqrt(np.mean((pred_test_ridge - y_test)**2)) # 13435 # Importanat Coefficient Plot important_coff = pd.Series(RidgeM1.coef_, index=X.columns) important_coff.plot(kind='barh', color='g') ##################################### - Elastic Net Regression - ########################################## ### Running a Elastic Net Regressor of set of alpha values and observing how the R-Squared, train_rmse and test_rmse are changing with change in alpha values train_rmse = [] test_rmse = [] R_sqrd = [] alphas = np.arange(0, 1, 0.01) for i in alphas: EN = ElasticNet(alpha=i, normalize=True, max_iter=500) EN.fit(X_train, y_train) R_sqrd.append(EN.score(X_train, y_train)) train_rmse.append(np.sqrt(np.mean((EN.predict(X_train) - y_train)**2))) test_rmse.append(np.sqrt(np.mean((EN.predict(X_test) - y_test)**2))) # Plotting Alpha vs Train and Test RMSE. plt.scatter(x=alphas, y=R_sqrd) plt.xlabel("alpha") plt.ylabel("R_Squared") plt.scatter(x=alphas, y=train_rmse) plt.xlabel("alpha") plt.ylabel("RMSE") plt.scatter(x=alphas, y=test_rmse) plt.xlabel("alpha") plt.ylabel("RMSE")
def get_linear_model(): elastic_net = ElasticNet() return [elastic_net], ['Elastic Net']
def run_stack(SEED, col): dset = "4" trainBaseTarget = pd.read_csv('../preprocess/pre_shuffled_target_' + col + '.csv') trainBase = pd.read_csv('../models/Lasso' + dset + '_train_' + col + '.csv') #trainBase = pd.read_csv('../preprocess/pre_shuffled_train' + dset + '.csv') trainBase.drop(['PIDN'], axis=1, inplace=True) #test = pd.read_csv('../data/pre_shuffled_test.csv') columns = trainBase.columns columnsHighScore = trainBase.columns print(trainBase.columns) trainBase = np.nan_to_num(np.array(trainBase)) targetBase = np.nan_to_num(np.array(trainBaseTarget)) #test = np.nan_to_num(np.array(test)) gc.collect() avg = 1.0 avgLast = avg bestAvg = avg bestAlpha = 0 NumFolds = 5 print ("Data size: " + str(len(trainBase))) print("Begin Training") lenTrainBase = len(trainBase) gc.collect() # best alpha is 0.00040 for a in np.logspace(-8, -.5, 50): # best values seem to be slightly greater than 0. clf = ElasticNet(alpha=a) #print(clf) avg = 0 coef_dataset = np.zeros((len(columns),NumFolds)) foldCount = 0 Folds = cross_validation.KFold(lenTrainBase, n_folds=NumFolds, indices=True) for train_index, test_index in Folds: #print() #print ("Iteration: " + str(foldCount)) #now = datetime.datetime.now() #print(now.strftime("%Y/%m/%d %H:%M:%S")) target = [targetBase[i] for i in train_index] train = [trainBase[i] for i in train_index] targetTest = [targetBase[i] for i in test_index] trainTest = [trainBase[i] for i in test_index] #print "LEN: ", len(train), len(target) target = np.array(np.reshape(target, (-1, 1)) ) #train = np.array(np.reshape(train, (-1, 1)) ) targetTest = np.array(np.reshape(targetTest, (-1, 1)) ) #trainTest = np.array(np.reshape(trainTest, (-1, 1)) ) #clf.fit(train, target, sample_weight = weight clf.fit(train, target) predicted = clf.predict(trainTest) #print(target.shape) #print(predicted.shape) #print(str(math.sqrt(mean_squared_error(targetTest, predicted)))) avg += math.sqrt(mean_squared_error(targetTest, predicted))/NumFolds coef_dataset[:, foldCount] = clf.coef_ foldCount = foldCount + 1 #break coefs = coef_dataset.mean(1) #print(coefs) sorted_coefs = sorted(coefs) #print("len coefs: " + str(len(sorted_coefs))) coefsAboveZero = [i for i in coefs if i > 0.0] #print(str(len(coefsAboveZero))) print ("------------------------Average: " + str(avg)) if avg < bestAvg: bestAvg = avg bestAlpha = a print("bestAvg: " + str(bestAvg)) print("bestAlpha: " + str(bestAlpha))
grid_search = GridSearchCV(estimator=lasso, param_grid=parameters, cv=5, scoring='neg_mean_squared_error', n_jobs=-1) grid_search = grid_search.fit(X_poly[:, 1:], y_train) best_accuracy = grid_search.best_score_ best_parameters = grid_search.best_params_ #Using elastic net regression in combination with polynomial regression att degree 2 from sklearn.linear_model import ElasticNet elastReg = ElasticNet(normalize=True, warm_start=True, random_state=True, precompute=False, selection='cyclic') parameters = [{ 'alpha': [1, 0.99, 0.98], 'tol': [1e+2, 1e-6, 1e-7, 1e-4, 1e-3, 1e-2, 1e-1, 1e-0], 'max_iter': [3000, 6000, 10000, 15000], 'l1_ratio': [0.99, 0.98, 0.95, 1] }] grid_search = GridSearchCV(estimator=elastReg, param_grid=parameters, cv=5, n_jobs=-1, scoring='neg_mean_squared_error') grid_search = grid_search.fit(X_poly[:, 1:], y_train)
def rmsle_cv(model): kf = KFold( n_folds, shuffle=True, random_state=42).get_n_splits(train.values) rmse = np.sqrt(-cross_val_score( model, train.values, y_train, scoring="neg_mean_squared_error", cv=kf)) print("rmse", rmse) return (rmse) # 模型 # LASSO Regression : lasso = make_pipeline(RobustScaler(), Lasso(alpha=0.0005, random_state=1)) # Elastic Net Regression ENet = make_pipeline( RobustScaler(), ElasticNet( alpha=0.0005, l1_ratio=.9, random_state=3)) # Kernel Ridge Regression KRR = KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5) # Gradient Boosting Regression GBoost = GradientBoostingRegressor( n_estimators=3000, learning_rate=0.05, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=5) # XGboost model_xgb = xgb.XGBRegressor( colsample_bytree=0.4603,
cv=nr_cv, verbose=1, scoring=score_calc) grid_lasso.fit(X, y) sc_lasso = get_best_score(grid_lasso) pred_lasso = grid_lasso.predict(X_test) # ### Elastic Net # In[ ]: from sklearn.linear_model import ElasticNet enet = ElasticNet() parameters = { 'alpha': [0.1, 1.0, 10], 'max_iter': [1000000], 'l1_ratio': [0.04, 0.05], 'fit_intercept': [False, True], 'normalize': [True, False], 'tol': [1e-02, 1e-03, 1e-04] } grid_enet = GridSearchCV(enet, parameters, cv=nr_cv, verbose=1, scoring=score_calc) grid_enet.fit(X_sc, y_sc)
def _select_estimator(estimator, n_jobs, n_estimators, random_state=None): '''Select estimator and parameters from argument name.''' # Regressors if estimator == 'RandomForestRegressor': param_dist = {**parameters['ensemble'], **parameters['bootstrap']} estimator = RandomForestRegressor( n_jobs=n_jobs, n_estimators=n_estimators, random_state=random_state) elif estimator == 'ExtraTreesRegressor': param_dist = {**parameters['ensemble'], **parameters['bootstrap']} estimator = ExtraTreesRegressor( n_jobs=n_jobs, n_estimators=n_estimators, random_state=random_state) elif estimator == 'GradientBoostingRegressor': param_dist = parameters['ensemble'] estimator = GradientBoostingRegressor( n_estimators=n_estimators, random_state=random_state) elif estimator == 'SVR': param_dist = {**parameters['svm'], 'epsilon': [0.0, 0.1]} estimator = SVR(kernel='rbf', gamma='scale') elif estimator == 'LinearSVR': param_dist = {**parameters['svm'], 'epsilon': [0.0, 0.1]} estimator = SVR(kernel='linear') elif estimator == 'Ridge': param_dist = parameters['linear'] estimator = Ridge(solver='auto', random_state=random_state) elif estimator == 'Lasso': param_dist = parameters['linear'] estimator = Lasso(random_state=random_state) elif estimator == 'ElasticNet': param_dist = parameters['linear'] estimator = ElasticNet(random_state=random_state) elif estimator == 'KNeighborsRegressor': param_dist = parameters['kneighbors'] estimator = KNeighborsRegressor(algorithm='auto') # Classifiers elif estimator == 'RandomForestClassifier': param_dist = {**parameters['ensemble'], **parameters['bootstrap'], **parameters['criterion']} estimator = RandomForestClassifier( n_jobs=n_jobs, n_estimators=n_estimators, random_state=random_state) elif estimator == 'ExtraTreesClassifier': param_dist = {**parameters['ensemble'], **parameters['bootstrap'], **parameters['criterion']} estimator = ExtraTreesClassifier( n_jobs=n_jobs, n_estimators=n_estimators, random_state=random_state) elif estimator == 'GradientBoostingClassifier': param_dist = parameters['ensemble'] estimator = GradientBoostingClassifier( n_estimators=n_estimators, random_state=random_state) elif estimator == 'LinearSVC': param_dist = parameters['linear_svm'] estimator = LinearSVC(random_state=random_state) elif estimator == 'SVC': param_dist = parameters['svm'] estimator = SVC(kernel='rbf', random_state=random_state, gamma='scale') elif estimator == 'KNeighborsClassifier': param_dist = parameters['kneighbors'] estimator = KNeighborsClassifier(algorithm='auto') return param_dist, estimator
LSTM_params = {'learning_rate':[1e-4, 1e-5, 1e-4, 1e-6], 'depth': [2, 2, 1, 2], 'hidden_number': [256]*4} RNN_params = {'learning_rate':[0.1, 0.1, 0.1, 0.001], 'depth': [1, 1, 2, 1], 'hidden_number': [256]*4} #**********************2.全样本3/12/24/36个月滑动窗口函数运行**********************************# path = r'..\DataBase\factor'#96项因子所在路径 factorname = [x[1:-4] for x in os.listdir(path)] riskfree, timeseries, factor, timeseries2, index = datatransfrom(path)[0], datatransfrom(path)[1], datatransfrom(path)[2], datatransfrom2(path)[0], datatransfrom2(path)[1] for i in range(4): i= 0 output(window[i],LinearRegression(),'OLS'+str(window[i]),riskfree[i], timeseries) FC(window[i], riskfree[i], timeseries, 96,'FC') output(window[i], PLSRegression(PLS_params[i]), 'PLS' + str(window[i]), riskfree[i], timeseries) output(window[i],Lasso(alpha=lasso_params[i]),'Lasso'+ str(window[i]), riskfree[i], timeseries) output(window[i],Ridge(alpha=ridge_params[i]),'Ridge'+str(window[i]),riskfree[i], timeseries) output(window[i],ElasticNet(alpha= elasticnet_params['alpha'] [i],l1_ratio= elasticnet_params['l1_ratio'][i]),'ElasticNet'+str(window[i]),riskfree[i], timeseries) output(window[i],SVR(kernel=SVR_params['kernel'][i],gamma= SVR_params ['gamma'][i],C= SVR_params ['C'][i] ),'SVR'+str(window[i]),riskfree[i], timeseries) output(window[i], GradientBoostingRegressor(n_estimators=GBDT_params['n_estimators'][i],max_depth=GBDT_params['maxdepth'][i],learning_rate=GBDT_params['learning_rate'][i]), 'GBDT' + str(window[i]),riskfree[i], timeseries) output(window[i], XGBRegressor(n_estimators=GBDT_params['n_estimators'][i],max_depth=GBDT_params['maxdepth'][i], learning_rate=GBDT_params['learning_rate'][i]), 'XGBOOST' + str(window[i]), riskfree[i], timeseries) output(window[i], ensemblenn(5,modeluse = MLPRegressor(solver = 'lbfgs', max_iter=ENANN_params['max_iter'][i]), pickpercent=ENANN_params['p'][i]), 'ENANN' + str(window[i]), riskfree[i], timeseries) output(window[i], DFN.DFN(outputdim=1, neuralset=[96, 50, 25, 10, 5, 2], ctx=gpu(0), epoch=10, batch_size=DFN_params['batch'][i], lr=DFN_params['learning_rate'][i]), 'DFN' + str(window[i]), riskfree[i], timeseries) output2(window[i], rm.lstmmodule(96, LSTM_params['hidden_number'][i], LSTM_params['depth'][i], 100, 3571, lr=LSTM_params['learning_rate'][i]), 'LSTM'+ str(window[i]) ,riskfree[i], timeseries2) output2(window[i], rm.lstmmodule(96, RNN_params['hidden_number'][i], RNN_params['depth'][i], 100, 3571, lr=RNN_params['learning_rate'][i], ntype='RNN'), 'RNN'+ str(window[i]), riskfree[i], timeseries2) modellist = [DFN.DFN(outputdim=1, neuralset=[96, 50, 25, 10, 5, 2], ctx=gpu(0), epoch=10, batch_size=DFN_params['batch'][i], lr=DFN_params['learning_rate'][i]), ensemblenn(5,modeluse = MLPRegressor(solver = 'lbfgs', max_iter=ENANN_params['max_iter'][i]), pickpercent=ENANN_params['p'][i]), XGBRegressor(n_estimators=GBDT_params['n_estimators'][i],max_depth=GBDT_params['maxdepth'][i], learning_rate=GBDT_params['learning_rate'][i]), GradientBoostingRegressor(n_estimators=GBDT_params['n_estimators'][i],max_depth=GBDT_params['maxdepth'][i],learning_rate=GBDT_params['learning_rate'][i]), PLSRegression(PLS_params[i]), Ridge(alpha=ridge_params[i]), SVR(kernel=SVR_params['kernel'][i],gamma= SVR_params ['gamma'][i],C= SVR_params ['C'][i])]# PLS一定要放在倒数第三个 nmolist = [rm.lstmmodule(96, LSTM_params['hidden_number'][i], LSTM_params['depth'][i], 100, 3571, lr=LSTM_params['learning_rate'][i]),
X_net_holdout[:,n_pow]=temp[1] X_net_test[:,n_pow]=temp[2] #################################################################################### #################################################################################### #################################################################################### #Elastic net blender #################################################################################### #################################################################################### #################################################################################### from sklearn.linear_model import ElasticNet # objective function: 1 / (2 * n_samples) * ||y - Xw||^2_2 + # + alpha * l1_ratio * ||w||_1 # + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2 enet=ElasticNet(alpha=1.0, l1_ratio=0.5, fit_intercept=False, normalize=False, precompute=False, max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False) enet_mod=enet.fit(X_net_valid,valid_Y) pred_holdout=enet_mod.predict(X_net_holdout) holdout_gini=Gini(holdout_Y,pred_holdout) valid_rmse=np.sqrt(sum( (pred_holdout[m] - holdout_Y[m])**2 for m in range(len(holdout_Y))) / float(len(holdout_Y))) print valid_rmse, holdout_gini pred_test=enet_mod.predict(X_net_test) df=pd.DataFrame(pred_test) df.columns=['Hazard'] indices=np.loadtxt("X_test_indices.gz",delimiter=",").astype('int32')
pred_train_l1 = l1Regr.predict(X_train) pred_test_l1 = l1Regr.predict(X_test) # GBR myGBR = GradientBoostingRegressor(n_estimators=3000, learning_rate=0.02, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=50, loss='huber', random_state=5) myGBR.fit(X_train, y_train) pred_train_GBR = myGBR.predict(X_train) pred_test_GBR = myGBR.predict(X_test) # ENet ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=4.0, l1_ratio=0.005, random_state=3)) ENet.fit(X_train, y_train) pred_train_ENet = ENet.predict(X_train) pred_test_ENet = ENet.predict(X_test) # LGB myLGB = lgb.LGBMRegressor(objective='regression', num_leaves=5, learning_rate=0.05, n_estimators=600, max_bin=50, bagging_fraction=0.6, bagging_freq=5, feature_fraction=0.25, feature_fraction_seed=9, bagging_seed=9, min_data_in_leaf=6, min_sum_hessian_in_leaf=11) myLGB.fit(X_train, y_train) pred_train_LGB = myLGB.predict(X_train) pred_test_LGB = myLGB.predict(X_test)
def BuildModel(self, data, labels): # Create and train the classifier. elasticNet = SElasticNet(alpha=self.rho, l1_ratio=self.alpha) elasticNet.fit(data, labels) return elasticNet
import pandas from sklearn import cross_validation from sklearn.linear_model import ElasticNet url = "https://goo.gl/sXleFv" names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ] dataframe = pandas.read_csv(url, delim_whitespace=True, names=names) array = dataframe.values X = array[:, 0:13] Y = array[:, 13] num_folds = 10 num_instances = len(X) seed = 7 kfold = cross_validation.KFold(n=num_instances, n_folds=num_folds, random_state=seed) model = ElasticNet() scoring = 'mean_squared_error' results = cross_validation.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) print(results.mean()) # -*- coding: utf-8 -*-
from math import sqrt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error as mse from sklearn.preprocessing import PolynomialFeatures, StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV from sklearn.linear_model import Lasso from sklearn.linear_model import ElasticNet pipe = Pipeline([('sc', StandardScaler()), ('poly', PolynomialFeatures(degree=2, include_bias=True)), ('en', ElasticNet())]) model = GridSearchCV(pipe, param_grid={ 'en__alpha': [0.005, 0.01, 0.05, 0.1], 'en__l1_ratio': [0.1, 0.4, 0.8] }) model.fit(train_[columns], train_['log_price']) degree = model.best_params_ print(degree) pred = np.exp(model.predict(test_)) Accuracy = sqrt(mse(pred, test['price'])) print('\nRMSE for elastic net regression : ', Accuracy) RMSE.append(Accuracy) Models.append('ElasticNet Regression')
options = ["0.1", "0.2", "0.3", "0.4", "0.5", "5.0", "10.0", "15.0"] signals = [options[0], options[1], options[3], options[5], options[7]] #5 MAX x_vals = [] x_real_vals = [] """ ELASTIC NET LEARNED PARAMETERS Learned from dl=10.0nm """ amax = 0.0464158883361 l1max = 2.53536449397 for i in xrange(len(signals)): yfile = path + '/Narrowband_2laser_data/2laser_dlambda=' + signals[ i] + 'nm_v1.txt' yf = pd.read_csv(yfile, sep='\t', usecols=[0, 1]) yval, OPL = yf.values[:, 1], yf.values[:, 0] enet = ElasticNet(alpha=amax, l1_ratio=l1max, positive=True) y_pred_enet = enet.fit(A1, yval).predict(A1) x_vals.append(enet.coef_) x_real_validate = np.zeros(len(A1[0])) x_real_validate[get_index(1560 + float(signals[i]) / 2.0, wavelengths)] = 0.8 x_real_validate[get_index(1560 - float(signals[i]) / 2.0, wavelengths)] = 1.0 x_real_vals.append(x_real_validate) """ --------------------------------------------------------------------------- PLOT THE RESULTS FROM x_vals & x_real_vals BELOW --------------------------------------------------------------------------- """ font = {'size': 16} matplotlib.rc('font', **font)
def Regression(train_data, train_solution, test_data, test_solution, method): ## Fix Data Structure ## train_data = train_data.values train_solution = train_solution.values test_data = test_data.values test_solution = test_solution.values ## List of Method Options with Initialization ## if method == 'lin_reg': # linear regression from sklearn.linear_model import LinearRegression reg = LinearRegression() elif method == 'ply_reg': # polynomial regression from sklearn.linear_model import LinearRegression reg = LinearRegression() poly_features = PolynomialFeatures(degree=2) elif method == 'rdg_reg': # ridge regression from sklearn.linear_model import Ridge reg = Ridge() elif method == 'lso_reg': # lasso regression from sklearn.linear_model import Lasso reg = Lasso(alpha=0.00001) elif method == 'ela_net': # elastic net regression from sklearn.linear_model import ElasticNet reg = ElasticNet() elif method == 'svr_lin': # SVM regression from sklearn.svm import LinearSVR reg = LinearSVR(epsilon=0.01, max_iter=10000) elif method == 'svr_2nd': # SVR regression from sklearn.svm import SVR reg = SVR(kernel='poly', degree=2, epsilon=0.01) #C=100 elif method == 'svr_3rd': # SVR regression from sklearn.svm import SVR reg = SVR(kernel='poly', degree=3, epsilon=0.01) #C=100 elif method == 'dcn_tre': # decision tree from sklearn.tree import DecisionTreeRegressor reg = DecisionTreeRegressor() elif method == 'rdm_for': # random forests from sklearn.ensemble import RandomForestRegressor reg = RandomForestRegressor(n_estimators=100, random_state=3) elif method == 'ada_bst': # AdaBoost Regressor from sklearn.ensemble import AdaBoostRegressor reg = AdaBoostRegressor(n_estimators=100, random_state=3) elif method == 'grd_bst': # Gradient Boosting Regressor from sklearn.ensemble import GradientBoostingRegressor reg = GradientBoostingRegressor(random_state=3) elif method == 'gss_prc': # Gaussian Process Regressor from sklearn.gaussian_process import GaussianProcessRegressor reg = GaussianProcessRegressor(random_state=3) elif method == 'knl_rdg': # Kernel Ridge Regression from sklearn.kernel_ridge import KernelRidge reg = KernelRidge() elif method == 'nst_nbr_uni': # K Nearest Neighbors Regressor from sklearn.neighbors import KNeighborsRegressor reg = KNeighborsRegressor(weights='uniform') elif method == 'nst_nbr_dst': # K Nearest Neighbors Regressor from sklearn.neighbors import KNeighborsRegressor reg = KNeighborsRegressor(weights='distance') elif method == 'rad_nbr_uni': # Radius Neighbor Regressor from sklearn.neighbors import RadiusNeighborsRegressor reg = RadiusNeighborsRegressor(weights='uniform') elif method == 'rad_nbr_dst': # Radius Neighbor Regressor from sklearn.neighbors import RadiusNeighborsRegressor reg = RadiusNeighborsRegressor(weights='distance') elif method == 'mlp_reg': from sklearn.neural_network import MLPRegressor reg = MLPRegressor(random_state=3) else: print( 'Error: Regression method not recognized.\nPlease pick a valid method key (example: xxx_xxx).' ) ## Preprocessing and Setup ## from sklearn.preprocessing import StandardScaler scaler = StandardScaler() data = scaler.fit_transform(train_data) scaler = StandardScaler() test_data = scaler.fit_transform(test_data) solution = train_solution.reshape(-1, ) if method == 'ply_reg': data = poly_features.fit_transform(data) reg.fit(data, solution) if len(test_data) < 5: predictions = reg.predict(data) elif len(test_data) > 5: if method == 'ply_reg': test_data = poly_features.transform(test_data) test_solution = test_solution.reshape(-1, ) predictions_test = reg.predict(test_data) solution = test_solution predictions = predictions_test else: print('Error: test_set undetermined.') Matrix_to_save = pd.DataFrame() Matrix_to_save['Solution'] = solution Matrix_to_save['Predictions'] = predictions return Matrix_to_save
importances = pd.DataFrame() oof_reg_preds = np.zeros(df_train.shape[0]) sub_reg_preds = np.zeros(df_test.shape[0]) df_test_fullvisitorid_str = df_test["fullVisitorId"].copy() df_test["fullVisitorId"] = df_test["fullVisitorId"].astype(float) for fold_, (trn_, val_) in enumerate(folds): trn_x, trn_y = df_train.iloc[trn_], y_reg.iloc[trn_] val_x, val_y = df_train.iloc[val_], y_reg.iloc[val_] trn_x["fullVisitorId"] = trn_x["fullVisitorId"].astype(float) val_x["fullVisitorId"] = val_x["fullVisitorId"].astype(float) reg = ElasticNet(random_state=0) reg.fit( trn_x, np.log1p(trn_y), ) oof_reg_preds[val_] = reg.predict(val_x) oof_reg_preds[oof_reg_preds < 0] = 0 _preds = reg.predict(df_test) _preds[_preds < 0] = 0 sub_reg_preds += np.expm1(_preds) / len(folds) mean_squared_error(np.log1p(y_reg), oof_reg_preds)**.5
plt.show() pic = [] test = [0.1, 0.3, 0.5, 0.7, 0.9] fig, axarr = plt.subplots(5, 5, figsize=(15, 15), sharex=True, sharey=True) a = int(0) b = int(0) for i in test_array: for j in test: ridge = ElasticNet(alpha=i, l1_ratio=j) # Fit the regressor to the training data ridge.fit(X_train, y_train) # Predict on the test data: y_pred y_pred = ridge.predict(X_test) print("Here is the ElasticNet regression with alpha = ", i, " and L1 ratio = ", j, " stat data:") print("coef: ", ridge.coef_) print("intercept: ", ridge.intercept_) # Compute and print R^2 and RMSE print("R^2: {}".format(round(ridge.score(X_test, y_test), 4)))
data = pd.read_csv(wine_path) # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1) test_x = test.drop(["quality"], axis=1) train_y = train[["quality"]] test_y = test[["quality"]] alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 with mlflow.start_run(): lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) predicted_qualities = lr.predict(test_x) (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) mlflow.log_param("alpha", alpha) mlflow.log_param("l1_ratio", l1_ratio) mlflow.log_metric("rmse", rmse) mlflow.log_metric("r2", r2)
def mult_reg(p_x, p_y): """ Funcion para ajustar varios modelos lineales Parameters ---------- p_x: pd.DataFrame with regressors or predictor variables p_y: pd.DataFrame with variable to predict Returns ------- r_models: dict Diccionario con modelos ajustados """ xtrain, xtest, ytrain, ytest = train_test_split(p_x, p_y, test_size=.8, random_state=455) # fit linear regression linreg = LinearRegression(normalize=False, fit_intercept=False) linreg.fit(xtrain, ytrain) y_p_linear = linreg.predict(xtest) # Fit RIDGE regression ridgereg = Ridge(normalize=True) model = ridgereg.fit(xtrain, ytrain) y_p_ridge = model.predict(xtest) # Fit LASSO regression lassoreg = Lasso(normalize=True) lassoreg.fit(xtrain, ytrain) y_p_lasso = lassoreg.predict(xtest) # Fit ElasticNet regression enetreg = ElasticNet(normalize=True) enetreg.fit(xtrain, ytrain) y_p_enet = enetreg.predict(xtest) # RSS = residual sum of squares # Return the result of the model r_models = { "summary": { "linear rss": sum((y_p_linear - ytest)**2), "Ridge rss": sum((y_p_ridge - ytest)**2), "lasso rss": sum((y_p_lasso - ytest)**2), "elasticnet rss": sum((y_p_enet - ytest)**2) }, "test": ytest, 'linear': { 'rss': sum((y_p_linear - ytest)**2), 'predict': y_p_linear, 'model': linreg, 'intercept': linreg.intercept_, 'coef': linreg.coef_ }, 'ridge': { 'rss': sum((y_p_ridge - ytest)**2), 'predict': y_p_ridge, 'model': ridgereg, 'intercept': ridgereg.intercept_, 'coef': ridgereg.coef_ }, 'lasso': { 'rss': sum((y_p_lasso - ytest)**2), 'predict': y_p_lasso, 'model': lassoreg, 'intercept': lassoreg.intercept_, 'coef': lassoreg.coef_ }, 'elasticnet': { 'rss': sum((y_p_enet - ytest)**2), 'predict': y_p_enet, 'model': enetreg, 'intercept': enetreg.intercept_, 'coef': enetreg.coef_ } } return r_models
def quantify_isoforms(genes, genome, reads): """ :param genes: the list of gene tuples generated by the parser :param genome_fn: the full genome file :param reads_fn: the file of shuffled reads :return: a list of tuples, where the first element of the tuple is the transcript sequence (the isoform in terms of the exon sequences that form it in the genome), and the second element of the tuple is the abundance of that specific isoform NOTE: this skeleton is built assuming the return value exists like this, but as long as you change the way the output file is generated, this can be in whatever form you like. """ """ Within this function, you should go through most of the process of quantifying isoforms given the data. This can be broken down into the following few steps: 1. Align reads to the genome, exome, or isoforms your choice of method, but note the length of the genome 2. Use the generated alignment to get exon counts 3. Formulate your RNA seq problem using the isoforms and exon counts (linear algebra) 4. Compute the isoform abundances based on your above formulation """ # Create phonebook pattern_length = 10 num_genes = len(genes) phonebook = {} exon_counts = [None] * num_genes for i in range(num_genes): num_exons = len(genes[i][0]) exons = [0] * num_exons exon_counts[i] = exons for j in range(num_exons): start = genes[i][0][j][0] end = genes[i][0][j][1] #print(start, end) for k in range(start, end - pattern_length + 2): pattern = genome[k:k + pattern_length] # if k == start: # print(len(pattern)) # print(pattern) if pattern in phonebook: phonebook[pattern].append((i, j)) #print('re') else: pair = (i, j) phonebook[pattern] = [pair] for read in reads: ll = range(len(read) - pattern_length + 1) read_portions = [ read[i:i + pattern_length] for i in ll[::pattern_length] ] for read_portion in read_portions: if read_portion in phonebook: #print(phonebook[read]) for gene_exon in phonebook[read_portion]: gene_id = gene_exon[0] exon_id = gene_exon[1] #print(gene_id, exon_id) exon_counts[gene_id][exon_id] += 1 / len( phonebook[read_portion]) / (50 / pattern_length) #print(exon_counts) isoforms = [] abundances = [] for i in range(num_genes): M = np.zeros((len(genes[i][0]), len(genes[i][1]))) for j in range(len(genes[i][1])): isoforms.append('') #print(genes[i][1][j]) for k in genes[i][1][j]: start = genes[i][0][k][0] end = genes[i][0][k][1] isoforms[len(isoforms) - 1] += genome[start:end + 1] M[k][j] = (end - start + 1) / 50 print(len(isoforms[len(isoforms) - 1])) #print(M) b = np.array(exon_counts[i]) #print(b) regr = ElasticNet(alpha=1.5, positive=True) #regr = linear_model.LassoLars(alpha=0.01, positive=True) regr.fit(M, b) x = regr.coef_ #x = np.linalg.lstsq(M, b)[0] x = x / sum(x) print(x) abundances.extend(x) iso_abund = [(isoforms[i], abundances[i]) for i in range(len(isoforms))] return iso_abund
def fit(self, *args, **kwargs): return ElasticNet.fit(self, *args, **kwargs)
reg_1 = Lasso() reg_1.fit(X_train, y_train) print("Lasso Score:", reg_1.score(X_test, y_test)) # Ridge Regressor reg_2 = Ridge() reg_2.fit(X_train, y_train) print("Ridge Score:", reg_2.score(X_test, y_test)) # Bayesian Ridge Regressor reg_3 = BayesianRidge() reg_3.fit(X_train, y_train) print("BayesianRidge Score:", reg_3.score(X_test, y_test)) # ElasticNet Regresor reg_4 = ElasticNet() reg_4.fit(X_train, y_train) print("ElasticNet Score:", reg_4.score(X_test, y_test)) #Let us predict the stock market for the Future 30 days days = 20 data_seed = df['Adj Close'].values[-window_size:][None] input_values = { 'Lasso': data_seed, 'Ridge': data_seed, 'BayesianRidge': data_seed, 'ElasticNet': data_seed } values = {'Lasso': [], 'Ridge': [], 'BayesianRidge': [], 'ElasticNet': []}
print X_test.shape y_train=df_train["Purchase"] df_train=df_train.drop("Purchase", axis=1) #from sklearn.feature_selection import SelectKBest #from sklearn.feature_selection import f_regression #sel = SelectKBest(f_regression, k=10) #X_tr=pd.DataFrame(sel.fit_transform(X_train,y_train)) #X_tst=pd.DataFrame(sel.transform(X_test)) #print X_tr.shape #print X_tst.shape from sklearn.linear_model import ElasticNet model=ElasticNet(alpha=0.001) model.fit(X_train,y_train) y_pred=model.predict(X_test) #print y_pred.shape #print key1.shape #print key2.shape out=pd.DataFrame() out["User_ID"]=key1 out["Product_ID"]=key2 out["Purchase"]=y_pred out.to_csv('outavb.csv', index=False)
# by Willi Richert and Luis Pedro Coelho # published by PACKT Publishing # # It is made available under the MIT License import numpy as np from sklearn.datasets import load_svmlight_file from sklearn.cross_validation import KFold from sklearn.linear_model import ElasticNet from sklearn.metrics import mean_squared_error, r2_score data, target = load_svmlight_file('data/E2006.train') # Edit the lines below if you want to switch method: # met = LinearRegression(fit_intercept=True) met = ElasticNet(fit_intercept=True, alpha=.1) kf = KFold(len(target), n_folds=5) pred = np.zeros_like(target) for train, test in kf: met.fit(data[train], target[train]) pred[test] = met.predict(data[test]) print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred)))) print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred))) print('') met.fit(data, target) pred = met.predict(data) print('[EN 0.1] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred)))) print('[EN 0.1] R2 on training, {:.2}'.format(r2_score(target, pred)))
reg = linear_model.LinearRegression() #岭回归 from sklearn import linear_model reg = linear_model.Ridge(alpha=.5) #逻辑回归算法 from sklearn.linear_model import LogisticRegression clf_l1_LR = LogisticRegression(C=C, penalty='l1', tol=0.01) #核岭回归(Kernel ridge regression) from sklearn.kernel_ridge import KernelRidge KernelRidge(kernel='rbf', alpha=0.1, gamma=10) #套索回归(Lasso) from sklearn import linear_model reg = linear_model.Lasso(alpha=0.1) #弹性网络回归(Elastic Net) from sklearn.linear_model import ElasticNet regr = ElasticNet(random_state=0) #贝叶斯回归(Bayesian Regression) from sklearn import linear_model reg = linear_model.BayesianRidge() #多项式回归(Polynomial regression——多项式基函数回归) from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=2) poly.fit_transform(X) #偏最小二乘回归(PLS) from sklearn.cross_decomposition import PLSCanonical PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2, scale=True, tol=1e-06)
# Lasso from sklearn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) r2_score_lasso = r2_score(y_test, y_pred_lasso) print(lasso) print("r^2 on test data : %f" % r2_score_lasso) # ############################################################################# # ElasticNet from sklearn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, l1_ratio=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) r2_score_enet = r2_score(y_test, y_pred_enet) print(enet) print("r^2 on test data : %f" % r2_score_enet) plt.plot(enet.coef_, color='lightgreen', linewidth=2, label='Elastic net coefficients') plt.plot(lasso.coef_, color='gold', linewidth=2, label='Lasso coefficients') plt.plot(coef, '--', color='navy', label='original coefficients') plt.legend(loc='best') plt.title("Lasso R^2: %f, Elastic Net R^2: %f" % (r2_score_lasso, r2_score_enet)) plt.show()
'C': np.arange(0.25, 2, 0.25) } knn_params = { 'n_neighbors': np.arange(3, 10, 2), 'weights': ['uniform', 'distance'], 'p': np.arange(1, 2, 0.25) } dt_params = { 'criterion': ['mse', 'friedman_mse', 'mae'], 'max_depth': np.arange(1, 50, 5) } models_list = [('LR', LinearRegression(), {}), ('Ridge', Ridge(), ridge_params), ('Lasso', Lasso(), lasso_params), ('ElasticNet', ElasticNet(), elasticnet_params), ('SGDRegressor', SGDRegressor(), sgdregressor_params), ('SVR', SVR(), svr_params), ('KNN', KNeighborsRegressor(), knn_params), ('GaussianProcess', GaussianProcessRegressor(), {}), ('DTree', DecisionTreeRegressor(), dt_params)] rmsle_scores = [] r2_scores = [] model_names = [] best_estimators = [] for name, model, model_params in list(models_list): print('-' * 100) print('Fitting ', name) model_names.append(name)
# Lasso from sklearn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) r2_score_lasso = r2_score(y_test, y_pred_lasso) print(lasso) print("r^2 on test data : %f" % r2_score_lasso) # ############################################################################# # ElasticNet from sklearn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, l1_ratio=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) r2_score_enet = r2_score(y_test, y_pred_enet) print(enet) print("r^2 on test data : %f" % r2_score_enet) m, s, _ = plt.stem( np.where(enet.coef_)[0], enet.coef_[enet.coef_ != 0], markerfmt="x", label="Elastic net coefficients", use_line_collection=True, ) plt.setp([m, s], color="#2ca02c") m, s, _ = plt.stem(
preds = np.zeros(XALL.shape[0]) feature_importance = [] test_preds = np.zeros((test.shape[0], 5)) for cv_idx, (train_idx, valid_idx) in enumerate(kf.split(XALL)): print('CV epoch[{0:2d}]:'.format(cv_idx)) train_dat = lgb.Dataset(XALL.iloc[train_idx], yALL.iloc[train_idx]) valid_dat = lgb.Dataset(XALL.iloc[valid_idx], yALL.iloc[valid_idx]) gbm = lgb.train(variables.lgb_params, train_dat, num_boost_round=variables.num_boost_round, valid_sets=valid_dat, verbose_eval=100, early_stopping_rounds=variables.early_stopping_rounds, feval=mse) tree_feature_train = gbm.predict(XALL.iloc[train_idx], num_iteration=gbm.best_iteration, pred_leaf=True) regr = ElasticNet(**variables.ElasticNetParams) regr.fit(tree_feature_train, yALL.iloc[train_idx]) test_feature = gbm.predict(test[predictor], pred_leaf=True, num_iteration=gbm.best_iteration) test_preds[:, cv_idx] = regr.predict(test_feature) preds = test_preds.mean(axis=1) submission = pd.DataFrame({'preds': preds}) submission.to_csv('../submission/result_lgb_en.csv', index=False, header=False)
""" # Import necessary modules from sklearn.linear_model import ElasticNet from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split, GridSearchCV # Create train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42) # Create the hyperparameter grid l1_space = np.linspace(0, 1, 30) param_grid = {'l1_ratio': l1_space} # Instantiate the ElasticNet regressor: elastic_net elastic_net = ElasticNet() # Setup the GridSearchCV object: gm_cv gm_cv = GridSearchCV(elastic_net, param_grid, cv=5) # Fit it to the training data gm_cv.fit(X_train, y_train) # Predict on the test set and compute metrics y_pred = gm_cv.predict(X_test) r2 = gm_cv.score(X_test, y_test) mse = mean_squared_error(y_test, y_pred) print("Tuned ElasticNet l1 ratio: {}".format(gm_cv.best_params_)) print("Tuned ElasticNet R squared: {}".format(r2)) print("Tuned ElasticNet MSE: {}".format(mse))
plt.xlim([-10, 50]) plt.show() #Mean Squared Error, R^2 print('MSE train: %.3f, test: %.3f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred))) #4. Elanet Regression model X = df.iloc[:, :-1].values y = df[df.columns].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) elanet = ElasticNet(alpha=1.0, l1_ratio=0.5) elanet.fit(X_train, y_train) y_train_pred = elanet.predict(X_train) y_test_pred = elanet.predict(X_test) #residual plot plt.scatter(y_train_pred, y_train_pred - y_train, c='blue', marker='o', edgecolor='white', label='Training data') plt.scatter(y_test_pred, y_test_pred - y_test, c='green', marker='s', edgecolor='white',
X_train = X_train[idx] y_train = y_train[idx] std = X_train.std(axis=0) mean = X_train.mean(axis=0) X_train = (X_train - mean) / std X_test = (X_test - mean) / std std = y_train.std(axis=0) mean = y_train.mean(axis=0) y_train = (y_train - mean) / std y_test = (y_test - mean) / std gc.collect() print("- benching ElasticNet") clf = ElasticNet(alpha=alpha, rho=0.5, fit_intercept=False) tstart = time() clf.fit(X_train, y_train) elnet_results[i, j, 0] = mean_squared_error(clf.predict(X_test), y_test) elnet_results[i, j, 1] = time() - tstart gc.collect() print("- benching SGD") n_iter = np.ceil(10 ** 4.0 / n_train) clf = SGDRegressor(alpha=alpha, fit_intercept=False, n_iter=n_iter, learning_rate="invscaling", eta0=.01, power_t=0.25) tstart = time() clf.fit(X_train, y_train)
sum1 = err[i] + sum1 mse1 = sum1 / len(err) print('mse=' + str(mse1)) def plot(y_pred, y_test): plt.plot(y_test, y_pred, '.') #actual values of x and y plt.title('Elastic Net Regression') plt.xlabel('Y_test') plt.ylabel('Y_pred') plt.show() aa = np.loadtxt('database.dat', unpack=True) data = aa.T data2 = data[~np.isnan(data).any(axis=1)] y = np.squeeze(data2[:, 3]) x = np.squeeze(data2[:, 4:]) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0) model = ElasticNet() model.fit(x_train, y_train) y_pred = model.predict(x_test) accuracy(y_pred, y_test) mse(y_pred, y_test) plot(y_pred, y_test)