def save_elastic_net_regression(): features = df.columns.tolist() del features[10] del features[0] X = df[features] y = df["class"] baseAlpha = 0.1 ElNet = ElasticNet(random_state=0, alpha=baseAlpha) ElNet.fit(X, y) baseScore = ElNet.score(X, y, sample_weight=None) for x in range(1, 1000): alpha = 0.1 * x ElNet = ElasticNet(random_state=0, alpha=alpha) ElNet.fit(X, y) # print("alpha : ", str(alpha), ", score: " # , str(ElNet.score(X, y, sample_weight=None))) if (ElNet.score(X, y, sample_weight=None) > baseScore): baseAlpha = alpha baseScore = ElNet.score(X, y, sample_weight=None) ElNet = ElasticNet(random_state=0, alpha=baseAlpha) ElNet.fit(X, y) params = np.append(ElNet.intercept_, ElNet.coef_) predictions = ElNet.predict(X) params = np.round(params, 4) myDF3 = get_formatted_data_frame_from_predictions(X, y, predictions, params, features) f = open("./results/elasticNetRegression.txt", "w") f.write("Alpha = " + str(baseAlpha) + "\n\n") f.write("R-squared = " + str(ElNet.score(X, y, sample_weight=None)) + "\n\n") f.write(str(myDF3))
def linear_regression1(): # 糖尿病情数据集 X, y = load_diabetes().data, load_diabetes().target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=8) elastic_net = ElasticNet(alpha=1, l1_ratio=1, max_iter=100000).fit(X_train, y_train) print('--------------') print('elastic_net.coef_:{}'.format(elastic_net.coef_[:])) print('elastic_net.intercept_:{}'.format(elastic_net.intercept_)) print('----------') print('训练数据集得分:{:.2f}'.format(elastic_net.score(X_train, y_train))) print('测试数据集得分:{:.2f}'.format(elastic_net.score(X_test, y_test))) print('弹性网回归使用的特征数:{}'.format(np.sum(elastic_net.coef_ != 0)))
def elastic_net_model(self, X_train, y_train, X_test, y_test): elast_net_model = ElasticNet(alpha=.253) elast_net_model.fit(X_train, y_train) y_train_pred = elast_net_model.predict(X_train) y_test_pred = elast_net_model.predict(X_test) # Scoring the model print(elast_net_model.score(X_train, y_train)) print(elast_net_model.score(X_test, y_test)) print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.6f, R^2 test: %.6f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000): """ Return the cv-parameters tested across the whole data :param X_t: :param y_t: :param top_df: :return: res_df, test_betas """ test_errs = np.zeros(len(top_df)) scores = np.zeros(len(top_df)) dfs = np.zeros(len(top_df)) test_coefs = np.zeros((len(top_df), X_t.shape[1])) for i in range(len(top_df)): alpha = top_df.iloc[i]["alpha"] lambda_min = top_df.iloc[i]["lambda.min"] enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter) enet.fit(X_t, y_t) y_pred = enet.predict(X_t) test_errs[i] = np.average((y_t - y_pred)**2) scores[i] = enet.score(X_t, y_t) test_coefs[i] = enet.coef_ dfs[i] = len(np.where(enet.coef_)[0]) top_df["test_err"] = test_errs top_df["score"] = scores top_df["df"] = dfs return top_df, test_coefs
def EN(data=data, city='all', label="label_activity_density"): if city == 'all': data2 = data.copy() else: data2 = data[data["city_district"].str.contains(city)].copy() target = data2[["city_district", label]] features = data2[features_columns] X = features.values y = target[label].values alphas = [0.0000001, 0.0000001, 0.000001, 0.00001,\ 0.0001, 0.001, 0.01, \ 0.03, 0.05, 0.07, 0.1] best_res = (0, 0, 0, 0) for a in alphas: model = ElasticNet(alpha=a).fit(X, y) score = model.score(X, y) pred_y = model.predict(X) mse = mean_squared_error(y, pred_y) print("Alpha:{0:.5f}, R2:{1:.2f}, MSE:{2:.2f}, RMSE:{3:.2f}".format( a, score, mse, np.sqrt(mse))) if score > best_res[1]: best_res = (a, score, mse, np.sqrt(mse)) return best_res
def elasticRegression(self): er = ElasticNet(alpha=0.0) X = self.train[:, :-1] y = self.train[:, -1:] er.fit(X, y) self.erScore = er.score(X, y) X = self.erTest[:, :-1] preds = er.predict(X) self.erTest = pd.DataFrame(self.erTest) self.erTest['er_Amount'] = preds self.erTest.to_csv('Elastic_Net_Apply.csv') yTrue = self.erTest.loc[:, 9] yPred = self.erTest.loc[:, 'er_Amount'] self.erMse = sklearn.metrics.mean_squared_error(yTrue, yPred) self.erMae = sklearn.metrics.mean_absolute_error(yTrue, yPred) self.erCoef = er.coef_ self.erNIter = er.n_iter_ self.er = er
def elastic_net(problem, **kwargs): r"""High level description. Parameters ---------- problem : type Description kwargs['elastic_net_reg_coef'] must be a nonnegative float. This is the multiplier for the penalty term kwargs['elastic_net_ratio'] must be between 0 and 1 kwargs['coef_tolerance'] must be a nonnegative float Returns ------- output : tuple (optimum, maximum) """ data_list = [datum['data']['values'] for datum in problem.data] data = numpy.array(data_list) elastic_net = ElasticNet(alpha=kwargs['elastic_net_reg_coef'], l1_ratio=kwargs['elastic_net_ratio']) elastic_net.fit(data.T, problem.goal['data']['values']) elastic_net_coefficients = elastic_net.coef_ optimum = [ problem.data[index] for index, element in enumerate(elastic_net_coefficients) if abs(element) > kwargs['coef_tolerance'] ] maximum = elastic_net.score(data.T, problem.goal['data']['values']) output = (optimum, maximum) return output
def LinearRegression_Elastic_Net(self, X_train, X_test, Y_train, Y_test, list_of_columns, colslist, alpha_val): print('---------------------------------------------') print('LinearRegression Elastic Net') # values converts it into a numpy array x_train = X_train[list_of_columns] x_test = X_test[list_of_columns] y_train = Y_train y_test = Y_test linear_regressor = ElasticNet( fit_intercept=True, alpha=alpha_val) # create object for the class linear_regressor.fit(x_train, y_train) # perform linear regression Y_pred = linear_regressor.predict(x_test) # make predictions print('---------------------------------------------') print('Coeff :', linear_regressor.coef_) print('Intercept', linear_regressor.intercept_) print('LScore', linear_regressor.score(x_test, y_test)) print('---------------------------------------------') print('Evaluation of Test Data') y_test_pred = linear_regressor.predict(x_test) # Model Evaluation self.FindErrors(x_test, y_test, y_test_pred, 'Linear Regressor Elastic Net', colslist, alpha_val)
def test_ElasticNet_alpha_beta(*data): train_x, test_x, train_y, test_y = data alphas = np.logspace(0, 2) betas = np.linspace(0.01, 1) scores_ElasticNet = [] for alpha in alphas: for beta in betas: regr_ElasticNet = ElasticNet(alpha=alpha, l1_ratio=beta) regr_ElasticNet.fit(train_x, train_y) scores_ElasticNet.append(regr_ElasticNet.score(test_x, test_y)) #绘图 alphas1, betas1 = np.meshgrid(alphas, betas) scores = np.array(scores_ElasticNet).reshape(alphas1.shape) fig = plt.figure() ax = Axes3D(fig) surf = ax.plot_surface(alphas1, betas1, scores, rstride=1, cstride=1, cmap=cm.jet, antialiased=False) fig.colorbar(surf) ax.set_xlabel(r'$\alpha$', fontproperties=myfont) ax.set_ylabel(r'$\beta$', fontproperties=myfont) ax.set_zlabel(r'score', fontproperties=myfont) ax.set_title('ElasticNet回归', fontproperties=myfont) plt.show() return
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000): """ Return the cv-parameters tested across the whole data :param X_t: :param y_t: :param top_df: :return: res_df, test_betas """ test_errs = np.zeros(len(top_df)) scores = np.zeros(len(top_df)) dfs = np.zeros(len(top_df)) test_coefs = np.zeros((len(top_df), X_t.shape[1])) for i in range(len(top_df)): alpha = top_df.iloc[i]["alpha"] lambda_min = top_df.iloc[i]["lambda.min"] enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter) enet.fit(X_t, y_t) y_pred = enet.predict(X_t) test_errs[i] = np.average((y_t - y_pred)**2) scores[i] = enet.score(X_t, y_t) test_coefs[i] = enet.coef_ dfs[i] = len(np.where(enet.coef_)[0]) top_df["test_err"] = test_errs top_df["score"] = scores top_df["df"] = dfs return top_df, test_coefs
def enet(a): print ("Doing elastic net") clf3 = ElasticNet(alpha=a) clf3.fit(base_X, base_Y) print ("Score = %f" % clf3.score(base_X, base_Y)) clf3_pred = clf3.predict(X_test) write_to_file("elastic.csv", clf3_pred)
def test_Ridge_lasso_alpha(*data): train_x, test_x, train_y, test_y = data alphas = np.logspace(0, 3, num=10) scores_Ridge = [] scores_lasso = [] scores_ElasticNet = [] for i, alpha in enumerate(alphas): regr_ridge = Ridge(alpha=alpha) regr_ridge.fit(train_x, train_y) scores_Ridge.append(regr_ridge.score(test_x, test_y)) regr_lasso = Lasso(alpha=alpha) regr_lasso.fit(train_x, train_y) scores_lasso.append(regr_lasso.score(test_x, test_y)) regr_ElasticNet = ElasticNet(alpha=alpha) regr_ElasticNet.fit(train_x, train_y) scores_ElasticNet.append(regr_ElasticNet.score(test_x, test_y)) ax3 = plt_helper('ax3', 'alph参数与回归性能', xlabel=r'$\alpha$取值', ylabel='归模型的预测性能') ax3.plot(alphas, scores_Ridge, label='岭回归') ax3.plot(alphas, scores_lasso, label='Lasso回归') ax3.plot(alphas, scores_ElasticNet, label='ElasticNet回归') ax3.legend(loc='best', prop=myfont) ax3.set_xscale('log') return
def perform_elastinet_regression(df_X, df_Y, test_X, test_Y): clf = ElasticNet(alpha=0.1, l1_ratio=0.7) clf.fit(df_X, df_Y) pred_Y = clf.predict(test_X) r2_score_rr = round(r2_score(test_Y, pred_Y), 3) accuracy = round(clf.score(df_X, df_Y) * 100, 2) returnval = {'model': 'ElasticNet', 'r2_score': r2_score_rr} return returnval
def _elastic_net_regularization(X_train, X_test, y_train, y_test): ela = ElasticNet(alpha=1.0, l1_ratio=2) ela.fit(X_train, y_train) y_pred = ela.predict(X_test) score = ela.score(X_test, y_test) err = mean_squared_error(y_test, y_pred) return score, err
def ElasticModel(X, Y): elastic = ElasticNet(alpha=0.05, l1_ratio=0.5, normalize=False) elastic.fit(X, Y) y_predict_elastic = elastic.predict(X) #calculating mse print('For our Elastic model, are values are:') print('Training MSE:', mean_squared_error(Y, y_predict_elastic)) print('Training r^2:', elastic.score(X, Y))
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples // 2:], X[:n_samples // 2] y_train, y_test = y[n_samples // 2:], y[:n_samples // 2] s_clf = ElasticNet( alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True, ) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = ElasticNet( alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True, ) d_clf.fit(X_train.toarray(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def test_model(X_test, y_test, l1_ratio=.5, alpha=.5, X_train=None, y_train=None): """This Trains a model on the training data with the specified parameters and then returns the score for that model with the test data""" reg = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=50000) reg.fit(X_train, y_train) score = reg.score(X_test, y_test) return score
def Elastic(): global x1, x2, y1, y2, dict1 model = ElasticNet() name = "Elastic Net" model.fit(x1, y1) y_pred = model.predict(x2) error = mean_squared_error(y2, y_pred) score = model.score(y2, y_pred) plotgraph(y_pred, name, error, score)
def enet_train(alpha,l1_ratio,x,y): clf=ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=1000,normalize=False) #clf.fit(x,y) cval = cross_val_score(clf, x, y, scoring='r2', cv=3) cval[np.where(cval < 0)[0]] = 0 return cval.mean() return clf.score(x, y)
def elastic_net(X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled): from sklearn.linear_model import ElasticNet elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled) elasticnet_predictions = elasticnet.predict(X_test_scaled) MSE_elastic = mean_squared_error(y_test_scaled, elasticnet_predictions) r2_elastic = elasticnet.score(X_test_scaled, y_test_scaled) return elasticnet_predictions, MSE_elastic, r2_elastic
def enet(a): print ("Doing elastic net") clf3 = ElasticNet(alpha=a) clf3.fit(base_X, base_Y) print ("Score = %f" % clf3.score(base_X, base_Y)) df_test_new = pd.read_csv("X_test2.csv") X_test = df_test_new.values clf3_pred = clf3.predict(X_test) write_to_file("elastic.csv", clf3_pred)
def model_el_net(args, y): alpha = 0.1 l1_ratio = 0.7 enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) enet.fit(args, y) res = enet.score(args, y) params = enet.get_params() coefs = [enet.intercept_] coefs = coefs + list(enet.coef_) return res, params, coefs
def linlasso(alpha): x = np.linspace(0, np.pi, 100) z = np.random.random(100) y = [] X = [] [X.append([x[i]**2, np.exp(np.sin(z[i]))]) for i in range(len(x))] [y.append([0.1 * z[i] + (x[i])]) for i in range(len(x))] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) x = [] z = [] for i in range(alpha): lr = ElasticNet(alpha=float(i), l1_ratio=float(i) / 10, max_iter=1e5).fit(X_train, y_train) #L1 & L2 Regularization (hyperparameter): as alpha increases model underfits x.append(lr.score(X_train, y_train)) z.append(lr.score(X_test, y_test)) I = [i for i in range(alpha)] plt.plot(I, x, 'r', I, z, 'k') plt.show()
def analyze_ElasticNet(l1_ratio, concrete_train_X, concrete_test_X, concrete_train_y, concrete_test_y): model = ElasticNet(l1_ratio=l1_ratio) model.fit(concrete_train_X, concrete_train_y) predict = model.predict(concrete_test_X) score = model.score(concrete_test_X, concrete_test_y) return make_analyze_response("ElasticNet L1_RATIO_%s" % (l1_ratio), concrete_test_X, concrete_test_y, predict, score)
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha, l1_ratio, training_data_path, test_data_path): print("train: " + training_data_path) print("test: " + test_data_path) print("alpha: ", alpha) print("l1-ratio: ", l1_ratio) print("label-col: " + label_col) for col in feat_cols: print("feat-cols: " + col) # Split data into training labels and testing labels. trainingLabels = training_pandas_data[label_col].values trainingFeatures = training_pandas_data[feat_cols].values testLabels = test_pandas_data[label_col].values testFeatures = test_pandas_data[feat_cols].values #We will use a linear Elastic Net model. en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) # Here we train the model. en.fit(trainingFeatures, trainingLabels) # Calculating the scores of the model. test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5 r2_score_training = en.score(trainingFeatures, trainingLabels) r2_score_test = en.score(testFeatures, testLabels) print("Test RMSE:", test_rmse) print("Training set score:", r2_score_training) print("Test set score:", r2_score_test) #Logging the RMSE and r2 scores. mlflow.log_metric("Test RMSE", test_rmse) mlflow.log_metric("Train R2", r2_score_training) mlflow.log_metric("Test R2", r2_score_test) #Saving the model as an artifact. sklearn.log_model(en, "model") run_id = mlflow.active_run().info.run_uuid print("Run with id %s finished" % run_id)
def ElasticNetPrediction(X_train, y_train, X_test, y_test): elasticnet = ElasticNet() elasticnet.fit(X_train, y_train) elasticnet_score = elasticnet.score(X_test, y_test) elasticnet_score elasticnet_pred = elasticnet.predict(X_test) # The mean squared error elasticnetRMSE = sqrt(mean_squared_error(y_test, elasticnet_pred)) print("Root mean squared error: %.2f" % elasticnetRMSE) print('R-squared elasticnet: %.2f' % r2_score(y_test, elasticnet_pred)) chart_regression(elasticnet_pred, y_test, 'ElasticNetPrediction') return elasticnet_score, elasticnetRMSE
def runElasticNetRegressor(self): lm = ElasticNet(fit_intercept=True, normalize=True) print("ElasticNet Regressor\n") reg = lm.fit(self.m_X_train, self.m_y_train) predictY = lm.predict(self.m_X_test) score = lm.score(self.m_X_test, self.m_y_test) predictTraingY = lm.predict(self.m_X_train) self.displayPredictPlot(predictY) self.displayResidualPlot(predictY, predictTraingY) self.dispalyModelResult(lm, predictY, score)
class boroReg: def __init__(self, X, y, idx, pipe_X, pipe_y): self.X = X[idx, :] # shift to fix 1 indexing using np broadcasting self.y = y[idx, :] self._gridSearch = None self.pipeline_X = pipe_X self.pipeline_y = pipe_y self._searchSpace = None self._params = None self.lm = ElasticNet() def __imputeVals(self, in_df): return imputeVals(in_df) def gridSearch(self, params, cv=5, njobs=-1, verbose=50): self._searchSpace = params self._gridSearch = GridSearchCV(self.lm, params, cv=cv, scoring="neg_mean_squared_error", n_jobs=njobs, verbose=verbose) self._gridSearch.fit(self.X, self.y) def getBestParams(self): if self._gridSearch is not None: return self._gridSearch.best_params_ else: raise ValueError() def getBestScore(self): if self._gridSearch is not None: return self._gridSearch.best_score_ else: raise ValueError() def fitModel(self, params): self._params = params self.lm.set_params(**params) self.lm.fit(self.X, self.y) def __invert(self, y): return np.exp(self.pipeline_y.inverse_transform(y)) def getTrainScore(self): return self.lm.score(self.X, self.y) def predict(self, test_X): piped_X = self.pipeline_X.transform(self.__imputeVals(test_X)) preds = self.lm.predict(piped_X) return self.__invert(preds)
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha, l1_ratio, training_data_path, test_data_path): print("training-data-path: " + training_data_path) print("test-data-path: " + test_data_path) print("alpha: ", alpha) print("l1-ratio: ", l1_ratio) print("label-col: " + label_col) for col in feat_cols: print("feat-cols: " + col) # Split data into training labels and testing labels. trainingLabels = training_pandas_data[label_col].values trainingFeatures = training_pandas_data[feat_cols].values testLabels = test_pandas_data[label_col].values testFeatures = test_pandas_data[feat_cols].values #We will use a linear Elastic Net model. en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) # Here we train the model. en.fit(trainingFeatures, trainingLabels) # Calculating the score of the model. r2_score_training = en.score(trainingFeatures, trainingLabels) r2_score_test = 0 r2_score_test = en.score(testFeatures, testLabels) print("Training set score:", r2_score_training) print("Test set score:", r2_score_test) #Logging the r2 score for both sets. mlflow.log_metric("R2 score for training set", r2_score_training) mlflow.log_metric("R2 score for test set", r2_score_test) #Saving the model as an artifact. sklearn.log_model(en, "model") run_id = mlflow.tracking.active_run().info.run_uuid print("Run with id %s finished" % run_id)
def compare_elastic_high_dimension_coef_(): """利用得分曲线,展示Lasso回归和Ridge回归结合在一起,在固定高维数据集的情况下,不同alpha值的效果""" # 运行时间比较长 X_train, X_test, y_train, y_test = load_train_test_extended_boston() alpha_range = [pow(10, (alpha / 10)) for alpha in range(-50, 0, 3)] lasso_train_score, lasso_test_score = [], [] ridge_train_score, ridge_test_score = [], [] elastic_train_score, elastic_test_score = [], [] fix_elastic_train_score, fix_elastic_test_score = [], [] for alpha in alpha_range: lasso = Lasso(alpha=alpha, max_iter=100000).fit(X_train, y_train) lasso_train_score.append(lasso.score(X_train, y_train)) lasso_test_score.append(lasso.score(X_test, y_test)) ridge = Ridge(alpha=alpha).fit(X_train, y_train) ridge_train_score.append(ridge.score(X_train, y_train)) ridge_test_score.append(ridge.score(X_test, y_test)) elastic = ElasticNet(alpha=alpha, l1_ratio=alpha).fit(X_train, y_train) elastic_train_score.append(elastic.score(X_train, y_train)) elastic_test_score.append(elastic.score(X_test, y_test)) # 将最好的L1正则化系数固定,变动L2系数 elastic = ElasticNet(alpha=alpha, l1_ratio=0.005).fit(X_train, y_train) fix_elastic_train_score.append(elastic.score(X_train, y_train)) fix_elastic_test_score.append(elastic.score(X_test, y_test)) plt.plot(alpha_range, lasso_train_score, label='lasso 训练集得分') plt.plot(alpha_range, ridge_train_score, label='ridge 训练集得分') plt.plot(alpha_range, elastic_train_score, label='elastic 训练集得分') plt.plot(alpha_range, fix_elastic_train_score, label='fix elastic 训练集得分') plt.plot(alpha_range, lasso_test_score, label='lasso 测试集得分') plt.plot(alpha_range, ridge_test_score, label='ridge 测试集得分') plt.plot(alpha_range, elastic_test_score, label='elastic 测试集得分') plt.plot(alpha_range, fix_elastic_test_score, label='fix elastic 测试集得分') plt.legend(ncol=4, loc=(0, 1)) plt.xlabel("alpha") plt.ylabel("score") plt.suptitle("不同alpha值的四种回归的系数曲线图")
def explore_coefficients(dataset, alphas): ### if dataset in g_datasets: data = DataHelper(dataset) X_train, X_test, y_train, y_test = do_split(data.X, data.y, ratio=0.2, seed=42) else: raise ValueError(f'{dataset} dataset is not available here.') ### coeffs = {'Ridge': [], 'Lasso': [], 'ElasticNet': []} scores = {'Ridge': [], 'Lasso': [], 'ElasticNet': []} for alpha in alphas: # RIDGE ridge = Ridge(alpha=alpha, fit_intercept=False) ridge.fit(X_train, y_train) coeffs['Ridge'].append(ridge.coef_) scores['Ridge'].append( (ridge.score(X_train, y_train), ridge.score(X_test, y_test))) # LASSO lasso = Lasso(alpha=alpha, fit_intercept=False) lasso.fit(X_train, y_train) coeffs['Lasso'].append(lasso.coef_) scores['Lasso'].append( (lasso.score(X_train, y_train), lasso.score(X_test, y_test))) elasticnet = ElasticNet(alpha=alpha, l1_ratio=0.5) elasticnet.fit(X_train, y_train) coeffs['ElasticNet'].append(elasticnet.coef_) scores['ElasticNet'].append( (elasticnet.score(X_train, y_train), elasticnet.score(X_test, y_test))) return coeffs, scores
# print ('final score: ' + str(np.mean(final_score))) # mean_coef = final_coef/10 # mean_intercept = np.mean(final_intercept) # lin_reg.coef_ = mean_coef # lin_reg.intercept_ = mean_intercept # LABEL NEW ERROR TICKET LABELS ON EXISTING DATA y_new = lin_reg.predict(X_test) # print(lin_reg.coef_) print('final score for linear Elastic Net: ' + str(lin_reg.score(X_test, y_test))) # DEFINITION FOR ERROR TICKET rate = 0.7 lower = y_new * (1-rate) result = np.expand_dims((np.squeeze(y_test) - lower) < 0, axis=1) # results = np.array(results) # plt.scatter(X_test, y_test, color='black', s=1) # plt.plot(X_test, y_new, color='blue') # plt.ylabel('price (USD)') # plt.xlabel('week score') # plt.xticks()
def fit_linear_model(X, y, results, keys, alpha = np.logspace(-5,2,50), l1_ratio = np.array([.1, .5, .7, .9, .95, .99, 1]), num_cv = 5, verbose = False, intercept_scaling = 10, plot_results = False, labels = None ): X = pp.scale(X) clf = [] R2 = [] coef = [] prob = [] score = [] group_keys = [] if num_cv > 1: num_cv2 = num_cv else: num_cv2 = 10 # Find best alpha and lambda if (np.size(alpha)>1) or (np.size(l1_ratio)>1): print "Determining best values for L1 ratio and alpha..." clf_temp = ENCV( l1_ratio = l1_ratio, alphas = alpha, cv = num_cv2, fit_intercept = False, verbose = verbose ) clf_temp.fit(X,y) best_alpha = clf_temp.alpha_ best_l1_ratio = clf_temp.l1_ratio_ print "Best L1 ratio: " + str(best_l1_ratio) + ", best alpha: " + str(best_alpha) else: best_alpha = alpha best_l1_ratio = l1_ratio # Now do cross-validation to estimate accuracy if num_cv > 1: if labels == None: kf = KFold(n = len(y), n_folds = num_cv) else: kf = LOLO(labels) # for train, test in kf: X_train, X_test, y_train, y_test, results_test, keys_test = X[train], X[test], y[train], y[test], results[test], keys[test] clf_temp2 = EN( l1_ratio = best_l1_ratio, alpha = best_alpha, fit_intercept = False) clf_temp2.fit(X_train,y_train) pred = clf_temp2.predict(X_test) clf.append(clf_temp2) R2.append(clf_temp2.score(X_test,y_test)) coef.append(clf_temp2.coef_) prob.append(diff_to_prob(pred)) score.append(lossFx(results_test,pred)) group_keys.append(keys_test) else: clf_temp2 = EN( l1_ratio = best_l1_ratio, alpha = best_alpha, fit_intercept = False) clf_temp2.fit(X,y) pred = clf_temp2.predict(X) clf = clf_temp2 R2 = clf_temp2.score(X,y) coef = clf_temp2.coef_ prob = diff_to_prob(pred) score = lossFx(results,pred) group_keys = keys if num_cv > 1: return clf, R2, score, coef, prob, kf, group_keys else: return clf, R2, score, coef, prob, group_keys
# ElasticNet Regression import numpy as np from sklearn import datasets from sklearn.linear_model import ElasticNet # load the diabetes datasets dataset = datasets.load_diabetes() # fit a model to the data model = ElasticNet(alpha=0.1) model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
net = ElasticNet(alpha=1.5) lasso = Lasso(alpha=5) ridge = Ridge(alpha=3) lr = LinearRegression() dtr = DecisionTreeRegressor(max_depth=17) bagger = BaggingRegressor(net, verbose = 1) X_train, X_test, y_train, y_test = train_test_split(X_model, y) dtr.fit(X_train,y_train) dtr.score(X_test, y_test) pred = dtr.predict(X_test) plt.scatter(y_test, (pred*0.8)-y_test) net.fit(X_train, y_train) net.score(X_test, y_test) preds = net.predict(X_test) plt.scatter(y_test, (preds) - y_test, alpha = 0.7) scores = cross_val_score(net, scale(X_model), y, cv=12) scores.mean() X2 = pivoted[['compilation_0', 'compilation_1', 'compilation_2']] y2 = pivoted.compilation_3 X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2) lr.fit(X_train, y_train) lr.score(X_test, y_test) pivoted.head() mapped_pivot = pd.read_csv('pivot_catcherr.csv')