print(test_X.shape) print(test_y.shape) # Ridge回归 # 一般情况下的Ridge(alpha:正则化强度) model_ridge = Ridge(alpha=0.5) model_ridge.fit(train_X, train_y) print('训练集预测的确定系数R ^ 2: ', model_ridge.score(train_X, train_y)) print('验证集预测的确定系数R ^ 2: ', model_ridge.score(test_X, test_y)) pred_1 = model_ridge.predict(test_X) print('模型误差: ', mean_squared_error(test_y, pred_1)) # 通过RidgeCV可以设置多个参数值,算法使用交叉验证获取最佳参数 model = RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0]) model.fit(train_X, train_y) print("模型参数:", model.get_params()) print("模型详情:", model) print('最佳alpha', model.alpha_) # Ridge()无这个方法,只有RidgeCV算法有 print('训练集预测的确定系数R ^ 2: ', model.score(train_X, train_y)) print('验证集预测的确定系数R ^ 2: ', model.score(test_X, test_y)) pred_2 = model.predict(test_X) print('Ridge模型误差: ', mean_squared_error(test_y, pred_2)) # Lasso回归 model_lasso = Lasso(alpha=0.01) model_lasso = LassoCV() model_lasso = LassoLarsCV() model_lasso.fit(train_X, train_y) print("模型参数:", model_lasso.get_params()) print("模型详情:", model_lasso)
y_pred = classifier.predict(X_train) math.sqrt(mean_squared_error(y_train, y_pred)) ####################### END Simple Linear Regression ############################### ####################### Regressao Ridge w/ cross validation ############################### lambdas = np.logspace(-5, -1, 50) ridge = RidgeCV(alphas=lambdas, fit_intercept=True, cv=10) ridge.fit(X_train, y_train) ridge.alpha_ ridge.cv_values_ ridge.score(X_train, y_train) ridge.get_params() ridge.coef_ print('Melhor lamda: %0.5f' % ridge.alpha_) important_variables = [] club_plus_position = 0 for i in range(len(ridge.coef_)): if abs(ridge.coef_[i]) >= 0.1: if (i < len(df.drop(columns=['clube_id', 'posicao_id']).columns)): important_variables.append( df.drop(columns=['clube_id', 'posicao_id']).columns[i]) else: club_plus_position += 1 print('Variáveis mais importantes para o Ridge: ', important_variables) print(
plt.show() df2 = df1[df1['charging_efficiency'] <= 20] len(df2['deviceid'].unique()) # 不同SOC充电效率 df2['charging_efficiency_greater_12.5'] = (df2['charging_efficiency'] > 12.5) * 1 df3 = df2[df2['charging_efficiency'] > 12.5] from sklearn.linear_model import RidgeCV x = np.array(df3['start_soc']).reshape(df3['start_soc'].shape[0], 1) y = np.array(df3['charging_efficiency']) model = RidgeCV() model.fit(x, y) pred = model.predict(np.arange(100).reshape(100, 1)) model.get_params() model.intercept_ model.coef_[0] plt.scatter(df2['start_soc'], df2['charging_efficiency'], c=df2['charging_efficiency_greater_12.5']) plt.plot(np.arange(100), pred, color='r') plt.title('charging efficiency vs soc_range\n(charging efficiency<=20)') plt.xlabel('start_soc') plt.ylabel('charging efficiency') plt.text( 75, 16.5, 'y = {0}x + {1}'.format(round(model.coef_[0], 4), round(model.intercept_, 4))) plt.show()
'min_samples_split':(2,3,4), 'min_samples_leaf':(1,2,3)} rfr = RandomForestRegressor(random_state=seed, warm_start=True) score = make_scorer(mean_squared_error, greater_is_better=False) grid_obj = GridSearchCV(rfr, param_grid=parameters, scoring=score, verbose=1, n_jobs=4, cv=5) grid_obj= grid_obj.fit(X_train, y_train) rfr = grid_obj.best_estimator_ print rfr.get_params(), '\n' print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(rfr, X_train, y_train)) print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(rfr, X_valid, y_valid)) # RidgeCV ridge = RidgeCV(alphas=(1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1.0, 10.0), cv=5) ridge = ridge.fit(X_train, y_train) print ridge.get_params(), '\n' print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(ridge, X_train, y_train)) print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(ridge, X_valid, y_valid)) # Save regressors pickle_file = 'regressor.pickle' try: f = open(pickle_file, 'wb') save = { 'random_forest_regressor': rfr, 'ridge': ridge, } pickle.dump(save, f, pickle.HIGHEST_PROTOCOL) f.close() except Exception as e:
class Model: params = Params() testsize = float(params.get_data_params()['testsize']) random_state = int(params.get_data_params()['randomstate']) def __init__(self, X, y, **kwargs): self.X, self.x_val, self.y, self.y_val = train_test_split( X, y, test_size=self.testsize, random_state=self.random_state) self.model = None self.FeatureSelectionType = kwargs['featureselection'] self.features = self.X.columns self.model_coefficients = None self.EvalMetrics = kwargs['evalmetric'] self.FeatSelCvFolds = kwargs['featureselectioncvfolds'] self.CvFolds = kwargs['gridsearchcvfolds'] self.gridSearch = kwargs['gridsearchcv'] self.set_model() self.feature_selection() self.fit_model() def __repr__(self): return "Model(" + str(self.model) + ")" def set_model(self): model_name = self.params.get_model() if model_name == 'lr': self.model = LinearRegression() params = self.params.get_linear_reg() del params['regularization'] self.model = self.model.set_params(**params) elif model_name == 'svr': self.model = SVR() self.model = self.model.set_params(**self.params.get_svr()) else: pass def lasso(self): estimator = LassoCV(cv=5, max_iter=10000) selector = SelectFromModel(estimator) selector = selector.fit(self.X, self.y) bool_mask = selector.get_support() self.features = list(compress(self.X.columns, bool_mask)) self.X = self.X.loc[:, bool_mask] self.x_val = self.x_val.loc[:, bool_mask] def feature_selection(self): if self.FeatureSelectionType.lower() == "lasso": self.lasso() else: self.features = self.X.columns def fit_model(self): print("Fitting model..") model_type = self.params.get_model() if self.gridSearch and model_type == 'svr': params_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [0.0001, 0.001, 0.01, 0.1, 1], 'kernel': ['linear', 'rbf'], } reg = GridSearchCV(self.model, params_grid, cv=5) reg.fit(self.X, self.y) self.model = self.model.set_params(**reg.best_params_) self.model.fit(self.X, self.y) elif model_type =='lr': regularization = self.params.get_linear_reg()['regularization'] if regularization.lower() == 'ridge': params = self.model.get_params() del params['copy_X'] del params['n_jobs'] self.model = RidgeCV(cv=5) self.model = self.model.set_params(**params) self.model.fit(self.X, self.y) else: self.model.fit(self.X, self.y) else: self.model.fit(self.X, self.y) def score_model(self): cv_fold = int(self.params.get_test_params()['testcvfold']) train_score_result = [] val_score_result = [] score_result = pd.DataFrame() y_pred = self.model.predict(self.x_val) scoring_list = ['r2', 'neg_mean_absolute_error', 'neg_mean_squared_error'] for scoring in scoring_list: train_score_result.append( np.abs(np.mean(cross_val_score(self.model, self.X, self.y, scoring=scoring, cv=cv_fold)))) train_score_result.append(np.sqrt(train_score_result[2])) val_score_result.append(r2_score(self.y_val, y_pred)) val_score_result.append(mean_absolute_error(self.y_val, y_pred)) val_score_result.append(mean_squared_error(self.y_val, y_pred)) val_score_result.append(np.sqrt(val_score_result[2])) score_result['mean {}-fold cv'.format(self.FeatSelCvFolds)] = train_score_result score_result['validation score'] = val_score_result score_result.index = ['r2', 'MAE', 'MSE', 'RMSE'] print(""" ######################################################### ######## Model and final parameters ####### #########################################################\n {} """.format(self.model) ) if self.params.get_model() == 'lr': coef_feature = pd.DataFrame() coef_feature['Coefficients'] = self.model.coef_ coef_feature['Feature'] = self.X.columns else: print("Predictors used:") coef_feature = self.X.columns.values print("{}".format(coef_feature) ) print(""" ######################################################### ######### Score ######### #########################################################\n {} """.format(score_result))