print(test_X.shape)
print(test_y.shape)

# Ridge回归
# 一般情况下的Ridge(alpha:正则化强度)
model_ridge = Ridge(alpha=0.5)
model_ridge.fit(train_X, train_y)
print('训练集预测的确定系数R ^ 2: ', model_ridge.score(train_X, train_y))
print('验证集预测的确定系数R ^ 2: ', model_ridge.score(test_X, test_y))
pred_1 = model_ridge.predict(test_X)
print('模型误差: ', mean_squared_error(test_y, pred_1))

# 通过RidgeCV可以设置多个参数值,算法使用交叉验证获取最佳参数
model = RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0])
model.fit(train_X, train_y)
print("模型参数:", model.get_params())
print("模型详情:", model)
print('最佳alpha', model.alpha_)  # Ridge()无这个方法,只有RidgeCV算法有
print('训练集预测的确定系数R ^ 2: ', model.score(train_X, train_y))
print('验证集预测的确定系数R ^ 2: ', model.score(test_X, test_y))

pred_2 = model.predict(test_X)
print('Ridge模型误差: ', mean_squared_error(test_y, pred_2))

# Lasso回归
model_lasso = Lasso(alpha=0.01)
model_lasso = LassoCV()
model_lasso = LassoLarsCV()
model_lasso.fit(train_X, train_y)
print("模型参数:", model_lasso.get_params())
print("模型详情:", model_lasso)
Beispiel #2
0
y_pred = classifier.predict(X_train)

math.sqrt(mean_squared_error(y_train, y_pred))

####################### END Simple Linear Regression ###############################

####################### Regressao Ridge w/ cross validation ###############################
lambdas = np.logspace(-5, -1, 50)
ridge = RidgeCV(alphas=lambdas, fit_intercept=True, cv=10)
ridge.fit(X_train, y_train)

ridge.alpha_
ridge.cv_values_
ridge.score(X_train, y_train)
ridge.get_params()
ridge.coef_

print('Melhor lamda: %0.5f' % ridge.alpha_)

important_variables = []
club_plus_position = 0
for i in range(len(ridge.coef_)):
    if abs(ridge.coef_[i]) >= 0.1:
        if (i < len(df.drop(columns=['clube_id', 'posicao_id']).columns)):
            important_variables.append(
                df.drop(columns=['clube_id', 'posicao_id']).columns[i])
        else:
            club_plus_position += 1
print('Variáveis mais importantes para o Ridge: ', important_variables)
print(
Beispiel #3
0
plt.show()

df2 = df1[df1['charging_efficiency'] <= 20]
len(df2['deviceid'].unique())

# 不同SOC充电效率
df2['charging_efficiency_greater_12.5'] = (df2['charging_efficiency'] >
                                           12.5) * 1
df3 = df2[df2['charging_efficiency'] > 12.5]
from sklearn.linear_model import RidgeCV
x = np.array(df3['start_soc']).reshape(df3['start_soc'].shape[0], 1)
y = np.array(df3['charging_efficiency'])
model = RidgeCV()
model.fit(x, y)
pred = model.predict(np.arange(100).reshape(100, 1))
model.get_params()
model.intercept_
model.coef_[0]

plt.scatter(df2['start_soc'],
            df2['charging_efficiency'],
            c=df2['charging_efficiency_greater_12.5'])
plt.plot(np.arange(100), pred, color='r')
plt.title('charging efficiency vs soc_range\n(charging efficiency<=20)')
plt.xlabel('start_soc')
plt.ylabel('charging efficiency')
plt.text(
    75, 16.5, 'y = {0}x + {1}'.format(round(model.coef_[0], 4),
                                      round(model.intercept_, 4)))
plt.show()
Beispiel #4
0
              'min_samples_split':(2,3,4),
              'min_samples_leaf':(1,2,3)}

rfr = RandomForestRegressor(random_state=seed, warm_start=True)
score = make_scorer(mean_squared_error, greater_is_better=False)
grid_obj = GridSearchCV(rfr, param_grid=parameters, scoring=score, verbose=1, n_jobs=4, cv=5)
grid_obj= grid_obj.fit(X_train, y_train)
rfr = grid_obj.best_estimator_
print rfr.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(rfr, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(rfr, X_valid, y_valid))

# RidgeCV
ridge = RidgeCV(alphas=(1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1.0, 10.0), cv=5)
ridge = ridge.fit(X_train, y_train)
print ridge.get_params(), '\n'
print "Tuned model has a training RMSE score of {:.4f}.".format(predict_labels(ridge, X_train, y_train))
print "Tuned model has a testing RMSE score of {:.4f}.".format(predict_labels(ridge, X_valid, y_valid))

# Save regressors
pickle_file = 'regressor.pickle'

try:
  f = open(pickle_file, 'wb')
  save = {
    'random_forest_regressor': rfr,
    'ridge': ridge,
    }
  pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
  f.close()
except Exception as e:
Beispiel #5
0
class Model:
    params = Params()
    testsize = float(params.get_data_params()['testsize'])
    random_state = int(params.get_data_params()['randomstate'])

    def __init__(self, X, y, **kwargs):
        self.X, self.x_val, self.y, self.y_val = train_test_split(
            X, y, test_size=self.testsize, random_state=self.random_state)
        self.model = None
        self.FeatureSelectionType = kwargs['featureselection']
        self.features = self.X.columns
        self.model_coefficients = None
        self.EvalMetrics = kwargs['evalmetric']
        self.FeatSelCvFolds = kwargs['featureselectioncvfolds']
        self.CvFolds = kwargs['gridsearchcvfolds']
        self.gridSearch = kwargs['gridsearchcv']

        self.set_model()
        self.feature_selection()
        self.fit_model()

    def __repr__(self):
        return "Model(" + str(self.model) + ")"

    def set_model(self):
        model_name = self.params.get_model()
        if model_name == 'lr':
            self.model = LinearRegression()
            params = self.params.get_linear_reg()
            del params['regularization']
            self.model = self.model.set_params(**params)
        elif model_name == 'svr':
            self.model = SVR()
            self.model = self.model.set_params(**self.params.get_svr())
        else:
            pass

    def lasso(self):
        estimator = LassoCV(cv=5, max_iter=10000)
        selector = SelectFromModel(estimator)
        selector = selector.fit(self.X, self.y)
        bool_mask = selector.get_support()
        self.features = list(compress(self.X.columns, bool_mask))
        self.X = self.X.loc[:, bool_mask]
        self.x_val = self.x_val.loc[:, bool_mask]

    def feature_selection(self):
        if self.FeatureSelectionType.lower() == "lasso":
            self.lasso()
        else:
            self.features = self.X.columns

    def fit_model(self):
        print("Fitting model..")
        model_type = self.params.get_model()
        if self.gridSearch and model_type == 'svr':
            params_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                           'gamma': [0.0001, 0.001, 0.01, 0.1, 1],
                           'kernel': ['linear', 'rbf'],
                           }
            reg = GridSearchCV(self.model,
                               params_grid,
                               cv=5)
            reg.fit(self.X, self.y)
            self.model = self.model.set_params(**reg.best_params_)
            self.model.fit(self.X, self.y)
        elif model_type =='lr':
            regularization = self.params.get_linear_reg()['regularization']
            if regularization.lower() == 'ridge':
                params = self.model.get_params()
                del params['copy_X']
                del params['n_jobs']
                self.model = RidgeCV(cv=5)
                self.model = self.model.set_params(**params)
                self.model.fit(self.X, self.y)
            else:
                self.model.fit(self.X, self.y)
        else:
            self.model.fit(self.X, self.y)


    def score_model(self):
        cv_fold = int(self.params.get_test_params()['testcvfold'])
        train_score_result = []
        val_score_result = []
        score_result = pd.DataFrame()
        y_pred = self.model.predict(self.x_val)
        scoring_list = ['r2', 'neg_mean_absolute_error', 'neg_mean_squared_error']
        for scoring in scoring_list:
            train_score_result.append(
                np.abs(np.mean(cross_val_score(self.model,
                                               self.X,
                                               self.y,
                                               scoring=scoring,
                                               cv=cv_fold))))
        train_score_result.append(np.sqrt(train_score_result[2]))
        val_score_result.append(r2_score(self.y_val, y_pred))
        val_score_result.append(mean_absolute_error(self.y_val, y_pred))
        val_score_result.append(mean_squared_error(self.y_val, y_pred))
        val_score_result.append(np.sqrt(val_score_result[2]))
        score_result['mean {}-fold cv'.format(self.FeatSelCvFolds)] = train_score_result
        score_result['validation score'] = val_score_result
        score_result.index = ['r2', 'MAE', 'MSE', 'RMSE']
        print("""
#########################################################
########        Model and final parameters        #######
#########################################################\n
{}
""".format(self.model)
              )
        if self.params.get_model() == 'lr':
            coef_feature = pd.DataFrame()
            coef_feature['Coefficients'] = self.model.coef_
            coef_feature['Feature'] = self.X.columns
        else:
            print("Predictors used:")
            coef_feature = self.X.columns.values
        print("{}".format(coef_feature)
              )
        print("""
#########################################################
#########                Score                   #########
#########################################################\n
{}
""".format(score_result))