# 1.1.1 from sklearn import linear_model reg = linear_model.LinearRegression() res = reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2]) print(res.coef_) # 1.1.2 reg1 = linear_model.Ridge(alpha=0.5) reg1.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) print(reg1.coef_) print(reg1.intercept_) # 1.1.3 reg2 = linear_model.Lasso(alpha=0.1) reg2.fit([[0, 0], [1, 1]], [0, 1]) print(reg2.coef_) print(reg2.intercept_) print(reg2.predict([[1, 1]]))
def Linear_Regression_Ridge_Model(): """ Defining Regression Ridge Model """ model = linear_model.Ridge() return model
regr = LinReg(fit_intercept=False, copy_X=False) regr.fit(train_x, train_std_scores) valid_x = np.asarray( (valid_df[valid_df['essay_set'] == i]).drop('std_score', axis=1)) #valid_x = np.asarray((valid_df[valid_df['essay_set'] == i])[['std_sentence_count']]) valid_pred_std_scores = regr.predict(valid_x) #print "Linear for Essay Set "+str(i)+":", Spearman(a = (valid_df[valid_df['essay_set'] == i])["std_score"], b = valid_pred_std_scores) #print "\n" alpha = [x * 1.0 / 20 for x in range(21)] ridge_scores = [] lasso_scores = [] for a in alpha: ridge = linear_model.Ridge(alpha=a) ridge.fit(train_x, train_std_scores) valid_pred_std_scores_ridge = ridge.predict(valid_x) new_ridge_score = Spearman( a=(valid_df[valid_df['essay_set'] == i])["std_score"], b=valid_pred_std_scores_ridge)[0] ridge_scores.append(new_ridge_score) lasso = linear_model.Lasso(alpha=a) lasso.fit(train_x, train_std_scores) valid_pred_std_scores_lasso = lasso.predict(valid_x) new_lasso_score = Spearman( a=(valid_df[valid_df['essay_set'] == i])["std_score"], b=valid_pred_std_scores_ridge)[0] lasso_scores.append(new_ridge_score)
regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsCV()), regression(linear_model.LassoLarsIC()), regression(linear_model.LinearRegression()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), regression( linear_model.RANSACRegressor( base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS), random_state=RANDOM_SEED)), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), # Statsmodels Linear Regression regression( utils.StatsmodelsSklearnLikeWrapper( sm.GLS, dict(init=dict(sigma=np.eye( len(utils.get_regression_model_trainer().y_train)) + 1)))), regression( utils.StatsmodelsSklearnLikeWrapper( sm.GLS, dict(init=dict(sigma=np.eye( len(utils.get_regression_model_trainer().y_train)) + 1),
from sklearn import svm from sklearn.metrics import mean_squared_error, r2_score #linear regression model linear_reg = linear_model.LinearRegression() linear_reg.fit(x_train, y_train) print(linear_reg.coef_) linear_reg_predict = linear_reg.predict(x_test) print( 'The mean squared error and r^2 values for the linear regression prediction is' ) print(mean_squared_error(y_test, linear_reg_predict)) print(r2_score(y_test, linear_reg_predict)) #ridge regression model ridge_reg = linear_model.Ridge(alpha=1) ridge_reg.fit(x_train, y_train) print(ridge_reg.coef_) ridge_reg_predict = ridge_reg.predict(x_test) print( 'The mean squared error and r^2 values for the ridge regression prediction is' ) print(mean_squared_error(y_test, ridge_reg_predict)) print(r2_score(y_test, ridge_reg_predict)) #support vector regression svr = svm.SVR() svr.fit(x_train, y_train) svr_predict = svr.predict(x_test) print( 'The mean squared error and r^2 values for the support vector regression prediction is'
import matplotlib.pyplot as plt from sklearn import linear_model # X 是 10x10 希尔伯特矩阵 X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis]) y = np.ones(10) # ############################################################################# # 计算路径 n_alphas = 200 alphas = np.logspace(-10, -2, n_alphas) coefs = [] for a in alphas: ridge = linear_model.Ridge(alpha=a, fit_intercept=False) ridge.fit(X, y) coefs.append(ridge.coef_) # ############################################################################# # 显示结果 ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis plt.xlabel('alpha') plt.ylabel('weights') plt.title('Ridge coefficients as a function of the regularization') plt.axis('tight')
def __init__(self, **kwargs) -> None: model = linear_model.Ridge(**kwargs) super().__init__(model)
X_pred = f.get('X_pred')[()] y_pred = f.get('y_pred')[()] # Split data into train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # Linear Regression Model lr = linear_model.LinearRegression() lr.fit(X_train, y_train) # Lasso Regression lasso = linear_model.Lasso(alpha=1., max_iter=2000) lasso.fit(X_train, y_train) # Ridge Regression ridge = linear_model.Ridge(alpha=13.95, max_iter=2000) ridge.fit(X_train, y_train) # Scores print('Ridge score is %f' % ridge.score(X_test, y_test)) print('Lasso score is %f' % lasso.score(X_test, y_test)) print('Linear Regression score is %f' % lr.score(X_test, y_test)) # Using different regression models, we got always 96% score # Visualize the prediction import matplotlib.pyplot as plt2 pred = lr.predict(X_pred) plt2.plot(pred, color='red', label='Prediction') plt2.plot(y_pred, color='blue', label='Ground Truth')
def __init__(self): cAbstractTrend.__init__(self) self.mTrendRidge = linear_model.Ridge() self.mOutName = "PolyTrend" self.mFormula = self.mOutName self.mComplexity = 1
parser.add_argument('--dim', type=int, default=16, help='height and width of mnist dataset to resize to') parser.add_argument('--debug', action='store_true', help='debug mode') return parser.parse_args() if __name__ == '__main__': args = parse_args() # Extract the training dataset train_data, train_labels = getDataSet(args, 'train') # Extract the training dataset test_data, test_labels = getDataSet(args, 'test') # Linear regression reg = linear_model.Ridge() reg.fit(train_data, train_labels) # Perform prediction with model float_labels = reg.predict(test_data) # Fixed point computation # CSE 548: Todo: tweak the SCALE to get less than 20% classification error SCALE = 50000 # CSE 548 - Change me offset = reg.intercept_ weight = reg.coef_ offset = np.clip(offset*SCALE, -128, 127) offset = offset.astype(np.int32) weight = np.clip(weight*SCALE, -128, 127) weight = weight.astype(np.int8)
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'): if reg_type == 'lasso': tol = 1e-2 alpha = 1.0 n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = alpha lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 1 alphas = None lambdas = None alpha_min = 1.0 alpha_max = 1.0 n_gpus = -1 fit_intercept = True max_iter = 5000 glm_stop_early = True glm_stop_early_error_fraction = 1.0 verbose = False reg_h2o = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) reg_sklearn = linear_model.Lasso() elif reg_type == 'ridge': reg_h2o = h2o4gpu.Ridge() reg_sklearn = linear_model.Ridge() elif reg_type == 'enet': reg_h2o = h2o4gpu.ElasticNet() # update when the wrapper is done reg_sklearn = linear_model.ElasticNet() start_h2o = time.time() reg_h2o.fit(X_train, y_train, free_input_data=1) time_h2o = time.time() - start_h2o start_sklearn = time.time() reg_sklearn.fit(X_train, y_train) time_sklearn = time.time() - start_sklearn # Predicting test values y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1) y_pred_h2o = y_pred_h2o.squeeze() y_pred_sklearn = reg_sklearn.predict(X_test) # Calculating R^2 scores r2_h2o = r2_score(y_test, y_pred_h2o) r2_sklearn = r2_score(y_test, y_pred_sklearn) # Clearing the memory reg_h2o.free_sols() reg_h2o.free_preds() reg_h2o.finish() del reg_h2o del reg_sklearn gc.collect() return time_h2o, time_sklearn, r2_h2o, r2_sklearn
from sklearn import linear_model from sklearn.kernel_ridge import KernelRidge from sklearn.isotonic import IsotonicRegression from sklearn import metrics from sklearn.neighbors import NearestNeighbors from sklearn.metrics import precision_recall_fscore_support as score # =============================================== # common function # =============================================== SGDClf = linear_model.SGDClassifier(loss='modified_huber', penalty='l1') LogicReg = linear_model.LogisticRegression(penalty='l1', C=1.0) RidgeReg = linear_model.Ridge(alpha=1.0) KernelRidge = KernelRidge(alpha=1.0, kernel="linear", gamma=None) RANSACReg = linear_model.RANSACRegressor(linear_model.LinearRegression()) BayesReg = linear_model.BayesianRidge(n_iter=300, alpha_1=1.e-6, alpha_2=1.e-6, lambda_1=1.e-6, lambda_2=1.e-6) IsotonicReg = IsotonicRegression(y_min=None, y_max=None, increasing=True, out_of_bounds='nan')
else: from sklearn.model_selection import GroupShuffleSplit cv = GroupShuffleSplit(n_splits = n_splits, test_size = 0.2, random_state = 12345) idxs_train,idxs_test = [],[] for idx_train,idx_test in cv.split(BOLD,targets,groups=groups): idxs_train.append(idx_train) idxs_test.append(idx_test) embedding_features = np.array([word2vec_vec[word.lower()] for word in df_data['words']]) # define the encoding model encoding_model = linear_model.Ridge( alpha = alpha, # L2 penalty, higher means more weights are constrained to zero normalize = True, # normalize the batch features random_state = 12345, # random seeding ) # black box cross validation res = cross_validate( encoding_model, embedding_features, BOLD, groups = groups, cv = zip(idxs_train,idxs_test), n_jobs = n_jobs, return_estimator = True,) # white box cross validation n_coef = embedding_features.shape[1] n_obs = int(embedding_features.shape[0] * 0.8) preds = np.array([model.predict(embedding_features[idx_test]) for model,idx_test in zip(res['estimator'],idxs_test)])
#print(confusion_matrix) #2. fit energy model under engine off ##2.1 Fuel avg_fuel_rate_eng_off = y_train.loc[y_train['eng_on'] == 0, 'fuel_rate(J)'].mean() y_train.loc[y_train['eng_on'] == 0, 'fuel_rate_pred'] = avg_fuel_rate_eng_off y_test.loc[:, 'fuel_rate_eng_off_pred'] = avg_fuel_rate_eng_off eng_off_fuel_r2 = metrics.r2_score( y_train.loc[y_train['eng_on'] == 0, 'fuel_rate(J)'], y_train.loc[y_train['eng_on'] == 0, 'fuel_rate_pred']) print(eng_off_fuel_r2) # ##2.2 Electric lm = linear_model.Ridge(alpha=0.1) elec_pos = lm.fit( X_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'VSP'].to_frame(), y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'elec_energy(J)']) y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'elec_rate_pred'] = lm.predict( X_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'VSP'].to_frame()) print(lm.coef_, lm.intercept_) eng_off_elec_pos_vsp_r2 = metrics.r2_score( y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'elec_energy(J)'], y_train.loc[(X_train['VSP'] >= 0) & (y_train['eng_on'] == 0), 'elec_rate_pred'])
sw = load[::, 1:2:] # second column is sample weight x = load[::, 2::] # remaining 10 columns are input features (histogram values) # train vs test y_train = y[0::2] sw_train = sw[0::2] x_train = x[0::2] y_test = y[1::2] sw_test = sw[1::2] x_test = x[1::2] print(sw_train) # linear regression model regr = linear_model.Ridge(alpha=0.01) regr.fit(x_train, y_train, np.reshape(sw_train, [-1])) #regr.fit(x_train, y_train) a_train = regr.predict(x_train) print(np.hstack((y_train, a_train))) a_test = regr.predict(x_test) print(np.hstack((y_test, a_test))) # smart? guess y_avg_test = np.repeat(np.average(y_test), len(y_test)) #y_avg_test = np.repeat(np.sum(np.multiply(y_test, sw_test)) / np.sum(sw_test), len(y_test)) print(np.average(y_test), np.sum(np.multiply(y_test, sw_test)) / np.sum(sw_test)) print(np.sum(sw_test))
print("Meaningful attribute column index:", col_list) new_d = pd.concat([d[col_list],y], axis=1) sns.pairplot(new_d, plot_kws={"s": 3}) plt.show() sns.heatmap(new_d.corr(), annot=True) plt.show() # Part2 Linear Regression print("Linear Regression Summury") reg = linear_model.LinearRegression() summary4reg(reg) print("-"*60) # Part3.1 Ridge Regression print("Ridge Regression Summury") reg_r = linear_model.Ridge() test4alpha(reg_r) print("-"*60) # Part3.2 Lasso Regression print("Lasso Regression Summury") reg_l = linear_model.Lasso() test4alpha(reg_l) print("-"*60) # Part3.3 ElasticNet Regression print("ElasticNet Regression Summury") reg_e = linear_model.ElasticNet() test4alpha(reg_e, isElasticNet=True) print("-"*60)
print(X_normed) # 2. load all names and sentiment & prices loader = Loader(os.path.join(os.path.dirname(__file__), '../data/Price_Sentiment_url.csv'), type='csv') data = np.array(loader.start()) names = data[:, (0)] labels = data[:, (2, 3)] labels[labels == ''] = '-100' # 3. prepare and match all data # TODO: fix here y = [] for index, name in enumerate(META): i, = np.where(names == name[1]) print(name) if len(i) != 0: y.append(labels[i]) y = np.array(y, dtype=float) print(X_normed.shape) X_train, X_test, y_train, y_test = train_test_split(X_normed[:-1], X_normed[-1], test_size=0.33, random_state=42) reg = linear_model.Ridge(alpha=.5) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) print('accuracy: ', accuracy_score(y_test, y_pred))
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=53) plt.figure(1) total_scores = [] for degree in range(2, 7): # print(X_train.shape) test_error = [] train_error = [] alphas = [] for alpha in np.arange(0.01, 2., 0.1): # 3. creating regression model instance (still not trained!) reg = make_pipeline(PolynomialFeatures(degree), linear_model.Ridge(alpha=alpha)) # 4. training the model reg.fit(X_train, y_train) # print(reg._final_estimator.coef_) # 5. predict (reg is now the predictive model) y_predictions_train = reg.predict(X_train) # y_predictions = reg.predict(X_test) # 6. evaluate are model! alphas.append(alpha) train_error.append(mean_squared_error(y_train, y_predictions_train)) test_error.append(-cross_val_score( reg, X_train, y_train, cv=10, scoring='neg_mean_squared_error').mean())
print("Default is:", default_max_baseline) if not os.path.exists("analysis/plots"): os.makedirs("analysis/plots") if not os.path.exists("analysis/plots/base_analysis"): os.makedirs("analysis/plots/base_analysis") mean_scores = [] std_scores = [] for score in constants.CLASSIFIERS_SCORES: mean_scores.append(score + "_mean") std_scores.append(score + "_std") reg_models = {} reg_models["neural_network"] = lambda: neural_network.MLPRegressor() reg_models["ridge"] = lambda: linear_model.Ridge() reg_models["gradient_descent"] = lambda: linear_model.SGDRegressor() reg_models["svm"] = lambda: svm.SVR(gamma="auto") reg_models["knn"] = lambda: neighbors.KNeighborsRegressor(weights="distance") reg_models["random_forest"] = lambda: ensemble.RandomForestRegressor( random_state=constants.RANDOM_STATE) reg_models[ "gaussian_process"] = lambda: gaussian_process.GaussianProcessRegressor() reg_models["decision_tree"] = lambda: tree.DecisionTreeRegressor( random_state=constants.RANDOM_STATE) reg_models["random"] = lambda: Random() reg_models["default"] = lambda: Default() divideFold = KFold(10, random_state=constants.RANDOM_STATE, shuffle=True)
def Ridge_KFold_Sort(Subjects_Data, Subjects_Score, Covariates, Fold_Quantity, Alpha_Range, ResultantFolder, Parallel_Quantity, Permutation_Flag): if not os.path.exists(ResultantFolder): os.makedirs(ResultantFolder) Subjects_Quantity = len(Subjects_Score) # Sort the subjects score Sorted_Index = np.argsort(Subjects_Score) Subjects_Data = Subjects_Data[Sorted_Index, :] Subjects_Score = Subjects_Score[Sorted_Index] Covariates = Covariates[Sorted_Index, :] EachFold_Size = np.int(np.fix(np.divide(Subjects_Quantity, Fold_Quantity))) MaxSize = EachFold_Size * Fold_Quantity EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize tmp = np.arange(Fold_Quantity - 1, -1, -1) EachFold_Max = EachFold_Max - tmp Remain = np.mod(Subjects_Quantity, Fold_Quantity) for j in np.arange(Remain): EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity Fold_Corr = [] Fold_MAE = [] Fold_Weight = [] Features_Quantity = np.shape(Subjects_Data)[1] Covariates_Quantity = np.shape(Covariates)[1] for j in np.arange(Fold_Quantity): Fold_J_Index = np.arange(j, EachFold_Max[j], Fold_Quantity) Subjects_Data_test = Subjects_Data[Fold_J_Index, :] Subjects_Score_test = Subjects_Score[Fold_J_Index] Covariates_test = Covariates[Fold_J_Index, :] Subjects_Data_train = np.delete(Subjects_Data, Fold_J_Index, axis=0) Subjects_Score_train = np.delete(Subjects_Score, Fold_J_Index) Covariates_train = np.delete(Covariates, Fold_J_Index, axis=0) # Controlling covariates from brain data df = {} for k in np.arange(Covariates_Quantity): df['Covariate_' + str(k)] = Covariates_train[:, k] # Construct formula Formula = 'Data ~ Covariate_0' for k in np.arange(Covariates_Quantity - 1) + 1: Formula = Formula + ' + Covariate_' + str(k) # Regress covariates from each brain features for k in np.arange(Features_Quantity): df['Data'] = Subjects_Data_train[:, k] # Regressing covariates using training data LinModel_Res = sm.ols(formula=Formula, data=df).fit() # Using residuals replace the training data Subjects_Data_train[:, k] = LinModel_Res.resid # Calculating the residuals of testing data by applying the coeffcients of training data Coefficients = LinModel_Res.params Subjects_Data_test[:, k] = Subjects_Data_test[:, k] - Coefficients[0] for m in np.arange(Covariates_Quantity): Subjects_Data_test[:, k] = Subjects_Data_test[:, k] - Coefficients[ m + 1] * Covariates_test[:, m] if Permutation_Flag: # If do permutation, the training scores should be permuted, while the testing scores remain Subjects_Index_Random = np.arange(len(Subjects_Score_train)) np.random.shuffle(Subjects_Index_Random) Subjects_Score_train = Subjects_Score_train[Subjects_Index_Random] if j == 0: RandIndex = {'Fold_0': Subjects_Index_Random} else: RandIndex['Fold_' + str(j)] = Subjects_Index_Random normalize = preprocessing.MinMaxScaler() Subjects_Data_train = normalize.fit_transform(Subjects_Data_train) Subjects_Data_test = normalize.transform(Subjects_Data_test) Optimal_Alpha, Inner_Corr, Inner_MAE_inv = Ridge_OptimalAlpha_KFold( Subjects_Data_train, Subjects_Score_train, Fold_Quantity, Alpha_Range, ResultantFolder, Parallel_Quantity) clf = linear_model.Ridge(alpha=Optimal_Alpha) clf.fit(Subjects_Data_train, Subjects_Score_train) Fold_J_Score = clf.predict(Subjects_Data_test) Fold_J_Corr = np.corrcoef(Fold_J_Score, Subjects_Score_test) Fold_J_Corr = Fold_J_Corr[0, 1] Fold_Corr.append(Fold_J_Corr) Fold_J_MAE = np.mean( np.abs(np.subtract(Fold_J_Score, Subjects_Score_test))) Fold_MAE.append(Fold_J_MAE) Fold_J_result = { 'Index': Sorted_Index[Fold_J_Index], 'Test_Score': Subjects_Score_test, 'Predict_Score': Fold_J_Score, 'Corr': Fold_J_Corr, 'MAE': Fold_J_MAE, 'alpha': Optimal_Alpha, 'Inner_Corr': Inner_Corr, 'Inner_MAE_inv': Inner_MAE_inv } Fold_J_FileName = 'Fold_' + str(j) + '_Score.mat' ResultantFile = os.path.join(ResultantFolder, Fold_J_FileName) sio.savemat(ResultantFile, Fold_J_result) Fold_Corr = [0 if np.isnan(x) else x for x in Fold_Corr] Mean_Corr = np.mean(Fold_Corr) Mean_MAE = np.mean(Fold_MAE) Res_NFold = { 'Mean_Corr': Mean_Corr, 'Mean_MAE': Mean_MAE } ResultantFile = os.path.join(ResultantFolder, 'Res_NFold.mat') sio.savemat(ResultantFile, Res_NFold) if Permutation_Flag: sio.savemat(ResultantFolder + '/RandIndex.mat', RandIndex) return (Mean_Corr, Mean_MAE)
def __init__(self, **kwargs): super().__init__(**kwargs) self.m = linear_model.Ridge(alpha=self.params.get("alpha", 1.0))
def __init__( self, model_config: RegressionEnhancedRandomForestRegressionModelConfig, input_space: Hypergrid, output_space: Hypergrid, logger=None ): if logger is None: logger = create_logger("RegressionEnhancedRandomForestRegressionModel") self.logger = logger assert RegressionEnhancedRandomForestRegressionModelConfig.contains(model_config) RegressionModel.__init__( self, model_type=type(self), model_config=model_config, input_space=input_space, output_space=output_space ) self.input_dimension_names = [dimension.name for dimension in self.input_space.dimensions] self.output_dimension_names = [dimension.name for dimension in self.output_space.dimensions] self._input_space_dimension_name_mappings = { dimension.name: Dimension.flatten_dimension_name(dimension.name) for dimension in self.input_space.dimensions } self._output_space_dimension_name_mappings = { dimension.name: Dimension.flatten_dimension_name(dimension.name) for dimension in self.output_space.dimensions } self.base_regressor_ = None self.base_regressor_config = dict() self.base_regressor_config = self.model_config.boosting_root_model_config if self.model_config.boosting_root_model_name == SklearnLassoRegressionModelConfig.__name__: self.base_regressor_ = linear_model.Lasso( alpha=self.base_regressor_config.alpha, fit_intercept=self.base_regressor_config.fit_intercept, normalize=self.base_regressor_config.normalize, precompute=self.base_regressor_config.precompute, copy_X=self.base_regressor_config.copy_x, max_iter=self.base_regressor_config.max_iter, tol=self.base_regressor_config.tol, warm_start=self.base_regressor_config.warm_start, positive=self.base_regressor_config.positive, random_state=self.base_regressor_config.random_state, selection=self.base_regressor_config.selection ) elif self.model_config.boosting_root_model_name == SklearnRidgeRegressionModelConfig.__name__: self.base_regressor_ = linear_model.Ridge( alpha=self.base_regressor_config.alpha, fit_intercept=self.base_regressor_config.fit_intercept, normalize=self.base_regressor_config.normalize, copy_X=self.base_regressor_config.copy_x, max_iter=self.base_regressor_config.max_iter, tol=self.base_regressor_config.tol, random_state=self.base_regressor_config.random_state, solver=self.base_regressor_config.solver ) else: self.logger('Boosting base model name "{0}" not supported currently.' \ .format(self.model_config.boosting_root_model_name)) rf_config = self.model_config.random_forest_model_config self.random_forest_regressor_ = RandomForestRegressor( n_estimators=rf_config.n_estimators, criterion=rf_config.criterion, max_depth=rf_config.max_depth_value, min_samples_split=rf_config.min_samples_split, min_samples_leaf=rf_config.min_samples_leaf, min_weight_fraction_leaf=rf_config.min_weight_fraction_leaf, max_features=rf_config.max_features, max_leaf_nodes=rf_config.max_leaf_nodes_value, min_impurity_decrease=rf_config.min_impurity_decrease, bootstrap=rf_config.bootstrap, oob_score=rf_config.oob_score, n_jobs=rf_config.n_jobs, warm_start=rf_config.warm_start, ccp_alpha=rf_config.ccp_alpha, max_samples=rf_config.max_sample_value ) # set up basis feature transform self.polynomial_features_transform_ = None if self.model_config.max_basis_function_degree > 1: self.polynomial_features_transform_ = \ PolynomialFeatures(degree=self.model_config.max_basis_function_degree) self.random_forest_kwargs = None self.root_model_kwargs = None self.detected_feature_indices_ = None self.screening_root_model_coef_ = None self.fit_X_ = None self.partial_hat_matrix_ = None self.base_regressor_standard_error_ = None self.dof_ = None self.variance_estimate_ = None self.root_model_gradient_coef_ = None
def _sklRidgeFit(self, X_train, y_train, lambda_): self.regression = linear_model.Ridge(fit_intercept=True, alpha=self.lambda_) self.regression.fit(X, y) self.beta = self.regression.coef_ self.beta[0] = self.regression.intercept_
number_of_samples = len(y) np.random.seed(0) random_indices = np.random.permutation(number_of_samples) num_training_samples = int(number_of_samples * 0.75) x_train = X_Train[random_indices[:num_training_samples]] y_train = y[random_indices[:num_training_samples]] x_test = X_Train[random_indices[num_training_samples:]] y_test = y[random_indices[num_training_samples:]] y_Train = list(y_train) # **Ridge Regression** # In[ ]: model = linear_model.Ridge() model.fit(x_train, y_train) y_predict = model.predict(x_train) error = 0 for i in range(len(y_Train)): error += (abs(y_Train[i] - y_predict[i]) / y_Train[i]) train_error_ridge = error / len(y_Train) * 100 print("Train error = " '{}'.format(train_error_ridge) + " percent in Ridge Regression") Y_test = model.predict(x_test) y_Predict = list(y_test) error = 0 for i in range(len(y_test)):
def fit(self, X, y): self.regr = linear_model.Ridge(alpha=self.alpha) self.regr.fit(X, y) return self
# X is a 10x10 matrix X = 1. / (np.arange(1, 11) + np.arange(0, 10)[:, np.newaxis]) # y is a 10 x 1 vector y = np.ones(10) # In[13]: n_alphas = 200 # alphas count is 200, 都在10的-10次方和10的-2次方之间 alphas = np.logspace(-10, -2, n_alphas) print alphas # In[14]: clf = linear_model.Ridge(fit_intercept=False) coefs = [] # 循环200次 for a in alphas: #设置本次循环的超参数 clf.set_params(alpha=a) #针对每个alpha做ridge回归 clf.fit(X, y) # 把每一个超参数alpha对应的theta存下来 coefs.append(clf.coef_) # In[18]: ax = plt.gca() ax.plot(alphas, coefs)
def Polynomial_Model(): """ Defining Polynomial Model """ model = make_pipeline(PolynomialFeatures(degree=3), linear_model.Ridge()) return model
def fit(self, X, Y, W): sk = skl_linear_model.Ridge(alpha=self.alpha, fit_intercept=True) sk.fit(X, Y) return LinearModel(sk)
def run(data, split, feature_args, exp_label): published_time = pd.to_datetime(data['published_time']) y = generate_regression_label(data) y_class = generate_classification_label(data) X_price = data['price'].values record = { 'classification':{ 'train':pd.DataFrame(), 'test':pd.DataFrame() }, 'regression':{ 'train':pd.DataFrame(), 'test':pd.DataFrame() }, 'pnl':{ 'train':pd.DataFrame(), 'test':pd.DataFrame() }, 'buy_actions':{ }, 'feature_size':{ } } feature_list = [BOW, TFIDF, WORD2VEC, SKIPTHOUGHT] feature_functions = { BOW:generate_bag_of_words, TFIDF:generate_tfidf, WORD2VEC:generate_word2vec, SKIPTHOUGHT:generate_skip_thoughts } fold_index = 0 tscv = TimeSeriesSplit(n_splits=split) for train_index, test_index in tscv.split(data.values): fold_index += 1 start_index = data.index[train_index[0]] split_index = data.index[test_index[0]] end_index = data.index[test_index[-1]] + 1 train = data[start_index:split_index] test = data[split_index:end_index] X_list = [] for feature_name in feature_list: if feature_name in feature_args: features, vectorizer = feature_functions[feature_name](train, test, feature_args[feature_name]) X_list.append(features) if len(X_list) > 1: array_list = [features.values for features in X_list] X = np.concatenate(array_list, axis=1) else: X = X_list[0].values feature_size = X.shape[1] print("feature size:", feature_size) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] y_class_train, y_class_test = y_class[train_index], y_class[test_index] X_train_price = X_price[train_index] X_test_price = X_price[test_index] # Normalization and Scaling scaler = RobustScaler() scaler.fit(y_train.reshape(-1, 1)) y_train_t = scaler.transform(y_train.reshape(-1, 1)).reshape(-1, ) x_train_t = X_train x_test_t = X_test # Modeling classifiers_dict = { 'Logistic Regression':LogisticRegression(penalty='l2', C=0.05, verbose=0, max_iter=10000) } regressors_dict = { 'SVR':SVR(kernel='linear', C=1.0, verbose=0), 'Ridge Regression':linear_model.Ridge(alpha=5.0) } train_class_err = {} test_class_err = {} train_regre_err = {} test_regre_err = {} train_pnl_err = {} test_pnl_err = {} test_buy_times = [] for label, clf in classifiers_dict.items(): clf.fit(x_train_t, y_class_train) y_class_train_pred = clf.predict(x_train_t) y_class_test_pred = clf.predict(x_test_t) # classification error train_acc = accuracy_score(y_class_train, y_class_train_pred) test_acc = accuracy_score(y_class_test, y_class_test_pred) train_class_err[label] = train_acc test_class_err[label] = test_acc # PNL error train_return, train_buy_action = evaluate_return(X_train_price, y_class_train_pred, y_train) test_return, test_buy_action = evaluate_return(X_test_price, y_class_test_pred, y_test) train_pnl_err[label] = train_return test_pnl_err[label] = test_return if label not in record['buy_actions']: record['buy_actions'][label] = [] for action_time in test_buy_action: record['buy_actions'][label].append(action_time + len(X_train)) for label, clf in regressors_dict.items(): clf.fit(x_train_t, y_train_t) y_train_pred = clf.predict(x_train_t) y_test_pred = clf.predict(x_test_t) # classification error y_class_train_pred = np.zeros(y_train_pred.shape[0], np.float) y_class_train_pred[y_train_pred >= 0.0] = 1.0 y_class_test_pred = np.zeros(y_test_pred.shape[0], np.float) y_class_test_pred[y_test_pred >= 0.0] = 1.0 train_acc = accuracy_score(y_class_train, y_class_train_pred) test_acc = accuracy_score(y_class_test, y_class_test_pred) train_class_err[label] = train_acc test_class_err[label] = test_acc # regression error y_train_pred = scaler.inverse_transform(y_train_pred.reshape(-1, 1)).reshape(-1, ) y_test_pred = scaler.inverse_transform(y_test_pred.reshape(-1, 1)).reshape(-1, ) train_mse = mean_squared_error(y_train, y_train_pred) test_mse = mean_squared_error(y_test, y_test_pred) train_regre_err[label] = train_mse test_regre_err[label] = test_mse # PNL error train_return, train_buy_action = evaluate_return(X_train_price, y_train_pred, y_train) test_return, test_buy_action = evaluate_return(X_test_price, y_test_pred, y_test) train_pnl_err[label] = train_return test_pnl_err[label] = test_return if label not in record['buy_actions']: record['buy_actions'][label] = [] for action_time in test_buy_action: record['buy_actions'][label].append(action_time + len(X_train)) record['classification']['train'] = record['classification']['train'].append(pd.Series(data=train_class_err), ignore_index=True) record['classification']['test'] = record['classification']['test'].append(pd.Series(data=test_class_err), ignore_index=True) record['regression']['train'] = record['regression']['train'].append(pd.Series(data=train_regre_err), ignore_index=True) record['regression']['test'] = record['regression']['test'].append(pd.Series(data=test_regre_err), ignore_index=True) record['pnl']['train'] = record['pnl']['train'].append(pd.Series(data=train_pnl_err), ignore_index=True) record['pnl']['test'] = record['pnl']['test'].append(pd.Series(data=test_pnl_err), ignore_index=True) record['feature_size'][str(fold_index)] = feature_size # Words analysis if vectorizer is not None and fold_index == split: plot_word_coef_in_model_dict(classifiers_dict, vectorizer, exp_label) plot_word_coef_in_model_dict(regressors_dict, vectorizer, exp_label) bayes_result = analysis_bay(X_train, y_class_train, ['negative', 'positive'], vectorizer) plot_word_analysis_result(bayes_result, 'bayes', exp_label) return record
mean_values[np.isnan(mean_values)] = 0 std_values = np.nanstd(X_train, axis=0) std_values[np.isnan(std_values)] = 1 X_train = (X_train - mean_values) / std_values X_train = np.nan_to_num(X_train) poly = PolynomialFeatures(degree=poly_dim, interaction_only=False) X_train = poly.fit_transform(X_train) y_train = train.loc[y_values_within, 'y'].values model1 = lm.LinearRegression(n_jobs=4, fit_intercept=False) model2 = lm.Ridge(alpha=1e5, tol=1e-4, fit_intercept=False) model3 = LinearSVR(C=1e-5, loss='squared_epsilon_insensitive', tol=1e-4, fit_intercept=False, dual=False) #model4 = xgb.XGBRegressor(reg_alpha = 0.001 , reg_lambda=1e4, # learning_rate=0.1 , n_estimators=500 , nthread=5) print('Training linear regressors...') model1.fit(X_train, y_train) model2.fit(X_train, y_train) model3.fit(X_train, y_train)