def load_default(self, machine_list=['lasso', 'tree', 'ridge', 'random_forest']): """ Loads 4 different scikit-learn regressors by default. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. """ for machine in machine_list: if machine == 'lasso': self.machines_['lasso'] = linear_model.LassoCV(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'tree': self.machines_['tree'] = DecisionTreeRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_) if machine == 'ridge': self.machines_['ridge'] = linear_model.RidgeCV().fit(self.X_k_, self.y_k_) if machine == 'random_forest': self.machines_['random_forest'] = RandomForestRegressor(random_state=self.random_state).fit(self.X_k_, self.y_k_)
def main(): train_x, train_y, test_x, test_y = (sio.loadmat(TRAIN_DIR)['trainx'], sio.loadmat(TRAIN_DIR)['trainy'], sio.loadmat(TEST_DIR)['testx'], sio.loadmat(TEST_DIR)['testy']) clf = linear_model.RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0], normalize=True, cv=10, store_cv_values=False) print("Training......") clf.fit(train_x, train_y) print("Predicting.......") years = clf.predict(test_x) diff = 0.0 for i, j in zip(test_y, years): diff += abs(i - j) diff /= TEST_SIZE print("MSE is: " + str(diff))
def fit(self, train, y): internal_model=linear_model.RidgeCV(alphas=(0.1, 0.5, 1.0, 5.0, 10.0), fit_intercept=False) bestscore=1e15 better=True indextrain=train.dropna().index limitlen=len(train)*self.limit_size_train while better: internal_model.fit(train.ix[indextrain], y.ix[indextrain]) score=metrics.mean_squared_error(internal_model.predict(train.ix[indextrain]), y.ix[indextrain]) if score < bestscore: bestscore=score self.bestmodel=internal_model residual=y.ix[indextrain]-internal_model.predict(train.ix[indextrain]) indextrain=residual[abs(residual)<=abs(residual).quantile(self.quant)].index if len(indextrain)<limitlen: better=False else: better=False self.bestmodel=internal_model
def Models(self, model_names): self.X_train, self.X_val, self.y_train, self.y_val = train_test_split( self.poly_features, self.y, test_size=0.3) #model_lasso = linear_model.LassoCV(alphas=[0.0001,0.001,0.01,0.05,0.1,0.2,0.5,1,10],cv=10) model_linear = linear_model.LinearRegression() model_ridge = linear_model.RidgeCV( alphas=[0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.5, 1, 10], cv=10) model_en = linear_model.ElasticNetCV( alphas=[0.0001, 0.0005, 0.001, 0.01, 0.1, 1, 10], l1_ratio=[.01, .1, .5, .9, .99], max_iter=5000) model_br = linear_model.BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300, normalize=False, tol=0.001, verbose=False) #model_svr = model_selection.GridSearchCV(LinearSVR(random_state=0, tol=1e-5), # param_grid={"epsilon":[0,0.2],"C": [0,1]},cv = 5) model_svr = make_pipeline(StandardScaler(), svm.LinearSVR(random_state=0, tol=1e-5)) model_sgdr = make_pipeline(StandardScaler(), SGDRegressor(max_iter=1000, tol=1e-3)) model_list = [ model_linear, model_ridge, model_en, model_br, model_svr, model_sgdr ] #print('岭回归最优alpha值:', model_ridge.alpha_, '\n', '-'*50) for i in range(6): model_predict = model_list[i].fit(self.X_train, self.y_train) self.pre_y_list = model_predict.predict(self.X_val) joblib.dump( model_predict, f'C:\\Users\\Administrator\\Desktop\\7.21\\models-saved\\1期 device6 模型{model_names[i]}.pkl' )
def single_ridge_regression(X_in, y_in): ''' Performs a single variable ridge regression given X feature and y outcome variable Input: Series of X feature and Series of y outcome variables Output: dataframe of beta coefficients, and the ridge regressor ''' # Ridge regression using only cumulative number of COVID cases # standardizes X values for the ridge regression scaler = StandardScaler() X = X_in.values.reshape(-1, 1) y = y_in.values.reshape(-1, 1) X_std = scaler.fit_transform(X) # formulates the training and tests set for each variable X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=.2, random_state=0) # ridge regression algorithm regressor = linear_model.RidgeCV(alphas=[.1, 1, 10]) regressor = regressor.fit(X_std, y) regressor.alpha_ # coefficient dataframe and y prediction results of the ridge regression y_pred = regressor.predict(X_test) df = pd.DataFrame({ "Actual": y_test.flatten(), "Predicted": y_pred.flatten() }) # shows model performance print("Ridge regression score: " + str(regressor.score(X_std, y))) print("Ridge regression explained_variance: " + str(metrics.explained_variance_score(y_test, y_pred))) print("Ridge regression MSE: " + str(metrics.mean_squared_error(y_test, y_pred))) return regressor, df
def multi_ridge_regression(X_in, y_in): ''' Performs a multivariate ridge regression given X features and y outcome variable Input: dataframe of X features and Series of y outcome variables Output: dataframe of beta coefficients, and the ridge regressor ''' # Ridge regression # standardizes X values for the ridge regression scaler = StandardScaler() X = X_in.values y = y_in.values X_std = scaler.fit_transform(X) # formulates the training and tests set for each variable X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=.2, random_state=0) # ridge regression algorithm regressor = linear_model.RidgeCV(alphas=[.1, 1, 10]) regressor = regressor.fit(X_std, y) regressor.alpha_ # coefficient dataframe and y prediction results of the ridge regression coeff_df = pd.DataFrame(regressor.coef_, X_in.columns, columns=["Coefficients"]) y_pred = regressor.predict(X_test) df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred}) print("Ridge regression score: " + str(regressor.score(X_std, y))) print("Ridge regression: " + str(metrics.explained_variance_score(y_test, y_pred))) print("Ridge regression: " + str(metrics.mean_squared_error(y_test, y_pred))) return coeff_df, df, regressor
def ridgeCV_model(X_train,X_valid,y_train,y_test,y_name, y_train_mean,y_train_std): print('head items to fit are: ', y_name) # In[ ]: for head_item in range(len(y_name)): y_train_item = y_train[:,head_item] y_train_item = np.reshape(y_train_item,[y_train.shape[0],1]) y_test_item = y_test[:,head_item] y_test_item = np.reshape(y_test_item,[y_test_item.shape[0],1]) print('********************************** Fitting RidgeCV on %s Data **********************************' % y_name[head_item]) #Declare model model = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0],normalize=True,fit_intercept=True) #Fit model model.fit(X_train,y_train_item) #Get predictions y_valid_predicted=model.predict(X_valid) training_prediction=model.predict(X_train) R2s_training=get_R2(y_train_item,training_prediction) print('R2 on training set = ', R2s_training) #Get metric of fit R2s=get_R2(y_test_item,y_valid_predicted) print('R2s:', R2s) print('saving prediction ...') np.savez(y_name[head_item] + '_RidgeCV_ypredicted.npz',y_test=y_test_item,y_prediction=y_valid_predicted, y_train_=y_train_item,training_prediction=training_prediction, y_train_mean=y_train_mean[head_item],y_train_std=y_train_std[head_item]) #print 'saving model ...' joblib.dump(model, y_name[head_item] + '_Ridge.pkl') print('plotting results...') plot_results(y_test_item,y_valid_predicted,y_name[head_item],R2s,model_name='RidgeCV') return model
def __init__(self, X, y, kind): self.X = X self.y = y kind = kind.upper() if kind == 'SVM': from sklearn import svm self.regressor = svm.SVR() elif kind == 'RIDGECV': from sklearn import linear_model self.regressor = linear_model.RidgeCV( alphas=[ x * y for x in [0.01, 0.1, 1, 10] for y in [1, 5] ] ) elif kind == 'SVM_GRID': from sklearn import svm from sklearn.model_selection import GridSearchCV self.regressor = GridSearchCV( svm.SVR(), { 'C': [1e0, 5e0, 1e1, 5e1, 1e2, 5e2, 1e3], 'epsilon': [1e-3, 1e-2], 'kernel': ['linear', 'rbf', 'poly'], 'degree': [2, 3, 4] }, scoring='neg_mean_squared_log_error' ) elif kind == 'SVM_GRID_SIMPLE': from sklearn import svm from sklearn.model_selection import GridSearchCV self.regressor = GridSearchCV( svm.SVR(), { 'C': [1e0, 5e0, 1e1, 5e1, 1e2], 'gamma': [1e-3, 1e-2, 1e-1], 'kernel': ['linear'], 'degree': [2] }, scoring='neg_mean_squared_log_error', n_jobs=-1, cv=7 )
def evaluate_lcc_model(en_train_matrix, en_test_matrix, fr_train_matrix, fr_test_matrix, dimensions, evaluation_function): scores = [] for dimension in tqdm(dimensions): en = en_train_matrix[:, :dimension] - np.mean( en_train_matrix[:, :dimension], axis=0) fr = fr_train_matrix[:, :dimension] - np.mean( fr_train_matrix[:, :dimension], axis=0) sample_size = en.shape[0] zero_matrix = np.zeros((sample_size, dimension)) X1 = np.concatenate((en, zero_matrix), axis=1) X2 = np.concatenate((zero_matrix, fr), axis=1) X = np.concatenate((X1, X2), axis=0) Y1 = np.concatenate((en, fr), axis=1) Y2 = np.concatenate((en, fr), axis=1) Y = np.concatenate((Y1, Y2), axis=0) reg = linear_model.RidgeCV(alphas=[1e-10, 1e-3, 1e-2, 1e-1, 1, 10]) reg.fit(X, Y) pca = PCA(n_components=int(dimension)) pca.fit(reg.predict(X)) rrr = lambda X: np.matmul(pca.transform(reg.predict(X)), pca. components_) #sample_size = len(en_docs_test) en = en_test_matrix[:, :dimension] - np.mean( en_train_matrix[:, :dimension], axis=0) fr = fr_test_matrix[:, :dimension] - np.mean( fr_train_matrix[:, :dimension], axis=0) zero_matrix = np.zeros((en_test_matrix.shape[0], dimension)) X1 = np.concatenate((en, zero_matrix), axis=1) X2 = np.concatenate((zero_matrix, fr), axis=1) X = np.concatenate((X1, X2), axis=0) english_encodings_lcc = rrr(X1) french_encodings_lcc = rrr(X2) score = evaluation_function(english_encodings_lcc, french_encodings_lcc) scores.append(score) return scores
def fit_and_predict(self, TEST_YEAR, regularization=True): X, Y, xTrain, yTrain, xTest, yTest, names = self.build_data_arrays(TEST_YEAR) if regularization: predictor = linear_model.RidgeCV(alphas=[0.1, 1.0, 10], fit_intercept=True, normalize=self.shouldNormalize) else: predictor = linear_model.LinearRegression(fit_intercept=True, normalize=self.shouldNormalize, copy_X=True, n_jobs=1) scores = {} relativeError = {} coefficients = {} output = {} for p in self.positions: if len(xTrain[p]) > 1 and len(xTest[p]) > 1: predictor.fit(np.array(xTrain[p]), np.array(yTrain[p])) coefficients[p] = pd.DataFrame(zip(self.features, predictor.coef_), columns = ['feature', 'coefficient']).sort_values(by=['coefficient'], ascending=False) prediction = predictor.predict(np.array(xTest[p])) output[p] = pd.DataFrame(zip(names[p], prediction), columns = ['name', 'value']).sort_values(by=['value'], ascending=False) scores[p] = (mean_squared_error(np.array(yTest[p]), np.array(prediction)), r2_score(np.array(yTest[p]), np.array(prediction))) relativeError[p] = self.get_relative_error(output[p], TEST_YEAR) return output
def ridge_regress(self, cv = 20, alphas = None ): """perform k-folds cross-validated ridge regression on the design_matrix. To be used when the design matrix contains very collinear regressors. For cross-validation and ridge fitting, we use sklearn's RidgeCV functionality. Note: intercept is not fit, and data are not prenormalized. :param cv: cross-validated folds, inherits RidgeCV cv argument's functionality. :type cv: int, standard = 20 :param alphas: values of penalization parameter to be traversed by the procedure, inherits RidgeCV cv argument's functionality. Standard value, when parameter is None, is np.logspace(7, 0, 20) :type alphas: numpy array, from >0 to 1. :returns: instance variables 'betas' (nr_betas x nr_signals) and 'residuals' (nr_signals x nr_samples) are created. """ if alphas == None: alphas = np.logspace(7, 0, 20) self.rcv = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=cv) self.rcv.fit(self.design_matrix.T, self.resampled_signal.T) self.betas = self.rcv.coef_.T self.residuals = self.resampled_signal - self.rcv.predict(self.design_matrix.T) self.logger.debug('performed ridge regression on %s design_matrix and %s signal, resulting alpha value is %f' % (str(self.design_matrix.shape), str(self.resampled_signal.shape), self.rcv.alpha_))
def fit_ridge(oe,mw): #### oe = x; mw = y variable. # fit line: oe = np.array(oe) mw = np.array(mw) clf = linear_model.RidgeCV(alphas=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]) num_codes = min(len(oe),len(mw)) print 'oe.shape = ', oe.shape oe = oe.reshape(-1, 1) print 'oe.shape = ', oe.shape mw = mw.reshape(-1, 1) clf.fit(oe[0:num_codes], mw[0:num_codes]) print '****************************************************************************' print 'RIDGE REGRESSION SCORE = ', clf.score(oe[0:num_codes],mw[0:num_codes]) m = clf.coef_[0][0] c = clf.intercept_[0] print 'm,c = ', m,c return m,c
def poly_ridge(self,deg=2): ''' Polynomial Ridge Regression ''' from sklearn import linear_model # Training t0 = time.time() phi = cm.naivePolyFeature(self.X,deg=deg,norm=True) lm = linear_model.RidgeCV(alphas=np.logspace(-10,-1,10)) lm.fit(phi,self.y) print lm.alpha_ t_tr = time.time() - t0 # Predicting t0 = time.time() phi_pred = cm.naivePolyFeature(self.X_pred,deg=deg,norm=True) y_lr = lm.predict(phi_pred) t_pr = time.time() - t0 eel = np.mean(np.maximum(self.Value0-self.c-np.sum(y_lr,axis=1),0)) return (eel, t_tr, t_pr)
def RidgeRegression(x, y, z, degree=5, alpha=10**(-6), verbose=False): # Split into training and test x_train = np.random.rand(100, 1) y_train = np.random.rand(100, 1) z = FrankeFunction(x_train, y_train) # training and finding design matrix X_ X = np.c_[x_train, y_train] poly = PolynomialFeatures(degree) X_ = poly.fit_transform(X) ridge = linear_model.RidgeCV(alphas=np.array([alpha])) ridge.fit(X_, z) beta = ridge.coef_ #intercept = ridge.intercept_ # predict data and prepare for plotting x_, y_ = np.meshgrid(x, y) x = x_.reshape(-1, 1) y = y_.reshape(-1, 1) M = np.c_[x, y] M_ = poly.fit_transform(M) predict = M_.dot(beta.T) if verbose: print("X_: ", np.shape(X_)) print("M: ", np.shape(M)) print("M_: ", np.shape(M_)) print("predict: ", np.shape(predict)) # show figure fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(x_, y_, predict.reshape(20, 20), cmap=cm.coolwarm, linewidth=0, antialiased=False) plt.show() return beta
def __initialize_model(model_name, lamda=0, hyper_parameters={}): """ initialize machine learning model. Args: model_name: learning algorithm name lamda: coefficient of standardization item hyper_parameter: other parameters for algorithms See parameters for RandomForest Regression in sci-kit-learn Returns: an initialized classifier """ if model_name == constants.MODEL_NAME_LASSO: # note: alpha in scikit-learn reprsents lamda which is the constant that # multiplies the regularization term clf_lasso = linear_model.Lasso(alpha=lamda) return clf_lasso elif model_name == constants.MODEL_NAME_ELASTICNET: clf_elasticnet = ElasticNet(alpha=lamda) return clf_elasticnet elif model_name == constants.MODEL_NAME_RIDGE: clf_ridge = linear_model.Ridge(alpha=lamda) return clf_ridge elif model_name == constants.MODEL_NAME_RIDGECV: clf_ridgecv = linear_model.RidgeCV(alphas=constants.lamdaArray) return clf_ridgecv elif model_name == constants.MODEL_NAME_LARS: clf_lars = linear_model.Lars(n_nonzero_coefs=1) return clf_lars elif model_name == constants.MODEL_NAME_BAYESIAN: clf_bayesian = linear_model.BayesianRidge() return clf_bayesian elif model_name == constants.MODEL_NAME_SGD: clf_sgd = linear_model.SGDRegressor(alpha=lamda) return clf_sgd elif model_name == constants.MODEL_NAME_RANDOM_FOREST: clf_random_forest = RandomForestRegressor(**hyper_parameters, random_state=0, n_jobs=-1) return clf_random_forest
def cross_validate_model(X_train, Y_train): """ Here we perform cross validation of models to choose the best one. """ # Divide the training and testing data train, test, y_actual, y_predict = train_test_split(X_train, Y_train, test_size=0.5, random_state=42) # List the regression methods to use. clf_random_forest = ensemble.RandomForestRegressor(n_estimators=50) clf_adaboost_reg = ensemble.AdaBoostRegressor(n_estimators=50) clf_lasso_larscv = sklinear.LassoLarsCV(cv=9) clf_ridge = sklinear.RidgeCV() clf_elastic_net = sklinear.ElasticNet() clf_extra_tree = ensemble.ExtraTreesRegressor(n_estimators=50) clf_mlpr = neural_network.MLPRegressor(solver='adam') # Add the above methods in an array # More ameable for looping methods = [ clf_random_forest, clf_adaboost_reg, clf_lasso_larscv, clf_elastic_net, clf_extra_tree, clf_mlpr ] methods_label = [ 'clf_random_forest', 'clf_adaboost_reg', 'clf_lasso_larscv', 'clf_elastic_net', 'clf_extra_tree', 'clf_mlpr' ] method_mse = np.zeros((len(methods), 1)) # Fit and predict for each method for i in range(len(methods)): methods[i].fit(train, y_actual) method_predict = methods[i].predict(test) method_mse[i] = metrics.mean_squared_error(y_predict, method_predict) print('MSE for %s while cross validation : %f' % (methods_label[i], method_mse[i])) # We return the method which has the minimum mse return np.argmin(method_mse)
def __init__(self, train=True, train_set=[], clf='ridge'): self.identity = None self.nb_victories = 0 self.nb_games = 0 self.next_action = {} self.gamma = 1 self.initial_alpha = 0.05 self.min_alpha = 0.005 self.alpha_update_rate = 1 # pow(self.initial_alpha/self.min_alpha, 1/500) self.alpha = self.initial_alpha self.epsilon = 0.1 self.state_space_x = [ 'low_prestige', 'high_prestige', 'can_buy_prestige', 'can_buy_card', 'can_reserve', 'can_take_2', 'can_take_3' ] self.training_phase = train self.is_trained = not train self.classifier = { 'ridge': linear_model.RidgeCV(), 'mlp': neural_network.MLPRegressor() }[clf] self.state_space_y = [0, 1] # 0: few, 1: some, 2: a lot self.state_space, self.state_space_inverted_index = self.build_state_space( ) # self.state_space_inverted_index = self.build_inverted_index(self.state_space) self.action_space = [ 'buy_prestige', 'buy_card', 'reserve', 'take_3', 'take_2', 'do_nothing' ] self.action_space_inverted_index = self.build_inverted_index( self.action_space) self.nb_actions = len(self.action_space) self.nb_states = len(self.state_space) self.state = {} self.actions = [] self.init_features() if len(train_set) > 0: self.load_training(train_set)
def poly_regression(x_tr, x_ts, y_tr, y_ts, degree, filename='Poly_regression'): print("Polynomial Regression") x_train = x_tr x_test = x_ts y_train = y_tr y_test = y_ts poly_features = PolynomialFeatures(degree=degree) x_train_poly = poly_features.fit_transform(x_train) poly_model = linear_model.RidgeCV(alphas=np.logspace(-9, 9, 19), normalize=True) #poly_model = linear_model.LinearRegression(normalize=True) print("Fitting...") poly_model.fit(x_train_poly, y_train) # Save The Model into File filename_model = f'Trained Models\{filename}.sav' pickle._dump(poly_model, open(filename_model, 'wb')) #print("Best Alpha : ", poly_model.alpha_) print("Predicting...") prediction = poly_model.predict(poly_features.fit_transform(x_test)) print( "Poly Accuracy: ", round( poly_model.score(poly_features.fit_transform(x_test), y_test) * 100), "%") print('Poly Mean Square Error', metrics.mean_squared_error(y_test, prediction), "\n\n") print("First Value of Test Samples' Actual Output: ", np.asarray(y_test)[0]) print("First Value of Test Samples' Predicted Output: ", prediction[0]) '''fig, ax = plt.subplots()
def run_ridge_cv(x_df, y_df, analyspar, alphas=None): """ run_ridge_cv(x_df, y_df, analyspar) """ if alphas is None: alphas = (0.1, 0.1, 2.0) steps = [("scaler", preprocessing.StandardScaler()), ("model", linear_model.RidgeCV(normalize=True, alphas=alphas, store_cv_values=True))] pipl = pipeline.Pipeline(steps) pipl.fit(x_df, y_df) log_sklearn_results(pipl, analyspar, name="ridge_cv", var_names=x_df.columns)
def build_models(predictors, responses, modelNo): if(modelNo==0): # Linear Regression model = linear_model.LinearRegression(); modelName = "Linear Regression"; if(modelNo==1): # Ridge Regression model = linear_model.RidgeCV(alphas = (0.1,0.1,10)); modelName = "Ridge Regression"; if(modelNo==2): # lasso Regression model = linear_model.MultiTaskLassoCV(eps=0.001, n_alphas=100, alphas=(0.1,0.1,10)); modelName = "Lasso Regression"; model.fit(predictors, responses); predictions = model.predict(predictors); Result = {}; Result['modelName'] = modelName; Result['predictions'] = predictions; Result['model'] = model; Result['Corr'] = pearsonr(predictions,responses)[0][0]; return Result;
def _regression_DKL(n_, fo_, fx_, stims_all_LM_, stims_all_S_, stims_all_LUM_): """ regress all coordinates but only hue matters """ reg_fo_LM = linear_model.RidgeCV() reg_fo_S = linear_model.RidgeCV() reg_fo_LUM = linear_model.RidgeCV() reg_fx_LM = linear_model.RidgeCV() reg_fx_S = linear_model.RidgeCV() reg_fx_LUM = linear_model.RidgeCV() reg_fo_LM.fit(fo_[:n_], stims_all_LM_[:n_]) reg_fo_S.fit(fo_[:n_], stims_all_S_[:n_]) reg_fo_LUM.fit(fo_[:n_], stims_all_LUM_[:n_]) reg_fx_LM.fit(fx_[:n_], stims_all_LM_[:n_]) reg_fx_S.fit(fx_[:n_], stims_all_S_[:n_]) reg_fx_LUM.fit(fx_[:n_], stims_all_LUM_[:n_]) return reg_fo_LM, reg_fo_S, reg_fo_LUM, reg_fx_LM, reg_fx_S, reg_fx_LUM
def load_default(self, machine_list=[ 'lasso', 'tree', 'ridge', 'random_forest', 'svm' ]): """ Loads 4 different scikit-learn regressors by default. Parameters ---------- machine_list: optional, list of strings List of default machine names to be loaded. Returns ------- self : returns an instance of self. """ self.estimators_ = {} for machine in machine_list: try: if machine == 'lasso': self.estimators_['lasso'] = linear_model.LassoCV( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'tree': self.estimators_['tree'] = DecisionTreeRegressor( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'ridge': self.estimators_['ridge'] = linear_model.RidgeCV().fit( self.X_k_, self.y_k_) if machine == 'random_forest': self.estimators_['random_forest'] = RandomForestRegressor( random_state=self.random_state).fit( self.X_k_, self.y_k_) if machine == 'svm': self.estimators_['svm'] = LinearSVR( random_state=self.random_state).fit( self.X_k_, self.y_k_) except ValueError: continue return self
def save_standard_results(model_type, year): p_train, r_train, p_test, r_test = pipeline(year, "pixel_level", "daily_total") scale = preprocessing.StandardScaler() p_train = scale.fit_transform(p_train) p_test = scale.transform(p_test) if model_type == "Lasso": lr = linear_model.LassoCV() elif model_type == "Ridge": lr = linear_model.RidgeCV() elif model_type == "OLS": lr = linear_model.LinearRegression() elif model_type == "Elastic_Net": lr = linear_model.ElasticNetCV() else: raise TypeError("Invalid model_type") model = lr.fit(p_train, r_train) predicted_train = model.predict(p_train) predicted_test = model.predict(p_test) train_correlation = fast_evaluate.anti_correlation(predicted_train, r_train) test_correlation = fast_evaluate.anti_correlation(predicted_test, r_test) train_rmse = fast_evaluate.root_mean_square_error(predicted_train, r_train) test_rmse = fast_evaluate.root_mean_square_error(predicted_test, r_test) train_mae = fast_evaluate.mean_absolute_error(predicted_train, r_train) test_mae = fast_evaluate.mean_absolute_error(predicted_test, r_test) coefficients = model.coef_.tolist() filename = "Standard_{0}_year_{1}.csv".format(model_type, year) with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow([ "train_correlation", "test_correlation", "train_rmse", "test_rmse", "train_mae", "test_mae", "coefficients" ]) writer.writerow((train_correlation, test_correlation, train_rmse, test_rmse, train_mae, test_mae, coefficients))
def calculateRAPM(units, points, weights): u = DictVectorizer(sparse=False) u_mat = u.fit_transform(units) # config.debug # print(u_mat) # print(points[:25]) # print(weights[:100]) playerIDs = u.get_feature_names() # print(json.dumps(u.get_feature_names()[:25], indent=4*' ')) # print(json.dumps(u.inverse_transform(u_mat)[:1], indent=4 * ' ')) clf = linear_model.RidgeCV(alphas=(np.array([0.01, 0.1, 1.0, 10, 100, 500, 1000, 2000, 5000])), cv=5) clf.fit(u_mat, points, sample_weight=weights) # print(clf.alpha_) ratings = [] for playerID in playerIDs: ratings.append((playerID, clf.coef_[playerIDs.index(playerID)])) ratings.sort(key=lambda tup: tup[1], reverse=True) return ratings
def train_models(mod, save=True, cutoff=0.999, percent=50, plot=True, scale=False): if mod == 'linear': clf = linear_model.LinearRegression(n_jobs=-1) elif mod == 'lasso': clf = linear_model.Lasso(alpha=1000, max_iter=10000, tol=0.001, normalize=True, positive=True) elif mod == 'lassolars': clf = linear_model.LassoLars(alpha=0.001) elif mod == 'multilasso': clf = linear_model.MultiTaskLasso(alpha=0.1) elif mod == 'ridgeCV': clf = linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0]) elif mod == 'ridge': clf = linear_model.Ridge(alpha=[1000]) elif mod == 'bayes': clf = linear_model.BayesianRidge() elif mod == 'huber': clf = linear_model.HuberRegressor() elif mod == 'poly': #clf = poly_clf() clf = PolynomialFeatures(degree=2) clf, continuum = train(clf, mod, save=save, cutoff=cutoff, percent=percent, plot=plot, scale=scale) return clf, continuum
def _estimate_model(self): """Estimates ridge regression model. Returns ------- model : sklearn ridge regression or ridge cv object Fitted ridge model. """ self.underlying = linear_model.Ridge(fit_intercept=self.intercept) if (self.cv_folds is not None) or (self.solver in ['svd', 'eigen']): #Ridge CV by default tests a very limited set of alphas, we expand on this alphas = np.logspace(-10, 5, 100) model = linear_model.RidgeCV(alphas=alphas, cv=self.cv_folds, fit_intercept=self.intercept, gcv_mode=self.solver, **self.kwargs) else: model = linear_model.Ridge(fit_intercept=self.intercept, **self.kwargs) model.fit(self.x_train, self.y_train) return model
def _get_generalizer(cls, gnrl): generalizers = dict( MEAN = MeanClassifier(), RFC = RandomForestClassifier(n_estimators=500, max_depth=32, n_jobs=-1, random_state=random_state), RCV = lm.RidgeCV(alphas=np.linspace(0, 200), cv=100), RCVp = RidgeCV_proba(alphas=np.linspace(0, 200), cv=100), LCV = lm.LassoCV(), LCVp = LassoCV_proba(), LSVC = svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=random_state), SVC = svm.SVC(C=1.0, kernel='linear', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=random_state), LR = lm.LogisticRegression(penalty='l2', dual=True, tol=0.00001, C=1, fit_intercept=True, intercept_scaling=1.0, class_weight=None, random_state=random_state), KNCuniform = KNeighborsClassifier(n_neighbors=1024, weights='uniform'), KNC = KNeighborsClassifier(n_neighbors=1024, weights='distance'), AUCR = AUCRegressor(), ABC_DTC = AdaBoostClassifier( algorithm='SAMME.R', base_estimator=DecisionTreeClassifier(compute_importances=None, criterion='gini', max_depth=1, max_features=1.0, min_density=None, min_samples_leaf=1, min_samples_split=2, random_state=random_state, splitter='best'), learning_rate=0.1, n_estimators=200, random_state=random_state), ) return generalizers[gnrl]
def __init__(self, data, classifier='linear', save=True, load=False, fname='FASMA_ML.pkl'): self.classifier = classifier self.data = data self.save = save self.load = load self.fname = fname self.X_train, self.y_train = data.X, data.y if self.classifier == 'linear': self.clf = linear_model.LinearRegression(n_jobs=-1) elif self.classifier == 'lasso': self.clf = linear_model.Lasso(alpha=0.00001) elif self.classifier == 'lassolars': self.clf = linear_model.LassoLars(alpha=1000) elif self.classifier == 'multilasso': self.clf = linear_model.MultiTaskLasso(alpha=1000) elif self.classifier == 'ridgeCV': self.clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0, 100]) elif self.classifier == 'ridge': self.clf = linear_model.Ridge(alpha=10) elif self.classifier == 'bayes': self.clf = linear_model.BayesianRidge() elif self.classifier == 'huber': self.clf = linear_model.HuberRegressor() # Train the classifier if not self.load: t = time() self.train_classifier() print('Trained classifier in {}s'.format(round(time() - t, 2))) else: with open(self.fname, 'rb') as f: self.clf = cPickle.load(f)
def fit(self, train, y): internal_model = linear_model.RidgeCV( fit_intercept=True, cv=model_selection.TimeSeriesSplit(n_splits=2)) bestscore = 1e15 better = True indextrain = train.dropna().index limitlen = len(train) * self.limit_size_train while better: internal_model.fit(train.ix[indextrain], y.ix[indextrain]) score = metrics.mean_squared_error( internal_model.predict(train.ix[indextrain]), y.ix[indextrain]) if score < bestscore: bestscore = score self.bestmodel = internal_model residual = y.ix[indextrain] - internal_model.predict( train.ix[indextrain]) indextrain = residual[ abs(residual) <= abs(residual).quantile(self.quant)].index if len(indextrain) < limitlen: better = False else: better = False self.bestmodel = internal_model
def __remodel__(self, model_type, regr, __X_train, __Y_train): """ Function to retrain certain models based on optimal alphas and/or ratios """ if model_type == "ridge": alpha = regr.alpha_ regr = linear_model.RidgeCV(alphas=self.__realpha__(alpha), cv=10) elif model_type == "lasso": alpha = regr.alpha_ regr = linear_model.LassoCV(alphas=self.__realpha__(alpha), max_iter=5000, cv=10) elif model_type == "elasticnet": alpha = regr.alpha_ ratio = regr.l1_ratio_ regr = linear_model.ElasticNetCV( l1_ratio=self.__reratio__(ratio), alphas=self.__elasticnet_init["alpha"], max_iter=1000, cv=3) regr.fit(__X_train, __Y_train) return regr