X_multiple = boston.data[:, 5:8] print(X_multiple) # Defino los datos del target y_multiple = boston.target # IMPLEMENTACION DE REGRESIÓN LINEAL MULTIPLE from sklearn.model_selection import train_test_split # Separamos los datos de entrenamiento y prueba X_train, X_test, y_train, y_test = train_test_split(X_multiple, y_multiple, test_size=0.2) # Definimos el algoritmo a utilizar lr_multiple = linear_model.LinearRegression() # Entrenamos el modelo lr_multiple.fit(X_train, y_train) # Realizamos una predicción Y_pred_multiple = lr_multiple.predict(X_test) # Calculamos los coeficientes del modelo print('DATOS DEL MODELO DE REGRESIÓN LINEAL MULTIPLE') print() print('Valor de la pendiente o coeficiente "a": ') print(lr_multiple.coef_) print() print('Valor de la intersección o coeficiente "b": ') print(lr_multiple.intercept_)
st.markdown('_Please see left sidebar for more details._') # currentStats = pd.read_csv('https://raw.githubusercontent.com/neelganta/neel_project/master/alltimeDynasty.csv') #Dynasty currentStats = pd.read_csv( 'https://raw.githubusercontent.com/neelganta/neel_project/master/2020stats_salary.csv' ) #Current regModel = pd.read_csv( 'https://raw.githubusercontent.com/neelganta/neel_project/master/githubRegression.csv' ) regModel = regModel.fillna(0) # regModel = regModel.drop(columns=['Unnamed: 0']) y = regModel['NET_RATING'] X = regModel.drop(['NET_RATING'], axis=1) # Fit the model below model1 = lm.LinearRegression( ) #higher alpha (penality parameter), fewer predictors model1.fit(X, y) model1_y = model1.predict(X) players = [] players = currentStats['Player'] players = deque(players) # players.appendleft('1980-Present NBA Players') players.appendleft('2020 NBA Players') #Current players = list(players) player1 = st.selectbox( 'Select first player: (Example: Type "BOS" to find all Celtics)', players) player2 = st.selectbox( 'Select second player: (Example: Type "PG" to find all Point Guards)', players)
from sklearn import linear_model as lm from sklearn.tree import DecisionTreeRegressor from lightgbm import LGBMRegressor MAX_ITER = 1e5 MODELS = [ ( 'regression', lm.LinearRegression(), None, ), ( 'ridge', lm.Ridge( random_state=0, solver='saga', max_iter=MAX_ITER ), {'clf__alpha': [1e-3, 1e-2, 1e-1, 1e0]}, ), ( 'lasso', lm.Lasso( random_state=0, max_iter=MAX_ITER ), {'clf__alpha': [1e-3, 1e-2, 1e-1, 1e0]}, ), ( 'elastic_net',
datasets_X = [] datasets_Y = [] fr = open('prices.txt','r') lines = fr.readlines() for line in lines: items = line.strip().split(',') datasets_X.append(int(items[0])) datasets_Y.append(int(items[1])) datasets_X = np.array(datasets_X).reshape([-1,1]) datasets_Y = np.array(datasets_Y) minX = min(datasets_X) maxX = max(datasets_X) X = np.arange(minX,maxX).reshape([-1,1]) poly_reg = PolynomialFeatures(degree = 2) X_poly = poly_reg.fit_transform(datasets_X) #使用线性模型学习X_poly和datasets_Y之间的映射关系(即参数) lin_reg_2 = linear_model.LinearRegression() lin_reg_2.fit(X_poly, datasets_Y) # 图像中显示 plt.scatter(datasets_X, datasets_Y, color = 'red',label='origin data') plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue',label='Polynomial regression prediction') plt.legend()#使label生效 plt.xlabel('Area') plt.ylabel('Price') plt.show()
X = pp.normalize(main_data[main_data.columns[0:-1]]) print(X.shape) Y = np.asarray(main_data[main_data.columns[-1]]) print(Y.shape) X_train, X_test, Y_train, Y_test = ms.train_test_split(X, Y, test_size=0.33, random_state=40) X_train.shape Y_train.shape import sklearn.linear_model as lm model = lm.LinearRegression() model.fit(X_train, Y_train) y_pred = model.predict(X_test) import sklearn.metrics as m m.mean_squared_error(Y_test, y_pred) X_train[1] main_data # displaying coefficients of each feature features = main_data.columns[0:-1] coeffecients = pd.DataFrame(model.coef_, features)
#5 years ago #attributes att_train_fiveyear_df = attributes_df[:fiveyear] att_test_fiveyear_df = attributes_df[fiveyear:fouryear] #target groupings tar_train_fiveyear__df = targets_df[:fiveyear] tar_test_fiveyear_df = targets_df[fiveyear:fouryear] # food 17 alone food17_train_fiveyear_df = food17_target_df[:fiveyear] food17_test_fiveyear_df = food17_target_df[fiveyear:fouryear] #LAST 12 MONTHS! #code below modified version of: #http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html # Create linear regression object for one year regr_oneyear = linear_model.LinearRegression() # Train the model using the training sets regr_oneyear.fit(att_train_oneyear_df, food17_train_oneyear_df) # Make predictions using the testing set food17_pred_oneyear = regr_oneyear.predict(att_test_oneyear_df) # The coefficients #print("2016-08-01 to 2017-07-01") print("2016") #print('Coefficients: \n', regr_oneyear.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_absolute_error(food17_test_oneyear_df, food17_pred_oneyear)) MSE.append(mean_absolute_error(food17_test_oneyear_df, food17_pred_oneyear)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' %
plt.tight_layout() filename='mfr_predict/scatter_matrix.png' plt.savefig(filename) ############### Btot average #split into training and test data from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8) ##linear model import sklearn.linear_model as lm lr=lm.LinearRegression() lr.fit(np.array(X_train).reshape(-1,1),y_train) #format needs to be different for lr.fit... y_pred = lr.predict(np.array(X_test).reshape(-1,1)) #print('R**2 score linear fit') #print(lr.score(X,y)) from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score print('Scores for <Btot>: mean absolute error, mean squared, median absolute, R2: Btot in nT') print(mean_absolute_error(y_pred,y_test)) print(mean_squared_error(y_pred,y_test)) print(median_absolute_error(y_pred,y_test)) print(r2_score(y_pred,y_test)) #these are the fit coefficients
sizeHR=len(HR_norm) sizepttsis=len(pttsis_norm) sizepttdia=len(pttdia_norm) sizevecsis=len(vec_sis) sizevecdia=len(vec_dia) ma=np.min([sizeHR,sizepttsis,sizepttdia]) HR_norm=HR_norm[0:ma] pttsis=pttsis_norm[0:ma] pttdia=pttdia_norm[0:ma] vec_dian1=vec_dia[0:ma] vec_sisn1=vec_sis[0:ma] vec_dia0=vec_dia[1:ma+1] vec_sis0=vec_sis[1:ma+1] regSIS=linear_model.LinearRegression(); xsis=np.transpose(np.array([HR_norm,pttsis,vec_sisn1])) regSIS.fit(xsis,vec_sis0) coefsis.append(regSIS.coef_) regDIA=linear_model.LinearRegression(); xdia=np.transpose(np.array([HR_norm,pttdia,vec_dian1])) regDIA.fit(xsis,vec_dia0) coefdia.append(regSIS.coef_) regSIS2=linear_model.LinearRegression(); xsis=np.transpose(np.array([HR_norm,pttsis])) regSIS2.fit(xsis,vec_sis0) coefsis2.append(regSIS.coef_)
MTmodel1 = ols("mpg ~ wt + hp", data=df1).fit() print(MTmodel1.summary()) predictionM1 = MTmodel1.predict() predictionM1 # Method 2 - sklearn IV = df1[['wt', 'hp']].values IV DV = df1['mpg'].values DV IV_train, IV_test, DV_train, DV_test = train_test_split(IV, DV, test_size=0.2, random_state=123) IV_train, IV_test, DV_train, DV_test from sklearn import linear_model MTmodel2a = linear_model.LinearRegression() MTmodel2a.fit(IV_train, DV_train) #putting data to model #MTmodel2a.summary() #no summary in sklearn MTmodel2a.intercept_ MTmodel2a.coef_ predicted2a = MTmodel2a.predict(IV_test) predicted2a DV_test r2_score(DV_train, MTmodel2a.predict(IV_train)) #The mean squared error from sklearn.metrics import mean_squared_error, r2_score mean_squared_error(DV_test, predicted2a) r2_score(DV_test, predicted2a) #%%% Logistic Regression
def generateLinearRegression(target, features): regression = linear_model.LinearRegression() regression.fit(features, target) return regression
import pandas as pd from sklearn import linear_model import matplotlib.pyplot as plt import numpy as np #read data dataframe = pd.read_csv('challenge_dataset.txt') print dataframe.head() x_values = dataframe[[0]] y_values = dataframe[[1]] #train model on data body_reg = linear_model.LinearRegression() body_reg.fit(x_values, y_values) #visualize results plt.scatter(x_values, y_values) plt.plot(x_values, body_reg.predict(x_values)) plt.show() # The coefficients print('Coefficients: ', body_reg.coef_) # The mean squared error print('Mean squared error: %.2f ' % np.mean( (body_reg.predict(x_values) - y_values)**2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % body_reg.score(x_values, y_values))
print "Features name:", list(df.columns.values) print "Selected features:", features y = df["price"] X = df[features] # split data-set into training (70%) and testing set (30%) x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # plotting features, target relationships plotting_features_vs_target(features, x_train, y_train) """ DEFAULT MODEL """ # training model linear = linear_model.LinearRegression() linear.fit(x_train, y_train) # evaluating model score_trained = linear.score(x_test, y_test) print "Model scored:", score_trained """ LASSO MODEL """ # L1 regularization lasso_linear = linear_model.Lasso(alpha=1.0) lasso_linear.fit(x_train, y_train) # evaluating L1 regularized model score_lasso_trained = lasso_linear.score(x_test, y_test)
print("... start linear regression\n") all_errMSE = [] for idx in range(0, len(coexpr_mats_train) - batch_size, batch_size): print("..." + " - batch: " + str(idx + 1) + "/" + str(len(coexpr_mats_train) - batch_size)) b_imgs_input_X_train = hic_mats_train[ idx:idx + batch_size] # iput: low-resol -> hic-data b_imgs_target_Y_train = coexpr_mats_train[ idx:idx + batch_size] # output: high-resol -> coexpr # create linear regression object lreg = linear_model.LinearRegression() # sklearn.linear_model # train the model using training set # => should be of shape (n, 1) lreg.fit(b_imgs_input_X_train.flatten()[:, np.newaxis], b_imgs_target_Y_train.flatten()[:, np.newaxis]) ## ITERATE OVER THE TEST DATA assert len(hic_mats_test) == len(coexpr_mats_test) # ??? TEST ALL TEST DATA IN ONE RUN ??? b_imgs_input_X_test = hic_mats_test[ idx:idx + batch_size] # iput: low-resol -> hic-data b_imgs_target_Y_test = coexpr_mats_test[ idx:idx + batch_size] # output: high-resol -> coexpr
Y_Carr_test = Y_Carr[Y_Carr['CARRIER_DELAY'] == 999] Y_Carr_train = Y_Carr[Y_Carr['CARRIER_DELAY'] < 999] # In[35]: Y_Carr_test Y_Carr_train # In[37]: from sklearn import linear_model, svm from sklearn import cross_validation X_train, X_test, y_train, y_test = X_Carr_train, X_Carr_test, Y_Carr_train, Y_Carr_test clf = linear_model.LinearRegression() clf.fit(X_train, y_train) y_Carr_res = clf.predict(X_test) # In[38]: X_weather_train = X_weather[X_weather['WEATHER_DELAY'] < 999] X_weather_train = X_weather_train.drop(['WEATHER_DELAY'], axis=1) X_weather_train # In[39]: X_weather_test = X_weather[X_weather['WEATHER_DELAY'] == 999] X_weather_test = X_weather_test.drop(['WEATHER_DELAY'], axis=1) X_weather_test
def Learning(t): from sklearn.model_selection import KFold K = 2 kf = KFold(n_splits=K, random_state=t, shuffle=True) sim_Q_obs, sim_Potential_obs, sim_X_obs, sim_N_obs, sim_A_obs, sim_A_True_obs = generate_data( sample_size, obs_p, seed=t) sim_Q_ran, sim_Potential_ran, sim_X_ran, sim_N_ran, sim_A_ran, sim_A_True_ran = generate_data( 10000, ran_p, seed=t, obs=False) # inclusion criterion np.random.seed(t) subgroup = np.where((sim_X_ran[:, 0] >= crit_point1) & (sim_X_ran[:, 0] <= crit_point2)) ran_index = np.random.choice(len(subgroup[0]), ran_size) sim_Q_ran = sim_Q_ran[subgroup][ran_index] sim_Potential_ran = sim_Potential_ran[subgroup][ran_index] sim_A_ran = sim_A_ran[subgroup][ran_index] sim_X_ran = sim_X_ran[subgroup][ran_index] # Prognositic score from sklearn import linear_model X_1 = sim_X_ran[np.where(sim_A_ran == -1)] Q_1 = sim_Q_ran[np.where(sim_A_ran == -1)] lin = linear_model.LinearRegression() lin.fit(X_1, Q_1) prog = lin.predict(sim_X_ran) prog = scale(prog) ####### strategy 3: My_model_out1, My_model_out2, My_model_trt1 = learn_obs( sim_X_obs[:, 0:(unobs_latent)], sim_Q_obs, sim_A_obs, seed=t * t) My_RCT_benefit, My_RCT_prob1 = RCT_scores(sim_X_ran[:, 0:(unobs_latent)], My_model_out1, My_model_out2, My_model_trt1) np.random.seed(t) My_RCT_prob1 = My_RCT_prob1 + np.random.uniform( -1e-10, 1e-10, size=ran_size) if strat == 'prob': My_RCT_scores1 = np.column_stack( (scale(My_RCT_benefit), prog, scale(My_RCT_prob1))) elif strat == 'benefit': My_RCT_scores1 = np.column_stack( (scale(My_RCT_prob1), prog, scale(My_RCT_benefit))) cutoff = 0 My_RCT_X3 = np.append(sim_X_ran[:, 0:(unobs_latent)], My_RCT_scores1, axis=1) My_RCT_X3_hi = My_RCT_X3[np.where(My_RCT_scores1[:, 2] >= cutoff)] sim_Q_ran_hi = sim_Q_ran[np.where(My_RCT_scores1[:, 2] >= cutoff)] sim_A_ran_hi = sim_A_ran[np.where(My_RCT_scores1[:, 2] >= cutoff)] My_RCT_X3_lo = My_RCT_X3[np.where(My_RCT_scores1[:, 2] < cutoff)] sim_Q_ran_lo = sim_Q_ran[np.where(My_RCT_scores1[:, 2] < cutoff)] sim_A_ran_lo = sim_A_ran[np.where(My_RCT_scores1[:, 2] < cutoff)] My_RCT_X3_hi = My_RCT_X3_hi[:, :-1] My_RCT_X3_lo = My_RCT_X3_lo[:, :-1] TEST_RCT_benefit, TEST_RCT_prob1 = RCT_scores( sim_X_TEST[:, 0:(unobs_latent)], My_model_out1, My_model_out2, My_model_trt1) np.random.seed(t * t) TEST_RCT_prob1 = TEST_RCT_prob1 + np.random.uniform( -1e-10, 1e-10, size=10000) if strat == 'prob': TEST_RCT_scores1 = np.column_stack( (scale(TEST_RCT_benefit), scale(TEST_RCT_prob1))) elif strat == 'benefit': TEST_RCT_scores1 = np.column_stack( (scale(TEST_RCT_prob1), scale(TEST_RCT_benefit))) TEST_RCT_X3 = np.append(sim_X_TEST[:, 0:(unobs_latent)], TEST_RCT_scores1, axis=1) TEST_RCT_X3_hi = TEST_RCT_X3[np.where(TEST_RCT_scores1[:, 1] >= cutoff)] sim_N_TEST_hi = sim_N_TEST[np.where(TEST_RCT_scores1[:, 1] >= cutoff)] sim_X_TEST_hi = sim_X_TEST[np.where(TEST_RCT_scores1[:, 1] >= cutoff)] TEST_RCT_X3_lo = TEST_RCT_X3[np.where(TEST_RCT_scores1[:, 1] < cutoff)] sim_N_TEST_lo = sim_N_TEST[np.where(TEST_RCT_scores1[:, 1] < cutoff)] sim_X_TEST_lo = sim_X_TEST[np.where(TEST_RCT_scores1[:, 1] < cutoff)] TEST_RCT_X3_hi = TEST_RCT_X3_hi[:, :-2] TEST_RCT_X3_lo = TEST_RCT_X3_lo[:, :-2] # HIGH GROUP MAX_EST3 = -9999 for C in Cs: for gamma in gammas: for kernel in kernels: cv_res = [] for train_index, test_index in kf.split(My_RCT_X3_hi): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = My_RCT_X3_hi[train_index], My_RCT_X3_hi[ test_index] Q_train, Q_test = sim_Q_ran_hi[train_index], sim_Q_ran_hi[ test_index] A_train, A_test = sim_A_ran_hi[train_index], sim_A_ran_hi[ test_index] model_all = MatchOLearn_KW(C=C, gamma=gamma, kernel=kernel, metric='mahalanobis', propensity=0.5) model_all.fit(X_train, Q_train, A_train, match=np.array([ X_train.shape[1] - 2, X_train.shape[1] - 1 ]), learn=np.array(range(X_train.shape[1] - 2)), bandC=1, size=my_size) est_all = model_all.estimate( X_test, Q_test, A_test, learn=np.array(range(X_test.shape[1] - 2)), normalize=True) cv_res.append(est_all) cv_res_all = np.mean(cv_res) if cv_res_all > (MAX_EST3 + 1e-5): MAX_EST3 = cv_res_all PARAM_EST3 = (C, gamma, kernel) best_model_s3_hi = MatchOLearn_KW(C=PARAM_EST3[0], gamma=PARAM_EST3[1], kernel=PARAM_EST3[2], metric='mahalanobis', propensity=0.5) best_model_s3_hi.fit( My_RCT_X3_hi, sim_Q_ran_hi, sim_A_ran_hi, match=np.array([My_RCT_X3_hi.shape[1] - 2, My_RCT_X3_hi.shape[1] - 1]), learn=np.array(range(My_RCT_X3_hi.shape[1] - 2)), bandC=1, size=my_size) TEST_Pred_ML_s3_hi = best_model_s3_hi.predict(TEST_RCT_X3_hi) phi_TEST3_hi, eta_TEST3_hi = phi_eta(sim_X_TEST_hi) TEST_Potential_ML_s3_hi = np.mean(eta_TEST3_hi + phi_TEST3_hi * TEST_Pred_ML_s3_hi + sim_N_TEST_hi) Ben3_hi = np.mean( phi_TEST3_hi * TEST_Pred_ML_s3_hi) * len(TEST_RCT_X3_hi) / 10000 TEST_Potential_ML_s3_hi = TEST_Potential_ML_s3_hi * len( TEST_RCT_X3_hi) / 10000 # LOW GROUP MAX_EST4 = -9999 for C in Cs: for gamma in gammas: for kernel in kernels: cv_res = [] for train_index, test_index in kf.split(My_RCT_X3_lo): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = My_RCT_X3_lo[train_index], My_RCT_X3_lo[ test_index] Q_train, Q_test = sim_Q_ran_lo[train_index], sim_Q_ran_lo[ test_index] A_train, A_test = sim_A_ran_lo[train_index], sim_A_ran_lo[ test_index] model_all = MatchOLearn_KW(C=C, gamma=gamma, kernel=kernel, metric='mahalanobis', propensity=0.5) model_all.fit(X_train, Q_train, A_train, match=np.array([ X_train.shape[1] - 2, X_train.shape[1] - 1 ]), learn=np.array(range(X_train.shape[1] - 2)), bandC=1, size=my_size) est_all = model_all.estimate( X_test, Q_test, A_test, learn=np.array(range(X_test.shape[1] - 2)), normalize=True) cv_res.append(est_all) cv_res_all = np.mean(cv_res) if cv_res_all > (MAX_EST4 + 1e-5): MAX_EST4 = cv_res_all PARAM_EST4 = (C, gamma, kernel) best_model_s3_lo = MatchOLearn_KW(C=PARAM_EST4[0], gamma=PARAM_EST4[1], kernel=PARAM_EST4[2], metric='mahalanobis', propensity=0.5) best_model_s3_lo.fit( My_RCT_X3_lo, sim_Q_ran_lo, sim_A_ran_lo, match=np.array([My_RCT_X3_lo.shape[1] - 2, My_RCT_X3_lo.shape[1] - 1]), learn=np.array(range(My_RCT_X3_lo.shape[1] - 2)), bandC=1, size=my_size) TEST_Pred_ML_s3_lo = best_model_s3_lo.predict(TEST_RCT_X3_lo) phi_TEST3_lo, eta_TEST3_lo = phi_eta(sim_X_TEST_lo) TEST_Potential_ML_s3_lo = np.mean(eta_TEST3_lo + phi_TEST3_lo * TEST_Pred_ML_s3_lo + sim_N_TEST_lo) TEST_Potential_ML_s3_lo = TEST_Potential_ML_s3_lo * len( TEST_RCT_X3_lo) / 10000 Ben3_lo = np.mean( phi_TEST3_lo * TEST_Pred_ML_s3_lo) * len(TEST_RCT_X3_lo) / 10000 TEST_Potential_ML_s3 = TEST_Potential_ML_s3_hi + TEST_Potential_ML_s3_lo Ben3 = Ben3_hi + Ben3_lo ####### strategy 1: My_model_out1, My_model_out2, My_model_trt1 = learn_obs( sim_X_obs[:, 0:(unobs_latent)], sim_Q_obs, sim_A_obs, seed=t * t) My_RCT_benefit, My_RCT_prob1 = RCT_scores(sim_X_ran[:, 0:(unobs_latent)], My_model_out1, My_model_out2, My_model_trt1) My_RCT_scores0 = np.column_stack( (prog, scale(My_RCT_benefit), scale(My_RCT_prob1))) My_RCT_X1 = np.append(sim_X_ran[:, 0:(unobs_latent)], My_RCT_scores0, axis=1) My_RCT_X1 = My_RCT_X1[:, :-2] TEST_RCT_X1 = sim_X_TEST[:, 0:(unobs_latent)] MAX_EST1 = -9999 for C in Cs: for gamma in gammas: for kernel in kernels: cv_res = [] for train_index, test_index in kf.split(My_RCT_X1): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = My_RCT_X1[train_index], My_RCT_X1[ test_index] Q_train, Q_test = sim_Q_ran[train_index], sim_Q_ran[ test_index] A_train, A_test = sim_A_ran[train_index], sim_A_ran[ test_index] model_all = MatchOLearn_KW(C=C, gamma=gamma, kernel=kernel, metric='mahalanobis', propensity=0.5) model_all.fit(X_train, Q_train, A_train, match=np.array([X_train.shape[1] - 1]), learn=np.array(range(X_train.shape[1] - 1)), bandC=1, size=my_size) est_all = model_all.estimate( X_test, Q_test, A_test, learn=np.array(range(X_test.shape[1] - 1)), normalize=True) cv_res.append(est_all) cv_res_all = np.mean(cv_res) if cv_res_all > (MAX_EST1 + 1e-5): MAX_EST1 = cv_res_all PARAM_EST1 = (C, gamma, kernel) best_model_s1 = MatchOLearn_KW(C=PARAM_EST1[0], gamma=PARAM_EST1[1], kernel=PARAM_EST1[2], metric='mahalanobis', propensity=0.5) best_model_s1.fit(My_RCT_X1, sim_Q_ran, sim_A_ran, match=np.array([My_RCT_X1.shape[1] - 1]), learn=np.array(range(My_RCT_X1.shape[1] - 1)), bandC=1, size=my_size) TEST_Pred_ML_s1 = best_model_s1.predict(TEST_RCT_X1) phi_TEST1, eta_TEST1 = phi_eta(sim_X_TEST) TEST_Potential_ML_s1 = np.mean(eta_TEST1 + phi_TEST1 * TEST_Pred_ML_s1 + sim_N_TEST) Ben1 = 2 * np.mean(phi_TEST1 * TEST_Pred_ML_s1) ####### Strategy 2 My_RCT_X2 = np.append(sim_X_ran[:, 0:(unobs_latent)], My_RCT_scores0, axis=1) TEST_RCT_X1 = sim_X_TEST[:, 0:(unobs_latent)] MAX_EST2 = -9999 for C in Cs: for gamma in gammas: for kernel in kernels: cv_res = [] for train_index, test_index in kf.split(My_RCT_X2): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = My_RCT_X2[train_index], My_RCT_X2[ test_index] Q_train, Q_test = sim_Q_ran[train_index], sim_Q_ran[ test_index] A_train, A_test = sim_A_ran[train_index], sim_A_ran[ test_index] model_all = MatchOLearn_KW(C=C, gamma=gamma, kernel=kernel, metric='mahalanobis', propensity=0.5) model_all.fit(X_train, Q_train, A_train, match=np.array([ X_train.shape[1] - 3, X_train.shape[1] - 2, X_train.shape[1] - 1 ]), learn=np.array(range(X_train.shape[1] - 3)), bandC=1, size=my_size) est_all = model_all.estimate( X_test, Q_test, A_test, learn=np.array(range(X_test.shape[1] - 3)), normalize=True) cv_res.append(est_all) cv_res_all = np.mean(cv_res) if cv_res_all > (MAX_EST2 + 1e-5): MAX_EST2 = cv_res_all PARAM_EST2 = (C, gamma, kernel) best_model_s2 = MatchOLearn_KW(C=PARAM_EST2[0], gamma=PARAM_EST2[1], kernel=PARAM_EST2[2], metric='mahalanobis', propensity=0.5) best_model_s2.fit(My_RCT_X2, sim_Q_ran, sim_A_ran, match=np.array([ My_RCT_X2.shape[1] - 3, My_RCT_X2.shape[1] - 2, My_RCT_X2.shape[1] - 1 ]), learn=np.array(range(My_RCT_X2.shape[1] - 3)), bandC=1, size=my_size) TEST_Pred_ML_s2 = best_model_s2.predict(TEST_RCT_X1) TEST_Potential_ML_s2 = np.mean(eta_TEST1 + phi_TEST1 * TEST_Pred_ML_s2 + sim_N_TEST) Ben2 = 2 * np.mean(phi_TEST1 * TEST_Pred_ML_s2) print('iteration_time: ', t, ' results: ', np.mean(sim_Potential_ran)), print("strategy 1: ", MAX_EST1, TEST_Potential_ML_s1, PARAM_EST1, Counter(TEST_Pred_ML_s1)), print("strategy 2: ", MAX_EST2, TEST_Potential_ML_s2, PARAM_EST2, Counter(TEST_Pred_ML_s2)), print("strategy 3: ", TEST_Potential_ML_s3_hi, TEST_Potential_ML_s3_lo, Counter(TEST_Pred_ML_s3_hi), Counter(TEST_Pred_ML_s3_lo), len(TEST_RCT_X3_hi), len(TEST_RCT_X3_lo)), print( "----------------------------------------------------------------------------------" ) return TEST_Potential_ML_s1, TEST_Potential_ML_s2, TEST_Potential_ML_s3, Ben1, Ben2, Ben3
si_series = np.array(merge_df["open_si"]) gazp_series = np.array(merge_df["open_gazp"]) gmkn_series = np.array(merge_df["open_gmkn"]) lkoh_series = np.array(merge_df["open_lkoh"]) #mgnt_series = np.array(merge_df["OPEN_MGNT"]) #rosn_series = np.array(merge_df["OPEN_SBER"]) sber_series = np.array(merge_df["open_sber"]) #sngsp_series = np.array(merge_df["OPEN_SNGSP"]) vtbr_series = np.array(merge_df["open_vtbr"]) #nvtk_series = np.array(merge_df["OPEN_NVTK"]) #sngs_series = np.array(merge_df["OPEN_SNGS"]) #trnfp_series = np.array(merge_df["OPEN_TRNFP"]) #rtsi_series = 100.0*np.array(merge_df["OPEN_RTSI"]) linreg = linear_model.LinearRegression(fit_intercept=True) series_list = [ gazp_series/si_series, gmkn_series/si_series, lkoh_series/si_series, sber_series/si_series, vtbr_series/si_series ] #series_list = [si_series, gazp_series, gmkn_series, lkoh_series, mgnt_series, rosn_series, sber_series, sngsp_series, vtbr_series, nvtk_series, sngs_series, trnfp_series] #rts_index_series = rts_index_series/(0.001*si_series) #series_list = [np.log(100.0*rts_index_series), np.arange(0, len(rts_index_series)) ] series_array = np.array(series_list).transpose() linreg.fit(series_array, np.log(rts_series)) print linreg.coef_ print linreg.intercept_ rts_index = np.exp(linreg.predict(series_array)) residual = rts_series - rts_index rts_index_data = { "date": merge_df["date"], "time": merge_df["time"], "index_residual" : residual, "index_residual_ma" : moving_average(residual, 2000)}
#also compute lagged correlation between THF and AMO #todo: compute lagged correlation between SLP and AMO print 'calculating correlations between AMO and THF, SLP...' for i in range(nlat): print 'latitude', lats[i] sstprime_g = sstprime[:, i, :] thfprime_g = thfprime[:, i, :] psprime_g = psprime[:, i, :] thf_lt_g = thf_lt[:, i, :] thf_st_g = thf_st[:, i, :] ps_lt_g = ps_lt[:, i, :] ps_st_g = ps_st[:, i, :] clf = linear_model.LinearRegression() clf.fit(AMOstd.reshape(-1, 1), sstprime_g) sstcorrs[i, :] = np.squeeze(clf.coef_) clf = linear_model.LinearRegression() clf.fit(AMOstd.reshape(-1, 1), thfprime_g) thfcorrs[i, :] = np.squeeze(clf.coef_) clf = linear_model.LinearRegression() clf.fit(AMOstd.reshape(-1, 1), psprime_g) pscorrs[i, :] = np.squeeze(clf.coef_) clf = linear_model.LinearRegression() clf.fit(AMOstd_lt.reshape(-1, 1), thf_lt_g) thfcorrs_lt[i, :] = np.squeeze(clf.coef_)
def __init__(self, maker, MAX_DF=0.1, MAX_FEATURES=300, LSA_DIM=10): estimator.__init__(self, maker, MAX_DF, MAX_FEATURES, LSA_DIM) self.model = linear_model.LinearRegression(fit_intercept=False)
def regression(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame, polynomial: int): """ Train a regression model Parameters ---------- other_args: List[str] Argparse arguments s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of stock prices polynomial: int Order of polynomial """ parser = argparse.ArgumentParser( add_help=False, prog="regression", description=""" Regression attempts to model the relationship between two variables by fitting a linear/quadratic/cubic/other equation to observed data. One variable is considered to be an explanatory variable, and the other is considered to be a dependent variable. """, ) parser.add_argument( "-i", "--input", action="store", dest="n_inputs", type=check_positive, default=40, help="number of days to use for prediction.", ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days.", ) parser.add_argument( "-j", "--jumps", action="store", dest="n_jumps", type=check_positive, default=1, help="number of jumps in training data.", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) if polynomial == USER_INPUT: parser.add_argument( "-p", "--polynomial", action="store", dest="n_polynomial", type=check_positive, required=True, help="polynomial associated with regression.", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=ns_parser.n_inputs + ns_parser.n_days, )[-1]: print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] # Split training data stock_x, stock_y = splitTrain.split_train( df_stock["Adj Close"].values, ns_parser.n_inputs, ns_parser.n_days, ns_parser.n_jumps, ) if not stock_x: print("Given the model parameters more training data is needed.\n") return # Machine Learning model if polynomial == LINEAR: model = linear_model.LinearRegression(n_jobs=-1) else: if polynomial == USER_INPUT: polynomial = ns_parser.n_polynomial model = pipeline.make_pipeline( preprocessing.PolynomialFeatures(polynomial), linear_model.Ridge()) model.fit(stock_x, stock_y) l_predictions = [ i if i > 0 else 0 for i in model.predict( df_stock["Adj Close"].values[-ns_parser.n_inputs:].reshape( 1, -1))[0] ] # Prediction data l_pred_days = get_next_stock_market_days( last_stock_day=df_stock["Adj Close"].index[-1], n_next_days=ns_parser.n_days, ) df_pred = pd.Series(l_predictions, index=l_pred_days, name="Price") # Plotting plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot(df_stock.index, df_stock["Adj Close"], lw=2) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Regression (polynomial {polynomial}) on {s_ticker} - {ns_parser.n_days} days prediction" ) plt.xlim(df_stock.index[0], get_next_stock_market_days(df_pred.index[-1], 1)[-1]) plt.xlabel("Time") plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=1, c="tab:green", linestyle="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="tab:green") plt.axvspan(df_stock.index[-1], df_pred.index[-1], facecolor="tab:orange", alpha=0.2) _, _, ymin, ymax = plt.axis() plt.vlines(df_stock.index[-1], ymin, ymax, linewidth=1, linestyle="--", color="k") # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["Adj Close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter(df_future.index, df_future["Adj Close"], c="tab:blue", lw=3) plt.plot( [df_stock.index[-1], df_future.index[0]], [ df_stock["Adj Close"].values[-1], df_future["Adj Close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock.index[-1], df_pred.index[0]], [df_stock["Adj Close"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["Adj Close"].values) / df_future["Adj Close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock.index[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["Adj Close"].values[0]) / df_future["Adj Close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim(df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1)) plt.xticks( [ df_stock.index[-1], df_pred.index[-1] + datetime.timedelta(days=1) ], visible=True, ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["Adj Close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: # Print prediction data print_pretty_prediction(df_pred, df_stock["Adj Close"].values[-1]) print("") except SystemExit: print("") except Exception as e: print(e) print("")
def update(self): # # Linear Regression using features from Correlation Matrix U = coormatrix_features.drop('price', axis=1) V = coormatrix_features['price'] # split the dataset into train and test U_train, U_test, V_train, V_test = train_test_split(U, V, test_size=0.2, random_state=10) # Standardize the features and target / # normalizing the features target ss = StandardScaler() U_train = ss.fit_transform(U_train) U_test = ss.transform(U_test) # borrowing parameters from train U_train.shape, U_test.shape V_train = ss.fit_transform(V_train.values.reshape(-1, 1)) V_test = ss.transform(V_test.values.reshape(-1, 1)) regr = linear_model.LinearRegression() regr.fit(U_train, V_train) airbnb_V_pred = regr.predict(U_test) vtitle = "V1 Prices vs Predicted Prices: $Y_i$ vs $\hat{Y}_i$" self.ax1.plot(V_test, airbnb_V_pred, 'bo') self.ax1.set_title(vtitle) self.ax1.set_xlabel("Prices: $Y_i$") self.ax1.set_ylabel("Predicted prices: $\hat{Y}_i$") self.ax1.grid(True) self.fig1.tight_layout() self.fig1.canvas.draw_idle() #self.label1.setText('V1 Coefficients: \n' % regr.coef_) self.label2.setText("V1 Mean squared error: %.2f" % mean_squared_error(V_test, airbnb_V_pred)) self.label3.setText('V1 R2 score: %.2f' % r2_score(V_test, airbnb_V_pred)) #LR - V2 X = FeaturesFINAL.drop('price', axis=1) y = FeaturesFINAL['price'] # split the dataset into train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10) # Standardize the features and target / # normalizing the features target ss = StandardScaler() X_train = ss.fit_transform(X_train) X_test = ss.transform(X_test) # borrowing parameters from train y_train = ss.fit_transform(y_train.values.reshape(-1, 1)) y_test = ss.transform(y_test.values.reshape(-1, 1)) regr = linear_model.LinearRegression() regr.fit(X_train, y_train) airbnb_y_pred = regr.predict(X_test) self.ax2.plot(y_test, airbnb_y_pred, 'bo') self.ax2.set_title( "V2 Prices vs Predicted Prices: $Y_i$ vs $\hat{Y}_i$") self.ax2.set_xlabel("Prices: $Y_i$") self.ax2.set_ylabel("Predicted prices: $\hat{Y}_i$") self.ax2.grid(True) self.fig2.tight_layout() self.fig2.canvas.draw_idle() #self.label5.setText('V2 Coefficients: \n' % regr.coef_) self.label6.setText("V2 Mean squared error: %.2f" % mean_squared_error(y_test, airbnb_y_pred)) self.label7.setText('V2 R2 score: %.2f' % r2_score(y_test, airbnb_y_pred))
ymon = np.array(ymon, dtype='float64') ''' dataw = np.array(dataw, dtype='int64') datas = np.array(datas, dtype='int64') datam = np.array(datam, dtype='int64') ywin = np.array(ywin, dtype='int64') ysum = np.array(ysum, dtype='int64') ymon = np.array(ymon, dtype='int64') clf = SVC(kernel='rbf') clf.fit(dataw, ywin) ysvc = clf.predict(pre) print(ysvc) clfw = linear_model.LinearRegression() clfw.fit(dataw, ywin) wpre = clfw.predict(pre) print(wpre) clf = SVC(kernel='rbf') clf.fit(datas, ysum) ysvc = clf.predict(pre) print(ysvc) clfs = linear_model.LinearRegression() clfs.fit(datas, ysum) spre = clfs.predict(pre) print(spre) clf = SVC(kernel='rbf')
def lr(x: numpy.ndarray, y: numpy.ndarray) -> LRRes: regr = linear_model.LinearRegression() regr.fit(x, y) score = regr.score(x, y) diff = numpy.abs(regr.predict(x) - y).sum() / y.sum() return LRRes(regr.coef_, regr.intercept_, regr._residues, score, diff)
def plot_ols(): """ ========================================================= Linear Regression Example ========================================================= This example uses the only the first feature of the `diabetes` dataset, in order to illustrate a two-dimensional plot of this regression technique. The straight line can be seen in the plot, showing how linear regression attempts to draw a straight line that will best minimize the residual sum of squares between the observed responses in the dataset, and the responses predicted by the linear approximation. The coefficients, the residual sum of squares and the variance score are also calculated. """ print(__doc__) # Code source: Jaques Grobler # License: BSD 3 clause import matplotlib.pyplot as plt import numpy as np from sklearn import datasets, linear_model from sklearn.metrics import mean_squared_error, r2_score # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature diabetes_X = diabetes.data[:, np.newaxis, 2] # Split the data into training/testing sets diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] # Split the targets into training/testing sets diabetes_y_train = diabetes.target[:-20] diabetes_y_test = diabetes.target[-20:] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(diabetes_X_train, diabetes_y_train) # Make predictions using the testing set diabetes_y_pred = regr.predict(diabetes_X_test) # The coefficients print('Coefficients: \n', regr.coef_) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred)) # Plot outputs plt.scatter(diabetes_X_test, diabetes_y_test, color='black') plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3) plt.xticks(()) plt.yticks(()) plt.show()
import numpy as np import pandas as pd data = pd.read_csv('./house_price.csv') data1 = data.dropna() data2 = pd.get_dummies(data1[['dist', 'floor']]) pd.set_option('display.max_columns', None) data3 = data2.drop(['dist_shijingshan', 'floor_high'], axis=1) data4 = pd.concat( [data3, data1[['roomnum', 'halls', 'AREA', 'subway', 'school', 'price']]], axis=1) x = data4.iloc[:, :-1] y = data4.iloc[:, -1:] from sklearn import linear_model from sklearn.model_selection import train_test_split x_train, x_text, y_train, y_text = train_test_split(x, y, test_size=0.3, random_state=42) model = linear_model.LinearRegression().fit(x_train, y_train) result = model.predict(np.array([[0, 0, 0, 0, 0, 0, 0, 2, 1, 60, 1, 1]])) # print(result) # print(model.coef_) #模型系数 # print(model.intercept_) #模型截距 # print(model.score(x_text,y_text))
Q1 = df.quantile(0.25) Q3 = df.quantile(0.75) IQR = Q3 - Q1 df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)] df.describe() # Dzielenie danych na zbiór treningowy i testowy X1_train, X1_test, Y_train, Y_test = train_test_split(df[['Age']], df[['YearsCode']], test_size=0.33, random_state=42) X1_X2_train, X1_X2_test, Y_train, Y_test = train_test_split( df[['Age', 'Age1stCode']], df[['YearsCode']], test_size=0.33, random_state=42) regr1 = linear_model.LinearRegression() regr1.fit(X1_train, Y_train) print('Coefficients: \n', regr1.coef_) print("Residual sum of squares: %.2f" % mean_squared_error(Y_test, regr1.predict(X1_test))) regr2 = linear_model.LinearRegression() regr2.fit(X1_X2_train, Y_train) print('Coefficients: \n', regr2.coef_) print("Residual sum of squares: %.2f" % mean_squared_error(Y_test, regr2.predict(X1_X2_test)))
from sklearn.ensemble import ExtraTreesRegressor from sklearn.ensemble import GradientBoostingRegressor from xgboost import XGBRegressor from sklearn.preprocessing import PolynomialFeatures as pf from sklearn import linear_model as lm train = pd.read_csv('C:\\Users\\Preetham G\\Downloads\\train.csv') test = pd.read_csv('C:\\Users\\Preetham G\\Downloads\\test.csv') train = train.drop(columns=['Index', 'District']) test = test.drop(columns=['Index', 'District']) base = [ RandomForestRegressor(n_estimators=100, max_depth=10), ExtraTreesRegressor(n_estimators=90, max_depth=15), GradientBoostingRegressor(n_estimators=60, max_depth=5), XGBRegressor(n_estimators=50, max_depth=5), BaggingRegressor(n_estimators=50, base_estimator=lm.LinearRegression()) ] name = ['RFR', 'ETR', 'GBR', 'XGBR', 'BAR'] df1 = pd.DataFrame() c = 0 train_x = train.drop(columns=['Rainfall']) train_y = train['Rainfall'] test_x = test.drop(columns=['Rainfall']) test_y = test['Rainfall'] d1 = {} for i, j in zip(base, name): print(j, c) if j == 'BAR': poly = pf(degree=4) train_x = poly.fit_transform(train_x) test_x = poly.fit_transform(test_x)
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2)) print("Median abs error =", round(sm.median_absolute_error(y_test, y_test_pred), 2)) print("Explain var scr =", round(sm.explained_variance_score(y_test, y_test_pred), 2)) print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2)) input = 'D:/ProgramData/praktika2/Mul_linear.txt' input_data = np.loadtxt(input, delimiter=',') X, y = input_data[:, :-1], input_data[:, -1] training_samples = int(0.6 * len(X)) testing_samples = len(X) - 10 X_train, y_train = X[:training_samples], y[:training_samples] X_test, y_test = X[training_samples:], y[training_samples:] reg_linear_mul = linear_model.LinearRegression() reg_linear_mul.fit(X_train, y_train) y_test_pred = reg_linear_mul.predict(X_test) print("Performance of Linear regressor:") print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_test_pred), 2)) print("Mean squared error =", round(sm.mean_squared_error(y_test, y_test_pred), 2)) print("Median abs error =", round(sm.median_absolute_error(y_test, y_test_pred), 2)) print("Explain var scr=", round(sm.explained_variance_score(y_test, y_test_pred), 2)) print("R2 score =", round(sm.r2_score(y_test, y_test_pred), 2)) polynomial = PolynomialFeatures(degree=10) X_train_transformed = polynomial.fit_transform(X_train) datapoint = [[2.23, 1.35, 1.12]]
test_size=0.2, shuffle=True) y_df_train, y_df_test = train_test_split(y_dataFrame, test_size=0.2, shuffle=True) # # print(y_df_train.loc[:, 0:6]) predict_x = None predict_y = None train_for_x = True train_for_y = True if train_for_x: poly_reg_x = PolynomialFeatures(degree=1) X_ploy = poly_reg_x.fit_transform(x_df_train.loc[:, 1:]) X_ploy_predict = poly_reg_x.fit_transform(x_df_test.loc[:, 1:]) lin_reg_2_x = linear_model.LinearRegression() lin_reg_2_x.fit(X_ploy, x_df_train.loc[:, 0] / 1920) predict_x = lin_reg_2_x.predict(X_ploy_predict) output = open('linear_x.pkl', 'wb') pickle.dump(lin_reg_2_x, output, 0) # 将训练后的线性模型保存 output.close() if train_for_y: poly_reg_y = PolynomialFeatures(degree=1) Y_ploy = poly_reg_y.fit_transform(y_df_train.loc[:, 1:]) Y_ploy_predict = poly_reg_y.fit_transform(y_df_test.loc[:, 1:]) lin_reg_2_y = linear_model.LinearRegression() # lin_reg_2 = svm.SVR(kernel='linear', C=1e3) lin_reg_2_y.fit(Y_ploy, y_df_train.loc[:, 0] / 1080) predict_y = lin_reg_2_y.predict(Y_ploy_predict) # output = open('svr_y.pkl', 'wb')
# 读取自带的diabete数据集 diabetes = datasets.load_diabetes() # 使用其中的一个feature diabetes_X = diabetes.data[:, np.newaxis, 2] # 将数据集分割成training set和test set diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] # 将目标(y值)分割成training set和test set diabetes_y_train = diabetes.target[:-20] diabetes_y_test = diabetes.target[-20:] # 使用线性回归 regr = linear_model.LinearRegression() # 进行training set和test set的fit,即是训练的过程 regr.fit(diabetes_X_train, diabetes_y_train) # 打印出相关系数和截距等信息 print('Coefficients: \n', regr.coef_) # The mean square error print("Residual sum of squares: %.2f" % np.mean( (regr.predict(diabetes_X_test) - diabetes_y_test)**2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test)) # 使用pyplot画图 plt.scatter(diabetes_X_test, diabetes_y_test, color='black') plt.plot(diabetes_X_test,
def Linear_Regression(): clf = linear_model.LinearRegression() clf.fit([[0, 0], [1, 1], [2, 2]], [1, 2, 4]) print(clf.predict([[0.2, 0.3], [0.4, 0.5]])) return clf.coef_, clf.intercept_