def stockLinearRegression(stock_index, start_date, end_date, columns_in): ''' columns_in = "adj_close" stock_index="000545" start_date = "2018-09-02" end_date = "2018-09-12" ''' spark = loadSpark() table = "stock_dev.day_history_insert" sql1 = """ select %s,stock_date from %s where stock_date >= '%s' and stock_date <= '%s' and stock_index = '%s' order by stock_date""" % (columns_in, table, start_date, end_date, stock_index) my_dataframe = spark.sql(sql1) df_cnt = my_dataframe.count() #logging.info("DATA FRAME rows".format(str(df_cnt))) if df_cnt == 0: logging.info("no data find out") return -999 else: logging.info("DATA FRAME rows {}".format(str(df_cnt))) df1 = my_dataframe.toPandas() df1['norm_col'] = (df1['adj_close'] - df1['adj_close'].min()) / ( df1['adj_close'].max() - df1['adj_close'].min() + 0.001) logging.info(df1.head(10)) logging.info(df1.tail(10)) slope_out = LinearReg.single_linear_reg(df1, 'norm_col')[0] np.save("slope_out.npy", slope_out) return slope_out
def apply_linear_reg(x): #x_in = x['adj_close'] try: x.loc[:, 'norm_col'] = norm_col(x, 'adj_close') slope_out = LinearReg.single_linear_reg(x, 'norm_col')[0] except: slope_out = -9999 return slope_out
def SH_slope(SH_index_table, start_date, end_date): ''' calculate the SH index slope give start and end date @ return SH_slope a float ''' sql_SH_index = """ select * from %s where stock_date >= '%s' and stock_date <= '%s' """ % (SH_index_table, start_date, end_date) SH_index_df = spark.sql(sql_SH_index) SH_index_df1 = SH_index_df.toPandas() SH_index_df1.loc[:, 'norm_col'] = norm_col(SH_index_df1, 'adj_close') SH_slope = LinearReg.single_linear_reg(SH_index_df1, 'norm_col')[0] return SH_slope
def rolling_regression(x, window, sort_col, reg_col): ''' @param: x is a dataframe @param: window: regression window @param: sort_col: which column sort the DF: e.g. stock_date @param: reg_col: which column need to do regression: e.g. adj_close ''' loop_len = x.shape[0] slope = [] num_in = [] x = x.sort_values(sort_col) for i in range(0, loop_len): st_index = i end_index = i + window try: df3 = x.iloc[st_index:end_index, :] num_in.append(df3.shape[0]) slope1, inter = LinearReg.single_linear_reg(df3, reg_col) slope.append(slope1) except: slope.append(-999) x['slopes'] = slope x['slope_num_in'] = num_in return x
test_y1 = test_y[0:40].tolist() p1 = yhat[0:40].tolist() predict_y1 = list(itertools.chain(*p1)) data_dict = {"test_Y": test_y1, "predict_Y": predict_y1} df_out = pd.DataFrame(data_dict) df_out.to_csv("test.csv") from sklearn.metrics import mean_squared_error import numpy as np predict_num = 60 print(mean_squared_error(test_y[0:40], yhat[0:40])) print(mean_squared_error(test_y[0:40], yhat[0:40]) / np.mean(test_y[0:40])) from davidyu_cfg import * from functions.LinearReg import LinearReg linear_reg = LinearReg() test_yy = pd.DataFrame(test_y[0:40]) test_yy.columns = ["col_in"] yhat_yy = pd.DataFrame(yhat[0:40]) yhat_yy.columns = ["col_in"] print(linear_reg.single_linear_reg(test_yy, "col_in")[0]) print(linear_reg.single_linear_reg(yhat_yy, "col_in")[0]) ''' test_X = test_X.reshape((test_X.shape[0], test_X.shape[2])) inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1) inv_yhat = scaler.inverse_transform(inv_yhat) inv_yhat = inv_yhat[:,0] # invert scaling for actual test_y = test_y.reshape((len(test_y), 1)) inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
def linearRegPred(self): DF = self.data_select_pred().df_pred slope = LinearReg.single_linear_reg(DF, 'adj_close')[0] return slope
from davidyu_cfg import * from functions.LinearReg import LinearReg def rolling_regression(x,window): ''' @param: x is a dataframe ''' loop_len = x.shape[0] slope = [] num_in = [] for i in range(0,loop_len): st_index = i end_index = i+window try: df3 = x.iloc[st_index:end_index,:] num_in.append(df3.shape[0]) slope1,inter = LinearReg.single_linear_reg(df3,'adj_close') slope.append(slope1) except: slope.append(-999) x['slopes'] = slope x['slope_num_in'] = num_in return x