def getRollReg(file1, window=None, save_name=None): ''' 已过滤掉不能租window长度的数据 ''' logging.info( "----------------- start to calculate rolling regression-------------" ) if save_name == None: save_name = 'test.csv' df1 = pd.read_csv(file1) stk_list = df1['stock_index'].unique().tolist() data_dir = tmp_data_dict.get('stock_feature') save_dir = os.path.join(data_dir, "rollRegression") df_reg = pd.DataFrame() if window == None: window = 5 for i in stk_list: print(i) df2 = df1[df1['stock_index'] == i] df2 = df2[['stock_date', "adj_close"]] df3 = rolling_regression(df2, window, "stock_date", "adj_close") df3 = df3[df3['slope_num_in'] == window] save_file = os.path.join(save_dir, save_name) df3['stock_index'] = str(i).zfill(6) df_reg = df_reg.append(df3) df_reg = changeStockIndex(df_reg, 'stock_index') df_reg.to_csv(save_file, index=0) logging.info("-------------------save data {}".format(save_dir)) return save_file
def makeModelData(data_dir,columns_list): data_all = loadCombineData(data_dir) df4 = cleanData(data_all,columns_list).dropna() df_roll_reg = df4.groupby("stock_index").apply(lambda x: rolling_regression(x,regression_window,"stock_date","close")) df2 = df_roll_reg.reset_index(drop=True) df2 = df2[df2["slope_num_in"] ==5] #df2["slopes"] = mergeData.regPN(df2,'slopes')["slopes"] df_final= df2[columns_list + ["slopes"]] return df_final
def process(df1,start_date,end_date,max_min_stat_window,regre_window,regre_col): stock = DF_to_StockDataFrame(df1) stock = select_data(stock,start_date,end_date) stock_kdj_macd = stock_kdj(stock) #kdj_feature = setFeature(df_kdj,'tes',14,['kdjk','kdjj','kdjd']) df_max_min = rollingFutureMaxMin(stock,max_min_stat_window) #df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True) df_max_min = df_max_min.reset_index() df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col) #df_max_min['slope_5'] = linear_slope['slopes'] df_max_min = df_max_min.reset_index().dropna() df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str) ## combine data cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date'] df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date") return df_f2
def getRollReg(file1,window=None): df1 = pd.read_csv(file1) stk_list = df1['stock_index'].unique().tolist() data_dir = tmp_data_dict.get('stock_feature') save_dir = os.path.join(data_dir,"rollRegression") window = 5 for i in stk_list: print(i) df2 = df1[df1['stock_index']==i] df2 = df2[['stock_date',"adj_close"]] df3 = rolling_regression(df2,window,"stock_date","adj_close") df3 = df3[df3['slope_num_in']==window] save_name = '_'.join([str(i),'rollReg',str(window)])+'.csv' save_file = os.path.join(save_dir,save_name) df3['stock_index'] = str(i).zfill(6) df3.to_csv(save_file,index=0)
def dfRollReg(df_feature, window): df_roll_reg = df_feature.groupby("stock_index").apply( lambda x: rolling_regression(x, window, "stock_date", "close")) df_roll_reg.reset_index(drop=True, inplace=True) return df_roll_reg
import pandas as pd from davidyu_cfg import * from functions.rolling_regression import * from functions.day_history.rollReg import rollRegDayHis data_file = "/home/davidyu/stock/data/SH_SZ_index/SH_index.csv" df1 = pd.read_csv(data_file) df1.columns = [x.split(".")[1] for x in df1.columns] x = df1 window = 5 sort_col = "stock_date" reg_col = "close" df_rollreg = rolling_regression(x, window, sort_col, reg_col) save_dir = tmp_data_dict.get("SH_index") df_rollreg.round(3).to_csv(os.path.join(save_dir, "sh_index_rollReg.csv"), index=0)
df_all = pd.concat(frames) def data_process(df2, columns_list): for col in columns_list: df2[col] = [x.replace(" ", "") for x in df2[col].tolist()] df2 = df2.replace("----", np.nan) return df2 columns_list = ["kdj_j", "kdj_k", "kdj_d", "macd_dif", "macd", "macd_dif_macd"] df3 = data_process(df_all, columns_list).dropna() window = 5 df_roll_reg = df3.groupby("stock_index").apply( lambda x: rolling_regression(x, window, "stock_date", "close")) df2 = df_roll_reg.reset_index(drop=True) df2 = df2[df2["slope_num_in"] == 5] df2["slopes"] = mergeData.regPN(df2, 'slopes')["slopes"] df3 = df2[[ "kdj_k", "kdj_d", "kdj_j", "macd_dif", "macd", "macd_dif_macd", "slopes" ]] tmp_path = raw_data_dir save_file = "test.csv" save_file_name = os.path.join(tmp_data_path, save_file) df3.to_csv(save_file_name, index=0) df2 = df1.replace(" ----", -999) df2 = df1.replace(" ", "")
df1.columns = [x.split(".")[1] for x in df1.columns.tolist()] df1 = adjustStockPrice.adj_stock_price(df1) stock = DF_to_StockDataFrame(df1) feature_list = ['kdjk','kdjd','kdjj','macdh',"rsi_6","close"] stock["rsi_6"] window = 3 df_stock = stock_feature(stock,feature_list) df3 = rolling_regression(df_stock,window,"stock_date","close") cols_to_use = df3.columns.difference(df_stock.columns) df_merge = pd.merge(df_stock,df3[["stock_date"]+cols_to_use.tolist()],on=("stock_date")) df_merge[df_merge["rsi_6"]>95] df_stock = stock_kdj(stock) def cut_list_pos_neg(seq): cut = 0 seq_list = [] try:
def process(): stock = DF_to_StockDataFrame(df1) stock_kdj_macd = stock_kdj(stock) kdj_thre = 0 buy_num = 1000 sale_days_threshold = 5 # 第二天用高于昨日最低价的百分之多少买入 buy_increase_ratio = 0.01 stock['next_low_1'] = stock['low'].shift(-1).tolist() stock,sale_columns = nextPrice(stock,sale_days_threshold) stock['stock_date'] = df1['stock_date'].astype(str).tolist() stock1 = stock[stock['kdjj']<kdj_thre] stock1['buy_price_now'] = stock1['low']*(1+ buy_increase_ratio/100) stock1['if_can_buy'] = stock1['buy_price_now'] - stock1['next_low_1'] stock1[stock1['if_can_buy']>0].shape[0]/stock1.shape[0] #stock1 = stock[(stock['macd']<kdj_thre)&(stock['kdjj']<kdj_thre)] #stock1['buy_price'] = stock1['next_low_1'] * 1.005 stock1['buy_price'] = stock1['buy_price_now'] stock1['positive_price'] = stock1['buy_price'] * 1.005 stock1['future_max'] = stock1[sale_columns].max(axis=1) stock1['max_can_sale'] = stock1['future_max']-stock1['positive_price'] stock1[stock1['max_can_sale']>0].shape[0]/stock1.shape[0] stock['buy'] = stock['kdjj'] stock1 = stock[stock['kdjj']<kdj_thre] stock1 = stock[stock['kdjj']<20] a1=stock1['next_low']/stock1['close'] a1.mean() a1=stock1['next_low']/stock1['low'] a1.mean() from sklearn import linear_model from sklearn.metrics import explained_variance_score,\ mean_absolute_error,\ mean_squared_error,\ median_absolute_error,r2_score reg = linear_model.LinearRegression(fit_intercept=True,normalize=False) stock1 = stock[['high','low','open','close','next_low']].dropna() x = stock1[['high','low','open','close']].values y = stock1.next_low.values reg.fit(x,y) mean_squared_error(y,reg.predict(x)) r2_score(y,reg.predict(x)) cols_to_use = stock_kdj_macd.columns.difference(stock.columns).tolist()+['stock_date'] df_f2 = pd.merge(stock,stock_kdj_macd[cols_to_use],on="stock_date") #stock = select_data(stock,start_date,end_date) stock_kdj_macd = stock_kdj(stock) pd.merge(stock_kdj_macd,stock,on="stock_date") #kdj_feature = setFeature(df_kdj,'tes',14,['kdjk','kdjj','kdjd']) df_max_min = rollingFutureMaxMin(stock,max_min_stat_window) #df_f2 = df_f1.merge(df_kdj, left_index=True, right_index=True) df_max_min = df_max_min.reset_index() df_max_min = rolling_regression(df_max_min,regre_window,'date',regre_col) #df_max_min['slope_5'] = linear_slope['slopes'] df_max_min = df_max_min.reset_index().dropna() df_max_min['stock_date'] = df_max_min.reset_index()['date'].astype(str) ## combine data cols_to_use = df_max_min.columns.difference(stock_kdj_macd.columns).tolist()+['stock_date'] df_f2 = pd.merge(stock_kdj_macd,df_max_min[cols_to_use],on="stock_date") return df_f2