def baoStockSlope(stock_index, start_date, stat_days, pred_days, if_print=1): stat_days = int(stat_days) pred_days = int(pred_days) df_dir = tmp_data_dict.get("baostock") df_dir = os.path.join(data_path, "history_data", "baostock", "2020-12-17") k1 = klineDate(start_date, stat_days, pred_days) stat_end_date, pred_start_date, pred_end_date = k1.make_date() try: df1 = pd.read_csv(os.path.join(df_dir, stock_index + ".csv")) df1 = df1.drop_duplicates() df2 = df1[(df1["date"] >= pred_start_date) & (df1["date"] <= pred_end_date)] rows = df2.shape[0] slope = LinearReg.single_linear_reg(df2, "close")[0] if if_print == 1: print("{},{},{},{},{},{},{}".format(stock_index, start_date, pred_start_date, pred_end_date, pred_days, rows, slope)) except: slope = -999 rows = 0 if if_print == 1: print("{},{},{},{},{},{},{}".format(stock_index, start_date, pred_start_date, pred_end_date, pred_days, 0, -999)) return_data = dict(stock_index=stock_index, start_date=start_date, pred_start_date=pred_start_date, pred_end_date=pred_end_date, rows=rows, slope=slope) return return_data
def stockSlope(stock_index, start_date, stat_days, pred_days): try: df1 = pd.read_csv(os.path.join(df_dir, stock_index + ".csv")) k1 = klineDate(start_date, stat_days, pred_days) stat_end_date, pred_start_date, pred_end_date = k1.make_date() df2 = df1[(df1["date"] >= pred_start_date) & (df1["date"] <= pred_end_date)] slope = LinearReg.single_linear_reg(df2, "close")[0] print("{},{},{},{},{}".format(stock_index, pred_start_date, pred_end_date, pred_days, slope)) except: print("{},{},{},{},{}".format(stock_index, "aa", "bb", pred_days, -999))
def stockSlope(stock_index, start_date, stat_days, pred_days): try: data_dir = data_dict.get("baostock") df1 = pd.read_csv( os.path.join(data_dir, "history_data", stock_index) + '.csv') k1 = klineDate(start_date, stat_days, pred_days) stat_end_date, pred_start_date, pred_end_date = k1.make_date() df2 = df1[(df1["date"] >= pred_start_date) & (df1["date"] <= pred_end_date)] slope = LinearReg.single_linear_reg(df2, "close")[0] #print("{},{},{},{},{}".format(stock_index,pred_start_date,pred_end_date,pred_days,slope)) except: #print("{},{},{},{},{}".format(stock_index,"aa","bb",pred_days,-999)) slope = -999 pred_start_date = -999 pred_end_date = -999 return slope, pred_start_date, pred_end_date
def getSlope(DF, start_date, stat_days, pred_days, col): df1 = cleanData.cleanColName(DF) df1 = cleanData.setDt(df1) df_columns = df1.columns if col not in df_columns: logging.error("no input slope columns: " + col) sys.exit(1) try: k1 = klineDate(start_date, stat_days, pred_days) stat_end_date, pred_start_date, pred_end_date = k1.make_date() df2 = df1[(df1["dt"] >= pred_start_date) & (df1["dt"] <= pred_end_date)] slope = LinearReg.single_linear_reg(df2, col)[0] except: slope = -999 pred_start_date = -999 pred_end_date = -999 return slope, pred_start_date, pred_end_date
df_diaoyan = getDiaoYanDF(df,start_date,end_date) df_diaoyan_head_n = df_diaoyan.head(head_n) stk_diaoyan_list = df_diaoyan_head_n['stock_index'].tolist() stk_diaoyan_tup = tuple(stk_diaoyan_list) return stk_diaoyan_tup,df_diaoyan_head_n if __name__ == "__main__": from functions.LinearReg import LinearReg from functions.day_history import kLines from functions.day_history.getDataFromSpark import * head_n = 100 dy_start_date = '2020-08-03' stat_days = 20 pred_days = 1 stat_end_date,pred_start_date,pred_end_date = kLines.klineDate(dy_start_date,stat_days,pred_days).make_date() df = loadData() df1 = getDiaoYanDF(df,dy_start_date,stat_end_date) #stat_end_date,pred_start_date,pred_end_date = kLines.klineDate(dy_start_date,stat_days,pred_days).make_date() #stat_dates_list = '_'.join([dy_start_date,stat_end_date,pred_start_date,pred_end_date]) stk_diaoyan_tup,df_diaoyan_head_n = headDiaoYanStockList(df,head_n,dy_start_date,stat_end_date) print(df_diaoyan_head_n.head(10)) ''' para = { 'stock_tuple': stk_diaoyan_tup, 'start_date': pred_start_date, 'end_date': pred_end_date } getSparkData = getDataFromSpark(para) getSparkData.getDataFromSpark()
jijin_count = jijin_count(df2) df2['chigu_money'] = df2['chigu_money'].astype(float) df3 = df2.groupby('stock_index')['chigu_money'].sum().reset_index().sort_values('chigu_money',ascending=False) from functions.day_history.kLines import klineDate start_date = "2020-06-01" stat_days = 20 pred_days = 20 stat_end_date,pred_start_date,pred_end_date = klineDate(start_date,stat_days,pred_days).make_date() df_p1 = df2[(df2["stock_date"]>="2020-06-01")&(df2["stock_date"]<=stat_end_date)] df_p2 = df2[(df2["stock_date"]>=pred_start_date)&(df2["stock_date"]<=pred_end_date)] pd.merge(df_p1[["stock_index","chigu_money"]],df_p2[["stock_index","chigu_money"]],how="inner",on=["stock_index"]) df2 = df1[df1['stock_date']=='2019-06-30'] df2 = df1[(df1['stock_date']=='2019-06-30')&(df1['stock_index']==428)] df2 = df1[(df1['stock_date']=='2019-12-30')&(df1['stock_index']==917)] df2['jijin_name']