def main_get(timStr='', nday=2): # #1---init.sys print('\nmain_get,nday:', nday) tfsys.xnday_down = nday zsys.web_get001txtFg = True # #2---init.tfb rs0 = '/tfbDat/' fgid = rs0 + 'gid2017.dat' xtfb = tft.fb_init(rs0, fgid) if nday == -1: tfsys.xnday_down = xtfb.gid_nday + 10 print('nday,', tfsys.xnday_down) # #3---update.data print('\n#3,update.data') if nday != 0: tft.fb_gid_get_nday(xtfb, timStr, fgExt=True) # #4 tn = zt.timNSec(timStr, xtfb.tim0, '') print('\n#4,update.data,tim:{0:.2f} s'.format(tn))
def main_ai_bt(timStr='', nday=2): # #1---init.sys print('\nmain_bt,nday:', nday) tfsys.xnday_down = nday zsys.web_get001txtFg = True #2---init.tfb rs0 = '/tfbDat/' fgid = rs0 + 'gid2018.dat' xtfb = tft.fb_init(rs0, fgid) if nday == -1: tfsys.xnday_down = xtfb.gid_nday + 10 print('nday,', tfsys.xnday_down) # #3---backtest print('\n#3,backtest') if nday != 0: xtfb.funPre = tfsty.sta00_pre #bt_1dayMain xtfb.funSta = tfsty.sta_ai_log01 xtfb.preVars = [] xtfb.staVars = [99, 99, 99] # # #3.a------ai.init zai.xmodel = {} xtfb.ai_mxfFN0 = rs0 + 'mlib/p7y2016_' xtfb.ai_mx_sgn_lst = ['log'] xtfb.ai_ysgn = 'kwin' xtfb.ai_xlst = [ 'cid', 'pwin0', 'pdraw0', 'plost0', 'pwin9', 'pdraw9', 'plost9' ] # zai.ai_f_mxRdlst(xtfb.ai_mxfFN0, xtfb.ai_mx_sgn_lst) #3.b-------预测 xtfb.kcid = '1' #cn,3=bet365 tfbt.bt_main(xtfb, timStr) # #4---main_ret print('\n#4,result.anz') tfbt.bt_main_ret(xtfb, True) print('kcid,', xtfb.kcid, ',nday,', nday) print('preVar,', xtfb.preVars) print('staVar,', xtfb.staVars) # #5 tn = zt.timNSec('', xtfb.tim0, '') print('\n#5,backtest,tim:{0:.2f} s'.format(tn)) # #6---end.main print('\n#6,end.main')
def main_download_samples(): #1---init.sys print('\ndownload samples\n') # #2---init.tfb rs0='/tfbDat/' fgid=rs0+'gid2019_js.dat' xtfb=tft.fb_init(rs0,fgid) #---download samples tft.fb_download_league_data('英冠', '2018-2019', fgSample=True)
def save_odds_to_file_for_xgboost(file_name, num): cv_sgn = [ 'win_cv_0', 'draw_cv_0', 'lost_cv_0', 'win_cv_9', 'draw_cv_9', 'lost_cv_9' ] #2---init.tfb rs0 = '/tfbDat/' fgid = rs0 + 'gid2018-2019(xgboost).dat' xtfb = tft.fb_init(rs0, fgid) df = tfsys.gids p_data = pd.DataFrame() p_data_tz = pd.DataFrame() oz_cv = pd.DataFrame() for i, row in df.iterrows(): if ((i + 1) % 2000 == 0): print((i + 1) / len(df) * 100, "%") print('now:', zt.tim_now_str()) if i >= num[0] and i < num[1]: kend = row['kend'] kwin = row['kwin'] if kend == '0' or kwin == -1: continue gid = row['gid'] fxdat_tz = tfsys.rxdat + gid + '_tz.dat' fxdat_oz = tfsys.rxdat + gid + '_oz_1.dat' if os.path.exists(fxdat_tz) and os.path.exists(fxdat_oz): odds_tz = pd.read_csv(fxdat_tz, index_col=False, dtype=float, encoding='gb18030') odds_oz = pd.read_csv(fxdat_oz, index_col=False, dtype=float, encoding='gb18030') if odds_tz.isnull().values.any(): continue ##如果有Nan值就不保存该数据 win_cv_0, draw_cv_0, lost_cv_0, win_cv_9, draw_cv_9, lost_cv_9 = calculate_odds_cv( odds_oz) # cov_win0, cov_lost0, cov_win9, cov_lost9 = calculate_odds_cov(odds_oz) features = odds_tz features['win_cv_0'] = win_cv_0 features['draw_cv_0'] = draw_cv_0 features['lost_cv_0'] = lost_cv_0 features['win_cv_9'] = win_cv_9 features['draw_cv_9'] = draw_cv_9 features['lost_cv_9'] = lost_cv_9 p_data = p_data.append(features, ignore_index=True) p_data.to_csv(file_name, index=False, encoding='gb18030')
def main_bt(timStr='',nday=2): # #1---init.sys print('\nmain_bt,nday:',nday) tfsys.xnday_down=nday # zsys.web_get001txtFg= True #2---init.tfb rs0='/tfbDat/' fgid=rs0+'gid2019_js.dat' xtfb=tft.fb_init(rs0,fgid) if nday==-1: tfsys.xnday_down=xtfb.gid_nday+2 print('nday,',tfsys.xnday_down) # #3---backtest print('\n#3,backtest') if nday!=0: xtfb.funPre=tfsty.sta_lstm_pre ##预测算法 xtfb.funSta=tfsty.sta00_sta xtfb.preVars=[] xtfb.staVars=[1.5] xtfb.kcid='1' #cn,3=bet365 # tfbt.bt_main(xtfb,timStr) print('\n#4, 完成预测') #4---main_ret 按预测计算预计回报 #暂时不分析预计回报,以后再开发 ''' print('\n#4,result.anz') tfbt.bt_main_ret(xtfb, True) print('kcid,',xtfb.kcid,',nday,',nday) print('preVar,',xtfb.preVars) print('staVar,',xtfb.staVars) ''' # #5 tn=zt.timNSec('',xtfb.tim0,'') print('\n#5,backtest,tim:{0:.2f} s'.format(tn)) # #6---end.main print('\n#6,end.main')
def read_lstm_sample_files(file_name, num): #2---init.tfb rs0 = '/tfbDat/' fgid = rs0 + 'gid2019_js.dat' xtfb = tft.fb_init(rs0, fgid) df = tfsys.gids data = pd.DataFrame() for i, row in df.iterrows(): if ((i + 1) % 2000 == 0): print((i + 1) / len(df) * 100, "%") print('now:', zt.tim_now_str()) if i >= num[0] and i < num[1]: kend = row['kend'] kwin = row['kwin'] if kend == '0' or kwin == -1: continue gid = row['gid'] fxdat_ftr = tfsys.rxdat + gid + '_ftr.dat' if os.path.exists(fxdat_ftr): features = pd.read_csv(fxdat_ftr, index_col=False, encoding='gb18030') if features.isnull().values.any(): continue ##如果有Nan值就不保存该数据 data = data.append(features, ignore_index=True) data.loc[:, "FTR"][data.loc[:, "FTR"] == 1] = 0 data.loc[:, "FTR"][data.loc[:, "FTR"] == 3] = 1 data = data.drop([ 'vol_prob_h', 'vol_prob_d', 'vol_prob_g', 'loss_idx', 'loss_vol_ratio' ], axis=1) data = data.drop([ 'avg_win', 'avg_draw', 'avg_lost', 'avg_win_diff', 'avg_draw_diff', 'avg_lost_diff' ], axis=1) return data
def save_odds_to_file(file_name, num): #2---init.tfb xtfb = tft.fb_init(rs0, fgid) df = tfsys.gids p_data = pd.DataFrame() # p_data_oz = pd.DataFrame() # p_data_az = pd.DataFrame() for i, row in df.iterrows(): if ((i+1) % 2000 == 0): print((i+1)/len(df) * 100, "%") print('now:',zt.tim_now_str()) if i>=num[0] and i<num[1]: gid = row['gid'] fxdat_oz = tfsys.rxdat + gid + '_oz.dat' fxdat_az = tfsys.rxdat + gid + '_az.dat' fxdat_tz = tfsys.rxdat + gid + '_tz.dat' if os.path.exists(fxdat_oz) and os.path.exists(fxdat_az) and os.path.exists(fxdat_tz): odds_oz = pd.read_csv(fxdat_oz, index_col = False, dtype = str, encoding = 'gb18030') odds_az = pd.read_csv(fxdat_az, index_col = False, dtype = str, encoding = 'gb18030') odds_tz = pd.read_csv(fxdat_tz, index_col = False, dtype = str, encoding = 'gb18030') if len(odds_oz) >= tfsys.cidrows \ and odds_oz.loc[0, 'kwin'] != '-1' \ and len(odds_az) >= tfsys.cidrows \ and odds_az.loc[0, 'kwin'] != '-1' \ and len(odds_tz) == 1: #如果数据有CID_ROWS行并且有比赛结果才处理数据 odds_oz = odds_oz[odds_oz['cid'] == '3'] odds_az = odds_az[odds_az['cid'] == '3'] if odds_oz.empty or odds_az.empty: continue label = odds_oz['kwin'] odds_oz = odds_oz[tfsys.usedSgn_oz] odds_az = odds_az[tfsys.usedSgn_az] odds_tz = odds_tz[tfsys.usedSng_tz] odds_oz = odds_oz.reset_index(drop=True) odds_az = odds_az.reset_index(drop=True) odds_tz = odds_tz.reset_index(drop=True) label = label.reset_index(drop=True) # odds_oz = odds_oz.astype(float) # odds_az = odds_az.astype(float) flag = is_filter_sample(odds_az) if flag: continue merge_data = pd.concat([odds_az, odds_oz, odds_tz, label], axis=1) p_data = p_data.append(merge_data, ignore_index=True) p_data.to_csv(file_name, index=False, encoding='gb18030')
def save_odds_to_file_for_svm(file_name, num): #2---init.tfb xtfb = tft.fb_init(rs0, fgid) df = tfsys.gids p_data = pd.DataFrame() p_data_oz = pd.DataFrame() p_data_az = pd.DataFrame() oz_cols = [ 'pwin0', 'pdraw0', 'plost0', 'pwin9', 'pdraw9', 'plost9', 'vwin0', 'vdraw0', 'vlost0', 'vback0', 'vwin0kali', 'vdraw0kali', 'vlost0kali', 'vwin9', 'vdraw9', 'vlost9', 'vback9', 'vwin9kali', 'vdraw9kali', 'vlost9kali' ] az_cols = ['mshui0', 'pan0', 'gshui0', 'mshui9', 'pan9', 'gshui9'] kali0 = ['vwin0kali', 'vdraw0kali', 'vlost0kali'] kali9 = ['vwin9kali', 'vdraw9kali', 'vlost9kali'] oz0 = ['pwin0', 'pdraw0', 'plost0'] oz9 = ['pwin9', 'pdraw9', 'plost9'] az0 = ['mshui0', 'pan0', 'gshui0'] az9 = ['mshui9', 'pan9', 'gshui9'] pb0 = ['vwin0', 'vdraw0', 'vlost0'] pb9 = ['vwin9', 'vdraw9', 'vlost9'] vb0 = ['vback0'] vb9 = ['vback9'] oz = ['pwin0', 'pwin9', 'pdraw0', 'pdraw9', 'plost0', 'plost9'] kali_columns = ['vwinkali', 'vdrawkali', 'vlostkali'] oz_columns = ['pwin', 'pdraw', 'plost'] az_columns = ['mshui', 'gshui', 'pan'] for i, row in df.iterrows(): if ((i + 1) % 2000 == 0): print((i + 1) / len(df) * 100, "%") print('now:', zt.tim_now_str()) if i >= num[0] and i < num[1]: gid = row['gid'] fxdat_oz = tfsys.rxdat + gid + '_oz.dat' fxdat_az = tfsys.rxdat + gid + '_az.dat' if os.path.exists(fxdat_oz) and os.path.exists(fxdat_az): odds_oz = pd.read_csv(fxdat_oz, index_col=False, dtype=str, encoding='gb18030') odds_az = pd.read_csv(fxdat_az, index_col=False, dtype=str, encoding='gb18030') if len(odds_oz) >= tfsys.cidrows and odds_oz.loc[ 0, 'kwin'] != '-1' and len( odds_az) >= tfsys.cidrows and odds_az.loc[ 0, 'kwin'] != '-1': #如果数据有CID_ROWS行并且有比赛结果才处理数据 if float(odds_oz.loc[0, 'pwin0']) <= 1.5 or float( odds_oz.loc[0, 'plost0']) <= 1.5 or float( odds_oz.loc[0, 'pwin9']) <= 1.5 or float( odds_oz.loc[0, 'plost9']) <= 1.5: continue cids = ['2', '3', '293'] odds_oz = odds_oz[odds_oz['cid'].isin(cids)] if len(odds_oz) != 3: continue # odds_oz = odds_oz[1:tfsys.cidrows+1] # odds_az = odds_az[0:tfsys.cidrows] target = odds_oz.pop('kwin') odds_oz = odds_oz[oz_cols].astype(float) # odds_az = odds_az[az_cols].astype(float) #1 odds_oz0_9 = odds_oz[oz] odds_oz0 = odds_oz[oz0] odds_oz9 = odds_oz[oz9] # odds_az0 = odds_az[az0] # odds_az9 = odds_az[az9] odds_oz0.columns = oz_columns odds_oz9.columns = oz_columns # odds_az0.columns = az_columns # odds_az9.columns = az_columns # az_diff = odds_az9 - odds_az0 oz_diff = odds_oz9 / odds_oz0 #2 odds_kali0 = odds_oz[kali0] odds_kali9 = odds_oz[kali9] odds_kali0.columns = kali_columns odds_kali9.columns = kali_columns kali_diff = odds_kali9 - odds_kali0 #3 vback0 = odds_oz['vback0'] / 100 vback9 = odds_oz['vback0'] / 100 vback0_diff = odds_kali0.sub(vback0, axis=0) vback9_diff = odds_kali9.sub(vback9, axis=0) #4 prob0 = odds_oz[pb0] prob9 = odds_oz[pb9] target = target.astype(int) target[target == 3] = 2 target = target.reset_index(drop=True) # az_diff = az_diff.reset_index(drop=True) oz_diff = oz_diff.reset_index(drop=True) kali_diff = kali_diff.reset_index(drop=True) vback0_diff = vback0_diff.reset_index(drop=True) vback9_diff = vback9_diff.reset_index(drop=True) # odds_az0 = odds_az0.reset_index(drop=True) odds_oz0 = odds_oz0.reset_index(drop=True) # odds_az9 = odds_az9.reset_index(drop=True) odds_oz9 = odds_oz9.reset_index(drop=True) odds_oz0_9 = odds_oz0_9.reset_index(drop=True) odds_kali0 = odds_kali0.reset_index(drop=True) odds_kali9 = odds_kali9.reset_index(drop=True) vback0 = vback0.reset_index(drop=True) vback9 = vback9.reset_index(drop=True) prob0 = prob0.reset_index(drop=True) prob9 = prob9.reset_index(drop=True) kelly0, kelly9, kelly0_var, kelly9_var, kelly0_diff, kelly9_diff, ky0_var, ky9_var = kelly_variance( prob0, prob9, odds_oz0, odds_oz9, vback0, vback9) # merge_data = pd.concat([target, az_diff, oz_diff, kali_diff, vback0_diff, vback9_diff], axis=1) merge_data = pd.concat( [target, odds_oz0_9, ky0_var, ky9_var], axis=1) p_data = p_data.append(merge_data, ignore_index=True) p_data.to_csv(file_name, index=False, encoding='gb18030')