def load_data(self, assets): contlist = {} exp_dates = {} dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig) cnx = dbaccess.connect(**dbconfig) for i, prod in enumerate(assets): cont_mth, exch = dbaccess.prod_main_cont_exch(prod) self.contlist[prod], _ = misc.contract_range(prod, exch, cont_mth, self.start_date, self.end_date) self.exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ] self.data_store[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): if self.sim_freq == 'd': tmp_df = dbaccess.load_daily_data_to_df(cnx, 'fut_min', cont, sd, ed) else: minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = dbaccess.load_min_data_to_df(cnx, 'fut_min', cont, sd, ed, minid_start, minid_end) misc.cleanup_mindata(tmp_df, prod) tmp_df['contract'] = cont self.data_store[prod][cont] = tmp_df cnx.close()
def load_data(self, assets): contlist = {} exp_dates = {} dbconfig = self.config.get('dbconfig', dbaccess.hist_dbconfig) cnx = dbaccess.connect(**dbconfig) for i, prod in enumerate(assets): cont_mth, exch = dbaccess.prod_main_cont_exch(prod) self.contlist[prod], _ = misc.contract_range( prod, exch, cont_mth, self.start_date, self.end_date) self.exp_dates[prod] = [ misc.contract_expiry(cont) for cont in contlist[prod] ] edates = [ misc.day_shift(d, self.config['rollrule']) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, self.sim_period) for d in exp_dates[prod] ] self.data_store[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): if self.sim_freq == 'd': tmp_df = dbaccess.load_daily_data_to_df( cnx, 'fut_min', cont, sd, ed) else: minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = dbaccess.load_min_data_to_df( cnx, 'fut_min', cont, sd, ed, minid_start, minid_end) misc.cleanup_mindata(tmp_df, prod) tmp_df['contract'] = cont self.data_store[prod][cont] = tmp_df cnx.close()
def snap_curr_market(): cont_list = {} all_conts = [] tday = datetime.date.today() for asset, exch, cont_mth in live_asset_list: conts, _ = misc.contract_range(asset, exch, cont_mth, tday, tday) cont_list[asset] = [ c for c in conts if misc.contract_expiry(c) >= tday ] all_conts += cont_list[asset] res = web_sina_api.get_fut_quotes(all_conts) return cont_list, res
def comfwd_db_loader(market_data, fwd_index, dep_tenors=[]): curve_info = cmq_crv_defn.COM_Curve_Map[fwd_index] prod_code = curve_info['instID'] mdate = market_data['market_date'] cnx = dbaccess.connect(**dbaccess.dbconfig) df = dbaccess.load_fut_curve(cnx, prod_code, mdate) if len(df) == 0: print "COMFwd data is not available for %s on %s" % (fwd_index, mdate) df['date'] = df['instID'].apply(lambda x: misc.inst2cont(x)) df['expiry'] = df['instID'].apply(lambda x: misc.contract_expiry(x, [])) return df[['date', 'expiry', 'close']].values.tolist()
def comfwd_db_loader(market_data, fwd_index, dep_tenors = []): curve_info = cmq_crv_defn.COM_Curve_Map[fwd_index] prod_code = curve_info['instID'] mkt_db = market_data['market_db'] mdate = market_data['market_date'] mkey = market_data['market_key'] cnx = dbaccess.connect(**mkt_db) df = dbaccess.load_fut_curve(cnx, prod_code, mkey) if len(df) == 0: print "COMFwd data is not available for %s on %s" % (fwd_index, mkey) df['date'] = df['instID'].apply(lambda x: misc.inst2cont(x)) df['expiry'] = df['instID'].apply(lambda x: misc.contract_expiry(x, [])) df = df[pd.to_datetime(df.date).dt.month.isin(curve_info['active_mths'])] return df[['date', 'expiry', 'close']].values.tolist()
def save_hist_data(start_date, end_date, index_list = [], product_codes=[], spot_list = [], min_bar = False): conn = dbaccess.connect(**dbaccess.dbconfig) tday = datetime.date.today() for symbol, cmd_idx, desc in index_list: df = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume'], start_date, end_date) if len(df)> 0: df['instID'] = cmd_idx exch = symbol.split('.')[-1] df['exch'] = exch print "saving daily data for instID = %s with number of data pts = %s" % (cmd_idx, len(df)) df.to_sql('fut_daily', conn, 'sqlite', if_exists='append', index=False) for prodcode in product_codes: cont_mth, exch = dbaccess.prod_main_cont_exch(prodcode) cont_list, _ = misc.contract_range(prodcode, exch, cont_mth, start_date, end_date) exp_dates = [misc.contract_expiry(cont) for cont in cont_list] exch2wind_dict = dict([ (v, k) for k, v in wind_exch_map.iteritems( )]) for cont, exp in zip(cont_list, exp_dates): if exp >= start_date: ex = exch2wind_dict[exch] symbol = cont + '.' + ex ddf = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume', 'oi'], max(exp - datetime.timedelta(days = 400), start_date), min(exp, tday), freq = 'd') if len(ddf) > 0: print "saving daily data for instID = %s with number of data pts = %s" % (cont, len(ddf)) ddf['instID'] = cont ddf['exch'] = exch ddf.to_sql('fut_daily', conn, 'sqlite', if_exists='append', index=False) if min_bar: mdf = load_symbol(symbol, ['open', 'high', 'low', 'close', 'volume', 'oi'], max(exp - datetime.timedelta(days = 400),start_date), min(exp, tday), freq='m') if len(mdf) > 0: print "saving min data for instID = %s with number of data pts = %s" % (cont, len(mdf)) mdf['instID'] = cont mdf['exch'] = exch mdf = process_min_id(mdf) mdf.to_sql('fut_min', conn, 'sqlite', if_exists='append', index=False) for symbol, spotID, desc in spot_list: df = load_symbol(symbol, ['close'], start_date, end_date) if len(df)> 0: df['spotID'] = spotID print "saving daily data for spotID = %s with number of data pts = %s" % (spotID, len(df)) df.to_sql('spot_daily', conn, 'sqlite', if_exists='append', index=False)
def simcontract_min(config_file): sim_config = {} with open(config_file, 'r') as fp: sim_config = json.load(fp) bktest_split = sim_config['sim_func'].split('.') run_sim = __import__('.'.join(bktest_split[:-1])) for i in range(1, len(bktest_split)): run_sim = getattr(run_sim, bktest_split[i]) dir_name = config_file.split('.')[0] dir_name = dir_name.split(os.path.sep)[-1] test_folder = get_bktest_folder() file_prefix = test_folder + dir_name + os.path.sep if not os.path.exists(file_prefix): os.makedirs(file_prefix) sim_list = sim_config['products'] if type(sim_list[0]).__name__ != 'list': sim_list = [[str(asset)] for asset in sim_list] sim_mode = sim_config.get('sim_mode', 'OR') calc_coeffs = sim_config.get('calc_coeffs', [1, -1]) cont_maplist = sim_config.get('cont_maplist', []) sim_period = sim_config.get('sim_period', '-12m') need_daily = sim_config.get('need_daily', False) if len(cont_maplist) == 0: cont_maplist = [[0]] * len(sim_list) config = {} start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date() config['start_date'] = start_date end_date = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date() config['end_date'] = end_date scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']] outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \ + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \ 'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \ 'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \ 'profit_per_win', 'profit_per_loss'] scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))] config.update(sim_config['config']) if 'pos_class' in sim_config: config['pos_class'] = eval(sim_config['pos_class']) if 'proc_func' in sim_config: config['proc_func'] = eval(sim_config['proc_func']) file_prefix = file_prefix + sim_config['sim_name'] if 'close_daily' in config and config['close_daily']: file_prefix = file_prefix + 'daily_' config['file_prefix'] = file_prefix summary_df = pd.DataFrame() fname = config['file_prefix'] + 'summary.csv' if os.path.isfile(fname): summary_df = pd.DataFrame.from_csv(fname) for assets, cont_map in zip(sim_list, cont_maplist): file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_' fname = file_prefix + 'stats.json' output = {'total': {}, 'cont': {}} if os.path.isfile(fname): with open(fname, 'r') as fp: output = json.load(fp) #if len(output['total'].keys()) == len(scenarios): # continue min_data = {} day_data = {} config['tick_base'] = 0 config['marginrate'] = (0, 0) rollrule = '-50b' config['exit_min'] = config.get('exit_min', 2057) config['no_trade_set'] = config.get('no_trade_set', []) if assets[0] in ['cu', 'al', 'zn']: rollrule = '-1b' elif assets[0] in ['IF', 'IH', 'IC']: rollrule = '-2b' elif assets[0] in ['au', 'ag']: rollrule = '-25b' elif assets[0] in ['TF', 'T']: rollrule = '-20b' rollrule = config.get('rollrule', rollrule) contlist = {} exp_dates = {} for i, prod in enumerate(assets): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod) contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date) exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ] config['tick_base'] += trade_offset_dict[prod] config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod])) min_data[prod] = {} day_data[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data') tmp_df['contract'] = cont min_data[prod][cont] = cleanup_mindata( tmp_df, prod) if need_daily: tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data') day_data[prod][cont] = tmp_df if 'offset' in sim_config: config['offset'] = sim_config['offset'] * config['tick_base'] else: config['offset'] = config['tick_base'] for ix, s in enumerate(scenarios): fname1 = file_prefix + str(ix) + '_trades.csv' fname2 = file_prefix + str(ix) + '_dailydata.csv' if os.path.isfile(fname1) and os.path.isfile(fname2): continue for key, seq in zip(sim_config['scen_keys'], s): config[key] = sim_config[key][seq] df_list = [] trade_list = [] for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule) if sim_mode == 'OR': mdf = min_data[assets[0]][cont] mdf = mdf[mdf.date <= edate] if need_daily: ddf = day_data[assets[0]][cont] config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue else: mode_keylist = sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)] if cmode == 'Full': mdf = pd.concat(all_data, axis = 1, join = 'inner') mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] mdf = mdf[ mdf.date0 < edate] else: #print all_data[0], all_data[1] for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['date', 'min_id', 'volume', 'openInterest'] mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') mdf.columns = ['open', 'high', 'low', 'close'] + col_list mdf['contract'] = cont #print mdf if need_daily: if smode == 'TS': all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)] if cmode == 'Full': ddf = pd.concat(all_data, axis = 1, join = 'inner') ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] config['ddf'] = ddf[ddf.index <= edate] else: for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['volume', 'openInterest'] ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') ddf.columns = ['open', 'high', 'low', 'close'] + col_list ddf['contract'] = cont config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue df = mdf.copy(deep = True) df, closed_trades = run_sim( df, config) df_list.append(df) trade_list = trade_list + closed_trades (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = cont if cont not in output['cont']: output['cont'][cont] = {} output['cont'][cont][ix] = res (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = '_'.join(assets) output['total'][ix] = res print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix)) all_trades = {} for i, tradepos in enumerate(trade_list): all_trades[i] = strat.tradepos2dict(tradepos) trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname1) ts.to_csv(fname2) fname = file_prefix + 'stats.json' try: with open(fname, 'w') as ofile: json.dump(output, ofile) except: continue cont_df = pd.DataFrame() for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] if cont not in output['cont']: continue res = scen_dict_to_df(output['cont'][cont]) out_res = res[outcol_list] if len(cont_df) == 0: cont_df = out_res[:20].copy(deep = True) else: cont_df = cont_df.append(out_res[:20]) fname = file_prefix + 'cont_stat.csv' cont_df.to_csv(fname) res = scen_dict_to_df(output['total']) out_res = res[outcol_list] if len(summary_df) == 0: summary_df = out_res[:20].copy(deep = True) else: summary_df = summary_df.append(out_res[:20]) fname = config['file_prefix'] + 'summary.csv' summary_df.to_csv(fname) return
def load_db_to_xlmkt(tag, xlfile = MKT_LATEST_XLFILE): if os.path.isfile(xlfile): book = openpyxl.load_workbook(xlfile) writer = pd.ExcelWriter(xlfile, engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) else: writer = pd.ExcelWriter(xlfile) cnx = dbaccess.connect(**dbaccess.mktsnap_dbconfig) req_data = {'i':None, 'fef':None, 'USD/CNY':None} prod_map = dict([(prod, cont) for (prod, exch, cont) in live_asset_list]) xl_structure = {"fut_daily": "COMM", "fx_daily": "FX", "ir_daily": "IR"} for tab_key in ['fut_daily', 'fx_daily', 'ir_daily']: for idx, prod_code in enumerate(all_asset_list[tab_key]): if tab_key == 'fut_daily': df = dbaccess.load_fut_curve(cnx, prod_code, tag) df['product_code'] = prod_code df['tenor_label'] = df['instID'].apply(lambda x: misc.inst2cont(x)) df['expiry_date'] = df['instID'].apply(lambda x: misc.contract_expiry(x, [])) df['exch'] = misc.prod2exch(prod_code) df.rename(columns = {'close': 'COMFwd'}, inplace = True) if prod_code in prod_map: df = df[pd.to_datetime(df['tenor_label']).dt.month.isin(prod_map[prod_code])] if prod_code in option_markets: vol_tbl = dbaccess.load_cmvol_curve(cnx, prod_code, tag) vol_tbl = vol_tbl.set_index('tenor_label') vol_tbl.drop(['expiry_date'], axis = 1,inplace = True) df = df.set_index('tenor_label') df = pd.concat([df, vol_tbl], axis = 1) df = df.reset_index() df.rename(columns = {'index': 'tenor_label'}, inplace = True) else: for key in ['COMVolATM', 'COMVolV90', 'COMVolV75', 'COMVolV25', 'COMVolV10']: if key == 'COMVolATM': df[key] = 0.2 else: df[key] = 0.0 df = df[['product_code', 'instID', 'exch', 'tenor_label', 'expiry_date', 'COMFwd', \ 'COMVolATM', 'COMVolV90', 'COMVolV75', 'COMVolV25', 'COMVolV10']].fillna(method = 'ffill') df['CalSpread'] = (df['COMFwd'] - df['COMFwd'].shift(-1)).fillna(method = 'ffill') multi = 13 elif tab_key == 'fx_daily': df = dbaccess.load_fut_curve(cnx, prod_code, tag, dbtable='fx_daily', field='ccy') df = df[df['rate']>0] df['fwd_points'] = df['rate'] - df['rate'][0] df = df[['ccy', 'tenor', 'rate', 'fwd_points']] multi = 5 elif tab_key == 'ir_daily': df = dbaccess.load_fut_curve(cnx, prod_code, tag, dbtable='ir_daily', field='ir_index') df = df[['ir_index', 'tenor', 'rate']] multi = 4 df.to_excel(writer, xl_structure[tab_key], index = False, startcol = idx * multi, startrow = 1) if prod_code in req_data: req_data[prod_code] = df #do the SGX-DCE spread calc #try: tday = datetime.date.today() if tday >= req_data['i']['tenor_label'][0]: dce_prompt = req_data['i']['instID'][1] else: dce_prompt = req_data['i']['instID'][0] sgx_prompt = 'fef' + dce_prompt[-4:] sgx_price = float(req_data['fef'].loc[req_data['fef']['instID'] == sgx_prompt, 'COMFwd']) dce_price = float(req_data['i'].loc[req_data['i']['instID'] == dce_prompt, 'COMFwd']) fx = float(req_data['USD/CNY']['rate'][0]) sgx_dce_spd = sgx_price - (dce_price - 30.0/0.92)/1.16/fx wb = writer.book wb['COMM']['F1'] = sgx_dce_spd #except: # print "failed to update SGX-DCE spread" writer.save()