def rolling_hist_data(product, n, start_date, end_date, cont_roll, freq, win_roll= '-20b', database = 'hist_data'): if start_date > end_date: return None cnx = mysql.connector.connect(**mysqlaccess.dbconfig) cursor = cnx.cursor() stmt = "select exchange, contract from trade_products where product_code='{prod}' ".format(prod=product) cursor.execute(stmt) out = [(exchange, contract) for (exchange, contract) in cursor] exch = str(out[0][0]) cont = str(out[0][1]) cont_mth = [month_code_map[c] for c in cont] cnx.close() contlist = contract_range(product, exch, cont_mth, start_date, end_date) exp_dates = [day_shift(contract_expiry(cont), cont_roll) for cont in contlist] #print contlist, exp_dates sdate = start_date all_data = {} i = 0 for idx, exp in enumerate(exp_dates): if exp < start_date: continue elif sdate > end_date: break nb_cont = contlist[idx+n-1] if freq == 'd': df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database) else: df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database) all_data[i] = {'contract': nb_cont, 'data': df} i += 1 sdate = min(exp,end_date) + datetime.timedelta(days=1) return all_data
def fix_daily_data(contlist, sdate, edate): for inst in contlist: ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, sdate, edate, minid_start=300, minid_end=2115) dailydata = data_handler.conv_ohlc_freq(mdf, 'D') for dd in dailydata.index: d = dd.date() dslice = dailydata.ix[dd] if d not in ddf.index: ddata = {} ddata['date'] = d ddata['open'] = float(dslice.open) ddata['close'] = float(dslice.close) ddata['high'] = float(dslice.high) ddata['low'] = float(dslice.low) ddata['volume'] = int(dslice.volume) ddata['openInterest'] = int(dslice.openInterest) print inst, ddata mysqlaccess.insert_daily_data(inst, ddata)
def validate_db_data(tday, filter=False): all_insts = filter_main_cont(tday, filter) data_count = {} inst_list = {'min': [], 'daily': []} for instID in all_insts: df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday) if len(df) <= 0: inst_list['daily'].append(instID) elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0: inst_list['daily'].append(instID) df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale') if len(df) <= 100: output = instID + ':' + str(len(df)) inst_list['min'].append(output) elif df.min_id < 2055: output = instID + ': end earlier' inst_list['min'].append(output) print inst_list
def get_cont_data(asset, start_date, end_date, freq='1m', nearby=1, rollrule='-10b'): if nearby == 0: mdf = mysqlaccess.load_min_data_to_df('fut_min', asset, start_date, end_date, minid_start=300, minid_end=2114, database='hist_data') mdf['contract'] = asset else: mdf = misc.nearby(asset, nearby, start_date, end_date, rollrule, 'm', need_shift=True, database='hist_data') mdf = backtest.cleanup_mindata(mdf, asset) xdf = dh.conv_ohlc_freq(mdf, freq, extra_cols=['contract'], bar_func=dh.bar_conv_func2) return xdf
def prepare_data_env(self, inst, mid_day = True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day self.day_data[inst] = mysqlaccess.load_daily_data_to_df('fut_daily', inst, daily_start, daily_end) df = self.day_data[inst] if len(df) > 0: self.instruments[inst].price = df['close'][-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = df['close'][-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(df) df[ts.name]= pd.Series(ts, index=df.index) if self.min_data_days > 0 or mid_day: self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id/1000) min_end = int(self.instruments[inst].last_tick_id/1000)+1 mindata = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database = 'blueshale') mindata = backtest.cleanup_mindata(mindata, self.instruments[inst].product) self.min_data[inst][1] = mindata if len(mindata)>0: min_date = mindata.index[-1].date() if (len(self.day_data[inst].index)==0) or (min_date > self.day_data[inst].index[-1]): ddf = data_handler.conv_ohlc_freq(mindata, 'd') self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int(ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime(*mindata.index[-1].timetuple()[0:-3]) self.cur_min[inst]['open'] = float(mindata.ix[-1,'open']) self.cur_min[inst]['close'] = float(mindata.ix[-1,'close']) self.cur_min[inst]['high'] = float(mindata.ix[-1,'high']) self.cur_min[inst]['low'] = float(mindata.ix[-1,'low']) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst]['openInterest'] self.cur_min[inst]['min_id'] = int(mindata.ix[-1,'min_id']) self.instruments[inst].price = float(mindata.ix[-1,'close']) self.instruments[inst].last_update = 0 self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) for m in self.min_data_func[inst]: if m != 1: self.min_data[inst][m] = data_handler.conv_ohlc_freq(self.min_data[inst][1], str(m)+'min') df = self.min_data[inst][m] for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(df) df[ts.name]= pd.Series(ts, index=df.index)
def nearby(prodcode, n, start_date, end_date, roll_rule, freq, need_shift=False, database = 'hist_data'): if start_date > end_date: return None cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode) contlist = contract_range(prodcode, exch, cont_mth, start_date, day_shift(end_date, roll_rule[1:])) exp_dates = [day_shift(contract_expiry(cont), roll_rule) for cont in contlist] #print contlist, exp_dates sdate = start_date is_new = True for idx, exp in enumerate(exp_dates): if exp < start_date: continue elif sdate > end_date: break nb_cont = contlist[idx+n-1] if freq == 'd': new_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, sdate, min(exp,end_date), database = database) else: minid_start = 1500 minid_end = 2114 if prodcode in night_session_markets: minid_start = 300 new_df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, sdate, min(exp,end_date), minid_start, minid_end, database = database) if len(new_df.shape) == 0: continue nn = new_df.shape[0] if nn > 0: new_df['contract'] = pd.Series([nb_cont]*nn, index=new_df.index) else: continue if is_new: df = new_df is_new = False else: if need_shift: if isinstance(df.index[-1], datetime.datetime): last_date = df.index[-1].date() else: last_date = df.index[-1] tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, last_date, last_date, database = database) shift = tmp_df['close'][-1] - df['close'][-1] for ticker in ['open','high','low','close']: df[ticker] = df[ticker] + shift df = df.append(new_df) sdate = min(exp,end_date) + datetime.timedelta(days=1) return df
def validate_db_data(tday, filter = False): all_insts = filter_main_cont(tday, filter) data_count = {} inst_list = {'min': [], 'daily': [] } for instID in all_insts: df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday) if len(df) <= 0: inst_list['daily'].append(instID) elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0: inst_list['daily'].append(instID) df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale') if len(df) <= 100: output = instID + ':' + str(len(df)) inst_list['min'].append(output) elif df.min_id < 2055: output = instID + ': end earlier' inst_list['min'].append(output) print inst_list
def fix_daily_data(contlist, sdate, edate): for inst in contlist: ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate) mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, sdate, edate, minid_start=300, minid_end = 2115) dailydata = data_handler.conv_ohlc_freq(mdf, 'D') for dd in dailydata.index: d = dd.date() dslice = dailydata.ix[dd] if d not in ddf.index: ddata = {} ddata['date'] = d ddata['open'] = float(dslice.open) ddata['close'] = float(dslice.close) ddata['high'] = float(dslice.high) ddata['low'] = float(dslice.low) ddata['volume'] = int(dslice.volume) ddata['openInterest'] = int(dslice.openInterest) print inst, ddata mysqlaccess.insert_daily_data(inst, ddata)
def prepare_data_env(self, inst, mid_day=True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: #self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, daily_start, daily_end, index_col=None) if len(ddf) > 0: self.instruments[inst].price = ddf['close'].iloc[-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = ddf['close'].iloc[-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(ddf) if type(ts).__name__ == 'Series': if ts.name in ddf.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (ts.name, inst)) ddf[ts.name] = ts elif type(ts).__name__ == 'DataFrame': for col_name in ts.columns: if col_name in ddf.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (col_name, inst)) ddf[col_name] = ts[col_name] self.day_data[inst] = data_handler.DynamicRecArray(dataframe=ddf) if self.min_data_days > 0 or mid_day: #self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id / 1000) min_end = int(self.instruments[inst].last_tick_id / 1000) + 1 mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database='blueshale', index_col=None) mdf = backtest.cleanup_mindata(mdf, self.instruments[inst].product, index_col=None) mdf['bar_id'] = self.conv_bar_id(mdf['min_id'], inst) if len(mdf) > 0: min_date = mdf['date'].iloc[-1] if (len(self.day_data[inst]) == 0) or ( min_date > self.day_data[inst].data['date'][-1]): ddf = data_handler.conv_ohlc_freq(mdf, 'd', index_col=None) self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int( ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime( *mdf['datetime'].iloc[-1].timetuple()[0:-3]) self.cur_min[inst]['date'] = mdf['date'].iloc[-1] self.cur_min[inst]['open'] = float(mdf['open'].iloc[-1]) self.cur_min[inst]['close'] = float(mdf['close'].iloc[-1]) self.cur_min[inst]['high'] = float(mdf['high'].iloc[-1]) self.cur_min[inst]['low'] = float(mdf['low'].iloc[-1]) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst][ 'openInterest'] self.cur_min[inst]['min_id'] = int(mdf['min_id'].iloc[-1]) self.cur_min[inst]['bar_id'] = self.conv_bar_id( self.cur_min[inst]['min_id'], inst) self.instruments[inst].price = float(mdf['close'].iloc[-1]) self.instruments[inst].last_update = 0 #self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) if 1 not in self.min_data_func[inst]: self.min_data[inst][1] = data_handler.DynamicRecArray( dataframe=mdf) for m in sorted(self.min_data_func[inst]): if m != 1: bar_func = lambda ts: self.conv_bar_id(ts, inst) mdf_m = data_handler.conv_ohlc_freq( mdf, str(m) + 'min', index_col=None, bar_func=bar_func, extra_cols=['bar_id']) else: mdf_m = mdf for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(mdf_m) if type(ts).__name__ == 'Series': if ts.name in mdf_m.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (ts.name, inst)) mdf_m[ts.name] = ts elif type(ts).__name__ == 'DataFrame': for col_name in ts.columns: if col_name in mdf_m.columns: self.logger.warning( 'TimeSeries name %s is already in the columns for inst = %s' % (col_name, inst)) mdf_m[col_name] = ts[col_name] self.min_data[inst][m] = data_handler.DynamicRecArray( dataframe=mdf_m)
def prepare_data_env(self, inst, mid_day=True): if self.instruments[inst].ptype == instrument.ProductType.Option: return if self.daily_data_days > 0 or mid_day: self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d')) daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays) daily_end = self.scur_day self.day_data[inst] = mysqlaccess.load_daily_data_to_df( 'fut_daily', inst, daily_start, daily_end) df = self.day_data[inst] print inst if len(df) > 0: self.instruments[inst].price = df['close'][-1] self.instruments[inst].last_update = 0 self.instruments[inst].prev_close = df['close'][-1] for fobj in self.day_data_func[inst]: ts = fobj.sfunc(df) df[ts.name] = pd.Series(ts, index=df.index) if self.min_data_days > 0 or mid_day: self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d')) d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays) d_end = self.scur_day min_start = int(self.instruments[inst].start_tick_id / 1000) min_end = int(self.instruments[inst].last_tick_id / 1000) + 1 mindata = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database='blueshale') mindata = backtest.cleanup_mindata(mindata, self.instruments[inst].product) self.min_data[inst][1] = mindata if len(mindata) > 0: min_date = mindata.index[-1].date() if (len(self.day_data[inst].index) == 0) or (min_date > self.day_data[inst].index[-1]): ddf = data_handler.conv_ohlc_freq(mindata, 'd') self.cur_day[inst]['open'] = float(ddf.open[-1]) self.cur_day[inst]['close'] = float(ddf.close[-1]) self.cur_day[inst]['high'] = float(ddf.high[-1]) self.cur_day[inst]['low'] = float(ddf.low[-1]) self.cur_day[inst]['volume'] = int(ddf.volume[-1]) self.cur_day[inst]['openInterest'] = int( ddf.openInterest[-1]) self.cur_min[inst]['datetime'] = pd.datetime( *mindata.index[-1].timetuple()[0:-3]) self.cur_min[inst]['open'] = float(mindata.ix[-1, 'open']) self.cur_min[inst]['close'] = float(mindata.ix[-1, 'close']) self.cur_min[inst]['high'] = float(mindata.ix[-1, 'high']) self.cur_min[inst]['low'] = float(mindata.ix[-1, 'low']) self.cur_min[inst]['volume'] = self.cur_day[inst]['volume'] self.cur_min[inst]['openInterest'] = self.cur_day[inst][ 'openInterest'] self.cur_min[inst]['min_id'] = int(mindata.ix[-1, 'min_id']) self.instruments[inst].price = float(mindata.ix[-1, 'close']) self.instruments[inst].last_update = 0 self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date)) for m in self.min_data_func[inst]: if m != 1: self.min_data[inst][m] = data_handler.conv_ohlc_freq( self.min_data[inst][1], str(m) + 'min') df = self.min_data[inst][m] for fobj in self.min_data_func[inst][m]: ts = fobj.sfunc(df) df[ts.name] = pd.Series(ts, index=df.index)
def simcontract_min(config_file): sim_config = {} with open(config_file, 'r') as fp: sim_config = json.load(fp) bktest_split = sim_config['sim_func'].split('.') run_sim = __import__('.'.join(bktest_split[:-1])) for i in range(1, len(bktest_split)): run_sim = getattr(run_sim, bktest_split[i]) dir_name = config_file.split('.')[0] dir_name = dir_name.split(os.path.sep)[-1] test_folder = get_bktest_folder() file_prefix = test_folder + dir_name + os.path.sep if not os.path.exists(file_prefix): os.makedirs(file_prefix) sim_list = sim_config['products'] if type(sim_list[0]).__name__ != 'list': sim_list = [[str(asset)] for asset in sim_list] sim_mode = sim_config.get('sim_mode', 'OR') calc_coeffs = sim_config.get('calc_coeffs', [1, -1]) cont_maplist = sim_config.get('cont_maplist', []) sim_period = sim_config.get('sim_period', '-12m') need_daily = sim_config.get('need_daily', False) if len(cont_maplist) == 0: cont_maplist = [[0]] * len(sim_list) config = {} start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date() config['start_date'] = start_date end_date = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date() config['end_date'] = end_date scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']] outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \ + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \ 'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \ 'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \ 'profit_per_win', 'profit_per_loss'] scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))] config.update(sim_config['config']) if 'pos_class' in sim_config: config['pos_class'] = eval(sim_config['pos_class']) if 'proc_func' in sim_config: config['proc_func'] = eval(sim_config['proc_func']) file_prefix = file_prefix + sim_config['sim_name'] if 'close_daily' in config and config['close_daily']: file_prefix = file_prefix + 'daily_' config['file_prefix'] = file_prefix summary_df = pd.DataFrame() fname = config['file_prefix'] + 'summary.csv' if os.path.isfile(fname): summary_df = pd.DataFrame.from_csv(fname) for assets, cont_map in zip(sim_list, cont_maplist): file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_' fname = file_prefix + 'stats.json' output = {'total': {}, 'cont': {}} if os.path.isfile(fname): with open(fname, 'r') as fp: output = json.load(fp) #if len(output['total'].keys()) == len(scenarios): # continue min_data = {} day_data = {} config['tick_base'] = 0 config['marginrate'] = (0, 0) rollrule = '-50b' config['exit_min'] = config.get('exit_min', 2057) config['no_trade_set'] = config.get('no_trade_set', []) if assets[0] in ['cu', 'al', 'zn']: rollrule = '-1b' elif assets[0] in ['IF', 'IH', 'IC']: rollrule = '-2b' elif assets[0] in ['au', 'ag']: rollrule = '-25b' elif assets[0] in ['TF', 'T']: rollrule = '-20b' rollrule = config.get('rollrule', rollrule) contlist = {} exp_dates = {} for i, prod in enumerate(assets): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod) contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date) exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]] edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ] sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ] config['tick_base'] += trade_offset_dict[prod] config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod])) min_data[prod] = {} day_data[prod] = {} for cont, sd, ed in zip(contlist[prod], sdates, edates): minid_start = 1500 minid_end = 2114 if prod in misc.night_session_markets: minid_start = 300 tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data') tmp_df['contract'] = cont min_data[prod][cont] = cleanup_mindata( tmp_df, prod) if need_daily: tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data') day_data[prod][cont] = tmp_df if 'offset' in sim_config: config['offset'] = sim_config['offset'] * config['tick_base'] else: config['offset'] = config['tick_base'] for ix, s in enumerate(scenarios): fname1 = file_prefix + str(ix) + '_trades.csv' fname2 = file_prefix + str(ix) + '_dailydata.csv' if os.path.isfile(fname1) and os.path.isfile(fname2): continue for key, seq in zip(sim_config['scen_keys'], s): config[key] = sim_config[key][seq] df_list = [] trade_list = [] for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule) if sim_mode == 'OR': mdf = min_data[assets[0]][cont] mdf = mdf[mdf.date <= edate] if need_daily: ddf = day_data[assets[0]][cont] config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue else: mode_keylist = sim_mode.split('-') smode = mode_keylist[0] cmode = mode_keylist[1] all_data = [] if smode == 'TS': all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)] if cmode == 'Full': mdf = pd.concat(all_data, axis = 1, join = 'inner') mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] mdf = mdf[ mdf.date0 < edate] else: #print all_data[0], all_data[1] for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xopen = xopen.dropna() xclose = xclose.dropna() xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['date', 'min_id', 'volume', 'openInterest'] mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') mdf.columns = ['open', 'high', 'low', 'close'] + col_list mdf['contract'] = cont #print mdf if need_daily: if smode == 'TS': all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map] else: all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)] if cmode == 'Full': ddf = pd.concat(all_data, axis = 1, join = 'inner') ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns] config['ddf'] = ddf[ddf.index <= edate] else: for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)): if i == 0: xopen = tmpdf['open'] * coeff xclose = tmpdf['close'] * coeff else: xopen = xopen + tmpdf['open'] * coeff xclose = xclose + tmpdf['close'] * coeff xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1) xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1) col_list = ['volume', 'openInterest'] ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner') ddf.columns = ['open', 'high', 'low', 'close'] + col_list ddf['contract'] = cont config['ddf'] = ddf[ddf.index <= edate] if len(config['ddf']) < 10: continue df = mdf.copy(deep = True) df, closed_trades = run_sim( df, config) df_list.append(df) trade_list = trade_list + closed_trades (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = cont if cont not in output['cont']: output['cont'][cont] = {} output['cont'][cont][ix] = res (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm') res_trade = get_trade_stats( trade_list ) res = dict( res_pnl.items() + res_trade.items()) res.update(dict(zip(sim_config['scen_keys'], s))) res['asset'] = '_'.join(assets) output['total'][ix] = res print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix)) all_trades = {} for i, tradepos in enumerate(trade_list): all_trades[i] = strat.tradepos2dict(tradepos) trades = pd.DataFrame.from_dict(all_trades).T trades.to_csv(fname1) ts.to_csv(fname2) fname = file_prefix + 'stats.json' try: with open(fname, 'w') as ofile: json.dump(output, ofile) except: continue cont_df = pd.DataFrame() for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)): cont = contlist[assets[0]][idx] if cont not in output['cont']: continue res = scen_dict_to_df(output['cont'][cont]) out_res = res[outcol_list] if len(cont_df) == 0: cont_df = out_res[:20].copy(deep = True) else: cont_df = cont_df.append(out_res[:20]) fname = file_prefix + 'cont_stat.csv' cont_df.to_csv(fname) res = scen_dict_to_df(output['total']) out_res = res[outcol_list] if len(summary_df) == 0: summary_df = out_res[:20].copy(deep = True) else: summary_df = summary_df.append(out_res[:20]) fname = config['file_prefix'] + 'summary.csv' summary_df.to_csv(fname) return
def hist_realized_vol_by_product(prodcode, start_d, end_d, periods=12, tenor='-1m', writeDB=False): cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode) contlist = contract_range(prodcode, exch, cont_mth, start_d, end_d) exp_dates = [get_opt_expiry(cont, inst2contmth(cont)) for cont in contlist] data = { 'is_dtime': True, 'data_column': 'close', 'data_freq': '30min', 'xs': [0.5, 0.25, 0.75], 'xs_names': ['atm', 'v25', 'v75'], 'xs_func': 'bs_delta_to_ratio', 'rehedge_period': 1, 'term_tenor': tenor, 'database': 'hist_data' } option_input = { 'otype': True, 'rd': 0.0, 'rf': 0.0, 'end_vol': 0.0, 'ref_vol': 0.5, 'pricer_func': 'bsopt.BSOpt', 'delta_func': 'bsopt.BSDelta', 'is_dtime': data['is_dtime'], } freq = data['data_freq'] for cont, expiry in zip(contlist, exp_dates): expiry_d = expiry.date() if expiry_d > end_d: break p_str = '-' + str(int(tenor[1:-1]) * periods) + tenor[-1] d_start = day_shift(expiry_d, p_str) if freq == 'd': df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, d_start, expiry_d, database=data['database'], index_col=None) else: mdf = mysqlaccess.load_min_data_to_df('fut_min', cont, d_start, expiry_d, minid_start=300, minid_end=2115, database=data['database'], index_col=None) mdf = backtest.cleanup_mindata(mdf, prodcode, index_col=None) mdf['bar_id'] = dh.bar_conv_func2(mdf['min_id']) df = dh.conv_ohlc_freq(mdf, freq, bar_func=dh.bar_conv_func2, extra_cols=['bar_id'], index_col=None) option_input['expiry'] = expiry data['dataframe'] = df vol_df = realized_termstruct(option_input, data) print cont, expiry_d, vol_df