Exemple #1
0
def rolling_hist_data(product, n, start_date, end_date, cont_roll, freq, win_roll= '-20b', database = 'hist_data'):
    if start_date > end_date: 
        return None
    cnx = mysql.connector.connect(**mysqlaccess.dbconfig)
    cursor = cnx.cursor()
    stmt = "select exchange, contract from trade_products where product_code='{prod}' ".format(prod=product)
    cursor.execute(stmt)
    out = [(exchange, contract) for (exchange, contract) in cursor]
    exch = str(out[0][0])
    cont = str(out[0][1])
    cont_mth = [month_code_map[c] for c in cont]
    cnx.close()  
    contlist = contract_range(product, exch, cont_mth, start_date, end_date)
    exp_dates = [day_shift(contract_expiry(cont), cont_roll) for cont in contlist]
    #print contlist, exp_dates
    sdate = start_date
    all_data = {}
    i = 0
    for idx, exp in enumerate(exp_dates):
        if exp < start_date:
            continue
        elif sdate > end_date:
            break
        nb_cont = contlist[idx+n-1]
        if freq == 'd':
            df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database)
        else:
            df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database)
        all_data[i] = {'contract': nb_cont, 'data': df}
        i += 1
        sdate = min(exp,end_date) + datetime.timedelta(days=1)
    return all_data    
Exemple #2
0
def fix_daily_data(contlist, sdate, edate):
    for inst in contlist:
        ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate,
                                                edate)
        mdf = mysqlaccess.load_min_data_to_df('fut_min',
                                              inst,
                                              sdate,
                                              edate,
                                              minid_start=300,
                                              minid_end=2115)
        dailydata = data_handler.conv_ohlc_freq(mdf, 'D')
        for dd in dailydata.index:
            d = dd.date()
            dslice = dailydata.ix[dd]
            if d not in ddf.index:
                ddata = {}
                ddata['date'] = d
                ddata['open'] = float(dslice.open)
                ddata['close'] = float(dslice.close)
                ddata['high'] = float(dslice.high)
                ddata['low'] = float(dslice.low)
                ddata['volume'] = int(dslice.volume)
                ddata['openInterest'] = int(dslice.openInterest)
                print inst, ddata
                mysqlaccess.insert_daily_data(inst, ddata)
Exemple #3
0
def rolling_hist_data(product, n, start_date, end_date, cont_roll, freq, win_roll= '-20b', database = 'hist_data'):
    if start_date > end_date: 
        return None
    cnx = mysql.connector.connect(**mysqlaccess.dbconfig)
    cursor = cnx.cursor()
    stmt = "select exchange, contract from trade_products where product_code='{prod}' ".format(prod=product)
    cursor.execute(stmt)
    out = [(exchange, contract) for (exchange, contract) in cursor]
    exch = str(out[0][0])
    cont = str(out[0][1])
    cont_mth = [month_code_map[c] for c in cont]
    cnx.close()  
    contlist = contract_range(product, exch, cont_mth, start_date, end_date)
    exp_dates = [day_shift(contract_expiry(cont), cont_roll) for cont in contlist]
    #print contlist, exp_dates
    sdate = start_date
    all_data = {}
    i = 0
    for idx, exp in enumerate(exp_dates):
        if exp < start_date:
            continue
        elif sdate > end_date:
            break
        nb_cont = contlist[idx+n-1]
        if freq == 'd':
            df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database)
        else:
            df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, day_shift(sdate,win_roll), min(exp,end_date), database = database)
        all_data[i] = {'contract': nb_cont, 'data': df}
        i += 1
        sdate = min(exp,end_date) + datetime.timedelta(days=1)
    return all_data    
def validate_db_data(tday, filter=False):
    all_insts = filter_main_cont(tday, filter)
    data_count = {}
    inst_list = {'min': [], 'daily': []}
    for instID in all_insts:
        df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday)
        if len(df) <= 0:
            inst_list['daily'].append(instID)
        elif (df.close[-1] == 0) or (df.high[-1]
                                     == 0) or (df.low[-1]
                                               == 0) or df.open[-1] == 0:
            inst_list['daily'].append(instID)
        df = mysqlaccess.load_min_data_to_df('fut_min',
                                             instID,
                                             tday,
                                             tday,
                                             minid_start=300,
                                             minid_end=2115,
                                             database='blueshale')
        if len(df) <= 100:
            output = instID + ':' + str(len(df))
            inst_list['min'].append(output)
        elif df.min_id < 2055:
            output = instID + ': end earlier'
            inst_list['min'].append(output)
    print inst_list
Exemple #5
0
def get_cont_data(asset,
                  start_date,
                  end_date,
                  freq='1m',
                  nearby=1,
                  rollrule='-10b'):
    if nearby == 0:
        mdf = mysqlaccess.load_min_data_to_df('fut_min',
                                              asset,
                                              start_date,
                                              end_date,
                                              minid_start=300,
                                              minid_end=2114,
                                              database='hist_data')
        mdf['contract'] = asset
    else:
        mdf = misc.nearby(asset,
                          nearby,
                          start_date,
                          end_date,
                          rollrule,
                          'm',
                          need_shift=True,
                          database='hist_data')
    mdf = backtest.cleanup_mindata(mdf, asset)
    xdf = dh.conv_ohlc_freq(mdf,
                            freq,
                            extra_cols=['contract'],
                            bar_func=dh.bar_conv_func2)
    return xdf
Exemple #6
0
 def prepare_data_env(self, inst, mid_day = True):
     if  self.instruments[inst].ptype == instrument.ProductType.Option:
         return
     if self.daily_data_days > 0 or mid_day:
         self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d'))
         daily_start = workdays.workday(self.scur_day, -self.daily_data_days, CHN_Holidays)
         daily_end = self.scur_day
         self.day_data[inst] = mysqlaccess.load_daily_data_to_df('fut_daily', inst, daily_start, daily_end)
         df = self.day_data[inst]
         if len(df) > 0:
             self.instruments[inst].price = df['close'][-1]
             self.instruments[inst].last_update = 0
             self.instruments[inst].prev_close = df['close'][-1]
             for fobj in self.day_data_func[inst]:
                 ts = fobj.sfunc(df)
                 df[ts.name]= pd.Series(ts, index=df.index)
     if self.min_data_days > 0 or mid_day:
         self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d'))
         d_start = workdays.workday(self.scur_day, -self.min_data_days, CHN_Holidays)
         d_end = self.scur_day
         min_start = int(self.instruments[inst].start_tick_id/1000)
         min_end = int(self.instruments[inst].last_tick_id/1000)+1
         mindata = mysqlaccess.load_min_data_to_df('fut_min', inst, d_start, d_end, minid_start=min_start, minid_end=min_end, database = 'blueshale')
         mindata = backtest.cleanup_mindata(mindata, self.instruments[inst].product)
         self.min_data[inst][1] = mindata
         if len(mindata)>0:
             min_date = mindata.index[-1].date()
             if (len(self.day_data[inst].index)==0) or (min_date > self.day_data[inst].index[-1]):
                 ddf = data_handler.conv_ohlc_freq(mindata, 'd')
                 self.cur_day[inst]['open'] = float(ddf.open[-1])
                 self.cur_day[inst]['close'] = float(ddf.close[-1])
                 self.cur_day[inst]['high'] = float(ddf.high[-1])
                 self.cur_day[inst]['low'] = float(ddf.low[-1])
                 self.cur_day[inst]['volume'] = int(ddf.volume[-1])
                 self.cur_day[inst]['openInterest'] = int(ddf.openInterest[-1])
                 self.cur_min[inst]['datetime'] = pd.datetime(*mindata.index[-1].timetuple()[0:-3])
                 self.cur_min[inst]['open'] = float(mindata.ix[-1,'open'])
                 self.cur_min[inst]['close'] = float(mindata.ix[-1,'close'])
                 self.cur_min[inst]['high'] = float(mindata.ix[-1,'high'])
                 self.cur_min[inst]['low'] = float(mindata.ix[-1,'low'])
                 self.cur_min[inst]['volume'] = self.cur_day[inst]['volume']
                 self.cur_min[inst]['openInterest'] = self.cur_day[inst]['openInterest']
                 self.cur_min[inst]['min_id'] = int(mindata.ix[-1,'min_id'])
                 self.instruments[inst].price = float(mindata.ix[-1,'close'])
                 self.instruments[inst].last_update = 0
                 self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date))
             for m in self.min_data_func[inst]:
                 if m != 1:
                     self.min_data[inst][m] = data_handler.conv_ohlc_freq(self.min_data[inst][1], str(m)+'min')
                 df = self.min_data[inst][m]
                 for fobj in self.min_data_func[inst][m]:
                     ts = fobj.sfunc(df)
                     df[ts.name]= pd.Series(ts, index=df.index)
Exemple #7
0
def nearby(prodcode, n, start_date, end_date, roll_rule, freq, need_shift=False, database = 'hist_data'):
    if start_date > end_date: 
        return None
    cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode)
    contlist = contract_range(prodcode, exch, cont_mth, start_date, day_shift(end_date, roll_rule[1:]))
    exp_dates = [day_shift(contract_expiry(cont), roll_rule) for cont in contlist]
    #print contlist, exp_dates
    sdate = start_date
    is_new = True
    for idx, exp in enumerate(exp_dates):
        if exp < start_date:
            continue
        elif sdate > end_date:
            break
        nb_cont = contlist[idx+n-1]
        if freq == 'd':
            new_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, sdate, min(exp,end_date), database = database)
        else:
            minid_start = 1500
            minid_end = 2114
            if prodcode in night_session_markets:
                minid_start = 300
            new_df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, sdate, min(exp,end_date), minid_start, minid_end, database = database)
        if len(new_df.shape) == 0:
            continue
        nn = new_df.shape[0]
        if nn > 0:
            new_df['contract'] = pd.Series([nb_cont]*nn, index=new_df.index)
        else:
            continue
        if is_new:
            df = new_df
            is_new = False
        else:
            if need_shift:
                if isinstance(df.index[-1], datetime.datetime):
                    last_date = df.index[-1].date()
                else:
                    last_date = df.index[-1]
                tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, last_date, last_date, database = database)
                shift = tmp_df['close'][-1] - df['close'][-1]
                for ticker in ['open','high','low','close']:
                    df[ticker] = df[ticker] + shift
            df = df.append(new_df)
        sdate = min(exp,end_date) + datetime.timedelta(days=1)
    return df        
Exemple #8
0
def nearby(prodcode, n, start_date, end_date, roll_rule, freq, need_shift=False, database = 'hist_data'):
    if start_date > end_date: 
        return None
    cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode)
    contlist = contract_range(prodcode, exch, cont_mth, start_date, day_shift(end_date, roll_rule[1:]))
    exp_dates = [day_shift(contract_expiry(cont), roll_rule) for cont in contlist]
    #print contlist, exp_dates
    sdate = start_date
    is_new = True
    for idx, exp in enumerate(exp_dates):
        if exp < start_date:
            continue
        elif sdate > end_date:
            break
        nb_cont = contlist[idx+n-1]
        if freq == 'd':
            new_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, sdate, min(exp,end_date), database = database)
        else:
            minid_start = 1500
            minid_end = 2114
            if prodcode in night_session_markets:
                minid_start = 300
            new_df = mysqlaccess.load_min_data_to_df('fut_min', nb_cont, sdate, min(exp,end_date), minid_start, minid_end, database = database)
        if len(new_df.shape) == 0:
            continue
        nn = new_df.shape[0]
        if nn > 0:
            new_df['contract'] = pd.Series([nb_cont]*nn, index=new_df.index)
        else:
            continue
        if is_new:
            df = new_df
            is_new = False
        else:
            if need_shift:
                if isinstance(df.index[-1], datetime.datetime):
                    last_date = df.index[-1].date()
                else:
                    last_date = df.index[-1]
                tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', nb_cont, last_date, last_date, database = database)
                shift = tmp_df['close'][-1] - df['close'][-1]
                for ticker in ['open','high','low','close']:
                    df[ticker] = df[ticker] + shift
            df = df.append(new_df)
        sdate = min(exp,end_date) + datetime.timedelta(days=1)
    return df        
Exemple #9
0
def validate_db_data(tday, filter = False):
    all_insts = filter_main_cont(tday, filter)
    data_count = {}
    inst_list = {'min': [], 'daily': [] }
    for instID in all_insts:
        df = mysqlaccess.load_daily_data_to_df('fut_daily', instID, tday, tday)
        if len(df) <= 0:
            inst_list['daily'].append(instID)
        elif (df.close[-1] == 0) or (df.high[-1] == 0) or (df.low[-1] == 0) or df.open[-1] == 0:
            inst_list['daily'].append(instID)
        df = mysqlaccess.load_min_data_to_df('fut_min', instID, tday, tday, minid_start=300, minid_end=2115, database='blueshale')
        if len(df) <= 100:
            output = instID + ':' + str(len(df))
            inst_list['min'].append(output)
        elif df.min_id < 2055:
            output = instID + ': end earlier'
            inst_list['min'].append(output)        
    print inst_list
Exemple #10
0
def fix_daily_data(contlist, sdate, edate):
    for inst in contlist:
        ddf = mysqlaccess.load_daily_data_to_df('fut_daily', inst, sdate, edate)
        mdf = mysqlaccess.load_min_data_to_df('fut_min', inst, sdate, edate, minid_start=300, minid_end = 2115)
        dailydata = data_handler.conv_ohlc_freq(mdf, 'D')
        for dd in dailydata.index:
            d = dd.date()
            dslice = dailydata.ix[dd]
            if d not in ddf.index:
                ddata = {}
                ddata['date'] = d
                ddata['open'] = float(dslice.open)
                ddata['close'] = float(dslice.close)
                ddata['high'] = float(dslice.high)
                ddata['low'] = float(dslice.low)
                ddata['volume'] = int(dslice.volume)
                ddata['openInterest'] = int(dslice.openInterest)
                print inst, ddata
                mysqlaccess.insert_daily_data(inst, ddata)
Exemple #11
0
 def prepare_data_env(self, inst, mid_day=True):
     if self.instruments[inst].ptype == instrument.ProductType.Option:
         return
     if self.daily_data_days > 0 or mid_day:
         #self.logger.debug('Updating historical daily data for %s' % self.scur_day.strftime('%Y-%m-%d'))
         daily_start = workdays.workday(self.scur_day,
                                        -self.daily_data_days, CHN_Holidays)
         daily_end = self.scur_day
         ddf = mysqlaccess.load_daily_data_to_df('fut_daily',
                                                 inst,
                                                 daily_start,
                                                 daily_end,
                                                 index_col=None)
         if len(ddf) > 0:
             self.instruments[inst].price = ddf['close'].iloc[-1]
             self.instruments[inst].last_update = 0
             self.instruments[inst].prev_close = ddf['close'].iloc[-1]
             for fobj in self.day_data_func[inst]:
                 ts = fobj.sfunc(ddf)
                 if type(ts).__name__ == 'Series':
                     if ts.name in ddf.columns:
                         self.logger.warning(
                             'TimeSeries name %s is already in the columns for inst = %s'
                             % (ts.name, inst))
                     ddf[ts.name] = ts
                 elif type(ts).__name__ == 'DataFrame':
                     for col_name in ts.columns:
                         if col_name in ddf.columns:
                             self.logger.warning(
                                 'TimeSeries name %s is already in the columns for inst = %s'
                                 % (col_name, inst))
                         ddf[col_name] = ts[col_name]
         self.day_data[inst] = data_handler.DynamicRecArray(dataframe=ddf)
     if self.min_data_days > 0 or mid_day:
         #self.logger.debug('Updating historical min data for %s' % self.scur_day.strftime('%Y-%m-%d'))
         d_start = workdays.workday(self.scur_day, -self.min_data_days,
                                    CHN_Holidays)
         d_end = self.scur_day
         min_start = int(self.instruments[inst].start_tick_id / 1000)
         min_end = int(self.instruments[inst].last_tick_id / 1000) + 1
         mdf = mysqlaccess.load_min_data_to_df('fut_min',
                                               inst,
                                               d_start,
                                               d_end,
                                               minid_start=min_start,
                                               minid_end=min_end,
                                               database='blueshale',
                                               index_col=None)
         mdf = backtest.cleanup_mindata(mdf,
                                        self.instruments[inst].product,
                                        index_col=None)
         mdf['bar_id'] = self.conv_bar_id(mdf['min_id'], inst)
         if len(mdf) > 0:
             min_date = mdf['date'].iloc[-1]
             if (len(self.day_data[inst]) == 0) or (
                     min_date > self.day_data[inst].data['date'][-1]):
                 ddf = data_handler.conv_ohlc_freq(mdf, 'd', index_col=None)
                 self.cur_day[inst]['open'] = float(ddf.open[-1])
                 self.cur_day[inst]['close'] = float(ddf.close[-1])
                 self.cur_day[inst]['high'] = float(ddf.high[-1])
                 self.cur_day[inst]['low'] = float(ddf.low[-1])
                 self.cur_day[inst]['volume'] = int(ddf.volume[-1])
                 self.cur_day[inst]['openInterest'] = int(
                     ddf.openInterest[-1])
                 self.cur_min[inst]['datetime'] = pd.datetime(
                     *mdf['datetime'].iloc[-1].timetuple()[0:-3])
                 self.cur_min[inst]['date'] = mdf['date'].iloc[-1]
                 self.cur_min[inst]['open'] = float(mdf['open'].iloc[-1])
                 self.cur_min[inst]['close'] = float(mdf['close'].iloc[-1])
                 self.cur_min[inst]['high'] = float(mdf['high'].iloc[-1])
                 self.cur_min[inst]['low'] = float(mdf['low'].iloc[-1])
                 self.cur_min[inst]['volume'] = self.cur_day[inst]['volume']
                 self.cur_min[inst]['openInterest'] = self.cur_day[inst][
                     'openInterest']
                 self.cur_min[inst]['min_id'] = int(mdf['min_id'].iloc[-1])
                 self.cur_min[inst]['bar_id'] = self.conv_bar_id(
                     self.cur_min[inst]['min_id'], inst)
                 self.instruments[inst].price = float(mdf['close'].iloc[-1])
                 self.instruments[inst].last_update = 0
                 #self.logger.debug('inst=%s tick data loaded for date=%s' % (inst, min_date))
             if 1 not in self.min_data_func[inst]:
                 self.min_data[inst][1] = data_handler.DynamicRecArray(
                     dataframe=mdf)
             for m in sorted(self.min_data_func[inst]):
                 if m != 1:
                     bar_func = lambda ts: self.conv_bar_id(ts, inst)
                     mdf_m = data_handler.conv_ohlc_freq(
                         mdf,
                         str(m) + 'min',
                         index_col=None,
                         bar_func=bar_func,
                         extra_cols=['bar_id'])
                 else:
                     mdf_m = mdf
                 for fobj in self.min_data_func[inst][m]:
                     ts = fobj.sfunc(mdf_m)
                     if type(ts).__name__ == 'Series':
                         if ts.name in mdf_m.columns:
                             self.logger.warning(
                                 'TimeSeries name %s is already in the columns for inst = %s'
                                 % (ts.name, inst))
                         mdf_m[ts.name] = ts
                     elif type(ts).__name__ == 'DataFrame':
                         for col_name in ts.columns:
                             if col_name in mdf_m.columns:
                                 self.logger.warning(
                                     'TimeSeries name %s is already in the columns for inst = %s'
                                     % (col_name, inst))
                             mdf_m[col_name] = ts[col_name]
                 self.min_data[inst][m] = data_handler.DynamicRecArray(
                     dataframe=mdf_m)
Exemple #12
0
 def prepare_data_env(self, inst, mid_day=True):
     if self.instruments[inst].ptype == instrument.ProductType.Option:
         return
     if self.daily_data_days > 0 or mid_day:
         self.logger.debug('Updating historical daily data for %s' %
                           self.scur_day.strftime('%Y-%m-%d'))
         daily_start = workdays.workday(self.scur_day,
                                        -self.daily_data_days, CHN_Holidays)
         daily_end = self.scur_day
         self.day_data[inst] = mysqlaccess.load_daily_data_to_df(
             'fut_daily', inst, daily_start, daily_end)
         df = self.day_data[inst]
         print inst
         if len(df) > 0:
             self.instruments[inst].price = df['close'][-1]
             self.instruments[inst].last_update = 0
             self.instruments[inst].prev_close = df['close'][-1]
             for fobj in self.day_data_func[inst]:
                 ts = fobj.sfunc(df)
                 df[ts.name] = pd.Series(ts, index=df.index)
     if self.min_data_days > 0 or mid_day:
         self.logger.debug('Updating historical min data for %s' %
                           self.scur_day.strftime('%Y-%m-%d'))
         d_start = workdays.workday(self.scur_day, -self.min_data_days,
                                    CHN_Holidays)
         d_end = self.scur_day
         min_start = int(self.instruments[inst].start_tick_id / 1000)
         min_end = int(self.instruments[inst].last_tick_id / 1000) + 1
         mindata = mysqlaccess.load_min_data_to_df('fut_min',
                                                   inst,
                                                   d_start,
                                                   d_end,
                                                   minid_start=min_start,
                                                   minid_end=min_end,
                                                   database='blueshale')
         mindata = backtest.cleanup_mindata(mindata,
                                            self.instruments[inst].product)
         self.min_data[inst][1] = mindata
         if len(mindata) > 0:
             min_date = mindata.index[-1].date()
             if (len(self.day_data[inst].index)
                     == 0) or (min_date > self.day_data[inst].index[-1]):
                 ddf = data_handler.conv_ohlc_freq(mindata, 'd')
                 self.cur_day[inst]['open'] = float(ddf.open[-1])
                 self.cur_day[inst]['close'] = float(ddf.close[-1])
                 self.cur_day[inst]['high'] = float(ddf.high[-1])
                 self.cur_day[inst]['low'] = float(ddf.low[-1])
                 self.cur_day[inst]['volume'] = int(ddf.volume[-1])
                 self.cur_day[inst]['openInterest'] = int(
                     ddf.openInterest[-1])
                 self.cur_min[inst]['datetime'] = pd.datetime(
                     *mindata.index[-1].timetuple()[0:-3])
                 self.cur_min[inst]['open'] = float(mindata.ix[-1, 'open'])
                 self.cur_min[inst]['close'] = float(mindata.ix[-1,
                                                                'close'])
                 self.cur_min[inst]['high'] = float(mindata.ix[-1, 'high'])
                 self.cur_min[inst]['low'] = float(mindata.ix[-1, 'low'])
                 self.cur_min[inst]['volume'] = self.cur_day[inst]['volume']
                 self.cur_min[inst]['openInterest'] = self.cur_day[inst][
                     'openInterest']
                 self.cur_min[inst]['min_id'] = int(mindata.ix[-1,
                                                               'min_id'])
                 self.instruments[inst].price = float(mindata.ix[-1,
                                                                 'close'])
                 self.instruments[inst].last_update = 0
                 self.logger.debug('inst=%s tick data loaded for date=%s' %
                                   (inst, min_date))
             for m in self.min_data_func[inst]:
                 if m != 1:
                     self.min_data[inst][m] = data_handler.conv_ohlc_freq(
                         self.min_data[inst][1],
                         str(m) + 'min')
                 df = self.min_data[inst][m]
                 for fobj in self.min_data_func[inst][m]:
                     ts = fobj.sfunc(df)
                     df[ts.name] = pd.Series(ts, index=df.index)
Exemple #13
0
def simcontract_min(config_file):
    sim_config = {}
    with open(config_file, 'r') as fp:
        sim_config = json.load(fp)
    bktest_split = sim_config['sim_func'].split('.')
    run_sim = __import__('.'.join(bktest_split[:-1]))
    for i in range(1, len(bktest_split)):
        run_sim = getattr(run_sim, bktest_split[i])
    dir_name = config_file.split('.')[0]
    dir_name = dir_name.split(os.path.sep)[-1]
    test_folder = get_bktest_folder()
    file_prefix = test_folder + dir_name + os.path.sep
    if not os.path.exists(file_prefix):
        os.makedirs(file_prefix)
    sim_list = sim_config['products']
    if type(sim_list[0]).__name__ != 'list':
        sim_list = [[str(asset)] for asset in sim_list]
    sim_mode = sim_config.get('sim_mode', 'OR')
    calc_coeffs = sim_config.get('calc_coeffs', [1, -1])
    cont_maplist = sim_config.get('cont_maplist', [])
    sim_period = sim_config.get('sim_period', '-12m')
    need_daily = sim_config.get('need_daily', False)
    if len(cont_maplist) == 0:
        cont_maplist = [[0]] * len(sim_list)
    config = {}
    start_date = datetime.datetime.strptime(sim_config['start_date'], '%Y%m%d').date()
    config['start_date'] = start_date
    end_date   = datetime.datetime.strptime(sim_config['end_date'], '%Y%m%d').date()
    config['end_date'] = end_date
    scen_dim = [ len(sim_config[s]) for s in sim_config['scen_keys']]
    outcol_list = ['asset', 'scenario'] + sim_config['scen_keys'] \
                + ['sharp_ratio', 'tot_pnl', 'std_pnl', 'num_days', \
                    'max_drawdown', 'max_dd_period', 'profit_dd_ratio', \
                    'all_profit', 'tot_cost', 'win_ratio', 'num_win', 'num_loss', \
                    'profit_per_win', 'profit_per_loss']
    scenarios = [list(s) for s in np.ndindex(tuple(scen_dim))]
    config.update(sim_config['config'])
    if 'pos_class' in sim_config:
        config['pos_class'] = eval(sim_config['pos_class'])
    if 'proc_func' in sim_config:
        config['proc_func'] = eval(sim_config['proc_func'])
    file_prefix = file_prefix + sim_config['sim_name']
    if 'close_daily' in config and config['close_daily']:
        file_prefix = file_prefix + 'daily_'
    config['file_prefix'] = file_prefix
    summary_df = pd.DataFrame()
    fname = config['file_prefix'] + 'summary.csv'
    if os.path.isfile(fname):
        summary_df = pd.DataFrame.from_csv(fname)
    for assets, cont_map in zip(sim_list, cont_maplist):
        file_prefix = config['file_prefix'] + '_' + sim_mode + '_' + '_'.join(assets) + '_'
        fname = file_prefix + 'stats.json'
        output = {'total': {}, 'cont': {}}
        if os.path.isfile(fname):
            with open(fname, 'r') as fp:
                output = json.load(fp)
        #if len(output['total'].keys()) == len(scenarios):
        #    continue
        min_data = {}
        day_data = {}
        config['tick_base'] = 0
        config['marginrate'] = (0, 0)
        rollrule = '-50b'
        config['exit_min'] = config.get('exit_min', 2057)
        config['no_trade_set'] = config.get('no_trade_set', [])
        if assets[0] in ['cu', 'al', 'zn']:
            rollrule = '-1b'
        elif assets[0] in ['IF', 'IH', 'IC']:
            rollrule = '-2b'
        elif assets[0] in ['au', 'ag']:
            rollrule = '-25b'
        elif assets[0] in ['TF', 'T']:
            rollrule = '-20b'
        rollrule = config.get('rollrule', rollrule)
        contlist = {}
        exp_dates = {}
        for i, prod in enumerate(assets):
            cont_mth, exch = mysqlaccess.prod_main_cont_exch(prod)
            contlist[prod] = misc.contract_range(prod, exch, cont_mth, start_date, end_date)
            exp_dates[prod] = [misc.contract_expiry(cont) for cont in contlist[prod]]
            edates = [ misc.day_shift(d, rollrule) for d in exp_dates[prod] ]
            sdates = [ misc.day_shift(d, sim_period) for d in exp_dates[prod] ]
            config['tick_base'] += trade_offset_dict[prod]
            config['marginrate'] = ( max(config['marginrate'][0], sim_margin_dict[prod]), max(config['marginrate'][1], sim_margin_dict[prod]))
            min_data[prod] = {}
            day_data[prod] = {}
            for cont, sd, ed in zip(contlist[prod], sdates, edates):
                minid_start = 1500
                minid_end = 2114
                if prod in misc.night_session_markets:
                    minid_start = 300
                tmp_df = mysqlaccess.load_min_data_to_df('fut_min', cont, sd, ed, minid_start, minid_end, database = 'hist_data')
                tmp_df['contract'] = cont
                min_data[prod][cont] = cleanup_mindata( tmp_df, prod)
                if need_daily:
                    tmp_df = mysqlaccess.load_daily_data_to_df('fut_daily', cont, sd, ed, database = 'hist_data')
                    day_data[prod][cont] = tmp_df
        if 'offset' in sim_config:
            config['offset'] = sim_config['offset'] * config['tick_base']
        else:
            config['offset'] = config['tick_base']
        for ix, s in enumerate(scenarios):
            fname1 = file_prefix + str(ix) + '_trades.csv'
            fname2 = file_prefix + str(ix) + '_dailydata.csv'
            if os.path.isfile(fname1) and os.path.isfile(fname2):
                continue
            for key, seq in zip(sim_config['scen_keys'], s):
                config[key] = sim_config[key][seq]
            df_list = []
            trade_list = []
            for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
                cont = contlist[assets[0]][idx]
                edate = misc.day_shift(exp_dates[assets[0]][idx], rollrule)
                if sim_mode == 'OR':
                    mdf = min_data[assets[0]][cont]
                    mdf = mdf[mdf.date <= edate]
                    if need_daily:
                        ddf = day_data[assets[0]][cont]
                        config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                else:
                    mode_keylist = sim_mode.split('-')
                    smode = mode_keylist[0]
                    cmode = mode_keylist[1]
                    all_data = []
                    if smode == 'TS':
                        all_data = [min_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                    else:
                        all_data = [min_data[asset][contlist[asset][idx+i]] for asset, i in zip(assets, cont_map)]
                    if cmode == 'Full':
                        mdf = pd.concat(all_data, axis = 1, join = 'inner')
                        mdf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                        mdf = mdf[ mdf.date0 < edate]
                    else:
                        #print all_data[0], all_data[1]
                        for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                            if i == 0:
                                xopen = tmpdf['open'] * coeff
                                xclose = tmpdf['close'] * coeff
                            else:
                                xopen = xopen + tmpdf['open'] * coeff
                                xclose = xclose + tmpdf['close'] * coeff
                        xopen = xopen.dropna()
                        xclose = xclose.dropna()
                        xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                        xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                        col_list = ['date', 'min_id', 'volume', 'openInterest']                        
                        mdf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                        mdf.columns = ['open', 'high', 'low', 'close'] + col_list
                        mdf['contract'] = cont
                        #print mdf
                    if need_daily:
                        if smode == 'TS':
                            all_data = [day_data[assets[0]][contlist[assets[0]][idx+i]] for i in cont_map]
                        else:
                            all_data = [day_data[asset][contlist[asset]][idx+i] for asset, i in zip(assets, cont_map)]
                        if cmode == 'Full':
                            ddf = pd.concat(all_data, axis = 1, join = 'inner')
                            ddf.columns = [iter + str(i) for i, x in enumerate(all_data) for iter in x.columns]
                            config['ddf'] = ddf[ddf.index <= edate]
                        else:
                            for i, (coeff, tmpdf) in enumerate(zip(calc_coeffs, all_data)):
                                if i == 0:
                                    xopen = tmpdf['open'] * coeff
                                    xclose = tmpdf['close'] * coeff
                                else:
                                    xopen = xopen + tmpdf['open'] * coeff
                                    xclose = xclose + tmpdf['close'] * coeff
                            xhigh = pd.concat([xopen, xclose], axis = 1).max(axis = 1)
                            xlow = pd.concat([xopen, xclose], axis = 1).min(axis = 1)
                            col_list = ['volume', 'openInterest']
                            ddf = pd.concat([ xopen, xhigh, xlow, xclose] + [all_data[0][col] for col in col_list], axis = 1, join = 'inner')
                            ddf.columns = ['open', 'high', 'low', 'close'] + col_list
                            ddf['contract'] = cont
                            config['ddf'] = ddf[ddf.index <= edate]
                        if len(config['ddf']) < 10:
                            continue
                df = mdf.copy(deep = True)
                df, closed_trades = run_sim( df, config)
                df_list.append(df)
                trade_list = trade_list + closed_trades
                (res_pnl, ts) = get_pnl_stats( [df], config['capital'], config['marginrate'], 'm')
                res_trade = get_trade_stats( trade_list )
                res = dict( res_pnl.items() + res_trade.items())
                res.update(dict(zip(sim_config['scen_keys'], s)))
                res['asset'] = cont
                if cont not in output['cont']:
                    output['cont'][cont] = {}
                output['cont'][cont][ix] = res
            (res_pnl, ts) = get_pnl_stats( df_list, config['capital'], config['marginrate'], 'm')
            res_trade = get_trade_stats( trade_list )
            res = dict( res_pnl.items() + res_trade.items())
            res.update(dict(zip(sim_config['scen_keys'], s)))
            res['asset'] = '_'.join(assets)
            output['total'][ix] = res
            print 'saving results for asset = %s, scen = %s' % ('_'.join(assets), str(ix))
            all_trades = {}
            for i, tradepos in enumerate(trade_list):
                all_trades[i] = strat.tradepos2dict(tradepos)
            trades = pd.DataFrame.from_dict(all_trades).T
            trades.to_csv(fname1)
            ts.to_csv(fname2)
            fname = file_prefix + 'stats.json'
            try:
                with open(fname, 'w') as ofile:
                    json.dump(output, ofile)
            except:
                continue
        cont_df = pd.DataFrame()
        for idx in range(abs(min(cont_map)), len(contlist[assets[0]]) - max(cont_map)):
            cont = contlist[assets[0]][idx]
            if cont not in output['cont']:
                continue
            res = scen_dict_to_df(output['cont'][cont])
            out_res = res[outcol_list]
            if len(cont_df) == 0:
                cont_df = out_res[:20].copy(deep = True)
            else:
                cont_df = cont_df.append(out_res[:20])
        fname = file_prefix + 'cont_stat.csv'
        cont_df.to_csv(fname)
        res = scen_dict_to_df(output['total'])
        out_res = res[outcol_list]
        if len(summary_df) == 0:
            summary_df = out_res[:20].copy(deep = True)
        else:
            summary_df = summary_df.append(out_res[:20])
        fname = config['file_prefix'] + 'summary.csv'
        summary_df.to_csv(fname)
    return
def hist_realized_vol_by_product(prodcode,
                                 start_d,
                                 end_d,
                                 periods=12,
                                 tenor='-1m',
                                 writeDB=False):
    cont_mth, exch = mysqlaccess.prod_main_cont_exch(prodcode)
    contlist = contract_range(prodcode, exch, cont_mth, start_d, end_d)
    exp_dates = [get_opt_expiry(cont, inst2contmth(cont)) for cont in contlist]
    data = {
        'is_dtime': True,
        'data_column': 'close',
        'data_freq': '30min',
        'xs': [0.5, 0.25, 0.75],
        'xs_names': ['atm', 'v25', 'v75'],
        'xs_func': 'bs_delta_to_ratio',
        'rehedge_period': 1,
        'term_tenor': tenor,
        'database': 'hist_data'
    }
    option_input = {
        'otype': True,
        'rd': 0.0,
        'rf': 0.0,
        'end_vol': 0.0,
        'ref_vol': 0.5,
        'pricer_func': 'bsopt.BSOpt',
        'delta_func': 'bsopt.BSDelta',
        'is_dtime': data['is_dtime'],
    }
    freq = data['data_freq']
    for cont, expiry in zip(contlist, exp_dates):
        expiry_d = expiry.date()
        if expiry_d > end_d:
            break
        p_str = '-' + str(int(tenor[1:-1]) * periods) + tenor[-1]
        d_start = day_shift(expiry_d, p_str)
        if freq == 'd':
            df = mysqlaccess.load_daily_data_to_df('fut_daily',
                                                   cont,
                                                   d_start,
                                                   expiry_d,
                                                   database=data['database'],
                                                   index_col=None)
        else:
            mdf = mysqlaccess.load_min_data_to_df('fut_min',
                                                  cont,
                                                  d_start,
                                                  expiry_d,
                                                  minid_start=300,
                                                  minid_end=2115,
                                                  database=data['database'],
                                                  index_col=None)
            mdf = backtest.cleanup_mindata(mdf, prodcode, index_col=None)
            mdf['bar_id'] = dh.bar_conv_func2(mdf['min_id'])
            df = dh.conv_ohlc_freq(mdf,
                                   freq,
                                   bar_func=dh.bar_conv_func2,
                                   extra_cols=['bar_id'],
                                   index_col=None)
        option_input['expiry'] = expiry
        data['dataframe'] = df
        vol_df = realized_termstruct(option_input, data)
        print cont, expiry_d, vol_df