def make_bootstrap_idx(symbol): venue = l1.venue_by_symbol(symbol) tick_size, contract_size = l1.asset_info(symbol) start_hour, end_hour = l1.get_start_end_hour(symbol) idx = { 'global': \ { 'symbol': symbol, \ 'venue' : venue, \ 'sehour': [start_hour, end_hour], \ 'ticksz': tick_size }, \ 'daily' : {\ # '19700101' : \ # { 'bar_sec': 1, \ # 'cols' : [] \ # columns available # } \ }\ } return idx
def fn_from_dates(symbol, sday, eday, is_front_future): try: is_fx = l1.venue_by_symbol(symbol) == 'FX' is_etf = l1.venue_by_symbol(symbol) == 'ETF' is_idx = l1.venue_by_symbol(symbol) == 'IDX' except: print 'Unknow symbol %s' % (symbol) raise ValueError('Unknown symbol ' + symbol) from ibbar import read_cfg hist_path = read_cfg('HistPath') sym0 = symbol if symbol in l1.RicMap.keys(): sym0 = l1.RicMap[symbol] if is_etf: fqt = glob.glob(hist_path + '/ETF/' + sym0 + '_[12]*_qt.csv*') elif is_fx: fqt = glob.glob(hist_path + '/FX/' + sym0 + '_[12]*_qt.csv*') elif is_idx: fqt = glob.glob(hist_path + '/IDX/' + sym0 + '_[12]*_trd.csv*') else: if is_front_future: fqt = glob.glob(hist_path + '/' + symbol + '/' + sym0 + '*_[12]*_qt.csv*') else: fqt = glob.glob(hist_path + '/' + symbol + '/nc/' + sym0 + '??_[12]*_qt.csv*') ds = [] de = [] fn = [] for f in fqt: if os.stat(f).st_size < 500: print '\t\t\t ***** ', f, ' is too small, ignored' continue ds0 = f.split('/')[-1].split('_')[1] de0 = f.split('/')[-1].split('_')[2].split('.')[0] # check for inclusion if ds0 > eday or de0 < sday: continue ds.append(ds0) de.append(de0) fn.append(f) # sort the list in the increasing order of starting dates # this will make the merging easier by using append # in case of total inclusion, then the rule will be # "overwrite", instead of "append" # append means add only the new content to the existing daily bar # overwrite means add all the content to the existing daily bar, overwirte if overlap # merge means to only apply to daily bars of any days that doesn't exists. ix = np.argsort(ds) dss = np.array(ds)[ix] des = np.array(de)[ix] fns = np.array(fn)[ix] while True: if len(fns) == 0: print 'ERROR! Nothing found for %s from %s to %s (front %s), search path %s' % ( symbol, sday, eday, str(is_front_future), hist_path) break # remove the files that are contained desi = des.astype(int) ix = np.nonzero(desi[1:] - desi[:-1] <= 0)[0] if len(ix) > 0: print fns[ix + 1], ' contained by ', fns[ ix], ', removed, if needed, consider load and overwrite repo' fns = np.delete(fns, ix + 1) des = np.delete(des, ix + 1) dss = np.delete(dss, ix + 1) else: break return fns, is_fx, is_etf, is_idx
def bar_by_file_ib(fn, symbol, start_day='19980101', end_day='20990101', bar_qt=None, bar_trd=None): """ _qt.csv and _trd.csv are expected to exist for the given fn return : bar_qt[:,0], utc_ltt, bar_qt[:,1:5].T, vwap, vol, vb, vs return bar_qt, bar_trd, bar where bar {utc, utcltt, open, high, low, close,vwap,vol,vb,vs} """ bid_ask_spd = get_future_spread(symbol) is_fx = l1.venue_by_symbol(symbol) == 'FX' is_idx = l1.venue_by_symbol(symbol) == 'IDX' is_etf = l1.venue_by_symbol(symbol) == 'ETF' if is_idx: print 'Getting IDX quotes!' b0 = bar_by_file_ib_idx(fn) if len(b0) > 0: ix0, ix1 = clip_idx(b0[:, 0], symbol, start_day, end_day) return [], [], b0[ix0:ix1, :] return [], [], b0 else: if fn[-3:] == '.gz': fn = fn[:-3] if fn[-4:] == '.csv': fn = fn[:-7] fnqt = fn + '_qt.csv' fntd = fn + '_trd.csv' if bar_trd is None or len(bar_trd) == 0: has_trd = l1.get_file_size(fntd) > 100 or l1.get_file_size(fntd + '.gz') > 100 if has_trd: bar_trd = get_trd(fntd) if is_fx or not has_trd or len(bar_trd) < 1: print 'Getting Quote Only!' b0 = bar_by_file_ib_qtonly(fn) if len(b0) > 0: ix0, ix1 = clip_idx(b0[:, 0], symbol, start_day, end_day) return [], [], b0[ix0:ix1, :] return [], [], b0 if bar_qt is None or len(bar_qt) == 0: bar_qt = get_qt(fnqt) # use quote as ref nqt = bar_qt.shape[0] assert nqt > 3, 'too few bars found at ' + fn # make sure the time stamps strictly increasing qix = l1.get_inc_idx(bar_qt[:, 0]) tix = l1.get_inc_idx(bar_trd[:, 0]) bar_qt = bar_qt[qix, :] bar_trd = bar_trd[tix, :] qts = bar_qt[:, 0] tts = bar_trd[:, 0] assert len(np.nonzero( qts[1:] - qts[:-1] < 0)[0]) == 0, 'quote time stamp goes back' assert len(np.nonzero( tts[1:] - tts[:-1] < 0)[0]) == 0, 'trade time stamp goes back' # deal with length difference # some times the file content has more days than the file name suggests. # such as ZNH8_20180201_20180302_1S_qt.csv has days from 2/1 to 3/19. # but the _trd.csv only has to 3/2 as file name suggests. # In this case, take the shorter one and ensure the days # checked for gaps in between for missing days # Only exception is when there is only one day, then while True: if len(qts) < 10: return [], [], [] #dtq0 = datetime.datetime.fromtimestamp(qts[0]) #dtt0 = datetime.datetime.fromtimestamp(tts[0]) #dtq1 = datetime.datetime.fromtimestamp(qts[-1]) #dtt1 = datetime.datetime.fromtimestamp(tts[-1]) dtq0 = l1.trd_day(qts[0]) dtt0 = l1.trd_day(tts[0]) dtq1 = l1.trd_day(qts[-1]) dtt1 = l1.trd_day(tts[-1]) print 'Got Quote: ', dtq0, ' to ', dtq1, ' Trade: ', dtt0, ' to ', dtt1 #if (qts[-1] != tts[-1]) : if dtq1 != dtt1: # only handles where ending date is different print '!!! Quote/Trade ending date mismatch!!!' ts = min(qts[-1], tts[-1]) if qts[-1] > ts: ix = np.nonzero(qts > ts)[0] qts = qts[:ix[0]] bar_qt = bar_qt[:ix[0], :] else: ix = np.nonzero(tts > ts)[0] tts = tts[:ix[0]] bar_trd = bar_trd[:ix[0], :] #elif (qts[0] != tts[0]) : elif dtq0 != dtt0: print '!!! Quote/Trade date starting mismatch!!!' ts = max(qts[0], tts[0]) if qts[0] < ts: ix = np.nonzero(qts < ts)[0] six = ix[-1] + 1 qts = qts[six:] bar_qt = bar_qt[six:, :] else: ix = np.nonzero(tts < ts)[0] six = ix[-1] + 1 tts = tts[six:] bar_trd = bar_trd[six:, :] else: break tix = np.clip(np.searchsorted(tts, qts), 0, len(tts) - 1) # they should be the same, otherwise, patch the different ones ix0 = np.nonzero(tts[tix] - qts != 0)[0] if len(ix0) != 0: print len(ix0), ' bars mismatch!' ts = bar_trd[tix, :] # This should be tts #ts[tix[ix0],5]=0 #ts[tix[ix0],6]=0 #ts[tix[ix0],7]=bar_qt[ix0,4].copy() ts[ix0, 5] = 0 ts[ix0, 6] = 0 ts[ix0, 7] = bar_qt[ix0, 4].copy() import pandas as pd vwap = ts[:, 7].copy() vol = ts[:, 5].copy() vb = vol.copy() vs = vol.copy() if is_etf: print 'adjust ETF size ' # IB's ETF volume in LOTS, i.e. 250 = 2 LOTS vol = vol * 100 + 50 vb = vb * 100 + 50 vs = vs * 100 + 50 utc_ltt = ts[:, 0] if len(ix0) > 0: utc_ltt[ix0] = np.nan df = pd.DataFrame(utc_ltt) df.fillna(method='ffill', inplace=True) """ # for those bar without price movements, calculate the volume by avg trade price ixe=np.nonzero(bar_qt[:,1]-bar_qt[:,4]==0)[0] #pdb.set_trace() vb[ixe]=np.clip((ts[ixe,7]-(bar_qt[ixe,4]-bid_ask_spd/2))/bid_ask_spd*ts[ixe,5],0,1e+10) vs[ixe]=ts[ixe,5]-vb[ixe] ixg=np.nonzero(bar_qt[:,1]-bar_qt[:,4]<0)[0] vs[ixg]=0 ixl=np.nonzero(bar_qt[:,1]-bar_qt[:,4]>0)[0] vb[ixl]=0 """ spd = bid_ask_spd * np.clip( np.sqrt((bar_qt[:, 2] - bar_qt[:, 3]) / bid_ask_spd), 1, 2) mid = (bar_qt[:, 2] + bar_qt[:, 3]) / 2 #mid=np.mean(bar_qt[:,1:5], axis=1) vb = np.clip((vwap - (mid - spd / 2)) / spd, 0, 1) * vol vs = vol - vb bar = np.vstack( (bar_qt[:, 0], utc_ltt, bar_qt[:, 1:5].T, vwap, vol, vb, vs)).T ix0, ix1 = clip_idx(bar[:, 0], symbol, start_day, end_day) return bar_qt, bar_trd, bar[ix0:ix1, :]
def write_daily_bar(symbol, bar, bar_sec=5, is_front=True, last_close_px=None, get_missing=True): """ bar: all bars from a hist file having the format of [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs] These bars have the same contract. The bar is in increasing utc, but may have gaps, or other invalid values The first day of that contract bar, due to prev_close_px unknown, it is usually covered by having the previous contract day. Note there is a limitation that the start end time has to be on a whole hour i.e. cannot stop on 4:30, just make it 5, which will write some zero bars. However, it can handle 24 hour trading, i.e. start/end at 18:00, for fx venues. Note 2, the first bar of a day should be 1 bar_sec after the starting utc and the last bar of a day should be at the ending utc. if get_missing is set to true, then try to get the bar on a bad day Output: array of daily_bar for each day covered in the bar (hist file) Each daily_bar have the following format: [obs_utc, lr, trd_vol, vbs, lrhl, lrvwap, ltt, lpx] where: obs_utc is the checking time stamp lr is the log return between this checking price and last checking price i.e. the lr of the previous bar that ended at this checking time (obs_utc) (May extend in the future) Note that the Trading Hours set to 24 for ICE hours In addition, it does the following: 1. loop the close px to the first open px, 2. convert the price to lr, removing bars with maxlr more than 0.2 (CME circuit breaker) 3. replace all inf/nan values with zero 4. cacluate the ltt and lpx """ import pandas as pd dt = datetime.datetime.fromtimestamp(bar[ 0, 0]) # fromtimestamp is safe for getting local representation of utc start_hour, end_hour = l1.get_start_end_hour(symbol) TRADING_HOURS = end_hour - start_hour start_hour = start_hour % 24 # get the initial day, last price day_start = dt.strftime('%Y%m%d') utc_s = int( l1.TradingDayIterator.local_ymd_to_utc(day_start, start_hour, 0, 0)) if last_close_px is None: x = np.searchsorted(bar[1:, 0], float(utc_s) - 1e-6) # only take the last price within 5 minutes of utc_s if x + 1 >= bar.shape[0] or bar[x + 1, 0] - utc_s > 300: if x + 1 >= bar.shape[0]: print 'no bars found after the start utc of ', day_start else: print 'start up utc (%d) more than 5 minutes later than start utc (%d) on %s' % ( bar[x + 1, 0], utc_s, day_start) print 'initializing start up last_close_px deferred' else: if x == 0: #last_close_px = bar[0, 2] #print 'last close price set as the first bar open px, this should use previous contract', datetime.datetime.fromtimestamp(bar[0,0]), datetime.datetime.fromtimestamp(bar[1,0]) last_close_px = bar[0, 5] print 'lost last close price, set as the first bar close px' else: last_close_px = bar[x, 5] print 'last close price set to close px of bar ', datetime.datetime.fromtimestamp( bar[x, 0]), ' px: ', last_close_px print 'GOT last close px ', last_close_px else: print 'GIVEN last close price ', last_close_px day_end = datetime.datetime.fromtimestamp(bar[-1, 0]).strftime('%Y%m%d') # deciding on the trading days if dt.hour > end_hour or (start_hour == end_hour and dt.hour >= end_hour): # CME 17, ICE 18, # the second rule is for 24 hour trading, note start/end has to be on a whole hour ti = l1.TradingDayIterator(day_start, adj_start=False) ti.next() trd_day_start = ti.yyyymmdd() else: trd_day_start = day_start trd_day_end = day_end print 'preparing bar from ', day_start, ' to ', day_end, ' , trading days: ', trd_day_start, trd_day_end ti = l1.TradingDayIterator(trd_day_start, adj_start=False) # day maybe a sunday day1 = ti.yyyymmdd() # first trading day barr = [] trade_days = [] col_arr = [] bad_trade_days = [] while day1 <= trd_day_end: utc_e = int( l1.TradingDayIterator.local_ymd_to_utc(day1, end_hour, 0, 0)) # get start backwards for starting on a Sunday utc_s = utc_e - TRADING_HOURS * 3600 # LIMITATION: start/stop has to be on a whole hour day = datetime.datetime.fromtimestamp(utc_s).strftime('%Y%m%d') i = np.searchsorted(bar[:, 0], float(utc_s) - 1e-6) j = np.searchsorted(bar[:, 0], float(utc_e) - 1e-6) bar0 = bar[ i: j, :] # take the bars in between the first occurance of start_hour (or after) and the last occurance of end_hour or before print 'getting bar ', day + '-' + str( start_hour) + ':00', day1 + '-' + str( end_hour) + ':00', ' , got ', j - i, 'bars' N = ( utc_e - utc_s ) / bar_sec # but we still fill in each bar, so N should be fixed for a given symbol/venue pair # here N*0.90, is to account for some closing hours during half hour ib retrieval time # The problem with using histclient.exe to retrieve IB history data for ES is # set end time is 4:30pm, will retreve 3:45 to 4:15. Because 4:15-4:30pm doesn't # have data. This is only true for ES so far # another consideration is that IB Hist client usually won't be off too much, so 90% is # a good threshold for missing/bad day bar_good = True if j - i < N * 0.90: if symbol in ['LE', 'HE'] or l1.venue_by_symbol(symbol) == 'IDX': bar_good = (j - i) > N * 0.75 elif not is_front: bar_good = (j - i) > N * 0.5 else: bar_good = False if not bar_good: print 'fewer bars for trading day %s: %d < %d * 0.9' % (day1, j - i, N) if day1 not in l1.bad_days and get_missing: # recurse with the current last price and get the updated last price print 'getting missing day %s' % (day1) from ibbar import get_missing_day fn = get_missing_day(symbol, [day1], bar_sec=bar_sec, is_front=is_front, reuse_exist_file=True) try: _, _, b0 = bar_by_file_ib(fn[0], symbol, start_day=day1, end_day=day1) except Exception as e: print e b0 = [] if len(b0) > j - i: print 'Getting more bars %d > %d on %s for %s, take it!' % ( len(b0), j - i, day1, symbol) barr0, trade_days0, col_arr0, bad_trade_days0, last_close_px0 = write_daily_bar( symbol, b0, bar_sec=bar_sec, is_front=is_front, last_close_px=last_close_px, get_missing=False) # taken as done barr += barr0 trade_days += trade_days0 col_arr += col_arr0 bad_trade_days += bad_trade_days0 last_close_px = last_close_px0 ti.next() day1 = ti.yyyymmdd() continue print 'Got %d bars on %s, had %d bars (%s), use previous!' % ( len(b0), day1, j - i, symbol) if len(bar0) < 1: print 'Bad Day! Too fewer bars in trading day %s: %d, should have %d ' % ( day1, j - i, N) bad_trade_days.append(day1) else: ix_utc = ((bar0[:, 0] - float(utc_s)) / bar_sec + 1e-9).astype( int) # lr(close_px-open_px) of a bar0 has bar_utc bar_utc = np.arange( utc_s + bar_sec, utc_e + bar_sec, bar_sec) # bar time will be time of close price, as if in prod if N != j - i: print 'fill missing for only ', j - i, ' bars (should be ', N, ')' bar1 = np.empty((N, bar0.shape[1])) bar1[:, 0] = np.arange(utc_s, utc_e, bar_sec) # filling all missing for [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs] # fillforward for utc_ltt, close_px, vwap for col in [1, 5, 6]: bar1[:, col] = np.nan bar1[ix_utc, col] = bar0[:, col] df = pd.DataFrame(bar1[:, col]) df.fillna(method='ffill', inplace=True) df.fillna(method='bfill', inplace=True) # fill zero for vol, vb, bs for col in [7, 8, 9]: bar1[:, col] = 0 bar1[ix_utc, col] = bar0[:, col] # copy value of close_px for open_px, hi_px, lo_px for col in [2, 3, 4]: bar1[:, col] = bar1[:, 5] bar1[ix_utc, col] = bar0[:, col] bar_arr = [] bar_arr.append(bar_utc.astype(float)) # construct the log returns for each bar, fill in zeros for gap #lpx_open=np.log(bar0[:,2]) if last_close_px is None: print 'setting last_close_px to ', bar0[0, 2] last_close_px = bar0[0, 2] lpx_open = np.log(np.r_[last_close_px, bar0[:-1, 5]]) lpx_hi = np.log(bar0[:, 3]) lpx_lo = np.log(bar0[:, 4]) lpx_close = np.log(bar0[:, 5]) lpx_vwap = np.log(bar0[:, 6]) lr = lpx_close - lpx_open lr_hi = lpx_hi - lpx_open lr_lo = lpx_lo - lpx_open lr_vw = lpx_vwap - lpx_open # remove bars having abnormal return, i.e. circuit break for ES # with 9999 prices MaxLR = 0.5 if l1.is_holiday(day) or l1.is_fx_future( symbol) or l1.venue_by_symbol(symbol) == 'FX': MaxLR = 5 ix1 = np.nonzero(np.abs(lr) >= MaxLR)[0] ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_hi) >= MaxLR)[0]) ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_lo) >= MaxLR)[0]) ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_vw) >= MaxLR)[0]) if len(ix1) > 0: print 'MaxLR (', MaxLR, ') exceeded: ', len(ix1), ' ticks!' # removing one-by-one for ix1_ in ix1: dt = datetime.datetime.fromtimestamp(bar_utc[ix1_]) if not l1.is_pre_market_hour(symbol, dt): print 'warning: removing 1 tick lr/lo/hi/vw: ', lr[ ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_] lr[ix1_] = 0 lr_hi[ix1_] = 0 lr_lo[ix1_] = 0 lr_vw[ix1_] = 0 else: print 'NOT removing 1 tick (pre_market=True: ', symbol, ', ', dt, ') lr/lo/hi/vw: ', lr[ ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_] # the trade volumes for each bar, fill in zeros for gap vlm = bar0[:, 7] vb = bar0[:, 8] vs = np.abs(bar0[:, 9]) vbs = vb - vs for v0, vn in zip([lr, lr_hi, lr_lo, lr_vw, vlm, vbs], ['lr', 'lr_hi', 'lr_lo', 'lr_vw', 'vlm', 'vbs']): nix = np.nonzero(np.isnan(v0))[0] nix = np.union1d(nix, np.nonzero(np.isinf(np.abs(v0)))[0]) if len(nix) > 0: print 'warning: removing ', len( nix), ' nan/inf ticks for ', vn v0[nix] = 0 b0 = np.zeros(N).astype(float) b0[ix_utc] = v0 bar_arr.append(b0.copy()) # get the last trade time, this is needs to be ltt = np.empty(N) * np.nan ltt[ix_utc] = bar0[:, 1] df = pd.DataFrame(ltt) df.fillna(method='ffill', inplace=True) if not np.isfinite(ltt[0]): ptt = 0 #no previous trading detectable if i > 0: #make some effort here ptt = bar[i - 1, 1] if not np.isfinite(ptt): ptt = 0 df.fillna(ptt, inplace=True) bar_arr.append(ltt) # get the last price, as a debugging tool # close price lpx = np.empty(N) * np.nan lpx[ix_utc] = bar0[:, 5] df = pd.DataFrame(lpx) df.fillna(method='ffill', inplace=True) if not np.isfinite(lpx[0]): df.fillna(last_close_px, inplace=True) bar_arr.append(lpx) ba = np.array(bar_arr).T bt0 = ba[:, 0] lr0 = ba[:, 1] vl0 = ba[:, 5] vbs0 = ba[:, 6] # add a volatility measure here lrhl0 = ba[:, 2] - ba[:, 3] vwap0 = ba[:, 4] ltt0 = ba[:, 7] lpx0 = ba[:, 8] barr.append( np.vstack((bt0, lr0, vl0, vbs0, lrhl0, vwap0, ltt0, lpx0)).T) last_close_px = lpx[-1] trade_days.append(day1) col_arr.append(repo.kdb_ib_col) ti.next() day1 = ti.yyyymmdd() # filling in missing days if not included in the bad_trade_days bad_trade_days = [] good_trade_days = [] it = l1.TradingDayIterator(trd_day_start) while True: day = it.yyyymmdd() if day > trd_day_end: break if day not in trade_days: bad_trade_days.append(day) else: good_trade_days.append(day) it.next() print 'got bad trade days ', bad_trade_days return barr, good_trade_days, col_arr, bad_trade_days, last_close_px
def get_missing_day(symbol, trd_day_arr, bar_sec, is_front, cid=None, reuse_exist_file=True, reuse_exist_only=False): """ Couple of options: reuse_exist_file: will take the previous daily file and try to reuse it reuse_exist_only: will only try to reuse the existing daily file. If not found, then don't run the ibclient. This is usually the case for unnecessary days (such as outside of sday/eday of file name). Note: if IB_CLIENT is not found, i.e. on the hp notebook, reuse_exist_only is set to true """ import copy ibclient = copy.deepcopy(IB_CLIENT) try: os.stat(ibclient) except: reuse_exist_only = True if reuse_exist_only: ibclient = None if cid is None: dt = datetime.datetime.now() cid = dt.month * 31 + dt.day + 300 + dt.second fnarr = [] for day in trd_day_arr: if day in l1.bad_days: print 'not getting holiday ', day continue if l1.venue_by_symbol(symbol) == 'FX': fnarr += get_ib(day, day, cid=cid + 3, sym_list=[symbol], reuse_exist_file=reuse_exist_file, verbose=False, ibclient=ibclient) else: # future or etf next_contract = not is_front fnarr += get_ib_future([symbol], day, day, bar_sec, mock_run=False, cid=cid + 1, getqt=True, gettrd=True, next_contract=next_contract, reuse_exist_file=reuse_exist_file, verbose=False, ibclient=ibclient) return fnarr
def ibvenue(symbol): return l1.venue_by_symbol(symbol)
def __init__(self, symbol, bar_file, dbar_repo) : """ A class for reading IB's L1 bars with the columes as UTC bs bp ap as bv sv utc_at_collect qbc qac bc sc ism_avg -------------------------------------------------------------------------------------------------- 1535425169, 5, 2901.5000000, 2901.7500000, 135, 5, 17, 1535425169000056, 1, 2, 1, 2, 2901.5062609 ... Where UTC is the bar ending time qbc is best bid change count qac is best ask change count bc is buy trade counts sc is sell trade counts Parser will get from the file in bar/NYM_CL_B1S.csv Based on a line in the bar file, the parsing returns the following two arrays bcol_arr: array of basic columns for each day. ['vol', 'vbs', 'spd', 'bs', 'as', 'mid'] ecol_arr: array of extended columns for each day ['qbc', 'qac', 'tbc', 'tsc', 'ism1'] if dbar_repo is not None, it will update repo by the following rule: 1. overwrite the [lrc,volc,vbsc,lpxc], whenever exist (indexing using the utcc) 2. add columns of bs, as, spd qbc qac tbc tsc ism1, fill-in on missing (see NOTE 5) NOTE 1: utc offset: From 201805301800 to 201806261700, utc + 1 matches with history From 201806261800 to 201808171700, utc + 2 matches with history Good afterwards NOTE 2: Extended columns starts from 20180715-20:39:55, but may have problem for first few days NOTE 3: Next contract bar starts from 20180802-18:12:30 Same as the IB_Hist, separate dbar_repo for the same symbol's next contract, i.e. dbar_repo_next_contract for bars of next contract NOTE 4: Be prepared for any data losses and errors! zero prices, zero sizes Note 5: There are 1~2 second drift on the hist's mid and L1's mid before 8/18/2018. Since the L1 is the live trading one, it is given more emphasis. To be consistent, the lr also is overwritten together with vol and vbs. But when constructing lr to override, due to the first lr being calculated with previous trading day on the same contract, BE SURE to use the hist data on the first index Weekend ingestion process for front/back future contract: 1. collect and ingest hist file, handling missings 2. read and ingest bar files """ self.symbol = symbol self.venue = l1.venue_by_symbol(symbol) self.hours = l1.get_start_end_hour(symbol) self.bar_file = bar_file if bar_file[-3:] == '.gz' : os.system('gunzip -f ' + bar_file) self.bar_file = bar_file[:-3] self.gzip = True else : self.gzip = False self.f = open(self.bar_file, 'r') self.dbar = dbar_repo # the time shifting start/stops, see Note 1 self.utc10 = l1.TradingDayIterator.local_ymd_to_utc('20180530', 18, 0, 0) self.utc11 = l1.TradingDayIterator.local_ymd_to_utc('20180626', 17, 0, 0) self.utc20 = l1.TradingDayIterator.local_ymd_to_utc('20180626', 18, 0, 0) self.utc21 = l1.TradingDayIterator.local_ymd_to_utc('20180817', 17, 0, 0) self.bar_sec = 1 # always fixed as 1 second bar for C++ l1 bar writer