def fix_eux_ice_first_bar_volume(repo_l1='./repo_l1', repo_hist='./repo', sday=None, eday=None) : """ EUX and ICE had the first bar's buy volume wrong, try to get it from repo_hist, or set to 0 This is included in the L1Bar.read(), so it's not needed. """ if sday is None : sday = '00000000' if eday is None : eday = '99999999' eur_sym = l1.ven_sym_map['EUX'] ice_sym = l1.ven_sym_map['ICE'] #for sym in eur_sym + ice_sym : for sym in ['LCO'] : print 'symbol: ', sym dbarl1=repo.RepoDailyBar(sym, repo_path=repo_l1) dbar=repo.RepoDailyBar(sym, repo_path=repo_hist) days=dbarl1.all_days() for d in days : if d < sday or d > eday: continue print 'day ', d, b1,c1,bs1=dbarl1.load_day(d) b,c,bs=dbar.load_day(d) changed=False ix0 = np.nonzero(b1[:, repo.ci(c1,repo.volc)]>1e-10)[0] if len(ix0)==0 : print 'all zero volume! ' continue ix0=ix0[0] vol0=b1[ix0, repo.ci(c1,repo.volc)] vbs0=b1[ix0, repo.ci(c1,repo.vbsc)] print vol0, vbs0, if bs == bs1 and len(b1)>0 and len(b)>0 : # use b's first bar volume vol0=b[ix0,repo.ci(c,repo.volc)] vbs0=b[ix0,repo.ci(c,repo.vbsc)] changed=True print 'using repo! ', vol0, vbs0 else : vbs1=b1[:,repo.ci(c1,repo.vbsc)] if np.abs(vbs1[ix0]) > 100 * np.median(np.abs(vbs1)) : # set to 0 vol0=0 vbs0=0 changed=True print 'setting to 0!' if changed : b1[ix0, repo.ci(c1,repo.volc)]=vol0 b1[ix0, repo.ci(c1,repo.vbsc)]=vbs0 dbarl1._dump_day(d, b1,c1,bs1) else : print 'all good!'
def remove_outlier(sym_arr,repo_path, sday, eday) : """ sym_arr=['6Z','6M','6R'], front and back """ for sym in sym_arr : print sym dbar = repo.RepoDailyBar(sym, repo_path=repo_path) repo.remove_outlier_lr(dbar, sday, eday)
def test_l1(bar_file='bar/20180727/NYM_CL_B1S.csv', hist_load_date = None, symbol = 'CL', repo_path='repo_test', bs=1) : """ need to run at the kisco root path if hist_load_date is not None, it should be a [start_day, end_day] for the repo to be loaded with IB Histroy This is only needed for initialization. Typically you can save a directory of repo and use rm -fR and cp -fR to achieve this """ if hist_load_date is not None : print 'create repo ', repo_path, ' and load history dates: ', hist_load_date import os import IB_hist as ibhist os.system('mkdir -p ' + repo_path + ' > /dev/null 2>&1') dbar = repo.RepoDailyBar(symbol, repo_path=repo_path, create=True) try : ibhist.gen_daily_bar_ib(symbol, hist_load_date[0], hist_load_date[1],bs,dbar_repo=dbar, get_missing=False) except : pass else : print 'using existing repo at ', repo_path dbar = repo.RepoDailyBar(symbol, repo_path=repo_path) # read l1 updates from L1 Bar file l1bar = L1Bar(symbol,bar_file, None) darr, uarr, barr, earr = l1bar.read() # save history bar without l1 updates bars = [] for d in darr : bar, col, bs = dbar.load_day(d) bars.append(copy.deepcopy(bar)) # update repo with l1 and save to bar5 bar5s=[] l1bar2 = L1Bar(symbol,bar_file, dbar) darr2, uarr2, barr2, earr2 = l1bar2.read() for d in darr : bar5, col, bs = dbar.load_day(d) bar5s.append(copy.deepcopy(bar5)) # ready to go for bar, bar5, d, ua, ba, ea in zip(bars, bar5s, darr, uarr, barr, earr) : verify_lpx_lr_vol_vbs_ism(bar, bar5, ua, ba, ea, d)
def ingest_all_l1(bar_date_dir_list=None, repo_path='./repo', sym_list=None, bar_path='./bar', future_inclusion=['front', 'back']) : """ ingest all the symbols in bar_date_dir, including the future, fx, etf and future_nc for each *_B1S.csv* file: read l1bar for symbol from bar_date_dir, i.e. NYM_CL_B1S.csv.gz if bar_date_dir_list is not none, it should be a list of bar_date_dir, i.e. [20180629,20180706] otherwise, all dates in bar_path if repo_path is not None, update the repo for that symbol. if sym_list is not None, then only these symbols are updated, otherwise, all symbols found in the bar directory will be updated Note 1: future_nc has *_B1S_bc.csv*, i.e. NYM_CL_B1S_bc.csv.gz and have different repo_path than front contract, obtained by repo.nc_repo_path(repo_path), i.e. repo_nc """ repo_path_nc = repo.nc_repo_path(repo_path) if repo_path is not None else None #gzip_everything(bar_path) if bar_date_dir_list is None : b = glob.glob(bar_path+'/*') bar_date_dir_list=[] for b0 in b : bar_date_dir_list.append(b0.split('/')[-1]) print 'got ', len(bar_date_dir_list), ' directories to update' bar_date_dir_list.sort() for bar_date_dir in bar_date_dir_list : fs_front = bar_path+'/'+str(bar_date_dir)+'/*_B1S.csv*' fs_back = bar_path+'/'+str(bar_date_dir)+'/*_B1S_bc.csv*' for fs, rp, contype in zip([fs_front, fs_back], [repo_path, repo_path_nc], ['front','back']) : if contype not in future_inclusion : continue fn = glob.glob(fs) print 'found ', len(fn), ' files for ', rp for f in fn : sym = f.split('/')[-1].split('_')[1] if sym_list is not None and sym not in sym_list : print sym, ' not in ', sym_list, ' ignored. ' continue print 'getting ', sym, ' from ', f, ' repo_path ', rp dbar = None if rp is not None : dbar = repo.RepoDailyBar(sym, repo_path=rp, create=True) l1b = L1Bar(sym, f, dbar) try : l1b.read(noret=True) except : import traceback traceback.print_exc()
def gen_bar(sym_array, sday, eday, repo_cme_path='./repo_cme', cme_path='./cme', bar_sec=1, nc=False): """ getting from the ts [utc, px, signed_vol] output format bt, lr, vl, vbs, lrhl, vwap, ltt, lpx repo_cme_path: repo to store the 1S trd bars return : None update (remove first) dbar with bar_arr, days, col_arr """ if nc: assert repo_cme_path[ -2:] == 'nc', 'repo_cme_path=' + repo_cme_path + ' not ending with nc' for symbol in sym_array: try: dbar = repo.RepoDailyBar(symbol, repo_path=repo_cme_path) except: print 'repo_trd_path failed, trying to create' dbar = repo.RepoDailyBar(symbol, repo_path=repo_cme_path, create=True) start_hour, end_hour = l1.get_start_end_hour(symbol) TRADING_HOURS = end_hour - start_hour # sday has to be a trading day it = l1.TradingDayIterator(sday) tday = it.yyyymmdd() if tday != sday: raise ValueError('sday has to be a trading day! sday: ' + sday + ' trd_day: ' + tday) lastpx = 0 prev_con = '' while tday <= eday: eutc = it.local_ymd_to_utc(tday, h_ofst=end_hour) sutc = eutc - (TRADING_HOURS) * 3600 if nc: con = l1.FC_next(symbol, tday)[0] else: con = l1.FC(symbol, tday) con = symbol + con[-2:] try: bar = bar_by_file(get_fn(cme_path, symbol, tday, con)) except (KeyboardInterrupt): print 'interrupt!' return except: print 'problem getting ', symbol, tday bar = [] if len(bar) == 0: lastpx = 0 prev_con = '' else: # this is the good case, prepare for the bar # 1) get bar with start/stop, 2) contract updated 3) lastpx # need to allow for entire content being in one ta, i.e. some # days having tds==2 but all contents in one ta, due to gmt_offset # have everything, need to get to # output format bt, lr, vl, vbs, lrhl, vwap, ltt, lp if lastpx == 0 or prev_con != con: lastpx = bar[0, 1] bt = np.arange(sutc + bar_sec, eutc + bar_sec, bar_sec) tts = np.r_[sutc, bar[:, 0]] pts = np.r_[bar[0, 1], bar[:, 1]] vts = np.r_[0, bar[:, 2]] pvts = np.abs(vts) * pts pxix = np.clip(np.searchsorted(tts[1:], bt + 1e-6), 0, len(tts) - 1) lpx = pts[pxix] lr = np.log(np.r_[lastpx, lpx]) lr = lr[1:] - lr[:-1] # tricky way to get index right on volumes btdc = np.r_[0, np.cumsum(vts)[pxix]] vbs = btdc[1:] - btdc[:-1] btdc = np.r_[0, np.cumsum(np.abs(vts))[pxix]] vol = btdc[1:] - btdc[:-1] # even tickier way to get vwap/ltt right ixg = np.nonzero(vol)[0] btdc = np.r_[0, np.cumsum(pvts)[pxix]] vwap = lpx.copy() #when there is no vol vwap[ixg] = (btdc[1:] - btdc[:-1])[ixg] / vol[ixg] ltt = np.zeros(len(bt)) ltt[ixg] = tts[pxix][ixg] repo.fwd_bck_fill(ltt, v=0) # give up, ignore the lrhl for trd bars lrhl = np.zeros(len(bt)) b = np.vstack((bt, lr, vol, vbs, lrhl, vwap, ltt, lpx)).T d = tday c = repo.kdb_ib_col dbar.remove_day(d) dbar.update([b], [d], [c], bar_sec) lastpx = lpx[-1] prev_con = con it.next() tday = it.yyyymmdd()
def ingest_all_symb(sday, eday, repo_path=None, get_missing=True, sym_list=None, future_inclusion=['front', 'back'], sym_list_exclude=[], overwrite_dbar=True, EarliestMissingDay='20180201'): """ This will go to IB historical data, usually in /cygdrive/e/ib/kisco, read all the symbols defined by sym_list and update the repo at repo_path, which is usually at /cygdrive/e/research/kdb if sym_list is None, then it will include all the symbol collected by ibbar. future_inclusion defaults to include both front and back. It is included for ibbar's ingestion, when only front contract is in hist, but the back contracts haven't been retrieved yet. NOTE: ETF and FX symbols are not affected by future_inclusion """ import ibbar if repo_path is None: repo_path = ibbar.read_cfg('RepoPath') fut_sym = ibbar.sym_priority_list fx_sym = l1.ven_sym_map['FX'] etf_sym = ibbar.ib_sym_etf fut_sym2 = ibbar.sym_priority_list_l1_next idx_sym = ibbar.ib_sym_idx if sym_list is None: sym_list = fut_sym + fx_sym + etf_sym + idx_sym for sym in sym_list: if sym in sym_list_exclude: continue print 'ingesting ', sym if sym in fut_sym and 'front' in future_inclusion: barsec = 1 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, is_front_future=True, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay) elif sym in fx_sym: barsec = 5 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay) elif sym in etf_sym or sym in idx_sym: barsec = 1 dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, get_missing=get_missing, overwrite_dbar=overwrite_dbar) if sym in fut_sym2 and 'back' in future_inclusion: barsec = 1 repo_path_nc = repo.nc_repo_path( repo_path) # repo path of next contract dbar = repo.RepoDailyBar(sym, repo_path=repo_path_nc, create=True) gen_daily_bar_ib(sym, sday, eday, barsec, dbar_repo=dbar, is_front_future=False, get_missing=get_missing, overwrite_dbar=overwrite_dbar, EarliestMissingDay=EarliestMissingDay)