Exemple #1
0
def fix_eux_ice_first_bar_volume(repo_l1='./repo_l1', repo_hist='./repo', sday=None, eday=None) :
    """
    EUX and ICE had the first bar's buy volume wrong, try to get it from 
    repo_hist, or set to 0
    This is included in the L1Bar.read(), so it's not needed.
    """
    if sday is None :
        sday = '00000000'
    if eday is None :
        eday = '99999999'

    eur_sym = l1.ven_sym_map['EUX']
    ice_sym = l1.ven_sym_map['ICE']

    #for sym in eur_sym + ice_sym :
    for sym in ['LCO'] :
        print 'symbol: ', sym
        dbarl1=repo.RepoDailyBar(sym, repo_path=repo_l1)
        dbar=repo.RepoDailyBar(sym, repo_path=repo_hist)

        days=dbarl1.all_days()
        for d in days :
            if d < sday or d > eday:
                continue

            print 'day ', d, 
            b1,c1,bs1=dbarl1.load_day(d)
            b,c,bs=dbar.load_day(d)
            changed=False
            ix0 = np.nonzero(b1[:, repo.ci(c1,repo.volc)]>1e-10)[0]
            if len(ix0)==0 :
                print 'all zero volume! '
                continue
            ix0=ix0[0]
            vol0=b1[ix0, repo.ci(c1,repo.volc)]
            vbs0=b1[ix0, repo.ci(c1,repo.vbsc)]
            print vol0, vbs0,
            if bs == bs1 and len(b1)>0 and len(b)>0 :
                # use b's first bar volume
                vol0=b[ix0,repo.ci(c,repo.volc)]
                vbs0=b[ix0,repo.ci(c,repo.vbsc)]
                changed=True
                print 'using repo! ', vol0, vbs0
            else :
                vbs1=b1[:,repo.ci(c1,repo.vbsc)]
                if np.abs(vbs1[ix0]) > 100 * np.median(np.abs(vbs1)) :
                    # set to 0
                    vol0=0
                    vbs0=0
                    changed=True
                    print 'setting to 0!'
            if changed :
                b1[ix0, repo.ci(c1,repo.volc)]=vol0
                b1[ix0, repo.ci(c1,repo.vbsc)]=vbs0
                dbarl1._dump_day(d, b1,c1,bs1)
            else :
                print 'all good!'
Exemple #2
0
def remove_outlier(sym_arr,repo_path, sday, eday) :
    """
    sym_arr=['6Z','6M','6R'], front and back
    """
    for sym in sym_arr :
        print sym
        dbar = repo.RepoDailyBar(sym, repo_path=repo_path)
        repo.remove_outlier_lr(dbar, sday, eday)
Exemple #3
0
def test_l1(bar_file='bar/20180727/NYM_CL_B1S.csv', hist_load_date = None, symbol = 'CL', repo_path='repo_test', bs=1) :
    """
    need to run at the kisco root path 
    if hist_load_date is not None, it should be a [start_day, end_day] for the repo to be loaded with IB Histroy
       This is only needed for initialization.  Typically you can save a directory of repo and use
       rm -fR and cp -fR to achieve this
    """
    if hist_load_date is not None :
        print 'create repo ', repo_path, ' and load history dates: ', hist_load_date
        import os
        import IB_hist as ibhist
        os.system('mkdir -p ' + repo_path + ' > /dev/null 2>&1')
        dbar = repo.RepoDailyBar(symbol, repo_path=repo_path, create=True)
        try :
            ibhist.gen_daily_bar_ib(symbol, hist_load_date[0], hist_load_date[1],bs,dbar_repo=dbar, get_missing=False)
        except :
            pass
    else :
        print 'using existing repo at ', repo_path
        dbar = repo.RepoDailyBar(symbol, repo_path=repo_path)

    # read l1 updates from L1 Bar file
    l1bar = L1Bar(symbol,bar_file, None)
    darr, uarr, barr, earr = l1bar.read()

    # save history bar without l1 updates
    bars = []
    for d in darr :
        bar, col, bs = dbar.load_day(d)
        bars.append(copy.deepcopy(bar))

    # update repo with l1 and save to bar5
    bar5s=[]
    l1bar2 = L1Bar(symbol,bar_file, dbar)
    darr2, uarr2, barr2, earr2 = l1bar2.read()
    for d in darr :
        bar5, col, bs = dbar.load_day(d)
        bar5s.append(copy.deepcopy(bar5))

    # ready to go
    for bar, bar5, d, ua, ba, ea in zip(bars, bar5s, darr, uarr, barr, earr) :
        verify_lpx_lr_vol_vbs_ism(bar, bar5, ua, ba, ea, d)
Exemple #4
0
def ingest_all_l1(bar_date_dir_list=None, repo_path='./repo', sym_list=None, bar_path='./bar', future_inclusion=['front', 'back']) :
    """
    ingest all the symbols in bar_date_dir, including the future, fx, etf and future_nc
    for each *_B1S.csv* file: 
    read l1bar for symbol from bar_date_dir, i.e. NYM_CL_B1S.csv.gz
    if bar_date_dir_list is not none, it should be a list of bar_date_dir, i.e. [20180629,20180706]
       otherwise, all dates in bar_path
    if repo_path is not None, update the repo for that symbol. 
    if sym_list is not None, then only these symbols are updated,
       otherwise, all symbols found in the bar directory will be updated
    Note 1: future_nc has *_B1S_bc.csv*,  i.e. NYM_CL_B1S_bc.csv.gz
            and have different repo_path than front contract, 
            obtained by repo.nc_repo_path(repo_path), i.e. repo_nc
    """
    repo_path_nc = repo.nc_repo_path(repo_path) if repo_path is not None else None
    #gzip_everything(bar_path)
    if bar_date_dir_list is None :
        b = glob.glob(bar_path+'/*')
        bar_date_dir_list=[]
        for b0 in b :
            bar_date_dir_list.append(b0.split('/')[-1])
        print 'got ', len(bar_date_dir_list), ' directories to update'

    bar_date_dir_list.sort()
    for bar_date_dir in bar_date_dir_list :
        fs_front = bar_path+'/'+str(bar_date_dir)+'/*_B1S.csv*'
        fs_back  = bar_path+'/'+str(bar_date_dir)+'/*_B1S_bc.csv*'
        for fs, rp, contype in zip([fs_front, fs_back], [repo_path, repo_path_nc], ['front','back']) :
            if contype not in future_inclusion :
                continue
            fn = glob.glob(fs)
            print 'found ', len(fn), ' files for ', rp
            for f in fn :
                sym = f.split('/')[-1].split('_')[1]
                if sym_list is not None and sym not in sym_list :
                    print sym, ' not in ', sym_list, ' ignored. '
                    continue
                print 'getting ', sym, ' from ', f, ' repo_path ', rp
                dbar = None
                if rp is not None :
                    dbar = repo.RepoDailyBar(sym, repo_path=rp, create=True)
                l1b = L1Bar(sym, f, dbar)
                try :
                    l1b.read(noret=True)
                except :
                    import traceback
                    traceback.print_exc()
Exemple #5
0
def gen_bar(sym_array,
            sday,
            eday,
            repo_cme_path='./repo_cme',
            cme_path='./cme',
            bar_sec=1,
            nc=False):
    """
    getting from the ts [utc, px, signed_vol]
    output format bt, lr, vl, vbs, lrhl, vwap, ltt, lpx

    repo_cme_path: repo to store the 1S trd bars

    return : None
        update (remove first) dbar with bar_arr, days, col_arr
    """

    if nc:
        assert repo_cme_path[
            -2:] == 'nc', 'repo_cme_path=' + repo_cme_path + ' not ending with nc'
    for symbol in sym_array:
        try:
            dbar = repo.RepoDailyBar(symbol, repo_path=repo_cme_path)
        except:
            print 'repo_trd_path failed, trying to create'
            dbar = repo.RepoDailyBar(symbol,
                                     repo_path=repo_cme_path,
                                     create=True)

        start_hour, end_hour = l1.get_start_end_hour(symbol)
        TRADING_HOURS = end_hour - start_hour
        # sday has to be a trading day
        it = l1.TradingDayIterator(sday)
        tday = it.yyyymmdd()
        if tday != sday:
            raise ValueError('sday has to be a trading day! sday: ' + sday +
                             ' trd_day: ' + tday)

        lastpx = 0
        prev_con = ''
        while tday <= eday:
            eutc = it.local_ymd_to_utc(tday, h_ofst=end_hour)
            sutc = eutc - (TRADING_HOURS) * 3600
            if nc:
                con = l1.FC_next(symbol, tday)[0]
            else:
                con = l1.FC(symbol, tday)

            con = symbol + con[-2:]
            try:
                bar = bar_by_file(get_fn(cme_path, symbol, tday, con))
            except (KeyboardInterrupt):
                print 'interrupt!'
                return
            except:
                print 'problem getting ', symbol, tday
                bar = []

            if len(bar) == 0:
                lastpx = 0
                prev_con = ''
            else:
                # this is the good case, prepare for the bar
                # 1) get bar with start/stop, 2) contract updated 3) lastpx
                # need to allow for entire content being in one ta, i.e. some
                # days having tds==2 but all contents in one ta, due to gmt_offset

                # have everything, need to get to
                # output format bt, lr, vl, vbs, lrhl, vwap, ltt, lp
                if lastpx == 0 or prev_con != con:
                    lastpx = bar[0, 1]
                bt = np.arange(sutc + bar_sec, eutc + bar_sec, bar_sec)
                tts = np.r_[sutc, bar[:, 0]]
                pts = np.r_[bar[0, 1], bar[:, 1]]
                vts = np.r_[0, bar[:, 2]]
                pvts = np.abs(vts) * pts

                pxix = np.clip(np.searchsorted(tts[1:], bt + 1e-6), 0,
                               len(tts) - 1)
                lpx = pts[pxix]
                lr = np.log(np.r_[lastpx, lpx])
                lr = lr[1:] - lr[:-1]

                # tricky way to get index right on volumes
                btdc = np.r_[0, np.cumsum(vts)[pxix]]
                vbs = btdc[1:] - btdc[:-1]
                btdc = np.r_[0, np.cumsum(np.abs(vts))[pxix]]
                vol = btdc[1:] - btdc[:-1]

                # even tickier way to get vwap/ltt right
                ixg = np.nonzero(vol)[0]
                btdc = np.r_[0, np.cumsum(pvts)[pxix]]
                vwap = lpx.copy()  #when there is no vol
                vwap[ixg] = (btdc[1:] - btdc[:-1])[ixg] / vol[ixg]
                ltt = np.zeros(len(bt))
                ltt[ixg] = tts[pxix][ixg]
                repo.fwd_bck_fill(ltt, v=0)

                # give up, ignore the lrhl for trd bars
                lrhl = np.zeros(len(bt))

                b = np.vstack((bt, lr, vol, vbs, lrhl, vwap, ltt, lpx)).T
                d = tday
                c = repo.kdb_ib_col
                dbar.remove_day(d)
                dbar.update([b], [d], [c], bar_sec)
                lastpx = lpx[-1]
                prev_con = con

            it.next()
            tday = it.yyyymmdd()
Exemple #6
0
def ingest_all_symb(sday,
                    eday,
                    repo_path=None,
                    get_missing=True,
                    sym_list=None,
                    future_inclusion=['front', 'back'],
                    sym_list_exclude=[],
                    overwrite_dbar=True,
                    EarliestMissingDay='20180201'):
    """
    This will go to IB historical data, usually in /cygdrive/e/ib/kisco,
    read all the symbols defined by sym_list and update the repo at repo_path,
    which is usually at /cygdrive/e/research/kdb
    if sym_list is None, then it will include all the symbol collected by ibbar.
    future_inclusion defaults to include both front and back.  It is included
        for ibbar's ingestion, when only front contract is in hist, but
        the back contracts haven't been retrieved yet.
    NOTE: ETF and FX symbols are not affected by future_inclusion
    """
    import ibbar
    if repo_path is None:
        repo_path = ibbar.read_cfg('RepoPath')
    fut_sym = ibbar.sym_priority_list
    fx_sym = l1.ven_sym_map['FX']
    etf_sym = ibbar.ib_sym_etf
    fut_sym2 = ibbar.sym_priority_list_l1_next
    idx_sym = ibbar.ib_sym_idx
    if sym_list is None:
        sym_list = fut_sym + fx_sym + etf_sym + idx_sym

    for sym in sym_list:
        if sym in sym_list_exclude:
            continue
        print 'ingesting ', sym
        if sym in fut_sym and 'front' in future_inclusion:
            barsec = 1
            dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True)
            gen_daily_bar_ib(sym,
                             sday,
                             eday,
                             barsec,
                             dbar_repo=dbar,
                             is_front_future=True,
                             get_missing=get_missing,
                             overwrite_dbar=overwrite_dbar,
                             EarliestMissingDay=EarliestMissingDay)
        elif sym in fx_sym:
            barsec = 5
            dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True)
            gen_daily_bar_ib(sym,
                             sday,
                             eday,
                             barsec,
                             dbar_repo=dbar,
                             get_missing=get_missing,
                             overwrite_dbar=overwrite_dbar,
                             EarliestMissingDay=EarliestMissingDay)
        elif sym in etf_sym or sym in idx_sym:
            barsec = 1
            dbar = repo.RepoDailyBar(sym, repo_path=repo_path, create=True)
            gen_daily_bar_ib(sym,
                             sday,
                             eday,
                             barsec,
                             dbar_repo=dbar,
                             get_missing=get_missing,
                             overwrite_dbar=overwrite_dbar)
        if sym in fut_sym2 and 'back' in future_inclusion:
            barsec = 1
            repo_path_nc = repo.nc_repo_path(
                repo_path)  # repo path of next contract
            dbar = repo.RepoDailyBar(sym, repo_path=repo_path_nc, create=True)
            gen_daily_bar_ib(sym,
                             sday,
                             eday,
                             barsec,
                             dbar_repo=dbar,
                             is_front_future=False,
                             get_missing=get_missing,
                             overwrite_dbar=overwrite_dbar,
                             EarliestMissingDay=EarliestMissingDay)