Esempio n. 1
0
def get_cols(bar, start_day, day_cnt, bar_sec, end_day='', cols=[utcc,lrc,volc,vbsc,vol1c]) :
    """
    This will get the cols w.r.t. time for the period given. 
    It is similar with get_weekly() but doesn't have to be in weekly and therefore return ca,
    i.e. two dim array indexed by bar number with each row represent cols specified.
    bar: a DailyBar object loaded with repo data, i.e. bar.set_repo(b) has been called
    start_day, end_day is in yyyymmdd string
    day_cnt is integer, including the start_day. 
            if end_day is specified day_cnt is ignored
    b is a loaded object from npz object.  Otherwise 
    return: ca, dt0
            ca: two dim array indexed by bar number with each row represent cols specified. 
            Note, the first cols[0] has to be utcc
            dt0: a one dimensional array of datetime object for each bar's ending time
    Internally, it calls the bar's get_bar function.
    """
    assert cols[0] == utcc, 'first column has to be utcc'
    if len(end_day) == 0 :
        # get the end day
        ti=l1.TradingDayIterator(start_day)
        ti.next_n_trade_day(day_cnt)
        end_day=ti.yyyymmdd()
    ca_arr=[]
    ti=l1.TradingDayIterator(start_day)
    day=ti.yyyymmdd()
    while day <= end_day :
        ca_arr.append(bar.get_bar(day, 1, bar_sec, cols=cols))
        ti.next()
        day=ti.yyyymmdd()
    ca=np.vstack(ca_arr)
    dt0=[]
    for utc0 in ca[:, 0] :
        dt0.append(datetime.datetime.fromtimestamp(utc0))
    return ca, dt0
Esempio n. 2
0
 def get_bar(self, start_day, day_cnt, bar_sec, end_day='', cols=[utcc,lrc]) :
     """
     return bars for specified period, with bar period
     return index of multiday bar starting from
     start_day, running for day_cnt
     """
     if end_day!='' :
         print 'end_day not null, ignoring day_cnt: ', day_cnt
         ti=l1.TradingDayIterator(start_day)
         day_cnt=0
         day=ti.yyyymmdd()
         while day <= end_day :
             day_cnt+=1
             ti.next()
             day=ti.yyyymmdd()
     print 'got ', day_cnt, ' days from ', start_day, ' to ', end_day
     ti=l1.TradingDayIterator(start_day)
     day=ti.yyyymmdd()
     dc=0
     ixa=[]
     while dc < day_cnt :
         try :
             ixa.append(self.daily_idx(day))
         except  Exception as e :
             print str(e)
             print 'problem getting for the day ', day, ', continue'
             ixa.append((-1,0,0,day))
         ti.next()
         day=ti.yyyymmdd()
         dc+=1
     return self.bar_by_idx(ixa,bar_sec,cols)
Esempio n. 3
0
def get_weekly(dbar, symbol, start_day, end_day, bar_sec, cols=[utcc,lrc,volc,vbsc,vol1c]) :
    """
    dbar is the DailyBar object
    wbdict is a basic object holding weekly data as 3 dimensional array
    wbdict['wbar'] the 3D array of [week, bar_of_week, cols]
    where first 5 columns in cols has to be utcc, lrc, volc, vbsc, vol1c
    """
    ti=l1.TradingDayIterator(start_day)
    while ti.dt.weekday() != 0 :
        ti.next()
    day=ti.yyyymmdd()
    ti2=l1.TradingDayIterator(end_day)
    while ti2.dt.weekday() != 4 :
        ti2.prev()
    end_day=ti2.yyyymmdd()
    print 'getting from ', day , ' to ', end_day
    wbar=[]
    while day <= end_day :
        while ti.dt.weekday() != 4 :
            ti.next()
        eday=ti.yyyymmdd()
        wbar.append(dbar.get_bar(day,0,bar_sec,end_day=eday,cols=cols))
        while ti.dt.weekday() != 0 :
            ti.next()
        day=ti.yyyymmdd()
    wbar=np.array(wbar)
    wbdict={'wbar':wbar,'cols':cols}
    return wbdict
Esempio n. 4
0
File: ibbar.py Progetto: tt9024/kr
def ingest_ib(sday, eday, get_missing=True, ingest_hist=True, ingest_l1=True):
    """
    sday is the first day to write to repo, usually a monday
    eday is the last day to write to repo, usually a friday
    get_missing if true, will try to get a missing day from IB,
    so it assumes the IB connectivity.
    ingest_hist if true, will read qt/trd files from hist/ and overwrite repo
    ingest_l1 if true, will read live *B1S.csv files from bar/ and update repo

    In normal case, this is supposed to be run on a ib machine, on a weekly basis.
    Note, it is not recommended to run while the normal history download
    is running, since there is some problem with duplicate session id
    that needs to be fixed.
    
    This first gets hist/ to populate the repo, then reads IB_L1 *B1S*.csv
    to update the trades and other bid/ask columns.
    """
    if ingest_hist:
        print "ingesting history!"
        import IB_hist as ibh
        ibh.ingest_all_symb(sday, eday, get_missing=get_missing)

    if ingest_l1:
        print "ingesting l1!"
        import IB_L1_Bar as ibl1
        # figure out the bar_dir_list as fridays during sday to eday
        # since the bar_dir is the friday containing previous 5 trading days

        tdi = l1.TradingDayIterator(sday)
        bar_list = []
        while tdi.yyyymmdd() <= eday:
            if tdi.weekday() == 4:
                bar_list.append(tdi.yyyymmdd())
            tdi.next()
        ibl1.ingest_all_l1(bar_list)
Esempio n. 5
0
File: fclient.py Progetto: tt9024/kr
def getFday(cl_bar_file, fday = None) :
    if fday is None :
        fday = datetime.datetime.now().strftime('%Y%m%d')

    ti = l1.TradingDayIterator(fday)
    assert ti.weekday() == 4, 'not a friday '+ fday

    ti.prev()
    thuday = ti.yyyymmdd()
    cl=np.genfromtxt(cl_bar_file,delimiter=',',usecols=[0,1,2,3,4])
    t0=l1.TradingDayIterator.local_ymd_to_utc(thuday,4,45)
    t1=l1.TradingDayIterator.local_ymd_to_utc(thuday,6,15)
    t2=l1.TradingDayIterator.local_ymd_to_utc(thuday,16,55)
    t3=l1.TradingDayIterator.local_ymd_to_utc(thuday,18,35)
    i0 = np.nonzero(cl[:,0]==t0)[0]
    i1 = np.nonzero(cl[:,0]==t1)[0]  
    i2 = np.nonzero(cl[:,0]==t2)[0] 
    i3 = np.nonzero(cl[:,0]==t3)[0] 
    mid0=cl[i0,2]
    mid1=cl[i1,2] 
    mid2=cl[i2,2] 
    mid3=cl[i3,2]
    lr1 = np.log(mid1)-np.log(mid0) 
    lr2 = np.log(mid3)-np.log(mid2)
    print lr1, lr2
    xmu = [0.000498121848, 0.000272078733]
    xstd = [0.00485678068, 0.00245295458]
    fcst = (lr1-xmu[0])/xstd[0] * 0.00064095 + (lr2-xmu[1])/xstd[1] *0.00117629 + 0.00107083
    print fcst
Esempio n. 6
0
def remove_outlier_lr(dbar, sday, eday, outlier_mul=500):
    tdi = l1.TradingDayIterator(sday)
    d = tdi.yyyymmdd()
    while d <= eday:
        b, c, bs = dbar.load_day(d)
        if len(b) > 0 and bs == 1:
            lr = b[:, ci(c, lrc)]
            vol = b[:, ci(c, volc)]
            lrmax = max(np.std(lr) * outlier_mul, 0.0001)
            volm = np.mean(vol)
            ix = np.nonzero(np.abs(lr) > lrmax)[0]
            if len(ix) > 0:
                ix0 = np.nonzero(vol[ix] < volm)[0]
                if len(ix0) > 0:
                    print('outlier ', len(ix0), ' ticks!')
                    ix0 = ix[ix0]
                    t = b[:, ci(c, utcc)]
                    ix1 = []
                    for ix0_ in ix0:
                        dt = datetime.datetime.fromtimestamp(t[ix0_])
                        if not l1.is_pre_market_hour(dbar.symbol, dt):
                            ix1.append(ix0_)
                        else:
                            print('NOT removing 1 tick (pre_market=True: ',
                                  dbar.symbol, ', ', dt)

                    dbar._delete_rows(b, c, ix1)
                    # remove lpx and overwrite the day
                    # to be reconstructed from lr
                    if lpxc in c:
                        b = np.delete(b, ci(c, lpxc), axis=1)
                        c.remove(lpxc)
                    dbar.overwrite([b], [d], [c], 1)
        tdi.next()
        d = tdi.yyyymmdd()
Esempio n. 7
0
def get_future_trade_front(symbol_list, start_date, end_date, kdb_util='bin/get_trade', mock_run=False) :
    for symbol in symbol_list :
        bar_dir = symbol
        os.system(' mkdir -p ' + bar_dir)
        ti = l1.TradingDayIterator(start_date)
        day=ti.yyyymmdd()
        while day <= end_date :
            fc=l1.FC(symbol, day)
            # for each day, get trades for FC, FC+, FC/FC+, FC+/FC++
            #fc_next, roll_day=FC_next(symbol, day)
            #fc_next_next, roll_day=FC_next(symbol, roll_day)

            for c in [fc ] :
                fn=bar_dir+'/'+c+'_trd_'+day+'.csv'
                print 'checking ', c, fn
                # check if the file exists and the size is small
                if l1.get_file_size(fn) < 10000 and l1.get_file_size(fn+'.gz') < 10000 :
                    os.system('rm -f ' + fn + ' > /dev/null 2>&1')
                    os.system('rm -f ' + fn + '.gz' + ' > /dev/null 2>&1')
                    cmdline=kdb_util + ' ' + c + ' ' + day + ' > ' + fn
                    print 'running ', cmdline
                    if not mock_run :
                        os.system( cmdline )
                        os.system( 'gzip ' + fn )
                        os.system( 'sleep 5' )
            ti.next()
            day=ti.yyyymmdd()
Esempio n. 8
0
def weekly_get_ingest(start_end_days=None,
                      repo_path='repo_hist',
                      rsync_dir_list=None):
    """
    This is supposed to be run on IB machine at EoD Friday.
    It first gets all the history of this week, and then ingest
    into a hist_repo.  The need for ingestion, is to correct
    on any missing data.  After this run, the files in the hist dir
    is copied to data machine
    """
    import ibbar
    if start_end_days is None:
        cdt = datetime.datetime.now()
        if cdt.weekday() != 4:
            raise ValueError('sday not set while running on non-friday!')
        eday = cdt.strftime('%Y%m%d')
        tdi = l1.TradingDayIterator(eday)
        sday = tdi.prev_n_trade_day(5).yyyymmdd()
    else:
        sday, eday = start_end_days

    print 'Got start/end day: ', sday, eday
    ibbar.weekly_get_hist(sday, eday)

    #No need to do this, unless the previous get failed. But
    #then it should be tried again.
    #ingest_all_symb(sday, eday, repo_path=repo_path)
    hist_path = ibbar.read_cfg('HistPath')
    if rsync_dir_list is not None:
        for rsync_dir in rsync_dir_list:
            if len(rsync_dir) > 0:
                os.system('rsync -avz ' + hist_path + '/ ' + rsync_dir)
Esempio n. 9
0
    def _get_overnight_lr(self, day, firstpx) :
        """
        This goes to self.dbar to try to get the lastpx of
        day-1 and calculate lr. 
        
        Note 1: 
        Only do it if the day's contract is consistent. 

        Note 2: 
        The overnight lr also calculated by
        IB_hist.  This calculation is meant to be used as a
        back up in case IB_hist is not available on that day. 
        Otherwise, always use IB_hist's overnight lr.  The
        reason is because 'firstpx' here could be delayed
        but is more reliable in IB_hist. 

        Note 3:
        The IB_L1 ingestions is meant to be run after 
        IB_hist ingestion.  And is supposedly to run
        in order of days.  So dbar should be populated
        on previous day.
        """
        # check the contract
        tdi=l1.TradingDayIterator(day)
        tdi.prev()
        day_prev=tdi.yyyymmdd()
        if l1.is_future(self.symbol) :
            ct_today=l1.FC(self.symbol, day)
            ct_prev = l1.FC(self.symbol,day_prev)
            if ct_prev != ct_today :
                print 'IB_L1 Failed Overnight ', self.symbol, day, ' due to contract roll from ', ct_prev, ' to ', ct_today
                return 0

        # get the lastpx
        b,c,bs=self.dbar.load_day(day_prev)
        if len(b) > 0 :
            lastpx=b[-1,repo.ci(c,repo.lpxc)]
        else :
            print 'IB_L1 Failed Overnight ', self.symbol, day, ' no lastpx on previous day ', day_prev, self.dbar.path
            return 0

        # return lr
        return np.log(firstpx)-np.log(lastpx)
Esempio n. 10
0
def copy_from_repo(symarr,
                   repo_path_write='./repo',
                   repo_path_read_arr=['./repo_cme'],
                   bar_sec=1,
                   sday='20170601',
                   eday='20171231',
                   keep_overnight='onzero'):
    """
    simply copy days from one repo to another, with overnight lr options.
    """
    tdi = l1.TradingDayIterator(sday)
    d = tdi.yyyymmdd()
    while d <= eday:
        UpdateFromRepo(symarr, [d],
                       repo_path_write,
                       repo_path_read_arr,
                       bar_sec,
                       keep_overnight=keep_overnight)
        tdi.next()
        d = tdi.yyyymmdd()
Esempio n. 11
0
def plot_dist_weekly_by_utc(wbdict, weekday, hhmmss, param_str='', if_plot_dist=False) :
    """
    find the ix that corresponding to hhmmss of the weekday. 
    wbdict: returned by bar.get_weekly()
    weekday: integer from datetime.weekday(), mon is 0, sun is 6, etc
    hhmmss: string of 103000, in '%H%M%S' format
    """
    assert weekday < 7, 'weekday has to be less than 7'
    utc0=wbdict['wbar'][0,0,0]
    dt0=datetime.datetime.fromtimestamp(utc0)
    ti=l1.TradingDayIterator(dt0.strftime('%Y%m%d'), adj_start=False)
    dc=0
    while ti.dt.weekday() != weekday and dc<7:
        ti.next()
        dc+=1
    assert ti.dt.weekday() == weekday, 'weekday ' + str(weekday) + ' not found!'
    dtstr=ti.yyyymmdd()+hhmmss
    utc1=int(l1.TradingDayIterator.local_dt_to_utc(datetime.datetime.strptime(dtstr,'%Y%m%d%H%M%S')))
    ix=np.searchsorted(wbdict['wbar'][0,:,0].astype(int),utc1)
    utc2=wbdict['wbar'][0,ix,0]
    dt2=datetime.datetime.fromtimestamp(utc2)
    assert dt2.strftime('%H%M%S')==hhmmss and dt2.weekday()==weekday, hhmmss+' not found on weekday ' + str(weekday)
    plot_dist_weekly_by_ix(wbdict,ix,if_plot_dist=if_plot_dist,param_str=param_str)
    return ix
Esempio n. 12
0
 def daily_idx(self,day) :
     """ 
     return a daily bar index of day
     starting from 18:00:00+self.bs(5 sec) of 
     previous day, ending at 17:00:00 of current 
     day each index has two elements indexing into
     self.b[i][j]
     """
     ti=l1.TradingDayIterator(day)
     if ti.yyyymmdd() != day :
         raise ValueError(day + ' not a trading day')
     # starting being 18:00:05
     utc0=float(l1.TradingDayIterator.local_dt_to_utc(ti.dt))
     utc_st=utc0-6*3600+self.bs
     utc_ed=utc0+17*3600
     i=np.searchsorted(self.utc0[1:], utc_st+1)
     i0=np.searchsorted(self.utc0[1:], utc_ed+1)
     assert i==i0, 'repo error, daily bar accross array ' + str(day) + ': '+str( [i,i0] )
     six=np.searchsorted(self.b[i][:,utcc],utc_st)
     eix=np.searchsorted(self.b[i][:,utcc],utc_ed) #eix is included
     assert eix > six, 'nothing found from repo on ' + day
     assert self.b[i][six,utcc]==utc_st, 'repo bar of start not found ' + day + ': ' + str(utc_st)
     assert self.b[i][eix,utcc]==utc_ed, 'repo bar of end not found ' + day + ': ' + str(utc_ed)
     return i, six, eix, day
Esempio n. 13
0
def gen_bar(sym_array,
            sday,
            eday,
            repo_cme_path='./repo_cme',
            cme_path='./cme',
            bar_sec=1,
            nc=False):
    """
    getting from the ts [utc, px, signed_vol]
    output format bt, lr, vl, vbs, lrhl, vwap, ltt, lpx

    repo_cme_path: repo to store the 1S trd bars

    return : None
        update (remove first) dbar with bar_arr, days, col_arr
    """

    if nc:
        assert repo_cme_path[
            -2:] == 'nc', 'repo_cme_path=' + repo_cme_path + ' not ending with nc'
    for symbol in sym_array:
        try:
            dbar = repo.RepoDailyBar(symbol, repo_path=repo_cme_path)
        except:
            print 'repo_trd_path failed, trying to create'
            dbar = repo.RepoDailyBar(symbol,
                                     repo_path=repo_cme_path,
                                     create=True)

        start_hour, end_hour = l1.get_start_end_hour(symbol)
        TRADING_HOURS = end_hour - start_hour
        # sday has to be a trading day
        it = l1.TradingDayIterator(sday)
        tday = it.yyyymmdd()
        if tday != sday:
            raise ValueError('sday has to be a trading day! sday: ' + sday +
                             ' trd_day: ' + tday)

        lastpx = 0
        prev_con = ''
        while tday <= eday:
            eutc = it.local_ymd_to_utc(tday, h_ofst=end_hour)
            sutc = eutc - (TRADING_HOURS) * 3600
            if nc:
                con = l1.FC_next(symbol, tday)[0]
            else:
                con = l1.FC(symbol, tday)

            con = symbol + con[-2:]
            try:
                bar = bar_by_file(get_fn(cme_path, symbol, tday, con))
            except (KeyboardInterrupt):
                print 'interrupt!'
                return
            except:
                print 'problem getting ', symbol, tday
                bar = []

            if len(bar) == 0:
                lastpx = 0
                prev_con = ''
            else:
                # this is the good case, prepare for the bar
                # 1) get bar with start/stop, 2) contract updated 3) lastpx
                # need to allow for entire content being in one ta, i.e. some
                # days having tds==2 but all contents in one ta, due to gmt_offset

                # have everything, need to get to
                # output format bt, lr, vl, vbs, lrhl, vwap, ltt, lp
                if lastpx == 0 or prev_con != con:
                    lastpx = bar[0, 1]
                bt = np.arange(sutc + bar_sec, eutc + bar_sec, bar_sec)
                tts = np.r_[sutc, bar[:, 0]]
                pts = np.r_[bar[0, 1], bar[:, 1]]
                vts = np.r_[0, bar[:, 2]]
                pvts = np.abs(vts) * pts

                pxix = np.clip(np.searchsorted(tts[1:], bt + 1e-6), 0,
                               len(tts) - 1)
                lpx = pts[pxix]
                lr = np.log(np.r_[lastpx, lpx])
                lr = lr[1:] - lr[:-1]

                # tricky way to get index right on volumes
                btdc = np.r_[0, np.cumsum(vts)[pxix]]
                vbs = btdc[1:] - btdc[:-1]
                btdc = np.r_[0, np.cumsum(np.abs(vts))[pxix]]
                vol = btdc[1:] - btdc[:-1]

                # even tickier way to get vwap/ltt right
                ixg = np.nonzero(vol)[0]
                btdc = np.r_[0, np.cumsum(pvts)[pxix]]
                vwap = lpx.copy()  #when there is no vol
                vwap[ixg] = (btdc[1:] - btdc[:-1])[ixg] / vol[ixg]
                ltt = np.zeros(len(bt))
                ltt[ixg] = tts[pxix][ixg]
                repo.fwd_bck_fill(ltt, v=0)

                # give up, ignore the lrhl for trd bars
                lrhl = np.zeros(len(bt))

                b = np.vstack((bt, lr, vol, vbs, lrhl, vwap, ltt, lpx)).T
                d = tday
                c = repo.kdb_ib_col
                dbar.remove_day(d)
                dbar.update([b], [d], [c], bar_sec)
                lastpx = lpx[-1]
                prev_con = con

            it.next()
            tday = it.yyyymmdd()
Esempio n. 14
0
def write_daily_bar(bar,bar_sec=5,last_close_px=None) :
    import pandas as pd
    dt=datetime.datetime.fromtimestamp(bar[0,0])

    # get the initial day, last price
    day_start=dt.strftime('%Y%m%d')
    utc_s = int(l1.TradingDayIterator.local_ymd_to_utc(day_start, 18, 0, 0))
    if last_close_px is None :
        x=np.searchsorted(bar[1:,0], float(utc_s-3600+bar_sec))
        last_close_px=bar[x,2]
        print 'last close price set to previous close at ', datetime.datetime.fromtimestamp(bar[x,0]), ' px: ', last_close_px
    else :
        print 'last close price set to ', last_close_px

    day_end=datetime.datetime.fromtimestamp(bar[-1,0]).strftime('%Y%m%d')
    # deciding on the trading days
    if dt.hour > 17 :
        ti=l1.TradingDayIterator(day_start,adj_start=False)
        ti.next()
        trd_day_start=ti.yyyymmdd()
    else :
        trd_day_start=day_start
    trd_day_end=day_end
    print 'preparing bar from ', day_start, ' to ', day_end, ' , trading days: ', trd_day_start, trd_day_end

    ti=l1.TradingDayIterator(day_start, adj_start=False)
    day=ti.yyyymmdd()  # day is the start_day
    barr=[]
    TRADING_HOURS=23
    while day < day_end:
        ti.next()
        day1=ti.yyyymmdd()
        utc_e = int(l1.TradingDayIterator.local_ymd_to_utc(day1, 17,0,0))

        # get start backwards for starting on a Sunday
        utc_s = utc_e - TRADING_HOURS*3600
        day=datetime.datetime.fromtimestamp(utc_s).strftime('%Y%m%d')

        i=np.searchsorted(bar[:, 0], float(utc_s)-1e-6)
        j=np.searchsorted(bar[:, 0], float(utc_e)-1e-6)
        bar0=bar[i:j,:]  # take the bars in between the first occurance of 18:00:00 (or after) and the last occurance of 17:00:00 or before

        N = (utc_e-utc_s)/bar_sec  # but we still fill in each bar
        ix_utc=((bar0[:,0]-float(utc_s))/bar_sec+1e-9).astype(int)
        bar_utc=np.arange(utc_s+bar_sec, utc_e+bar_sec, bar_sec) # bar time will be time of close price, as if in prod

        print 'getting bar ', day+'-18:00', day1+'-17:00', ' , got ', j-i, 'bars'
        # start to construct bar
        if j<=i :
            print ' NO bars found, skipping'
        else :
            bar_arr=[]
            bar_arr.append(bar_utc.astype(float))

            # construct the log returns for each bar, fill in zeros for gap
            #lpx_open=np.log(bar0[:,2])
            lpx_open=np.log(np.r_[last_close_px,bar0[:-1,5]])
            lpx_hi=np.log(bar0[:,3])
            lpx_lo=np.log(bar0[:,4])
            lpx_close=np.log(bar0[:,5])
            lpx_vwap=np.log(bar0[:,6])
            lr=lpx_close-lpx_open
            lr_hi=lpx_hi-lpx_open
            lr_lo=lpx_lo-lpx_open
            lr_vw=lpx_vwap-lpx_open

            # remove bars having abnormal return, i.e. circuit break for ES
            # with 9999 prices
            MaxLR=0.2
            ix1=np.nonzero(np.abs(lr)>=MaxLR)[0]
            ix1=np.union1d(ix1,np.nonzero(np.abs(lr_hi)>=MaxLR)[0])
            ix1=np.union1d(ix1,np.nonzero(np.abs(lr_lo)>=MaxLR)[0])
            ix1=np.union1d(ix1,np.nonzero(np.abs(lr_vw)>=MaxLR)[0])
            if len(ix1) > 0 :
                print 'warning: removing ', len(ix1), 'ticks exceed MaxLR (lr/lo/hi/vw) ', zip(lr[ix1],lr_hi[ix1],lr_lo[ix1],lr_vw[ix1])
                lr[ix1]=0
                lr_hi[ix1]=0
                lr_lo[ix1]=0
                lr_vw[ix1]=0

            # the trade volumes for each bar, fill in zeros for gap
            vlm=bar0[:,7]
            vb=bar0[:,8]
            vs=np.abs(bar0[:,9])
            vbs=vb-vs

            for v0, vn in zip([lr,lr_hi,lr_lo,lr_vw,vlm,vbs], ['lr','lr_hi','lr_lo','lr_vw','vlm','vbs']) :
                nix=np.nonzero(np.isnan(v0))[0]
                nix=np.union1d(nix, np.nonzero(np.isinf(np.abs(v0)))[0])
                if len(nix) > 0 :
                    print 'warning: removing ', len(nix), ' nan/inf ticks for ', vn
                    v0[nix]=0
                b0=np.zeros(N).astype(float)
                b0[ix_utc]=v0
                bar_arr.append(b0.copy())
         
            # get the last trade time, this is needs to be
            ltt=np.empty(N)*np.nan
            ltt[ix_utc]=bar0[:,1]
            df=pd.DataFrame(ltt)
            df.fillna(method='ffill',inplace=True)
            if not np.isfinite(ltt[0]) :
                ptt=0 #no previous trading detectable
                if i > 0 : #make some effort here
                    ptt=bar[i-1,1]
                    if not np.isfinite(ptt) :
                        ptt=0
                df.fillna(ptt,inplace=True)
            bar_arr.append(ltt)

            # get the last price, as a debugging tool
            lpx=np.empty(N)*np.nan
            lpx[ix_utc]=bar0[:,5]
            df=pd.DataFrame(lpx)
            df.fillna(method='ffill',inplace=True)
            if not np.isfinite(lpx[0]) :
                df.fillna(last_close_px,inplace=True)
            bar_arr.append(lpx)
            barr.append(np.array(bar_arr).T.copy())
            last_close_px=lpx[-1]

        day=day1

    return np.vstack(barr), trd_day_start, trd_day_end
Esempio n. 15
0
    def daily_bar(self,
                  start_day,
                  day_cnt,
                  bar_sec,
                  end_day=None,
                  cols=[utcc, lrc, volc, vbsc, lpxc],
                  group_days=5,
                  verbose=False):
        """
        return 3-d array of bars for specified period, with bar period, column, 
        grouped by days (i.e.e daily, weekly, etc)
        start_day: the first trading day to be returned
        day_cnt :  number of trading days to be returned, can be None to use end_day
        bar_sec :  bar period to be returned
        end_day :  last trading day to be returned, can be None to use day_cnt
        cols    :  columns to be returned
        group_days: first dimension of the 3-d array, daily: 1, weekly=5, etc
        """
        if end_day is not None:
            #print ('end_day not null, got ',)
            ti = l1.TradingDayIterator(start_day)
            day_cnt = 0
            day = ti.yyyymmdd()
            while day <= end_day:
                day_cnt += 1
                ti.next()
                day = ti.yyyymmdd()
        else:
            ti = l1.TradingDayIterator(start_day)
            ti.next_n_trade_day(day_cnt - 1)
            end_day = ti.yyyymmdd()

        # getting the day count, removing initial and final missing days
        ti = l1.TradingDayIterator(start_day)
        day = ti.yyyymmdd()
        darr = []
        inarr = []
        while day <= end_day:
            darr.append(day)
            if self.has_day(day):
                inarr.append(True)
            else:
                inarr.append(False)
            ti.next()
            day = ti.yyyymmdd()

        ix = np.nonzero(inarr)[0]
        if len(ix) == 0:
            raise ValueError('no bars found in repo! %s to %s!' %
                             (start_day, end_day))
        start_day = darr[ix[0]]
        end_day = darr[ix[-1]]
        day_cnt = ix[-1] - ix[0] + 1
        if day_cnt / group_days * group_days != day_cnt:
            print(
                '( Warning! group_days ' + str(group_days) +
                ' not multiple of ' + str(day_cnt) + ' adjustint...)', )
            day_cnt = day_cnt / group_days * group_days
            start_day = darr[ix[-1] - day_cnt + 1]
        print("got", day_cnt, 'days from', start_day, 'to', end_day)

        ti = l1.TradingDayIterator(start_day)
        day = ti.yyyymmdd()
        bar = []
        day_arr = []
        while day <= end_day:
            #print ("reading ", day, )
            b, c, bs = self.load_day(day)
            if len(b) == 0:
                if verbose:
                    print(" missing, filling zeros on ", day)
                bar.append(self._fill_daily_bar_col(day, bar_sec, cols))
                day_arr.append(day)
            else:
                bar.append(self._scale(day, b, c, bs, cols, bar_sec))
                day_arr.append(day)
                #print (" scale bar_sec from ", bs, " to ", bar_sec)
            ti.next()
            day = ti.yyyymmdd()

        bar = np.vstack(bar)

        # process missing days if any
        for c in [lpxc, lttc] + col_idx(['ism1']):
            if c in cols:
                self._fill_last(bar[:, ci(cols, c)])

        d1 = day_cnt / group_days
        bar = bar.reshape((d1, bar.shape[0] / d1, bar.shape[1]))
        return bar
Esempio n. 16
0
File: ibbar.py Progetto: ssh352/kr
def get_ib_future(symbol_list,
                  start_date,
                  end_date,
                  barsec,
                  ibclient=IB_CLIENT,
                  clp='IB',
                  mock_run=False,
                  getqt=True,
                  gettrd=False,
                  cid=100,
                  start_end_hour=[],
                  next_contract=False,
                  reuse_exist_file=False,
                  verbose=False,
                  num_threads=None,
                  wait_thread=True):
    bar_path = read_cfg('HistPath')
    if num_threads is not None:
        import _strptime
        n = len(symbol_list)
        k = np.linspace(0, n, num=num_threads + 1).astype(int)
        pool = mp.Pool(processes=num_threads)
        res = []
        for i0, i1 in zip(k[:-1], k[1:]):
            if i1 == i0:
                continue
            res.append(
                pool.apply_async(get_ib_future,
                                 args=(symbol_list[i0:i1], start_date,
                                       end_date, barsec, ibclient, clp,
                                       mock_run, getqt, gettrd, cid,
                                       start_end_hour, next_contract,
                                       reuse_exist_file, verbose, None, True)))
            cid += 1

        fnarr = []
        if wait_thread:
            for r in res:
                fnarr += r.get()
        return fnarr

    step_sec = barsec_dur[barsec]
    fnarr = []
    for symbol in symbol_list:
        venue = ibvenue(symbol)
        if venue == 'FX':
            bar_dir = bar_path + '/FX'
        elif venue == 'ETF':
            bar_dir = bar_path + '/ETF'
        elif venue == 'IDX':
            bar_dir = bar_path + '/IDX'
        else:
            bar_dir = bar_path + '/' + symbol
        if next_contract:
            bar_dir += '/nc'
        os.system(' mkdir -p ' + bar_dir)

        if len(start_end_hour) != 2:
            start_hour, end_hour = l1.get_start_end_hour(symbol)
        else:
            start_hour, end_hour = start_end_hour

        ti = l1.TradingDayIterator(start_date)
        day = ti.yyyymmdd()
        eday = day
        while day <= end_date:
            sday = eday
            fc = l1fc(symbol, day)
            fcn = l1fc(symbol, day, next_contract=True)
            while day <= end_date:
                ti.next()
                day = ti.yyyymmdd()
                fc0 = l1fc(symbol, day)
                if fc != fc0:
                    break
                eday = day
            # make sure eday is not more than end_date
            # if end_date was given as a weekend dates
            if (eday > end_date):
                print 'ending to ', end_date, ' adjust to ',
                ti0 = l1.TradingDayIterator(eday)
                eday = ti0.prev().yyyymmdd()
                print eday

            if next_contract:
                fc = fcn
            fn = bar_dir + '/' + ibfn(fc, barsec, sday, eday)
            fnarr.append(fn)

            fext = []
            cext = []
            for gt, ext, ext_str, etp in zip([getqt, gettrd],
                                             ['_qt.csv', '_trd.csv'],
                                             ['quote', 'trade'], ['0', '1']):
                if not gt:
                    continue
                fn0 = fn + ext
                # reuse_exist_file
                try:
                    found = 0
                    assert reuse_exist_file
                    for ext0 in ['', '.gz']:
                        try:
                            if os.stat(fn0 + ext0).st_size > 1024:
                                found += 1
                                print 'found existing file: ', fn0 + ext0, ' count = ', found
                        except:
                            continue
                    assert found == 1
                    print 'reusing ', fn0, ' for ', ext_str
                except:
                    print 'getting ', ext_str, ' FILE: ', fn0, ' (found = %d)' % (
                        found)
                    fext.append(ext)
                    cext.append(etp)

            if len(fext) == 0:
                print 'Nothing to get from %s to %s!' % (sday, eday)
                continue

            if len(fext) == 1 and fext[
                    0] == '_trd.csv' and next_contract and getqt:
                print '!! Next Contract using existing quote only'
                continue

            if ibclient is None:
                # here if ibclient is None then
                # don't run it (save time)
                # the caller should except file
                # not found and handle it with zero bar
                print 'Not running ibclient (None)!'
                fnarr.remove(fn)
                continue

            # clean up the existing files
            for ext in fext:
                fn0 = fn + ext
                if not mock_run:
                    os.system('rm -f ' + fn0 + ' > /dev/null 2>&1')
                    os.system('rm -f ' + fn0 + '.gz' + ' > /dev/null 2>&1')

            if symbol in ib_sym_special:
                fc = symbol + fc[-2:]
            sym = venue + '/' + fc
            # get all days with the same contract, saving to the same file
            tic = l1.TradingDayIterator(sday)
            d0 = tic.yyyymmdd()
            try:
                while d0 <= eday and d0 <= end_date:
                    # get for day d0
                    utc1 = tic.to_local_utc(end_hour, 0, 0)
                    utc0 = utc1 - (end_hour - start_hour) * 3600
                    while utc0 < utc1:
                        # get for quote and trade for end_time as utc
                        utc0 += step_sec
                        eday_str = datetime.datetime.fromtimestamp(
                            utc0).strftime('%Y%m%d %H:%M:%S')
                        #for ist, ext in zip (['0', '1'], ['_qt.csv','_trd.csv']):
                        for ist, ext in zip(cext, fext):
                            fn0 = fn + ext
                            cmdline = ibclient + ' ' + str(
                                cid
                            ) + ' ' + sym + ' ' + '\"' + eday_str + '\"' + ' ' + str(
                                barsec) + ' ' + fn0 + ' ' + ist + ' ' + clp
                            print 'running ', cmdline
                            if not mock_run:
                                os.system(cmdline)
                                time.sleep(2)
                                #os.system( 'sleep 2'
                    tic.next()
                    d0 = tic.yyyymmdd()
            except (KeyboardInterrupt, SystemExit):
                print 'stop ...'
                return []
            except:
                traceback.print_exc()

    for fn in fnarr:
        for ext in fext:
            fn0 = fn + ext
            if not mock_run:
                print 'gzip ', fn0
                os.system('gzip ' + fn0)
    """
    if upd_repo :
        repo_path = read_cfg('RepoPath')
        future_inclusion = ['back' if next_contract else 'front']
        from IB_hist import ingest_all_symbol
        ingest_all_symbol(start_date, end_date, repo_path=repo_path, get_missing=True, sym_list=sym_list, future_inclusion=future_inclusion)
    """

    return fnarr
Esempio n. 17
0
def write_daily_bar(symbol,
                    bar,
                    bar_sec=5,
                    is_front=True,
                    last_close_px=None,
                    get_missing=True):
    """
    bar: all bars from a hist file having the format of 
    [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs]
    These bars have the same contract. 
    The bar is in increasing utc, but may have gaps, or other invalid values
    The first day of that contract bar, due to prev_close_px unknown, it is
    usually covered by having the previous contract day. 
    Note there is a limitation that the start end time has to be on a whole hour
    i.e. cannot stop on 4:30, just make it 5, which will write some zero bars.
    However, it can handle 24 hour trading, i.e. start/end at 18:00, for fx venues.
    Note 2, the first bar of a day should be 1 bar_sec after the starting utc and
    the last bar of a day should be at the ending utc.

    if get_missing is set to true, then try to get the bar on a bad day

    Output: 
    array of daily_bar for each day covered in the bar (hist file)
    Each daily_bar have the following format: 
    [obs_utc, lr, trd_vol, vbs, lrhl, lrvwap, ltt, lpx]
    where: 
        obs_utc is the checking time stamp
        lr is the log return between this checking price and last checking price
           i.e. the lr of the previous bar that ended at this checking time (obs_utc)

      (May extend in the future)
    Note that the Trading Hours set to 24 for ICE hours
    In addition, it does the following:
    1. loop the close px to the first open px, 
    2. convert the price to lr, removing bars with maxlr more than 0.2 (CME circuit breaker)
    3. replace all inf/nan values with zero
    4. cacluate the ltt and lpx
    """
    import pandas as pd
    dt = datetime.datetime.fromtimestamp(bar[
        0, 0])  # fromtimestamp is safe for getting local representation of utc

    start_hour, end_hour = l1.get_start_end_hour(symbol)
    TRADING_HOURS = end_hour - start_hour
    start_hour = start_hour % 24

    # get the initial day, last price
    day_start = dt.strftime('%Y%m%d')
    utc_s = int(
        l1.TradingDayIterator.local_ymd_to_utc(day_start, start_hour, 0, 0))
    if last_close_px is None:
        x = np.searchsorted(bar[1:, 0], float(utc_s) - 1e-6)

        # only take the last price within 5 minutes of utc_s
        if x + 1 >= bar.shape[0] or bar[x + 1, 0] - utc_s > 300:
            if x + 1 >= bar.shape[0]:
                print 'no bars found after the start utc of ', day_start
            else:
                print 'start up utc (%d) more than 5 minutes later than start utc (%d) on %s' % (
                    bar[x + 1, 0], utc_s, day_start)
                print 'initializing start up last_close_px deferred'
        else:
            if x == 0:
                #last_close_px = bar[0, 2]
                #print 'last close price set as the first bar open px, this should use previous contract', datetime.datetime.fromtimestamp(bar[0,0]), datetime.datetime.fromtimestamp(bar[1,0])
                last_close_px = bar[0, 5]
                print 'lost last close price, set as the first bar close px'
            else:
                last_close_px = bar[x, 5]
                print 'last close price set to close px of bar ', datetime.datetime.fromtimestamp(
                    bar[x, 0]), ' px: ', last_close_px

        print 'GOT last close px ', last_close_px
    else:
        print 'GIVEN last close price ', last_close_px

    day_end = datetime.datetime.fromtimestamp(bar[-1, 0]).strftime('%Y%m%d')
    # deciding on the trading days
    if dt.hour > end_hour or (start_hour == end_hour and dt.hour >= end_hour):
        # CME 17, ICE 18,
        # the second rule is for 24 hour trading, note start/end has to be on a whole hour
        ti = l1.TradingDayIterator(day_start, adj_start=False)
        ti.next()
        trd_day_start = ti.yyyymmdd()
    else:
        trd_day_start = day_start
    trd_day_end = day_end
    print 'preparing bar from ', day_start, ' to ', day_end, ' , trading days: ', trd_day_start, trd_day_end

    ti = l1.TradingDayIterator(trd_day_start,
                               adj_start=False)  # day maybe a sunday
    day1 = ti.yyyymmdd()  # first trading day
    barr = []
    trade_days = []
    col_arr = []
    bad_trade_days = []
    while day1 <= trd_day_end:
        utc_e = int(
            l1.TradingDayIterator.local_ymd_to_utc(day1, end_hour, 0, 0))
        # get start backwards for starting on a Sunday
        utc_s = utc_e - TRADING_HOURS * 3600  # LIMITATION:  start/stop has to be on a whole hour
        day = datetime.datetime.fromtimestamp(utc_s).strftime('%Y%m%d')

        i = np.searchsorted(bar[:, 0], float(utc_s) - 1e-6)
        j = np.searchsorted(bar[:, 0], float(utc_e) - 1e-6)
        bar0 = bar[
            i:
            j, :]  # take the bars in between the first occurance of start_hour (or after) and the last occurance of end_hour or before

        print 'getting bar ', day + '-' + str(
            start_hour) + ':00', day1 + '-' + str(
                end_hour) + ':00', ' , got ', j - i, 'bars'
        N = (
            utc_e - utc_s
        ) / bar_sec  # but we still fill in each bar, so N should be fixed for a given symbol/venue pair

        # here N*0.90, is to account for some closing hours during half hour ib retrieval time
        # The problem with using histclient.exe to retrieve IB history data for ES is
        # set end time is 4:30pm, will retreve 3:45 to 4:15.  Because 4:15-4:30pm doesn't
        # have data.  This is only true for ES so far
        # another consideration is that IB Hist client usually won't be off too much, so 90% is
        # a good threshold for missing/bad day
        bar_good = True
        if j - i < N * 0.90:
            if symbol in ['LE', 'HE'] or l1.venue_by_symbol(symbol) == 'IDX':
                bar_good = (j - i) > N * 0.75
            elif not is_front:
                bar_good = (j - i) > N * 0.5
            else:
                bar_good = False

        if not bar_good:
            print 'fewer bars for trading day %s: %d < %d * 0.9' % (day1,
                                                                    j - i, N)
            if day1 not in l1.bad_days and get_missing:
                # recurse with the current last price and get the updated last price
                print 'getting missing day %s' % (day1)
                from ibbar import get_missing_day
                fn = get_missing_day(symbol, [day1],
                                     bar_sec=bar_sec,
                                     is_front=is_front,
                                     reuse_exist_file=True)
                try:
                    _, _, b0 = bar_by_file_ib(fn[0],
                                              symbol,
                                              start_day=day1,
                                              end_day=day1)
                except Exception as e:
                    print e
                    b0 = []

                if len(b0) > j - i:
                    print 'Getting more bars %d > %d on %s for %s, take it!' % (
                        len(b0), j - i, day1, symbol)
                    barr0, trade_days0, col_arr0, bad_trade_days0, last_close_px0 = write_daily_bar(
                        symbol,
                        b0,
                        bar_sec=bar_sec,
                        is_front=is_front,
                        last_close_px=last_close_px,
                        get_missing=False)
                    # taken as done
                    barr += barr0
                    trade_days += trade_days0
                    col_arr += col_arr0
                    bad_trade_days += bad_trade_days0
                    last_close_px = last_close_px0
                    ti.next()
                    day1 = ti.yyyymmdd()
                    continue
                print 'Got %d bars on %s, had %d bars (%s), use previous!' % (
                    len(b0), day1, j - i, symbol)

        if len(bar0) < 1:
            print 'Bad Day! Too fewer bars in trading day %s: %d, should have %d ' % (
                day1, j - i, N)
            bad_trade_days.append(day1)
        else:
            ix_utc = ((bar0[:, 0] - float(utc_s)) / bar_sec + 1e-9).astype(
                int)  # lr(close_px-open_px) of a bar0 has bar_utc
            bar_utc = np.arange(
                utc_s + bar_sec, utc_e + bar_sec,
                bar_sec)  # bar time will be time of close price, as if in prod

            if N != j - i:
                print 'fill missing for only ', j - i, ' bars (should be ', N, ')'
                bar1 = np.empty((N, bar0.shape[1]))
                bar1[:, 0] = np.arange(utc_s, utc_e, bar_sec)
                # filling all missing for [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs]
                # fillforward for utc_ltt, close_px, vwap
                for col in [1, 5, 6]:
                    bar1[:, col] = np.nan
                    bar1[ix_utc, col] = bar0[:, col]
                    df = pd.DataFrame(bar1[:, col])
                    df.fillna(method='ffill', inplace=True)
                    df.fillna(method='bfill', inplace=True)
                # fill zero for vol, vb, bs
                for col in [7, 8, 9]:
                    bar1[:, col] = 0
                    bar1[ix_utc, col] = bar0[:, col]
                # copy value of close_px for open_px, hi_px, lo_px
                for col in [2, 3, 4]:
                    bar1[:, col] = bar1[:, 5]
                    bar1[ix_utc, col] = bar0[:, col]

            bar_arr = []
            bar_arr.append(bar_utc.astype(float))

            # construct the log returns for each bar, fill in zeros for gap
            #lpx_open=np.log(bar0[:,2])
            if last_close_px is None:
                print 'setting last_close_px to ', bar0[0, 2]
                last_close_px = bar0[0, 2]

            lpx_open = np.log(np.r_[last_close_px, bar0[:-1, 5]])
            lpx_hi = np.log(bar0[:, 3])
            lpx_lo = np.log(bar0[:, 4])
            lpx_close = np.log(bar0[:, 5])
            lpx_vwap = np.log(bar0[:, 6])
            lr = lpx_close - lpx_open
            lr_hi = lpx_hi - lpx_open
            lr_lo = lpx_lo - lpx_open
            lr_vw = lpx_vwap - lpx_open

            # remove bars having abnormal return, i.e. circuit break for ES
            # with 9999 prices
            MaxLR = 0.5
            if l1.is_holiday(day) or l1.is_fx_future(
                    symbol) or l1.venue_by_symbol(symbol) == 'FX':
                MaxLR = 5
            ix1 = np.nonzero(np.abs(lr) >= MaxLR)[0]
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_hi) >= MaxLR)[0])
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_lo) >= MaxLR)[0])
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_vw) >= MaxLR)[0])
            if len(ix1) > 0:
                print 'MaxLR (', MaxLR, ') exceeded: ', len(ix1), ' ticks!'
                # removing one-by-one
                for ix1_ in ix1:
                    dt = datetime.datetime.fromtimestamp(bar_utc[ix1_])
                    if not l1.is_pre_market_hour(symbol, dt):
                        print 'warning: removing 1 tick lr/lo/hi/vw: ', lr[
                            ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_]
                        lr[ix1_] = 0
                        lr_hi[ix1_] = 0
                        lr_lo[ix1_] = 0
                        lr_vw[ix1_] = 0
                    else:
                        print 'NOT removing 1 tick (pre_market=True: ', symbol, ', ', dt, ') lr/lo/hi/vw: ', lr[
                            ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_]

            # the trade volumes for each bar, fill in zeros for gap
            vlm = bar0[:, 7]
            vb = bar0[:, 8]
            vs = np.abs(bar0[:, 9])
            vbs = vb - vs

            for v0, vn in zip([lr, lr_hi, lr_lo, lr_vw, vlm, vbs],
                              ['lr', 'lr_hi', 'lr_lo', 'lr_vw', 'vlm', 'vbs']):
                nix = np.nonzero(np.isnan(v0))[0]
                nix = np.union1d(nix, np.nonzero(np.isinf(np.abs(v0)))[0])
                if len(nix) > 0:
                    print 'warning: removing ', len(
                        nix), ' nan/inf ticks for ', vn
                    v0[nix] = 0
                b0 = np.zeros(N).astype(float)
                b0[ix_utc] = v0
                bar_arr.append(b0.copy())

            # get the last trade time, this is needs to be
            ltt = np.empty(N) * np.nan
            ltt[ix_utc] = bar0[:, 1]
            df = pd.DataFrame(ltt)
            df.fillna(method='ffill', inplace=True)
            if not np.isfinite(ltt[0]):
                ptt = 0  #no previous trading detectable
                if i > 0:  #make some effort here
                    ptt = bar[i - 1, 1]
                    if not np.isfinite(ptt):
                        ptt = 0
                df.fillna(ptt, inplace=True)
            bar_arr.append(ltt)

            # get the last price, as a debugging tool
            # close price
            lpx = np.empty(N) * np.nan
            lpx[ix_utc] = bar0[:, 5]
            df = pd.DataFrame(lpx)
            df.fillna(method='ffill', inplace=True)
            if not np.isfinite(lpx[0]):
                df.fillna(last_close_px, inplace=True)
            bar_arr.append(lpx)

            ba = np.array(bar_arr).T
            bt0 = ba[:, 0]
            lr0 = ba[:, 1]
            vl0 = ba[:, 5]
            vbs0 = ba[:, 6]
            # add a volatility measure here
            lrhl0 = ba[:, 2] - ba[:, 3]
            vwap0 = ba[:, 4]
            ltt0 = ba[:, 7]
            lpx0 = ba[:, 8]
            barr.append(
                np.vstack((bt0, lr0, vl0, vbs0, lrhl0, vwap0, ltt0, lpx0)).T)
            last_close_px = lpx[-1]
            trade_days.append(day1)
            col_arr.append(repo.kdb_ib_col)

        ti.next()
        day1 = ti.yyyymmdd()

    # filling in missing days if not included in the bad_trade_days
    bad_trade_days = []
    good_trade_days = []
    it = l1.TradingDayIterator(trd_day_start)
    while True:
        day = it.yyyymmdd()
        if day > trd_day_end:
            break
        if day not in trade_days:
            bad_trade_days.append(day)
        else:
            good_trade_days.append(day)
        it.next()

    print 'got bad trade days ', bad_trade_days
    return barr, good_trade_days, col_arr, bad_trade_days, last_close_px
Esempio n. 18
0
def gen_daily_bar_ib(symbol,
                     sday,
                     eday,
                     default_barsec,
                     dbar_repo,
                     is_front_future=True,
                     get_missing=True,
                     barsec_from_file=True,
                     overwrite_dbar=False,
                     EarliestMissingDay='19980101'):
    """
    generate IB dily bars from sday to eday.
    It is intended to be used to add too the daily bar repo manually
    NOTE 1: bar_sec from file name is used to read/write the day. 
            default_barsec given is taken as a default when getting missing days. 
            When barsec_from_file is not True, the bar_sec from file name
            is checked against the default bar_sec given and raises on mismatch.
    NOTE 2: barsec_from_file being False enforces all day's barsec has to agree with default_barsec
    NOTE 3: The flexibility on barsec from file name is to entertain IB's rule for
            half year history on 1S, 1 year history on 30S bar, etc, enforced 
            differently on asset classes. Inconsistencies on weekly operations
    NOTE 4: if overwrite_dbar is True, then the existing repo content on the day will be deleted before
            ingestion
    """

    fn, is_fx, is_etf, is_idx = fn_from_dates(symbol, sday, eday,
                                              is_front_future)
    spread = get_future_spread(symbol)
    print 'Got ', len(fn), ' files: ', fn, ' spread: ', spread

    num_col = 8  # adding spd vol, last_trd_time, last_close_pxa
    tda = []
    tda_bad = []
    for f in fn:
        bar_sec = get_barsec_from_file(f)
        if bar_sec != default_barsec:
            if not barsec_from_file:
                raise ValueError(
                    'Bar second mismatch for file %s with barsec %d' %
                    (f, default_barsec))
            else:
                print 'Set barsec to ', bar_sec, ' from ', default_barsec

        try:
            d0, d1 = get_days_from_file(f)
            _, _, b = bar_by_file_ib(f,
                                     symbol,
                                     start_day=max(sday, d0),
                                     end_day=min(eday, d1))
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            print e
            b = []
        if len(b) > 0:
            ba, td, col, bad_days, last_px = write_daily_bar(
                symbol,
                b,
                bar_sec=bar_sec,
                is_front=is_front_future,
                get_missing=get_missing)
            if overwrite_dbar:
                for td0 in td:
                    # assuming days in increasing order: don't delete days
                    # just written

                    # don't delete if the barsec does not match
                    # Because I don't want the IB_hist (barsec=1) to
                    # overwrite the KDB days, which has barsec=5
                    if td0 not in tda:
                        dbar_repo.remove_day(td0, match_barsec=bar_sec)
            dbar_repo.update(ba, td, col, bar_sec)
            tda += td
            tda_bad += bad_days
        else:
            print '!!! No bars was read from ', f

    tda = list(set(tda))
    tda.sort()
    tda_bad = list(set(tda_bad))
    tda_bad.sort()

    # The following gets the days that are either in tda nor in
    # tda_bad, i.e. some missing days not found in any history files
    # todo - this shouldn't happen and most probably due to the
    # half day/holidays, should remove
    if len(tda) == 0:
        print 'NOTHING found! Not getting any missing days!'
    # in case there are some entirely missed days
    elif get_missing:
        # there could be some duplication in files, so
        # so some files has bad days but otherwise already in other files.
        missday = []
        d0 = max(sday, EarliestMissingDay)
        print ' checking on the missing days from %s to %s' % (d0, eday)

        diter = l1.TradingDayIterator(d0)
        while d0 <= eday:
            if d0 not in tda and d0 not in tda_bad and d0 not in l1.bad_days:
                missday.append(d0)
            diter.next()
            d0 = diter.yyyymmdd()

        if len(missday) > 0:
            print 'getting the missing days ', missday
            from ibbar import get_missing_day
            fn = []
            mdays = []
            for md in missday:
                fn0 = get_missing_day(symbol, [md],
                                      bar_sec,
                                      is_front_future,
                                      reuse_exist_file=True)
                if len(fn0) > 0:
                    fn += fn0
                    mdays.append(md)
                else:
                    print 'nothing on missing day: ', md

            for f, d in zip(fn, mdays):
                try:
                    _, _, b = bar_by_file_ib(f, symbol, start_day=d, end_day=d)
                    if len(b) > 0:
                        print 'got ', len(
                            b), ' bars from ', f, ' on missing day', d
                        ba, td, col, bad_days, lastpx0 = write_daily_bar(
                            symbol,
                            b,
                            bar_sec=bar_sec,
                            is_front=is_front_future,
                            get_missing=False)
                        tda += td
                        tda_bad += bad_days
                        if len(td) > 0:
                            if overwrite_dbar:
                                for td0 in td:
                                    dbar_repo.remove_day(td0,
                                                         match_barsec=bar_sec)
                            dbar_repo.update(ba, td, col, bar_sec)
                        else:
                            print 'no trading day is found from ', f, ' on missing day ', d
                    else:
                        print 'nothing got for missing day: ', d
                except KeyboardInterrupt as e:
                    raise e
                except:
                    traceback.print_exc()
                    print 'problem processing file ', f

    tda.sort()
    tda_bad.sort()
    print 'Done! Bad Days: ', tda_bad
    return tda, tda_bad
Esempio n. 19
0
File: launch.py Progetto: ssh352/kr
def launch_sustain():
    alive = False
    dtnow = datetime.datetime.now()
    while not should_run() and dtnow.weekday() != 6:
        print 'wait for Sunday open...'
        #reset_network()
        bounce_ibg()
        time.sleep(RESET_WAIT_SECOND)
        dtnow = datetime.datetime.now()
    while dtnow.weekday() == 6 and not should_run():
        utcnow = l1.TradingDayIterator.local_dt_to_utc(dtnow)
        utcstart = get_utcstart()
        while utcnow < utcstart - RESET_WAIT_SECOND - 10:
            print 'wait for Sunday open...', utcnow, utcstart, utcstart - utcnow
            #reset_network()
            bounce_ibg()
            time.sleep(RESET_WAIT_SECOND)
            utcnow = l1.TradingDayIterator.cur_utc()

        print 'getting on-line, updating roll ', datetime.datetime.now()
        ibbar.update_ib_config(cfg_file=cfg)
        utcnow = l1.TradingDayIterator.cur_utc()
        if utcstart > utcnow and not is_in_daily_trading():
            time.sleep(utcstart - utcnow)

        utcnow = l1.TradingDayIterator.cur_utc()
        print 'spining for start', utcnow
        while not is_in_daily_trading():
            utcnow = l1.TradingDayIterator.cur_utc()
            #time.sleep( float((1000000-utcnow.microsecond)/1000)/1000.0 )
        print 'starting on', utcnow
        alive = True

    tpm = TPMon()
    while should_run():
        if is_in_daily_trading():
            if not alive:
                print 'getting on-line, updating roll ', datetime.datetime.now(
                )
                ibbar.update_ib_config(cfg_file=cfg)
                alive = True
            # poll and sustain
            for p in procs:
                if (p not in proc_map.keys()) or (not is_proc_alive(
                        proc_map[p])):
                    launch(p)
            time.sleep(1)
            if not tpm.check():
                # All L2 repo hasn't been updated for 1 min
                # exit the process and retry in outer (while [ 1 ]) loop
                print 'stale detected, exit!'
                _should_run = False
                kill_all()
                alive = False
                sys.exit(1)
            continue
        else:
            if alive:
                print 'getting off-line, killing all ', datetime.datetime.now()
                kill_all()
                alive = False
            # do one hour of reset
            dtnow = datetime.datetime.now()
            utcstart = get_utcstart()
            cur_utc = l1.TradingDayIterator.cur_utc()
            while cur_utc <= utcstart - RESET_WAIT_SECOND - 10:
                print 'reset network', cur_utc, utcstart
                #reset_network()
                bounce_ibg()
                time.sleep(RESET_WAIT_SECOND)
                cur_utc = l1.TradingDayIterator.cur_utc()
            print 'getting on-line, updating roll ', datetime.datetime.now()
            ibbar.update_ib_config(cfg_file=cfg)
            cur_utc = l1.TradingDayIterator.cur_utc()
            if utcstart > cur_utc:
                time.sleep(utcstart - cur_utc)
            cur_utc = l1.TradingDayIterator.cur_utc()
            print 'spinning for start', cur_utc
            while cur_utc <= utcstart:
                cur_utc = l1.TradingDayIterator.cur_utc()
            alive = True
            tpm = TPMon()

    print 'stopped ', datetime.datetime.now()
    kill_all()
    if is_weekend():
        # only do it on friday close
        dt = datetime.datetime.now()
        wd = dt.weekday()
        if wd == 4:
            remove_logs()
            # edrive
            prev_wk, this_wk = ibbar.move_bar(
                rsync_dir_list=['/cygdrive/e/ib/kisco/bar'])
            bar_path = ibbar.read_cfg('BarPath')
            #os.system('scp -r ' + bar_path + '/'+this_wk + ' ' + USER+'@'+DATA_MACHINE+':'+BAR_PATH)

            print 'moving bar files to ', this_wk
            print 'previous week was ', prev_wk

            #import IB_hist
            #IB_hist.weekly_get_ingest(rsync_dir_list=['/cygdrive/e/ib/kisco/hist'])
            eday = dt.strftime('%Y%m%d')
            tdi = l1.TradingDayIterator(eday)
            sday = tdi.prev_n_trade_day(5).yyyymmdd()
            #ibbar.weekly_get_hist(sday, eday)
            os.system("nohup python/ibbar.py " + sday + " " + eday +
                      " 2>&1 >> ./gethist.log &")
            print "started nohup python/ibbar.py " + sday + " " + eday + " 2>&1 >> ./gethist.log &", datetime.datetime.now(
            )
            time.sleep(30)
Esempio n. 20
0
File: ibbar.py Progetto: tt9024/kr
def get_missing_day(symbol,
                    trd_day_arr,
                    bar_sec,
                    is_front,
                    cid=None,
                    reuse_exist_file=True,
                    reuse_exist_only=False):
    """
    Couple of options:
    reuse_exist_file: will take the previous daily file
                      and try to reuse it
    reuse_exist_only: will only try to reuse the existing
                      daily file.  If not found, then
                      don't run the ibclient.  This is
                      usually the case for unnecessary
                      days (such as outside of sday/eday
                      of file name).

    Note: if IB_CLIENT is not found, i.e. on the hp notebook, 
          reuse_exist_only is set to true
    """
    import copy
    ibclient = copy.deepcopy(IB_CLIENT)
    try:
        os.stat(ibclient)
    except:
        reuse_exist_only = True

    if reuse_exist_only:
        ibclient = None

    if cid is None:
        dt = datetime.datetime.now()
        cid = dt.month * 31 + dt.day + 300 + dt.second

    fnarr = []
    for day in trd_day_arr:
        if day in l1.bad_days or l1.is_holiday(day):
            print 'not getting holiday ', day
            continue
        cur_day = datetime.datetime.now().strftime('%Y%m%d')
        tdi = l1.TradingDayIterator(cur_day)
        tdi.prev_n_trade_day(260)  # IB allow 1 year 1S bar
        if day <= tdi.yyyymmdd():
            print 'older than a year, IB not allowed to get ', day
            continue
        if l1.venue_by_symbol(symbol) == 'FX':
            fnarr += get_ib(day,
                            day,
                            cid=cid + 3,
                            sym_list=[symbol],
                            reuse_exist_file=reuse_exist_file,
                            verbose=False,
                            ibclient=ibclient)
        else:
            # future or etf
            next_contract = not is_front
            fnarr += get_ib_future([symbol],
                                   day,
                                   day,
                                   bar_sec,
                                   mock_run=False,
                                   cid=cid + 1,
                                   getqt=True,
                                   gettrd=True,
                                   next_contract=next_contract,
                                   reuse_exist_file=reuse_exist_file,
                                   verbose=False,
                                   ibclient=ibclient)

    return fnarr