Example #1
0
def clip_idx(utc, symbol, start_day, end_day):
    """
    Find the two indexes for trading day start_day to end_day (inclusive). 
    Return:
    ix0, ix1, so that utc[ix0:ix1] are all the included time instances
    """
    sh, eh = l1.get_start_end_hour(symbol)
    utc0 = l1.TradingDayIterator.local_ymd_to_utc(start_day,
                                                  eh) - (eh - sh) * 3600
    utc1 = l1.TradingDayIterator.local_ymd_to_utc(end_day, eh)
    ix0 = np.searchsorted(utc, utc0)
    ix1 = np.searchsorted(utc, utc1 + 0.1)
    return ix0, ix1
Example #2
0
    def __init__(self,
                 symbol,
                 repo_path='/cygdrive/e/research/kdb/repo',
                 bootstrap_idx=None,
                 venue=None,
                 create=False):
        """
        boostrap_idx: an optional idx to use if the idx file doesn't exist (idx.npz)
        venue:   an optional venue for the symbol, i.e. EBS.  path will append the venue name
        create:  if the idx does not exist, create a new (empty) idx

        idx.npz stores global as well as daily configurations
        global:
           start hour
           end hour
           symbol
        daily: key by 'YYYYMMDD'
           bar_sec
           cols
        """
        self.symbol = symbol
        self.path = repo_path + '/' + symbol
        if venue is not None:
            # this is to entertain multiple venue for FX
            # i.e. EUR.USD/Hotspot, etc
            self.path += '/' + venue
        self.idxfn = self.path + '/idx.npz'
        if bootstrap_idx is not None:
            if l1.get_file_size(self.idxfn) > 0:
                raise ValueError('idx file exists: ' + self.idxfn)
            print('saving the given bootstrap idx')
            np.savez_compressed(self.idxfn, idx=bootstrap_idx)

        try:
            self.idx = np.load(self.path + '/idx.npz',
                               allow_pickle=True)['idx'].item()
        except Exception as e:
            if create:
                os.system('mkdir -p ' + self.path)
                self.idx = RepoDailyBar.make_bootstrap_idx(symbol)
                np.savez_compressed(self.path + '/idx.npz', idx=self.idx)
            else:
                import traceback
                traceback.print_exc()
                raise ValueError('idx.npz not found from ' + self.path)

        self.venue = self.idx['global']['venue']
        #self.sh,self.eh = self.idx['global']['sehour']
        self.sh, self.eh = l1.get_start_end_hour(symbol)
Example #3
0
    def make_bootstrap_idx(symbol):
        venue = l1.venue_by_symbol(symbol)
        tick_size, contract_size = l1.asset_info(symbol)
        start_hour, end_hour = l1.get_start_end_hour(symbol)

        idx = { 'global': \
                       { 'symbol': symbol, \
                         'venue' : venue,  \
                         'sehour': [start_hour, end_hour], \
                         'ticksz': tick_size
                       },  \
                'daily' : {\
                    #  '19700101' : \
                    #             { 'bar_sec': 1, \
                    #               'cols'   : [] \  # columns available
                    #             }  \
                          }\
              }
        return idx
Example #4
0
def gen_bar(sym_array,
            sday,
            eday,
            repo_cme_path='./repo_cme',
            cme_path='./cme',
            bar_sec=1,
            nc=False):
    """
    getting from the ts [utc, px, signed_vol]
    output format bt, lr, vl, vbs, lrhl, vwap, ltt, lpx

    repo_cme_path: repo to store the 1S trd bars

    return : None
        update (remove first) dbar with bar_arr, days, col_arr
    """

    if nc:
        assert repo_cme_path[
            -2:] == 'nc', 'repo_cme_path=' + repo_cme_path + ' not ending with nc'
    for symbol in sym_array:
        try:
            dbar = repo.RepoDailyBar(symbol, repo_path=repo_cme_path)
        except:
            print 'repo_trd_path failed, trying to create'
            dbar = repo.RepoDailyBar(symbol,
                                     repo_path=repo_cme_path,
                                     create=True)

        start_hour, end_hour = l1.get_start_end_hour(symbol)
        TRADING_HOURS = end_hour - start_hour
        # sday has to be a trading day
        it = l1.TradingDayIterator(sday)
        tday = it.yyyymmdd()
        if tday != sday:
            raise ValueError('sday has to be a trading day! sday: ' + sday +
                             ' trd_day: ' + tday)

        lastpx = 0
        prev_con = ''
        while tday <= eday:
            eutc = it.local_ymd_to_utc(tday, h_ofst=end_hour)
            sutc = eutc - (TRADING_HOURS) * 3600
            if nc:
                con = l1.FC_next(symbol, tday)[0]
            else:
                con = l1.FC(symbol, tday)

            con = symbol + con[-2:]
            try:
                bar = bar_by_file(get_fn(cme_path, symbol, tday, con))
            except (KeyboardInterrupt):
                print 'interrupt!'
                return
            except:
                print 'problem getting ', symbol, tday
                bar = []

            if len(bar) == 0:
                lastpx = 0
                prev_con = ''
            else:
                # this is the good case, prepare for the bar
                # 1) get bar with start/stop, 2) contract updated 3) lastpx
                # need to allow for entire content being in one ta, i.e. some
                # days having tds==2 but all contents in one ta, due to gmt_offset

                # have everything, need to get to
                # output format bt, lr, vl, vbs, lrhl, vwap, ltt, lp
                if lastpx == 0 or prev_con != con:
                    lastpx = bar[0, 1]
                bt = np.arange(sutc + bar_sec, eutc + bar_sec, bar_sec)
                tts = np.r_[sutc, bar[:, 0]]
                pts = np.r_[bar[0, 1], bar[:, 1]]
                vts = np.r_[0, bar[:, 2]]
                pvts = np.abs(vts) * pts

                pxix = np.clip(np.searchsorted(tts[1:], bt + 1e-6), 0,
                               len(tts) - 1)
                lpx = pts[pxix]
                lr = np.log(np.r_[lastpx, lpx])
                lr = lr[1:] - lr[:-1]

                # tricky way to get index right on volumes
                btdc = np.r_[0, np.cumsum(vts)[pxix]]
                vbs = btdc[1:] - btdc[:-1]
                btdc = np.r_[0, np.cumsum(np.abs(vts))[pxix]]
                vol = btdc[1:] - btdc[:-1]

                # even tickier way to get vwap/ltt right
                ixg = np.nonzero(vol)[0]
                btdc = np.r_[0, np.cumsum(pvts)[pxix]]
                vwap = lpx.copy()  #when there is no vol
                vwap[ixg] = (btdc[1:] - btdc[:-1])[ixg] / vol[ixg]
                ltt = np.zeros(len(bt))
                ltt[ixg] = tts[pxix][ixg]
                repo.fwd_bck_fill(ltt, v=0)

                # give up, ignore the lrhl for trd bars
                lrhl = np.zeros(len(bt))

                b = np.vstack((bt, lr, vol, vbs, lrhl, vwap, ltt, lpx)).T
                d = tday
                c = repo.kdb_ib_col
                dbar.remove_day(d)
                dbar.update([b], [d], [c], bar_sec)
                lastpx = lpx[-1]
                prev_con = con

            it.next()
            tday = it.yyyymmdd()
Example #5
0
def write_daily_bar(symbol,
                    bar,
                    bar_sec=5,
                    is_front=True,
                    last_close_px=None,
                    get_missing=True):
    """
    bar: all bars from a hist file having the format of 
    [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs]
    These bars have the same contract. 
    The bar is in increasing utc, but may have gaps, or other invalid values
    The first day of that contract bar, due to prev_close_px unknown, it is
    usually covered by having the previous contract day. 
    Note there is a limitation that the start end time has to be on a whole hour
    i.e. cannot stop on 4:30, just make it 5, which will write some zero bars.
    However, it can handle 24 hour trading, i.e. start/end at 18:00, for fx venues.
    Note 2, the first bar of a day should be 1 bar_sec after the starting utc and
    the last bar of a day should be at the ending utc.

    if get_missing is set to true, then try to get the bar on a bad day

    Output: 
    array of daily_bar for each day covered in the bar (hist file)
    Each daily_bar have the following format: 
    [obs_utc, lr, trd_vol, vbs, lrhl, lrvwap, ltt, lpx]
    where: 
        obs_utc is the checking time stamp
        lr is the log return between this checking price and last checking price
           i.e. the lr of the previous bar that ended at this checking time (obs_utc)

      (May extend in the future)
    Note that the Trading Hours set to 24 for ICE hours
    In addition, it does the following:
    1. loop the close px to the first open px, 
    2. convert the price to lr, removing bars with maxlr more than 0.2 (CME circuit breaker)
    3. replace all inf/nan values with zero
    4. cacluate the ltt and lpx
    """
    import pandas as pd
    dt = datetime.datetime.fromtimestamp(bar[
        0, 0])  # fromtimestamp is safe for getting local representation of utc

    start_hour, end_hour = l1.get_start_end_hour(symbol)
    TRADING_HOURS = end_hour - start_hour
    start_hour = start_hour % 24

    # get the initial day, last price
    day_start = dt.strftime('%Y%m%d')
    utc_s = int(
        l1.TradingDayIterator.local_ymd_to_utc(day_start, start_hour, 0, 0))
    if last_close_px is None:
        x = np.searchsorted(bar[1:, 0], float(utc_s) - 1e-6)

        # only take the last price within 5 minutes of utc_s
        if x + 1 >= bar.shape[0] or bar[x + 1, 0] - utc_s > 300:
            if x + 1 >= bar.shape[0]:
                print 'no bars found after the start utc of ', day_start
            else:
                print 'start up utc (%d) more than 5 minutes later than start utc (%d) on %s' % (
                    bar[x + 1, 0], utc_s, day_start)
                print 'initializing start up last_close_px deferred'
        else:
            if x == 0:
                #last_close_px = bar[0, 2]
                #print 'last close price set as the first bar open px, this should use previous contract', datetime.datetime.fromtimestamp(bar[0,0]), datetime.datetime.fromtimestamp(bar[1,0])
                last_close_px = bar[0, 5]
                print 'lost last close price, set as the first bar close px'
            else:
                last_close_px = bar[x, 5]
                print 'last close price set to close px of bar ', datetime.datetime.fromtimestamp(
                    bar[x, 0]), ' px: ', last_close_px

        print 'GOT last close px ', last_close_px
    else:
        print 'GIVEN last close price ', last_close_px

    day_end = datetime.datetime.fromtimestamp(bar[-1, 0]).strftime('%Y%m%d')
    # deciding on the trading days
    if dt.hour > end_hour or (start_hour == end_hour and dt.hour >= end_hour):
        # CME 17, ICE 18,
        # the second rule is for 24 hour trading, note start/end has to be on a whole hour
        ti = l1.TradingDayIterator(day_start, adj_start=False)
        ti.next()
        trd_day_start = ti.yyyymmdd()
    else:
        trd_day_start = day_start
    trd_day_end = day_end
    print 'preparing bar from ', day_start, ' to ', day_end, ' , trading days: ', trd_day_start, trd_day_end

    ti = l1.TradingDayIterator(trd_day_start,
                               adj_start=False)  # day maybe a sunday
    day1 = ti.yyyymmdd()  # first trading day
    barr = []
    trade_days = []
    col_arr = []
    bad_trade_days = []
    while day1 <= trd_day_end:
        utc_e = int(
            l1.TradingDayIterator.local_ymd_to_utc(day1, end_hour, 0, 0))
        # get start backwards for starting on a Sunday
        utc_s = utc_e - TRADING_HOURS * 3600  # LIMITATION:  start/stop has to be on a whole hour
        day = datetime.datetime.fromtimestamp(utc_s).strftime('%Y%m%d')

        i = np.searchsorted(bar[:, 0], float(utc_s) - 1e-6)
        j = np.searchsorted(bar[:, 0], float(utc_e) - 1e-6)
        bar0 = bar[
            i:
            j, :]  # take the bars in between the first occurance of start_hour (or after) and the last occurance of end_hour or before

        print 'getting bar ', day + '-' + str(
            start_hour) + ':00', day1 + '-' + str(
                end_hour) + ':00', ' , got ', j - i, 'bars'
        N = (
            utc_e - utc_s
        ) / bar_sec  # but we still fill in each bar, so N should be fixed for a given symbol/venue pair

        # here N*0.90, is to account for some closing hours during half hour ib retrieval time
        # The problem with using histclient.exe to retrieve IB history data for ES is
        # set end time is 4:30pm, will retreve 3:45 to 4:15.  Because 4:15-4:30pm doesn't
        # have data.  This is only true for ES so far
        # another consideration is that IB Hist client usually won't be off too much, so 90% is
        # a good threshold for missing/bad day
        bar_good = True
        if j - i < N * 0.90:
            if symbol in ['LE', 'HE'] or l1.venue_by_symbol(symbol) == 'IDX':
                bar_good = (j - i) > N * 0.75
            elif not is_front:
                bar_good = (j - i) > N * 0.5
            else:
                bar_good = False

        if not bar_good:
            print 'fewer bars for trading day %s: %d < %d * 0.9' % (day1,
                                                                    j - i, N)
            if day1 not in l1.bad_days and get_missing:
                # recurse with the current last price and get the updated last price
                print 'getting missing day %s' % (day1)
                from ibbar import get_missing_day
                fn = get_missing_day(symbol, [day1],
                                     bar_sec=bar_sec,
                                     is_front=is_front,
                                     reuse_exist_file=True)
                try:
                    _, _, b0 = bar_by_file_ib(fn[0],
                                              symbol,
                                              start_day=day1,
                                              end_day=day1)
                except Exception as e:
                    print e
                    b0 = []

                if len(b0) > j - i:
                    print 'Getting more bars %d > %d on %s for %s, take it!' % (
                        len(b0), j - i, day1, symbol)
                    barr0, trade_days0, col_arr0, bad_trade_days0, last_close_px0 = write_daily_bar(
                        symbol,
                        b0,
                        bar_sec=bar_sec,
                        is_front=is_front,
                        last_close_px=last_close_px,
                        get_missing=False)
                    # taken as done
                    barr += barr0
                    trade_days += trade_days0
                    col_arr += col_arr0
                    bad_trade_days += bad_trade_days0
                    last_close_px = last_close_px0
                    ti.next()
                    day1 = ti.yyyymmdd()
                    continue
                print 'Got %d bars on %s, had %d bars (%s), use previous!' % (
                    len(b0), day1, j - i, symbol)

        if len(bar0) < 1:
            print 'Bad Day! Too fewer bars in trading day %s: %d, should have %d ' % (
                day1, j - i, N)
            bad_trade_days.append(day1)
        else:
            ix_utc = ((bar0[:, 0] - float(utc_s)) / bar_sec + 1e-9).astype(
                int)  # lr(close_px-open_px) of a bar0 has bar_utc
            bar_utc = np.arange(
                utc_s + bar_sec, utc_e + bar_sec,
                bar_sec)  # bar time will be time of close price, as if in prod

            if N != j - i:
                print 'fill missing for only ', j - i, ' bars (should be ', N, ')'
                bar1 = np.empty((N, bar0.shape[1]))
                bar1[:, 0] = np.arange(utc_s, utc_e, bar_sec)
                # filling all missing for [utc, utc_ltt, open_px, hi_px, lo_px, close_px, vwap, vol, vb, vs]
                # fillforward for utc_ltt, close_px, vwap
                for col in [1, 5, 6]:
                    bar1[:, col] = np.nan
                    bar1[ix_utc, col] = bar0[:, col]
                    df = pd.DataFrame(bar1[:, col])
                    df.fillna(method='ffill', inplace=True)
                    df.fillna(method='bfill', inplace=True)
                # fill zero for vol, vb, bs
                for col in [7, 8, 9]:
                    bar1[:, col] = 0
                    bar1[ix_utc, col] = bar0[:, col]
                # copy value of close_px for open_px, hi_px, lo_px
                for col in [2, 3, 4]:
                    bar1[:, col] = bar1[:, 5]
                    bar1[ix_utc, col] = bar0[:, col]

            bar_arr = []
            bar_arr.append(bar_utc.astype(float))

            # construct the log returns for each bar, fill in zeros for gap
            #lpx_open=np.log(bar0[:,2])
            if last_close_px is None:
                print 'setting last_close_px to ', bar0[0, 2]
                last_close_px = bar0[0, 2]

            lpx_open = np.log(np.r_[last_close_px, bar0[:-1, 5]])
            lpx_hi = np.log(bar0[:, 3])
            lpx_lo = np.log(bar0[:, 4])
            lpx_close = np.log(bar0[:, 5])
            lpx_vwap = np.log(bar0[:, 6])
            lr = lpx_close - lpx_open
            lr_hi = lpx_hi - lpx_open
            lr_lo = lpx_lo - lpx_open
            lr_vw = lpx_vwap - lpx_open

            # remove bars having abnormal return, i.e. circuit break for ES
            # with 9999 prices
            MaxLR = 0.5
            if l1.is_holiday(day) or l1.is_fx_future(
                    symbol) or l1.venue_by_symbol(symbol) == 'FX':
                MaxLR = 5
            ix1 = np.nonzero(np.abs(lr) >= MaxLR)[0]
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_hi) >= MaxLR)[0])
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_lo) >= MaxLR)[0])
            ix1 = np.union1d(ix1, np.nonzero(np.abs(lr_vw) >= MaxLR)[0])
            if len(ix1) > 0:
                print 'MaxLR (', MaxLR, ') exceeded: ', len(ix1), ' ticks!'
                # removing one-by-one
                for ix1_ in ix1:
                    dt = datetime.datetime.fromtimestamp(bar_utc[ix1_])
                    if not l1.is_pre_market_hour(symbol, dt):
                        print 'warning: removing 1 tick lr/lo/hi/vw: ', lr[
                            ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_]
                        lr[ix1_] = 0
                        lr_hi[ix1_] = 0
                        lr_lo[ix1_] = 0
                        lr_vw[ix1_] = 0
                    else:
                        print 'NOT removing 1 tick (pre_market=True: ', symbol, ', ', dt, ') lr/lo/hi/vw: ', lr[
                            ix1_], lr_hi[ix1_], lr_lo[ix1_], lr_vw[ix1_]

            # the trade volumes for each bar, fill in zeros for gap
            vlm = bar0[:, 7]
            vb = bar0[:, 8]
            vs = np.abs(bar0[:, 9])
            vbs = vb - vs

            for v0, vn in zip([lr, lr_hi, lr_lo, lr_vw, vlm, vbs],
                              ['lr', 'lr_hi', 'lr_lo', 'lr_vw', 'vlm', 'vbs']):
                nix = np.nonzero(np.isnan(v0))[0]
                nix = np.union1d(nix, np.nonzero(np.isinf(np.abs(v0)))[0])
                if len(nix) > 0:
                    print 'warning: removing ', len(
                        nix), ' nan/inf ticks for ', vn
                    v0[nix] = 0
                b0 = np.zeros(N).astype(float)
                b0[ix_utc] = v0
                bar_arr.append(b0.copy())

            # get the last trade time, this is needs to be
            ltt = np.empty(N) * np.nan
            ltt[ix_utc] = bar0[:, 1]
            df = pd.DataFrame(ltt)
            df.fillna(method='ffill', inplace=True)
            if not np.isfinite(ltt[0]):
                ptt = 0  #no previous trading detectable
                if i > 0:  #make some effort here
                    ptt = bar[i - 1, 1]
                    if not np.isfinite(ptt):
                        ptt = 0
                df.fillna(ptt, inplace=True)
            bar_arr.append(ltt)

            # get the last price, as a debugging tool
            # close price
            lpx = np.empty(N) * np.nan
            lpx[ix_utc] = bar0[:, 5]
            df = pd.DataFrame(lpx)
            df.fillna(method='ffill', inplace=True)
            if not np.isfinite(lpx[0]):
                df.fillna(last_close_px, inplace=True)
            bar_arr.append(lpx)

            ba = np.array(bar_arr).T
            bt0 = ba[:, 0]
            lr0 = ba[:, 1]
            vl0 = ba[:, 5]
            vbs0 = ba[:, 6]
            # add a volatility measure here
            lrhl0 = ba[:, 2] - ba[:, 3]
            vwap0 = ba[:, 4]
            ltt0 = ba[:, 7]
            lpx0 = ba[:, 8]
            barr.append(
                np.vstack((bt0, lr0, vl0, vbs0, lrhl0, vwap0, ltt0, lpx0)).T)
            last_close_px = lpx[-1]
            trade_days.append(day1)
            col_arr.append(repo.kdb_ib_col)

        ti.next()
        day1 = ti.yyyymmdd()

    # filling in missing days if not included in the bad_trade_days
    bad_trade_days = []
    good_trade_days = []
    it = l1.TradingDayIterator(trd_day_start)
    while True:
        day = it.yyyymmdd()
        if day > trd_day_end:
            break
        if day not in trade_days:
            bad_trade_days.append(day)
        else:
            good_trade_days.append(day)
        it.next()

    print 'got bad trade days ', bad_trade_days
    return barr, good_trade_days, col_arr, bad_trade_days, last_close_px
Example #6
0
File: ibbar.py Project: ssh352/kr
def get_ib_future(symbol_list,
                  start_date,
                  end_date,
                  barsec,
                  ibclient=IB_CLIENT,
                  clp='IB',
                  mock_run=False,
                  getqt=True,
                  gettrd=False,
                  cid=100,
                  start_end_hour=[],
                  next_contract=False,
                  reuse_exist_file=False,
                  verbose=False,
                  num_threads=None,
                  wait_thread=True):
    bar_path = read_cfg('HistPath')
    if num_threads is not None:
        import _strptime
        n = len(symbol_list)
        k = np.linspace(0, n, num=num_threads + 1).astype(int)
        pool = mp.Pool(processes=num_threads)
        res = []
        for i0, i1 in zip(k[:-1], k[1:]):
            if i1 == i0:
                continue
            res.append(
                pool.apply_async(get_ib_future,
                                 args=(symbol_list[i0:i1], start_date,
                                       end_date, barsec, ibclient, clp,
                                       mock_run, getqt, gettrd, cid,
                                       start_end_hour, next_contract,
                                       reuse_exist_file, verbose, None, True)))
            cid += 1

        fnarr = []
        if wait_thread:
            for r in res:
                fnarr += r.get()
        return fnarr

    step_sec = barsec_dur[barsec]
    fnarr = []
    for symbol in symbol_list:
        venue = ibvenue(symbol)
        if venue == 'FX':
            bar_dir = bar_path + '/FX'
        elif venue == 'ETF':
            bar_dir = bar_path + '/ETF'
        elif venue == 'IDX':
            bar_dir = bar_path + '/IDX'
        else:
            bar_dir = bar_path + '/' + symbol
        if next_contract:
            bar_dir += '/nc'
        os.system(' mkdir -p ' + bar_dir)

        if len(start_end_hour) != 2:
            start_hour, end_hour = l1.get_start_end_hour(symbol)
        else:
            start_hour, end_hour = start_end_hour

        ti = l1.TradingDayIterator(start_date)
        day = ti.yyyymmdd()
        eday = day
        while day <= end_date:
            sday = eday
            fc = l1fc(symbol, day)
            fcn = l1fc(symbol, day, next_contract=True)
            while day <= end_date:
                ti.next()
                day = ti.yyyymmdd()
                fc0 = l1fc(symbol, day)
                if fc != fc0:
                    break
                eday = day
            # make sure eday is not more than end_date
            # if end_date was given as a weekend dates
            if (eday > end_date):
                print 'ending to ', end_date, ' adjust to ',
                ti0 = l1.TradingDayIterator(eday)
                eday = ti0.prev().yyyymmdd()
                print eday

            if next_contract:
                fc = fcn
            fn = bar_dir + '/' + ibfn(fc, barsec, sday, eday)
            fnarr.append(fn)

            fext = []
            cext = []
            for gt, ext, ext_str, etp in zip([getqt, gettrd],
                                             ['_qt.csv', '_trd.csv'],
                                             ['quote', 'trade'], ['0', '1']):
                if not gt:
                    continue
                fn0 = fn + ext
                # reuse_exist_file
                try:
                    found = 0
                    assert reuse_exist_file
                    for ext0 in ['', '.gz']:
                        try:
                            if os.stat(fn0 + ext0).st_size > 1024:
                                found += 1
                                print 'found existing file: ', fn0 + ext0, ' count = ', found
                        except:
                            continue
                    assert found == 1
                    print 'reusing ', fn0, ' for ', ext_str
                except:
                    print 'getting ', ext_str, ' FILE: ', fn0, ' (found = %d)' % (
                        found)
                    fext.append(ext)
                    cext.append(etp)

            if len(fext) == 0:
                print 'Nothing to get from %s to %s!' % (sday, eday)
                continue

            if len(fext) == 1 and fext[
                    0] == '_trd.csv' and next_contract and getqt:
                print '!! Next Contract using existing quote only'
                continue

            if ibclient is None:
                # here if ibclient is None then
                # don't run it (save time)
                # the caller should except file
                # not found and handle it with zero bar
                print 'Not running ibclient (None)!'
                fnarr.remove(fn)
                continue

            # clean up the existing files
            for ext in fext:
                fn0 = fn + ext
                if not mock_run:
                    os.system('rm -f ' + fn0 + ' > /dev/null 2>&1')
                    os.system('rm -f ' + fn0 + '.gz' + ' > /dev/null 2>&1')

            if symbol in ib_sym_special:
                fc = symbol + fc[-2:]
            sym = venue + '/' + fc
            # get all days with the same contract, saving to the same file
            tic = l1.TradingDayIterator(sday)
            d0 = tic.yyyymmdd()
            try:
                while d0 <= eday and d0 <= end_date:
                    # get for day d0
                    utc1 = tic.to_local_utc(end_hour, 0, 0)
                    utc0 = utc1 - (end_hour - start_hour) * 3600
                    while utc0 < utc1:
                        # get for quote and trade for end_time as utc
                        utc0 += step_sec
                        eday_str = datetime.datetime.fromtimestamp(
                            utc0).strftime('%Y%m%d %H:%M:%S')
                        #for ist, ext in zip (['0', '1'], ['_qt.csv','_trd.csv']):
                        for ist, ext in zip(cext, fext):
                            fn0 = fn + ext
                            cmdline = ibclient + ' ' + str(
                                cid
                            ) + ' ' + sym + ' ' + '\"' + eday_str + '\"' + ' ' + str(
                                barsec) + ' ' + fn0 + ' ' + ist + ' ' + clp
                            print 'running ', cmdline
                            if not mock_run:
                                os.system(cmdline)
                                time.sleep(2)
                                #os.system( 'sleep 2'
                    tic.next()
                    d0 = tic.yyyymmdd()
            except (KeyboardInterrupt, SystemExit):
                print 'stop ...'
                return []
            except:
                traceback.print_exc()

    for fn in fnarr:
        for ext in fext:
            fn0 = fn + ext
            if not mock_run:
                print 'gzip ', fn0
                os.system('gzip ' + fn0)
    """
    if upd_repo :
        repo_path = read_cfg('RepoPath')
        future_inclusion = ['back' if next_contract else 'front']
        from IB_hist import ingest_all_symbol
        ingest_all_symbol(start_date, end_date, repo_path=repo_path, get_missing=True, sym_list=sym_list, future_inclusion=future_inclusion)
    """

    return fnarr
Example #7
0
    def __init__(self, symbol, bar_file, dbar_repo) :
        """
        A class for reading IB's L1 bars with the columes as
        UTC         bs    bp         ap            as  bv  sv  utc_at_collect   qbc qac bc sc ism_avg
        --------------------------------------------------------------------------------------------------
        1535425169, 5, 2901.5000000, 2901.7500000, 135, 5, 17, 1535425169000056, 1, 2, 1, 2, 2901.5062609
        ...
        Where
        UTC is the bar ending time
        qbc is best bid change count
        qac is best ask change count
        bc  is buy trade counts 
        sc  is sell trade counts
       
        Parser will get from the file in 
        bar/NYM_CL_B1S.csv

        Based on a line in the bar file, the parsing returns the following two arrays
        bcol_arr: array of basic columns for each day.  
                 ['vol', 'vbs', 'spd', 'bs', 'as', 'mid']
        ecol_arr: array of extended columns for each day
                 ['qbc', 'qac', 'tbc', 'tsc', 'ism1']

        if dbar_repo is not None, it will update repo by the following rule:
        1. overwrite the [lrc,volc,vbsc,lpxc], whenever exist (indexing using the utcc)
        2. add columns of bs, as, spd qbc qac tbc tsc ism1, fill-in on missing
           (see NOTE 5)


        NOTE 1: utc offset:
        From 201805301800 to 201806261700, utc + 1 matches with history
        From 201806261800 to 201808171700, utc + 2 matches with history
        Good afterwards

        NOTE 2:
        Extended columns starts from 20180715-20:39:55, but may have problem
        for first few days
       
        NOTE 3:
        Next contract bar starts from 20180802-18:12:30
        Same as the IB_Hist, separate dbar_repo for the same symbol's next contract,
        i.e. dbar_repo_next_contract for bars of next contract
       
        NOTE 4:
        Be prepared for any data losses and errors!
        zero prices, zero sizes
        

        Note 5:
        There are 1~2 second drift on the hist's mid and L1's mid before 8/18/2018.
        Since the L1 is the live trading one, it is given more emphasis. 
        To be consistent, the lr also is overwritten together with vol and vbs.

        But when constructing lr to override, due to the first lr being
        calculated with previous trading day on the same contract, 
        BE SURE to use the hist data on the first index

        Weekend ingestion process for front/back future contract:
        1. collect and ingest hist file, handling missings
        2. read and ingest bar files
        """
        self.symbol = symbol
        self.venue = l1.venue_by_symbol(symbol)
        self.hours = l1.get_start_end_hour(symbol)
        self.bar_file = bar_file
        if bar_file[-3:] == '.gz' :
            os.system('gunzip -f ' + bar_file)
            self.bar_file = bar_file[:-3]
            self.gzip = True
        else :
            self.gzip = False
        self.f = open(self.bar_file, 'r')
        self.dbar = dbar_repo

        # the time shifting start/stops, see Note 1
        self.utc10 = l1.TradingDayIterator.local_ymd_to_utc('20180530', 18, 0, 0)
        self.utc11 = l1.TradingDayIterator.local_ymd_to_utc('20180626', 17, 0, 0)
        self.utc20 = l1.TradingDayIterator.local_ymd_to_utc('20180626', 18, 0, 0)
        self.utc21 = l1.TradingDayIterator.local_ymd_to_utc('20180817', 17, 0, 0)
        self.bar_sec = 1  # always fixed as 1 second bar for C++ l1 bar writer