shareclass = []
daily = []

bins = {k:{} for k in ['effective', 'realized', 'impact', 'quoted', 'volume',
                       'offersize', 'bidsize', 'ret', 'retq', 'counts']}
tic = time.time()
intervals = [(v,'s') for v in [1,2,5,15,30]] + [(v,'m') for v in [1,2,5]]
dates = [20191007, 20191008, 20180305, 20180306]
for d, date in enumerate(dates):
    master, trades, quotes = opentaq(date, taqdir)

    # screen on CRSP universe, and drop duplicate share classes (same permco)
    univ = crsp.get_universe(date)\
               .join(crsp.get_section(dataset='names',
                                      fields=['ncusip', 'permco', 'exchcd'],
                                      date_field='date',
                                      date=date,
                                      start=0), how='inner')\
               .sort_values(['permco', 'ncusip'])
    dups = master['CUSIP'].str.slice(0, 8).isin(
        univ.loc[univ.duplicated(['permco'], keep=False), 'ncusip'])
    shareclass.extend(master[dups].to_dict(orient='index').values())
    univ = univ.sort_values(['permco','cap'], na_position='first')\
               .drop_duplicates(['permco'], keep='last')\
               .reset_index().set_index('ncusip', drop=False)

    # Iterate by symbol over Daily Taq trades, nbbo and master files
    for ct, cq, m in itertaq(trades, quotes, master, cusips=univ['ncusip'],
                             open_t=_open, close_t=None):
        h = {'date':date}
        h.update(univ.loc[m['CUSIP'][:8], ['permno','decile','exchcd','siccd']])
                            months=[6],
                            rebals=rebals)['holdings']

# Compute MOM momentum factor
label = 'mom'
past = (2, 12)
df = []  # collect each month's momentum signal values
rebalend = bd.endmo(LAST_DATE, -1)
for rebaldate in bd.date_range(rebalbeg, rebalend, 'endmo'):
    beg = bd.endmo(rebaldate, -past[1])  # require price at this date
    start = bd.offset(beg, 1)  # start date, inclusive, of signal
    end = bd.endmo(rebaldate, 1 - past[0])  # end date of signal
    p = [
        crsp.get_universe(rebaldate),  # retrieve prices and construct signal
        crsp.get_ret(start, end)['ret'].rename(label),
        crsp.get_section('monthly', ['prc'], 'date', beg)['prc'].rename('beg'),
        crsp.get_section('monthly', ['prc'], 'date', end)['prc'].rename('end')
    ]
    q = pd.concat(p, axis=1, join='inner').reset_index().dropna()
    q['rebaldate'] = rebaldate
    df.append(q[['permno', 'rebaldate', label]])
    print(rebaldate, len(df), len(q))
df = pd.concat(df)
signals = chunk_signal(df)
holdings[label] = famafrench_sorts(crsp,
                                   label,
                                   signals,
                                   rebalbeg,
                                   rebalend,
                                   window=0,
                                   months=[])['holdings'][label]
# Momentum and divyld from CRSP monthly
if 'monthly' in testable:
    if regenerate:
        beg, end = 19251231, LAST_DATE
        intervals = {'mom12m': (2,12), 'mom36m': (13,36),
                     'mom6m': (2,6), 'mom1m': (1,1)}
        for label, past in intervals.items():
            out = DataFrame()
            for rebaldate in bd.date_range(bd.endmo(beg, past[1]), end, 'endmo'):
                start = bd.endmo(rebaldate, -past[1])
                beg1 = bd.offset(start, 1)
                end1 = bd.endmo(rebaldate, 1-past[0])
                df = crsp.get_universe(end1)
                df['start'] = crsp.get_section(dataset='monthly', fields=['ret'],
                                               date_field='date', date=start)\
                                  .reindex(df.index)
                df[label] = crsp.get_ret(beg1, end1).reindex(df.index)
                df['permno'] = df.index
                df['rebaldate'] = rebaldate
                df = df.dropna(subset=['start'])
                out = out.append(df[['rebaldate', 'permno', label]],
                                 ignore_index=True)    # append rows
            n = signals.write(out, label, overwrite=True)

        beg, end = 19270101, LAST_DATE
        columns = ['chmom', 'divyld', 'indmom']
        out = DataFrame()
        for rebaldate in bd.date_range(beg, end, 'endmo'):
            start = bd.endmo(rebaldate, -12)
            beg1 = bd.offset(start, 1)
        df = pd.read_csv(os.path.join(pathname, 'dividends.csv.gz'), sep='|')
        new = set(np.unique(df['ticker'])).difference(
            set(np.unique(dividends['ticker'])))
        df = df[df['ticker'].isin(new)]
        dividends = dividends.append(df, sort=False)
        print(pathname, 'added dividends', new)

    sql = SQL(**config.credentials['sql'], echo=config.ECHO)
    bd = BusDay(sql)
    crsp = CRSP(sql, bd, rdb=None)
    date = bd.offset(crsp_date)

    # get price and shrout as of last date
    price = crsp.get_section('daily', ['prc', 'shrout'],
                             'date',
                             date,
                             start=None)

    # get tickers to lookup permno
    tickers = crsp.get_section('names', ['tsymbol', 'date'],
                               'date',
                               date,
                               start=0).reindex(price.index)
    tickers = tickers.sort_values(['tsymbol', 'date'])\
                     .drop_duplicates(keep='last')
    tickers = tickers.reset_index().set_index('tsymbol')['permno']

    # Yahoo has '-' but CRSP has '' between symbol and share class
    prices['ticker'] = prices['ticker'].str.replace('-', '')  # dividends

    # merge permnos into big prices table