shareclass = [] daily = [] bins = {k:{} for k in ['effective', 'realized', 'impact', 'quoted', 'volume', 'offersize', 'bidsize', 'ret', 'retq', 'counts']} tic = time.time() intervals = [(v,'s') for v in [1,2,5,15,30]] + [(v,'m') for v in [1,2,5]] dates = [20191007, 20191008, 20180305, 20180306] for d, date in enumerate(dates): master, trades, quotes = opentaq(date, taqdir) # screen on CRSP universe, and drop duplicate share classes (same permco) univ = crsp.get_universe(date)\ .join(crsp.get_section(dataset='names', fields=['ncusip', 'permco', 'exchcd'], date_field='date', date=date, start=0), how='inner')\ .sort_values(['permco', 'ncusip']) dups = master['CUSIP'].str.slice(0, 8).isin( univ.loc[univ.duplicated(['permco'], keep=False), 'ncusip']) shareclass.extend(master[dups].to_dict(orient='index').values()) univ = univ.sort_values(['permco','cap'], na_position='first')\ .drop_duplicates(['permco'], keep='last')\ .reset_index().set_index('ncusip', drop=False) # Iterate by symbol over Daily Taq trades, nbbo and master files for ct, cq, m in itertaq(trades, quotes, master, cusips=univ['ncusip'], open_t=_open, close_t=None): h = {'date':date} h.update(univ.loc[m['CUSIP'][:8], ['permno','decile','exchcd','siccd']])
months=[6], rebals=rebals)['holdings'] # Compute MOM momentum factor label = 'mom' past = (2, 12) df = [] # collect each month's momentum signal values rebalend = bd.endmo(LAST_DATE, -1) for rebaldate in bd.date_range(rebalbeg, rebalend, 'endmo'): beg = bd.endmo(rebaldate, -past[1]) # require price at this date start = bd.offset(beg, 1) # start date, inclusive, of signal end = bd.endmo(rebaldate, 1 - past[0]) # end date of signal p = [ crsp.get_universe(rebaldate), # retrieve prices and construct signal crsp.get_ret(start, end)['ret'].rename(label), crsp.get_section('monthly', ['prc'], 'date', beg)['prc'].rename('beg'), crsp.get_section('monthly', ['prc'], 'date', end)['prc'].rename('end') ] q = pd.concat(p, axis=1, join='inner').reset_index().dropna() q['rebaldate'] = rebaldate df.append(q[['permno', 'rebaldate', label]]) print(rebaldate, len(df), len(q)) df = pd.concat(df) signals = chunk_signal(df) holdings[label] = famafrench_sorts(crsp, label, signals, rebalbeg, rebalend, window=0, months=[])['holdings'][label]
# Momentum and divyld from CRSP monthly if 'monthly' in testable: if regenerate: beg, end = 19251231, LAST_DATE intervals = {'mom12m': (2,12), 'mom36m': (13,36), 'mom6m': (2,6), 'mom1m': (1,1)} for label, past in intervals.items(): out = DataFrame() for rebaldate in bd.date_range(bd.endmo(beg, past[1]), end, 'endmo'): start = bd.endmo(rebaldate, -past[1]) beg1 = bd.offset(start, 1) end1 = bd.endmo(rebaldate, 1-past[0]) df = crsp.get_universe(end1) df['start'] = crsp.get_section(dataset='monthly', fields=['ret'], date_field='date', date=start)\ .reindex(df.index) df[label] = crsp.get_ret(beg1, end1).reindex(df.index) df['permno'] = df.index df['rebaldate'] = rebaldate df = df.dropna(subset=['start']) out = out.append(df[['rebaldate', 'permno', label]], ignore_index=True) # append rows n = signals.write(out, label, overwrite=True) beg, end = 19270101, LAST_DATE columns = ['chmom', 'divyld', 'indmom'] out = DataFrame() for rebaldate in bd.date_range(beg, end, 'endmo'): start = bd.endmo(rebaldate, -12) beg1 = bd.offset(start, 1)
df = pd.read_csv(os.path.join(pathname, 'dividends.csv.gz'), sep='|') new = set(np.unique(df['ticker'])).difference( set(np.unique(dividends['ticker']))) df = df[df['ticker'].isin(new)] dividends = dividends.append(df, sort=False) print(pathname, 'added dividends', new) sql = SQL(**config.credentials['sql'], echo=config.ECHO) bd = BusDay(sql) crsp = CRSP(sql, bd, rdb=None) date = bd.offset(crsp_date) # get price and shrout as of last date price = crsp.get_section('daily', ['prc', 'shrout'], 'date', date, start=None) # get tickers to lookup permno tickers = crsp.get_section('names', ['tsymbol', 'date'], 'date', date, start=0).reindex(price.index) tickers = tickers.sort_values(['tsymbol', 'date'])\ .drop_duplicates(keep='last') tickers = tickers.reset_index().set_index('tsymbol')['permno'] # Yahoo has '-' but CRSP has '' between symbol and share class prices['ticker'] = prices['ticker'].str.replace('-', '') # dividends # merge permnos into big prices table