Example #1
0
# Construct monthly BEA industry returns for the same period of years
codes = Sectoring(sql, f"bea{vintage}", fillna='')
naics = pstat.build_lookup('lpermno', 'naics', fillna=0)
caps, counts, rets = [], [], []
for year in years:
    date = bd.endyr(year - 1)
    univ = crsp.get_universe(date)
    univ['bea'] = codes[naics(univ.index, date)]
    univ = univ[univ['bea'].ne('')]
    grouped = univ.groupby('bea')
    caps.append(grouped['cap'].sum().rename(year))
    counts.append(grouped['cap'].count().rename(year))
        
    months = bd.date_range(date, bd.endyr(year), 'endmo')
    for rebaldate, end in zip(months[:-1], months[1:]):
        r = pd.concat([crsp.get_ret(bd.begmo(end), end),
                       crsp.get_cap(rebaldate, use_permco=False),
                       univ['bea']], axis=1, join='inner').dropna()
        grp = r.groupby('bea')   # industry ret is sum of weighted rets
        r['wtdret'] = r['ret'].mul(r['cap'].div(grp['cap'].transform('sum')))
        rets.append(grp['wtdret'].sum(min_count=1).rename(end))
        print(end, len(r), r['wtdret'].sum() / len(grp))

# collect and average market caps, counts and returns
caps = pd.concat(caps, axis=1).mean(axis=1)     # average cap over years
counts = pd.concat(counts, axis=1).mean(axis=1) # average count
rets = pd.concat(rets, axis=1)

# create node variables: count and cap (will take logs of)
nodevars = pd.concat([caps.rename('cap'), counts.rename('count')], axis=1)
rets = rets.T[nodevars.index]    # ensure same order of industries
    ls.get_robustcov_results('hac-panel', groups=rets['port'],
                             maxlags=3).summary())
print(ls.get_robustcov_results('cluster', groups=rets['port']).summary())

## Fama MacBeth with individual stocks and standardized scores as loadings
rebalbeg = 19640601
rebalend = LAST_DATE
rebaldates = crsp.bd.date_range(rebalbeg, rebalend, 'endmo')
loadings = dict()
for pordate in rebaldates:  # retrieve signal values every month
    date = bd.june_universe(pordate)
    univ = crsp.get_universe(date)
    cap = np.sqrt(crsp.get_cap(date)['cap'])
    smb = -np.log(cap).rename('size')
    hml = signals('hml', date, bd.endmo(date, -12))['hml'].rename('value')
    beta = (signals('beta', pordate, bd.begmo(pordate))['beta'] * 2 / 3) + (1 /
                                                                            3)
    mom = signals('mom', pordate)['mom'].rename('momentum')
    df = pd.concat(
        (beta, hml, smb, mom),  # inner join of signals with univ
        join='inner',
        axis=1).reindex(univ.index).dropna()
    loadings[pordate] = winsorized(df, quantiles=[0.05, 0.95])

## Compute coefficients from FM cross-sectional regressions
riskpremium = RiskPremium(user, bench, 'RF', LAST_DATE)
riskpremium(
    crsp,
    loadings,  # FM regressions on standardized scores
    weights=None,
    standardize=['value', 'size', 'momentum'])
                                   rebalbeg, rebalend, window=1, months=[],
                                   leverage=flips.get(label, 1))
        excess = backtest_pipeline(backtest, crsp, holdings, label,
                                   benchnames, overlap=0, outdir=outdir)

## Liquidity signals from daily stock returns
if 'daily' in testable:
    beg, end = 19830601, LAST_DATE  # nasdaq/volume from after 1982
    columns = ['ill', 'maxret', 'retvol', 'baspread', 'std_dolvol',
               'zerotrade', 'std_turn', 'turn']
    if regenerate:
        tic = time.time()
        out = DataFrame()
        dolvol = DataFrame()
        turn = DataFrame()    # to average turn signal over rolling 3-months
        dt = bd.date_range(bd.begmo(beg,-3), end, 'endmo')
        chunksize = 12        # chunk monthly date ranges by yearly batches
        batchdates = [dt[i:(i+chunksize)] for i in range(0, len(dt), chunksize)]
        for rebaldates in batchdates:
            q = (f"SELECT permno, date, ret, askhi, bidlo, prc, vol, shrout "
                 f" FROM {crsp['daily'].key} WHERE "
                 f" date>={bd.begmo(rebaldates[0])} AND "
                 f" date<={rebaldates[-1]}")  # retrieve a chunk
            f = crsp.sql.read_dataframe(q).sort_values(['permno', 'date'])
            f['baspread'] = ((f['askhi'] - f['bidlo']) /
                             ((f['askhi'] + f['bidlo']) / 2))
            f['dolvol'] = f['prc'].abs() * f['vol']
            f['turn1'] = f['vol'] / f['shrout']
            f.loc[f['dolvol']>0, 'ldv'] = np.log(f.loc[f['dolvol']>0, 'dolvol'])
            f['ill'] = 1000000 * f['ret'].abs() / f['dolvol']
            print(q, len(f), int(time.time() - tic))
Example #4
0
        if len(univ):
            sub = fractiles(univ[key], [20, 80])
            pos = weighted_average(univ.loc[sub == 1, ['cap', 'ret']],
                                   'cap')['ret']
            neg = weighted_average(univ.loc[sub == 3, ['cap', 'ret']],
                                   'cap')['ret']
            ret0[end] = {
                'ret': pos - neg,
                'npos': sum(sub == 1),
                'nneg': sum(sub == 3)
            }
            if ECHO:
                print(end, len(univ), pos, neg)

        # compute year ahead spread returns
        beg = bd.begmo(end, 4)
        end = bd.endmo(end, 15)
        univ = data[data['year'] == year]\
                   .dropna(subset=[key])\
                   .set_index('permno')\
                   .join(crsp.get_cap(bd.offset(beg, -1)), how='inner')\
                   .join(crsp.get_ret(beg, end, delist=True), how='left')
        if len(univ):
            sub = fractiles(univ[key], [20, 80])
            pos = weighted_average(univ.loc[sub == 1, ['cap', 'ret']],
                                   'cap')['ret']
            neg = weighted_average(univ.loc[sub == 3, ['cap', 'ret']],
                                   'cap')['ret']
            ret1[end] = {
                'ret': pos - neg,
                'npos': sum(sub == 1),