'realestate', 'salecash', 'salerec', 'saleinv', 'secured', 'sgr', 'sp', 'tang', 'bm_ia', 'cfp_ia', 'chatoia' , 'chpmia', 'pchcapx_ia', 'chempia', 'mve_ia'] numlag = 6 # number of months to lag data for rebalance end = LAST_DATE # last data date if regenerate: # retrieve annual, keep [permno, datadate] with non null prccq if any fields = ['sic', 'fyear', 'ib', 'oancf', 'at', 'act', 'che', 'lct', 'dlc', 'dltt', 'prcc_f', 'csho', 'invt', 'dp', 'ppent', 'dvt', 'ceq', 'txp', 'revt', 'cogs', 'rect', 'aco', 'intan', 'ao', 'ap', 'lco', 'lo', 'capx', 'emp', 'ppegt', 'lt', 'sale', 'xsga', 'xrd', 'fatb', 'fatl', 'dm'] df = pstat.get_linked( dataset='annual', date_field='datadate', fields=fields, where=(f"indfmt = 'INDL' AND datafmt = 'STD' AND " f"curcd = 'USD' AND popsrc = 'D' AND consol = 'C' AND " f"datadate <= {end//100}31")) fund = df.sort_values(['permno', 'datadate', 'ib'])\ .drop_duplicates(['permno', 'datadate'])\ .dropna(subset=['ib']) fund.index = list(zip(fund['permno'], fund['datadate'])) # multi-index fund['rebaldate'] = bd.endmo(fund.datadate, numlag) # precompute, and lag common metrics: mve_f avg_at sic2 fund['sic2'] = np.where(fund['sic'].notna(), fund['sic'] // 100, 0) fund['fyear'] = fund['datadate'] // 10000 # can delete this fund['mve_f'] = fund['prcc_f'] * fund['csho'] lag = fund.shift(1, fill_value=0) lag.loc[lag['permno'] != fund['permno'], fields] = np.nan
crsp = CRSP(sql, bd, rdb) pstat = PSTAT(sql, bd) ## Rebalance and return dates, and initialize classes for calculations rebalbeg = bd.offset(20190630) rebals = [bd.offset(d) for d in [20200630]] stocks = chunk_stocks(crsp, rebalbeg, LAST_DATE) perf = DailyPerformance(stocks) # Compute HML factor label = 'hml' lag = 6 # number of months to lag fundamental data df = pstat.get_linked( # retrieve required fields from compustat dataset='annual', date_field='datadate', fields=['seq', 'pstk', 'pstkrv', 'pstkl', 'txditc'], where=(f"indfmt = 'INDL' AND datafmt = 'STD' AND curcd = 'USD' " f" AND popsrc = 'D' AND consol = 'C' " f" AND seq > 0 AND datadate >= 20141201")) ## subtract preferred stock, add back deferred investment tax credit df[label] = np.where(df['pstkrv'].isna(), df['pstkl'], df['pstkrv']) df[label] = np.where(df[label].isna(), df['pstk'], df[label]) df[label] = np.where(df[label].isna(), 0, df[label]) df[label] = df['seq'] + df['txditc'].fillna(0) - df[label] df.dropna(subset=[label], inplace=True) df = df[df[label] > 0][['permno', 'gvkey', 'datadate', label]] ## years in Compustat df = df.sort_values(by=['gvkey', 'datadate']) df['count'] = df.groupby(['gvkey']).cumcount()