# ### Dates # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # ## Construct CAPM idiosyncratic variances # ### Backward part # %%time sampling_date = first_sampling_date while sampling_date <= last_sampling_date: # load betas df_var = data.load_historic(sampling_date=sampling_date, column="var") df_betas = data.load_asset_estimates( sampling_date=sampling_date, columns=["spy_capm_spy"] ) spy_data = df_spy.loc[df_var.index] # decompose df_decomposition = decompose_variance(df_var, df_betas, spy_data) df_decomposition = df_decomposition.loc[:, ["sys", "idio"]].add_prefix("var_") # store data.store( data=df_decomposition, path="samples/{:%Y-%m-%d}/historic_daily.csv".format(sampling_date), )
# %% sampling_date = dt.datetime(year=2019, month=12, day=31) # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Data # %% option = "logvar_capm_resid" # "spy_capm_decomp" # %% data = DataMap("../data") df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") df_logvar_resid = data.load_historic(sampling_date=sampling_date, column="logvar_capm_resid") df_var = data.load_historic(sampling_date=sampling_date, column="var") df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index] df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"]) # %% [markdown] # ### Tickers # %% ticker_list = (data.load_historic( sampling_date=sampling_date, column="ticker").tail(1).values.ravel().tolist())
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict: """Load the data necessary for estimation from disk. Args: data: DataMap to load data from. sampling_date: Last day in the sample. Returns: df_info: Summarizing information. df_log_mcap_vola: Logarithm of value variance variable. df_factors: Factor data. """ # asset data df_var = data.load_historic(sampling_date=sampling_date, column="var") df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar") df_ret = data.load_historic(sampling_date=sampling_date, column="retadj") df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap") df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"], ) df_info["ticker"] = make_tickers_unique(df_info["ticker"]) # prepare asset data df_vola = np.sqrt(df_var) df_noisevola = np.sqrt(df_noisevar) df_lagged_mcap = df_mcap / (df_ret + 1) df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola) df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill") df_log_mcap_vola = df_log_vola + df_log_mcap df_log_mcap_vola = map_columns(df_log_mcap_vola, mapping=df_info["ticker"], mapping_name="ticker") # factor data df_factors = pd.DataFrame(index=df_var.index) def prepare_spy_factor(df_spy): open_prc = df_spy["prc"] / (1 + df_spy["ret"]) std = df_spy["var"]**0.5 factor = log_replace(open_prc * std, method="min").rename("spy") return factor def prepare_yahoo_factor(df_yahoo): open_prc = df_yahoo["Open"] std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) - np.log(df_yahoo["Low"])) factor = log_replace(open_prc * std, method="min").rename("yahoo") return factor def prepare_ew_factor(df_obs): factor = df_obs.sub(df_obs.mean()).div( df_obs.std()).mean(axis=1).rename("ew") return factor df_spy = data.load_spy_data().reindex(df_var.index) spy_factor = prepare_spy_factor(df_spy) df_factors = df_factors.join(spy_factor) ew_factor = prepare_ew_factor(df_log_mcap_vola) df_factors = df_factors.join(ew_factor) crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data) df_factors = df_factors.join(crsp_factor) for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]: df_yahoo = data.load_yahoo(ticker).reindex(df_var.index) factor = prepare_yahoo_factor(df_yahoo).rename(ticker) df_factors = df_factors.join(factor) return (df_info, df_log_mcap_vola, df_factors)
# ### Dates # %% # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ## Assets summary stats # %% # %%time sampling_date = first_sampling_date while sampling_date <= last_sampling_date: # get samples df_historic = data.load_historic(sampling_date=sampling_date, column="retadj") df_historic -= df_rf.loc[df_historic.index].values # calculate stats df_stats = pd.DataFrame(index=df_historic.columns) df_stats["ret_excess"] = (1 + df_historic).prod() - 1 df_stats["var_annual"] = df_historic.var() * 252 if sampling_date < last_sampling_date: # get excess return samples df_future = data.load_future(sampling_date=sampling_date, column="retadj") df_future -= df_rf.loc[df_future.index].values # slice expanding window df_expanding_estimates = pd.DataFrame(index=df_future.columns)
# %% first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ## Standard Factor Models # %% [markdown] # ### Backward part # %% # %%time sampling_date = first_sampling_date while sampling_date <= last_sampling_date: # get excess return samples df_historic = data.load_historic(sampling_date=sampling_date, column="retadj") df_historic -= df_rf.loc[df_historic.index].values # estimate models backwards df_estimates, df_residuals = estimate_models(ret_models, df_historic) # store data.store( data=df_residuals, path="samples/{:%Y-%m-%d}/historic_daily.csv".format(sampling_date), ) data.store( data=df_estimates, path="samples/{:%Y-%m-%d}/asset_estimates.csv".format(sampling_date), )
# %% sampling_date = dt.datetime(year=2019, month=12, day=31) # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Data # %% data = DataMap("../data") # df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") # df_logvar_resid = data.load_historic( # sampling_date=sampling_date, column="logvar_capm_resid" # ) df_var = data.load_historic(sampling_date=sampling_date, column="var") df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar") df_spy_var = data.load_spy_data(series="var").loc[df_var.index] df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"] ) # %% [markdown] # ### Tickers # %% ticker_list = ( data.load_historic(sampling_date=sampling_date, column="ticker") .tail(1) .values.ravel() .tolist()
# cov_grid = {'alpha': [1e-1]} # horizon = 21 # %% [markdown] # ## Test single period # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% # %%time # load and transform idiosyncratic volatility data, load size data if option == "spy_capm_decomp": df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") df_log_idio_var = data.log_replace(df_idio_var, method="min") elif option == "logvar_capm_resid": df_log_idio_var = data.load_historic( sampling_date=sampling_date, column="logvar_capm_resid" ) mean_size = data.load_asset_estimates( sampling_date=sampling_date, columns="mean_size" ).values.squeeze() # estimate var var = VAR(has_intercepts=True, p_lags=1) var_cv = var.fit_adaptive_elastic_net_cv( var_data=df_log_idio_var, grid=var_grid, return_cv=True ) residuals = var.residuals(df_log_idio_var)