# %load_ext autoreload # %autoreload 2 import datetime as dt import pandas as pd from dateutil.relativedelta import relativedelta from euraculus.data import DataMap from euraculus.factor import decompose_variance # ## Set up # ### Data data = DataMap("../data") df_spy = data.load_spy_data(series="var") # ### Dates # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # ## Construct CAPM idiosyncratic variances # ### Backward part # %%time sampling_date = first_sampling_date while sampling_date <= last_sampling_date: # load betas
sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Data # %% option = "logvar_capm_resid" # "spy_capm_decomp" # %% data = DataMap("../data") df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") df_logvar_resid = data.load_historic(sampling_date=sampling_date, column="logvar_capm_resid") df_var = data.load_historic(sampling_date=sampling_date, column="var") df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index] df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"]) # %% [markdown] # ### Tickers # %% ticker_list = (data.load_historic( sampling_date=sampling_date, column="ticker").tail(1).values.ravel().tolist()) column_to_ticker = make_ticker_dict(ticker_list) # %% [markdown] # Make and export table:
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict: """Load the data necessary for estimation from disk. Args: data: DataMap to load data from. sampling_date: Last day in the sample. Returns: df_info: Summarizing information. df_log_mcap_vola: Logarithm of value variance variable. df_factors: Factor data. """ # asset data df_var = data.load_historic(sampling_date=sampling_date, column="var") df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar") df_ret = data.load_historic(sampling_date=sampling_date, column="retadj") df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap") df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"], ) df_info["ticker"] = make_tickers_unique(df_info["ticker"]) # prepare asset data df_vola = np.sqrt(df_var) df_noisevola = np.sqrt(df_noisevar) df_lagged_mcap = df_mcap / (df_ret + 1) df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola) df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill") df_log_mcap_vola = df_log_vola + df_log_mcap df_log_mcap_vola = map_columns(df_log_mcap_vola, mapping=df_info["ticker"], mapping_name="ticker") # factor data df_factors = pd.DataFrame(index=df_var.index) def prepare_spy_factor(df_spy): open_prc = df_spy["prc"] / (1 + df_spy["ret"]) std = df_spy["var"]**0.5 factor = log_replace(open_prc * std, method="min").rename("spy") return factor def prepare_yahoo_factor(df_yahoo): open_prc = df_yahoo["Open"] std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) - np.log(df_yahoo["Low"])) factor = log_replace(open_prc * std, method="min").rename("yahoo") return factor def prepare_ew_factor(df_obs): factor = df_obs.sub(df_obs.mean()).div( df_obs.std()).mean(axis=1).rename("ew") return factor df_spy = data.load_spy_data().reindex(df_var.index) spy_factor = prepare_spy_factor(df_spy) df_factors = df_factors.join(spy_factor) ew_factor = prepare_ew_factor(df_log_mcap_vola) df_factors = df_factors.join(ew_factor) crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data) df_factors = df_factors.join(crsp_factor) for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]: df_yahoo = data.load_yahoo(ticker).reindex(df_var.index) factor = prepare_yahoo_factor(df_yahoo).rename(ticker) df_factors = df_factors.join(factor) return (df_info, df_log_mcap_vola, df_factors)