# %load_ext autoreload
# %autoreload 2

import datetime as dt

import pandas as pd
from dateutil.relativedelta import relativedelta

from euraculus.data import DataMap
from euraculus.factor import decompose_variance

# ## Set up
# ### Data

data = DataMap("../data")
df_spy = data.load_spy_data(series="var")

# ### Dates

# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# ## Construct CAPM idiosyncratic variances

# ### Backward part

# %%time
sampling_date = first_sampling_date
while sampling_date <= last_sampling_date:
    # load betas
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Data

# %%
option = "logvar_capm_resid"  # "spy_capm_decomp"

# %%
data = DataMap("../data")
df_idio_var = data.load_historic(sampling_date=sampling_date,
                                 column="var_idio")
df_logvar_resid = data.load_historic(sampling_date=sampling_date,
                                     column="logvar_capm_resid")
df_var = data.load_historic(sampling_date=sampling_date, column="var")
df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index]
df_info = data.load_asset_estimates(
    sampling_date=sampling_date,
    columns=["ticker", "comnam", "last_size", "mean_size"])

# %% [markdown]
# ### Tickers

# %%
ticker_list = (data.load_historic(
    sampling_date=sampling_date,
    column="ticker").tail(1).values.ravel().tolist())
column_to_ticker = make_ticker_dict(ticker_list)

# %% [markdown]
# Make and export table:
Beispiel #3
0
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict:
    """Load the data necessary for estimation from disk.

    Args:
        data: DataMap to load data from.
        sampling_date: Last day in the sample.

    Returns:
        df_info: Summarizing information.
        df_log_mcap_vola: Logarithm of value variance variable.
        df_factors: Factor data.

    """
    # asset data
    df_var = data.load_historic(sampling_date=sampling_date, column="var")
    df_noisevar = data.load_historic(sampling_date=sampling_date,
                                     column="noisevar")
    df_ret = data.load_historic(sampling_date=sampling_date, column="retadj")
    df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap")
    df_info = data.load_asset_estimates(
        sampling_date=sampling_date,
        columns=["ticker", "comnam", "last_size", "mean_size"],
    )
    df_info["ticker"] = make_tickers_unique(df_info["ticker"])

    # prepare asset data
    df_vola = np.sqrt(df_var)
    df_noisevola = np.sqrt(df_noisevar)
    df_lagged_mcap = df_mcap / (df_ret + 1)
    df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola)
    df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill")
    df_log_mcap_vola = df_log_vola + df_log_mcap
    df_log_mcap_vola = map_columns(df_log_mcap_vola,
                                   mapping=df_info["ticker"],
                                   mapping_name="ticker")

    # factor data
    df_factors = pd.DataFrame(index=df_var.index)

    def prepare_spy_factor(df_spy):
        open_prc = df_spy["prc"] / (1 + df_spy["ret"])
        std = df_spy["var"]**0.5
        factor = log_replace(open_prc * std, method="min").rename("spy")
        return factor

    def prepare_yahoo_factor(df_yahoo):
        open_prc = df_yahoo["Open"]
        std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) -
                                 np.log(df_yahoo["Low"]))
        factor = log_replace(open_prc * std, method="min").rename("yahoo")
        return factor

    def prepare_ew_factor(df_obs):
        factor = df_obs.sub(df_obs.mean()).div(
            df_obs.std()).mean(axis=1).rename("ew")
        return factor

    df_spy = data.load_spy_data().reindex(df_var.index)
    spy_factor = prepare_spy_factor(df_spy)
    df_factors = df_factors.join(spy_factor)

    ew_factor = prepare_ew_factor(df_log_mcap_vola)
    df_factors = df_factors.join(ew_factor)

    crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data)
    df_factors = df_factors.join(crsp_factor)

    for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]:
        df_yahoo = data.load_yahoo(ticker).reindex(df_var.index)
        factor = prepare_yahoo_factor(df_yahoo).rename(ticker)
        df_factors = df_factors.join(factor)

    return (df_info, df_log_mcap_vola, df_factors)