# ### Dates

# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# ## Construct CAPM idiosyncratic variances

# ### Backward part

# %%time
sampling_date = first_sampling_date
while sampling_date <= last_sampling_date:
    # load betas
    df_var = data.load_historic(sampling_date=sampling_date, column="var")
    df_betas = data.load_asset_estimates(
        sampling_date=sampling_date, columns=["spy_capm_spy"]
    )
    spy_data = df_spy.loc[df_var.index]

    # decompose
    df_decomposition = decompose_variance(df_var, df_betas, spy_data)
    df_decomposition = df_decomposition.loc[:, ["sys", "idio"]].add_prefix("var_")

    # store
    data.store(
        data=df_decomposition,
        path="samples/{:%Y-%m-%d}/historic_daily.csv".format(sampling_date),
    )
# %%
sampling_date = dt.datetime(year=2019, month=12, day=31)

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Data

# %%
option = "logvar_capm_resid"  # "spy_capm_decomp"

# %%
data = DataMap("../data")
df_idio_var = data.load_historic(sampling_date=sampling_date,
                                 column="var_idio")
df_logvar_resid = data.load_historic(sampling_date=sampling_date,
                                     column="logvar_capm_resid")
df_var = data.load_historic(sampling_date=sampling_date, column="var")
df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index]
df_info = data.load_asset_estimates(
    sampling_date=sampling_date,
    columns=["ticker", "comnam", "last_size", "mean_size"])

# %% [markdown]
# ### Tickers

# %%
ticker_list = (data.load_historic(
    sampling_date=sampling_date,
    column="ticker").tail(1).values.ravel().tolist())
Example #3
0
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict:
    """Load the data necessary for estimation from disk.

    Args:
        data: DataMap to load data from.
        sampling_date: Last day in the sample.

    Returns:
        df_info: Summarizing information.
        df_log_mcap_vola: Logarithm of value variance variable.
        df_factors: Factor data.

    """
    # asset data
    df_var = data.load_historic(sampling_date=sampling_date, column="var")
    df_noisevar = data.load_historic(sampling_date=sampling_date,
                                     column="noisevar")
    df_ret = data.load_historic(sampling_date=sampling_date, column="retadj")
    df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap")
    df_info = data.load_asset_estimates(
        sampling_date=sampling_date,
        columns=["ticker", "comnam", "last_size", "mean_size"],
    )
    df_info["ticker"] = make_tickers_unique(df_info["ticker"])

    # prepare asset data
    df_vola = np.sqrt(df_var)
    df_noisevola = np.sqrt(df_noisevar)
    df_lagged_mcap = df_mcap / (df_ret + 1)
    df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola)
    df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill")
    df_log_mcap_vola = df_log_vola + df_log_mcap
    df_log_mcap_vola = map_columns(df_log_mcap_vola,
                                   mapping=df_info["ticker"],
                                   mapping_name="ticker")

    # factor data
    df_factors = pd.DataFrame(index=df_var.index)

    def prepare_spy_factor(df_spy):
        open_prc = df_spy["prc"] / (1 + df_spy["ret"])
        std = df_spy["var"]**0.5
        factor = log_replace(open_prc * std, method="min").rename("spy")
        return factor

    def prepare_yahoo_factor(df_yahoo):
        open_prc = df_yahoo["Open"]
        std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) -
                                 np.log(df_yahoo["Low"]))
        factor = log_replace(open_prc * std, method="min").rename("yahoo")
        return factor

    def prepare_ew_factor(df_obs):
        factor = df_obs.sub(df_obs.mean()).div(
            df_obs.std()).mean(axis=1).rename("ew")
        return factor

    df_spy = data.load_spy_data().reindex(df_var.index)
    spy_factor = prepare_spy_factor(df_spy)
    df_factors = df_factors.join(spy_factor)

    ew_factor = prepare_ew_factor(df_log_mcap_vola)
    df_factors = df_factors.join(ew_factor)

    crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data)
    df_factors = df_factors.join(crsp_factor)

    for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]:
        df_yahoo = data.load_yahoo(ticker).reindex(df_var.index)
        factor = prepare_yahoo_factor(df_yahoo).rename(ticker)
        df_factors = df_factors.join(factor)

    return (df_info, df_log_mcap_vola, df_factors)
# ### Dates

# %%
# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ## Assets summary stats

# %%
# %%time
sampling_date = first_sampling_date
while sampling_date <= last_sampling_date:
    # get samples
    df_historic = data.load_historic(sampling_date=sampling_date,
                                     column="retadj")
    df_historic -= df_rf.loc[df_historic.index].values

    # calculate stats
    df_stats = pd.DataFrame(index=df_historic.columns)
    df_stats["ret_excess"] = (1 + df_historic).prod() - 1
    df_stats["var_annual"] = df_historic.var() * 252

    if sampling_date < last_sampling_date:
        # get excess return samples
        df_future = data.load_future(sampling_date=sampling_date,
                                     column="retadj")
        df_future -= df_rf.loc[df_future.index].values

        # slice expanding window
        df_expanding_estimates = pd.DataFrame(index=df_future.columns)
# %%
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ## Standard Factor Models

# %% [markdown]
# ### Backward part

# %%
# %%time
sampling_date = first_sampling_date
while sampling_date <= last_sampling_date:
    # get excess return samples
    df_historic = data.load_historic(sampling_date=sampling_date,
                                     column="retadj")
    df_historic -= df_rf.loc[df_historic.index].values

    # estimate models backwards
    df_estimates, df_residuals = estimate_models(ret_models, df_historic)

    # store
    data.store(
        data=df_residuals,
        path="samples/{:%Y-%m-%d}/historic_daily.csv".format(sampling_date),
    )
    data.store(
        data=df_estimates,
        path="samples/{:%Y-%m-%d}/asset_estimates.csv".format(sampling_date),
    )
# %%
sampling_date = dt.datetime(year=2019, month=12, day=31)

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Data

# %%
data = DataMap("../data")
# df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio")
# df_logvar_resid = data.load_historic(
#     sampling_date=sampling_date, column="logvar_capm_resid"
# )
df_var = data.load_historic(sampling_date=sampling_date, column="var")
df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar")
df_spy_var = data.load_spy_data(series="var").loc[df_var.index]
df_info = data.load_asset_estimates(
    sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"]
)

# %% [markdown]
# ### Tickers

# %%
ticker_list = (
    data.load_historic(sampling_date=sampling_date, column="ticker")
    .tail(1)
    .values.ravel()
    .tolist()
Example #7
0
# cov_grid = {'alpha': [1e-1]}

# horizon = 21

# %% [markdown]
# ## Test single period

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %%
# %%time
# load and transform idiosyncratic volatility data, load size data
if option == "spy_capm_decomp":
    df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio")
    df_log_idio_var = data.log_replace(df_idio_var, method="min")
elif option == "logvar_capm_resid":
    df_log_idio_var = data.load_historic(
        sampling_date=sampling_date, column="logvar_capm_resid"
    )
mean_size = data.load_asset_estimates(
    sampling_date=sampling_date, columns="mean_size"
).values.squeeze()

# estimate var
var = VAR(has_intercepts=True, p_lags=1)
var_cv = var.fit_adaptive_elastic_net_cv(
    var_data=df_log_idio_var, grid=var_grid, return_cv=True
)
residuals = var.residuals(df_log_idio_var)