Beispiel #1
0
def construct_crsp_index(sampling_date: dt.datetime,
                         data: DataMap) -> pd.Series:
    """Constructs an equally weighted log wealth volatility index across the CRSP universe.

    Args:
        data: DataMap to load data from.
        sampling_date: Last day in the sample.

    Returns:
        index: Constructed index series.

    """
    # set parameters
    start_date = sampling_date - relativedelta(years=1) + relativedelta(days=1)

    # load data
    df_var = data.load_crsp_data(start_date=start_date,
                                 end_date=sampling_date,
                                 column="var")
    df_noisevar = data.load_crsp_data(start_date=start_date,
                                      end_date=sampling_date,
                                      column="noisevar")
    df_ret = data.load_crsp_data(start_date=start_date,
                                 end_date=sampling_date,
                                 column="retadj")
    df_mcap = data.load_crsp_data(start_date=start_date,
                                  end_date=sampling_date,
                                  column="mcap")

    # process data
    df_var[df_var == 0] = df_noisevar
    df_vola = np.sqrt(df_var.replace(0, np.nan))
    df_lagged_mcap = df_mcap / (df_ret + 1)
    df_lagged_mcap[df_lagged_mcap <= 0] = np.nan
    df_log_mcap_vola = np.log(df_vola) + np.log(df_lagged_mcap)

    # build index
    index = (df_log_mcap_vola.sub(df_log_mcap_vola.mean()).div(
        df_log_mcap_vola.std()).mean(axis=1).rename("crsp"))
    return index
# ### Sampling date

# %%
sampling_date = dt.datetime(year=2019, month=12, day=31)

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Data

# %%
option = "logvar_capm_resid"  # "spy_capm_decomp"

# %%
data = DataMap("../data")
df_idio_var = data.load_historic(sampling_date=sampling_date,
                                 column="var_idio")
df_logvar_resid = data.load_historic(sampling_date=sampling_date,
                                     column="logvar_capm_resid")
df_var = data.load_historic(sampling_date=sampling_date, column="var")
df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index]
df_info = data.load_asset_estimates(
    sampling_date=sampling_date,
    columns=["ticker", "comnam", "last_size", "mean_size"])

# %% [markdown]
# ### Tickers

# %%
ticker_list = (data.load_historic(
Beispiel #3
0
from euraculus.data import DataMap
from euraculus.download import WRDSDownloader, download_yahoo_data

# %% [markdown]
# ## Set up

# %% [markdown]
# ### WRDS Connection & DataMap

# %%
db = WRDSDownloader()
db._create_pgpass_file()

# %%
data = DataMap("../data")

# %% [markdown]
# ### Timeframe

# %%
first_year = 1993
last_year = 2021

# %% [markdown]
# #### Explore database

# %%
libraries = db.list_libraries()

# %%
# %load_ext autoreload
# %autoreload 2

import datetime as dt

import pandas as pd
from dateutil.relativedelta import relativedelta

from euraculus.data import DataMap
from euraculus.factor import decompose_variance

# ## Set up
# ### Data

data = DataMap("../data")
df_spy = data.load_spy_data(series="var")

# ### Dates

# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# ## Construct CAPM idiosyncratic variances

# ### Backward part

# %%time
sampling_date = first_sampling_date
while sampling_date <= last_sampling_date:
Beispiel #5
0
# import pytest

import numpy as np
import pandas as pd
from pandas.testing import assert_frame_equal

from euraculus.data import DataMap

datamap = DataMap(datapath="/home/rubelrennfix/projects/euraculus/data")


class TestPrepareLogVariances:
    """This class serves to test various cases of preparing log variance data."""

    def test_full_column(self):
        df_var = pd.DataFrame(data=[[1], [1], [1]])
        df_noisevar = pd.DataFrame(data=[[2], [2], [2]])
        output = datamap.prepare_log_variances(df_var=df_var, df_noisevar=df_noisevar)
        expected = np.log(pd.DataFrame(data=[[1], [1], [1]]))
        assert_frame_equal(output, expected)

    def test_empty_column(self):
        _ = np.nan
        df_var = pd.DataFrame(data=[[_], [_], [_]])
        df_noisevar = pd.DataFrame(data=[[_], [_], [_]])
        output = datamap.prepare_log_variances(df_var=df_var, df_noisevar=df_noisevar)
        expected = np.log(pd.DataFrame(data=[[_], [_], [_]]))
        assert_frame_equal(output, expected)

    def test_zero_column(self):
        df_var = pd.DataFrame(data=[[0], [0], [0]])
# %%
factors = ["pca_1"]
var_grid = {
    "alpha": np.geomspace(1e-10, 1e0, 11),
    "lambdau": np.geomspace(1e-1, 1e1, 11),
    #'gamma': np.geomspace(1e-2, 1e2, 15),
}
cov_grid = {"alpha": np.geomspace(1e-3, 1e0, 25)}
horizon = 21

# %% [markdown]
# ### Data

# %%
data = DataMap("../data")

# %% [markdown]
# ## Test single period

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %%
# %%time
# load data
df_info, df_log_mcap_vola, df_factors = load_estimation_data(data=data, sampling_date=sampling_date)
df_pca = construct_pca_factors(df=df_log_mcap_vola, n_factors=1)
df_factors = df_factors.join(df_pca)

# estimate
Beispiel #7
0
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict:
    """Load the data necessary for estimation from disk.

    Args:
        data: DataMap to load data from.
        sampling_date: Last day in the sample.

    Returns:
        df_info: Summarizing information.
        df_log_mcap_vola: Logarithm of value variance variable.
        df_factors: Factor data.

    """
    # asset data
    df_var = data.load_historic(sampling_date=sampling_date, column="var")
    df_noisevar = data.load_historic(sampling_date=sampling_date,
                                     column="noisevar")
    df_ret = data.load_historic(sampling_date=sampling_date, column="retadj")
    df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap")
    df_info = data.load_asset_estimates(
        sampling_date=sampling_date,
        columns=["ticker", "comnam", "last_size", "mean_size"],
    )
    df_info["ticker"] = make_tickers_unique(df_info["ticker"])

    # prepare asset data
    df_vola = np.sqrt(df_var)
    df_noisevola = np.sqrt(df_noisevar)
    df_lagged_mcap = df_mcap / (df_ret + 1)
    df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola)
    df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill")
    df_log_mcap_vola = df_log_vola + df_log_mcap
    df_log_mcap_vola = map_columns(df_log_mcap_vola,
                                   mapping=df_info["ticker"],
                                   mapping_name="ticker")

    # factor data
    df_factors = pd.DataFrame(index=df_var.index)

    def prepare_spy_factor(df_spy):
        open_prc = df_spy["prc"] / (1 + df_spy["ret"])
        std = df_spy["var"]**0.5
        factor = log_replace(open_prc * std, method="min").rename("spy")
        return factor

    def prepare_yahoo_factor(df_yahoo):
        open_prc = df_yahoo["Open"]
        std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) -
                                 np.log(df_yahoo["Low"]))
        factor = log_replace(open_prc * std, method="min").rename("yahoo")
        return factor

    def prepare_ew_factor(df_obs):
        factor = df_obs.sub(df_obs.mean()).div(
            df_obs.std()).mean(axis=1).rename("ew")
        return factor

    df_spy = data.load_spy_data().reindex(df_var.index)
    spy_factor = prepare_spy_factor(df_spy)
    df_factors = df_factors.join(spy_factor)

    ew_factor = prepare_ew_factor(df_log_mcap_vola)
    df_factors = df_factors.join(ew_factor)

    crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data)
    df_factors = df_factors.join(crsp_factor)

    for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]:
        df_yahoo = data.load_yahoo(ticker).reindex(df_var.index)
        factor = prepare_yahoo_factor(df_yahoo).rename(ticker)
        df_factors = df_factors.join(factor)

    return (df_info, df_log_mcap_vola, df_factors)
# %autoreload 2

import datetime as dt

# %%
import pandas as pd
from dateutil.relativedelta import relativedelta

from euraculus.data import DataMap

# %% [markdown]
# ## Set up
# ### Data

# %%
data = DataMap("../data")
df_rf = data.load_rf()

# %% [markdown]
# ### Dates

# %%
# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ## Assets summary stats

# %%
# %%time
Beispiel #9
0
import numpy as np
import pandas as pd
from dateutil.relativedelta import relativedelta

from euraculus.data import DataMap
from euraculus.sampling import LargeCapSampler

# %% [markdown]
# ## Set up

# %% [markdown]
# ### Sampler

# %%
data = DataMap("../data")
sampler = LargeCapSampler(datamap=data,
                          n_assets=100,
                          back_offset=12,
                          forward_offset=12)

# %% [markdown]
# ### Timeframe

# %%
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ## Conduct monthly sampling
# %load_ext autoreload
# %autoreload 2

# %%
from euraculus.data import DataMap
from euraculus.plot import (
    plot_estimation_summary,
    plot_regularisation_summary,
    plot_network_summary,
)

# %% [markdown]
# ## Load data

# %%
data = DataMap("../data")
df_stats = data.load_estimation_summary()

# %% [markdown]
# ## Calculations

# %%
df_stats["mean_shrinkage"] = (
    df_stats["var_nonzero_shrinkage"] + df_stats["covar_full_shrinkage"]
) / 2
df_stats["cov_used_df"] = (
    df_stats["precision_density"] * df_stats["N"] ** 2 - df_stats["N"]
) / 2 + df_stats["N"]
df_stats["var_regular_lost_df"] = df_stats["N"] ** 2 + df_stats["N"]
df_stats["covar_regular_lost_df"] = (df_stats["N"] * (df_stats["N"] - 1)) / 2
df_stats["var_estimate_share"] = df_stats["var_regular_lost_df"] / (
    Carhart4FactorModel,
    FactorModel,
    FamaFrench3FactorModel,
    SPY1FactorModel,
    SPYVariance1FactorModel,
)
from euraculus.estimate import prepare_log_data
from euraculus.factor import estimate_models
from euraculus.utils import months_difference

# %% [markdown]
# ## Set up
# ### Data

# %%
data = DataMap("../data")
df_rf = data.load_rf()

# %% [markdown]
# ### Models

# %%
ret_models = {
    "spy_capm": SPY1FactorModel(data),
    "capm": CAPM(data),
    "ff3": FamaFrench3FactorModel(data),
    "c4": Carhart4FactorModel(data),
}

# %%
var_models = {
# %% [markdown]
# ## Set up
# ### Sampling date

# %%
sampling_date = dt.datetime(year=2019, month=12, day=31)

# %%
sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Data

# %%
data = DataMap("../data")
# df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio")
# df_logvar_resid = data.load_historic(
#     sampling_date=sampling_date, column="logvar_capm_resid"
# )
df_var = data.load_historic(sampling_date=sampling_date, column="var")
df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar")
df_spy_var = data.load_spy_data(series="var").loc[df_var.index]
df_info = data.load_asset_estimates(
    sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"]
)

# %% [markdown]
# ### Tickers

# %%
Beispiel #13
0
    describe_var,
    describe_cov,
    describe_fevd,
    collect_var_estimates,
    collect_cov_estimates,
    collect_fevd_estimates,
)

# %% [markdown]
# ## Setup

# %% [markdown]
# ### Data

# %%
data = DataMap("../data")

# %%
option = "logvar_capm_resid"  # "spy_capm_decomp"

# %% [markdown]
# ### Dates

# %%
# define timeframe
first_sampling_date = dt.datetime(year=1994, month=1, day=31)
last_sampling_date = dt.datetime(year=2021, month=12, day=31)

# %% [markdown]
# ### Hyperparamters