def construct_crsp_index(sampling_date: dt.datetime, data: DataMap) -> pd.Series: """Constructs an equally weighted log wealth volatility index across the CRSP universe. Args: data: DataMap to load data from. sampling_date: Last day in the sample. Returns: index: Constructed index series. """ # set parameters start_date = sampling_date - relativedelta(years=1) + relativedelta(days=1) # load data df_var = data.load_crsp_data(start_date=start_date, end_date=sampling_date, column="var") df_noisevar = data.load_crsp_data(start_date=start_date, end_date=sampling_date, column="noisevar") df_ret = data.load_crsp_data(start_date=start_date, end_date=sampling_date, column="retadj") df_mcap = data.load_crsp_data(start_date=start_date, end_date=sampling_date, column="mcap") # process data df_var[df_var == 0] = df_noisevar df_vola = np.sqrt(df_var.replace(0, np.nan)) df_lagged_mcap = df_mcap / (df_ret + 1) df_lagged_mcap[df_lagged_mcap <= 0] = np.nan df_log_mcap_vola = np.log(df_vola) + np.log(df_lagged_mcap) # build index index = (df_log_mcap_vola.sub(df_log_mcap_vola.mean()).div( df_log_mcap_vola.std()).mean(axis=1).rename("crsp")) return index
# ### Sampling date # %% sampling_date = dt.datetime(year=2019, month=12, day=31) # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Data # %% option = "logvar_capm_resid" # "spy_capm_decomp" # %% data = DataMap("../data") df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") df_logvar_resid = data.load_historic(sampling_date=sampling_date, column="logvar_capm_resid") df_var = data.load_historic(sampling_date=sampling_date, column="var") df_spy_var = data.load_spy_data(series="var").loc[df_idio_var.index] df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"]) # %% [markdown] # ### Tickers # %% ticker_list = (data.load_historic(
from euraculus.data import DataMap from euraculus.download import WRDSDownloader, download_yahoo_data # %% [markdown] # ## Set up # %% [markdown] # ### WRDS Connection & DataMap # %% db = WRDSDownloader() db._create_pgpass_file() # %% data = DataMap("../data") # %% [markdown] # ### Timeframe # %% first_year = 1993 last_year = 2021 # %% [markdown] # #### Explore database # %% libraries = db.list_libraries() # %%
# %load_ext autoreload # %autoreload 2 import datetime as dt import pandas as pd from dateutil.relativedelta import relativedelta from euraculus.data import DataMap from euraculus.factor import decompose_variance # ## Set up # ### Data data = DataMap("../data") df_spy = data.load_spy_data(series="var") # ### Dates # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # ## Construct CAPM idiosyncratic variances # ### Backward part # %%time sampling_date = first_sampling_date while sampling_date <= last_sampling_date:
# import pytest import numpy as np import pandas as pd from pandas.testing import assert_frame_equal from euraculus.data import DataMap datamap = DataMap(datapath="/home/rubelrennfix/projects/euraculus/data") class TestPrepareLogVariances: """This class serves to test various cases of preparing log variance data.""" def test_full_column(self): df_var = pd.DataFrame(data=[[1], [1], [1]]) df_noisevar = pd.DataFrame(data=[[2], [2], [2]]) output = datamap.prepare_log_variances(df_var=df_var, df_noisevar=df_noisevar) expected = np.log(pd.DataFrame(data=[[1], [1], [1]])) assert_frame_equal(output, expected) def test_empty_column(self): _ = np.nan df_var = pd.DataFrame(data=[[_], [_], [_]]) df_noisevar = pd.DataFrame(data=[[_], [_], [_]]) output = datamap.prepare_log_variances(df_var=df_var, df_noisevar=df_noisevar) expected = np.log(pd.DataFrame(data=[[_], [_], [_]])) assert_frame_equal(output, expected) def test_zero_column(self): df_var = pd.DataFrame(data=[[0], [0], [0]])
# %% factors = ["pca_1"] var_grid = { "alpha": np.geomspace(1e-10, 1e0, 11), "lambdau": np.geomspace(1e-1, 1e1, 11), #'gamma': np.geomspace(1e-2, 1e2, 15), } cov_grid = {"alpha": np.geomspace(1e-3, 1e0, 25)} horizon = 21 # %% [markdown] # ### Data # %% data = DataMap("../data") # %% [markdown] # ## Test single period # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% # %%time # load data df_info, df_log_mcap_vola, df_factors = load_estimation_data(data=data, sampling_date=sampling_date) df_pca = construct_pca_factors(df=df_log_mcap_vola, n_factors=1) df_factors = df_factors.join(df_pca) # estimate
def load_estimation_data(data: DataMap, sampling_date: dt.datetime) -> dict: """Load the data necessary for estimation from disk. Args: data: DataMap to load data from. sampling_date: Last day in the sample. Returns: df_info: Summarizing information. df_log_mcap_vola: Logarithm of value variance variable. df_factors: Factor data. """ # asset data df_var = data.load_historic(sampling_date=sampling_date, column="var") df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar") df_ret = data.load_historic(sampling_date=sampling_date, column="retadj") df_mcap = data.load_historic(sampling_date=sampling_date, column="mcap") df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"], ) df_info["ticker"] = make_tickers_unique(df_info["ticker"]) # prepare asset data df_vola = np.sqrt(df_var) df_noisevola = np.sqrt(df_noisevar) df_lagged_mcap = df_mcap / (df_ret + 1) df_log_vola = prepare_log_data(df_data=df_vola, df_fill=df_noisevola) df_log_mcap = log_replace(df=df_lagged_mcap, method="ffill") df_log_mcap_vola = df_log_vola + df_log_mcap df_log_mcap_vola = map_columns(df_log_mcap_vola, mapping=df_info["ticker"], mapping_name="ticker") # factor data df_factors = pd.DataFrame(index=df_var.index) def prepare_spy_factor(df_spy): open_prc = df_spy["prc"] / (1 + df_spy["ret"]) std = df_spy["var"]**0.5 factor = log_replace(open_prc * std, method="min").rename("spy") return factor def prepare_yahoo_factor(df_yahoo): open_prc = df_yahoo["Open"] std = np.sqrt(0.3607) * (np.log(df_yahoo["High"]) - np.log(df_yahoo["Low"])) factor = log_replace(open_prc * std, method="min").rename("yahoo") return factor def prepare_ew_factor(df_obs): factor = df_obs.sub(df_obs.mean()).div( df_obs.std()).mean(axis=1).rename("ew") return factor df_spy = data.load_spy_data().reindex(df_var.index) spy_factor = prepare_spy_factor(df_spy) df_factors = df_factors.join(spy_factor) ew_factor = prepare_ew_factor(df_log_mcap_vola) df_factors = df_factors.join(ew_factor) crsp_factor = construct_crsp_index(sampling_date=sampling_date, data=data) df_factors = df_factors.join(crsp_factor) for ticker in ["^VIX", "DX-Y.NYB", "^TNX"]: df_yahoo = data.load_yahoo(ticker).reindex(df_var.index) factor = prepare_yahoo_factor(df_yahoo).rename(ticker) df_factors = df_factors.join(factor) return (df_info, df_log_mcap_vola, df_factors)
# %autoreload 2 import datetime as dt # %% import pandas as pd from dateutil.relativedelta import relativedelta from euraculus.data import DataMap # %% [markdown] # ## Set up # ### Data # %% data = DataMap("../data") df_rf = data.load_rf() # %% [markdown] # ### Dates # %% # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ## Assets summary stats # %% # %%time
import numpy as np import pandas as pd from dateutil.relativedelta import relativedelta from euraculus.data import DataMap from euraculus.sampling import LargeCapSampler # %% [markdown] # ## Set up # %% [markdown] # ### Sampler # %% data = DataMap("../data") sampler = LargeCapSampler(datamap=data, n_assets=100, back_offset=12, forward_offset=12) # %% [markdown] # ### Timeframe # %% first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ## Conduct monthly sampling
# %load_ext autoreload # %autoreload 2 # %% from euraculus.data import DataMap from euraculus.plot import ( plot_estimation_summary, plot_regularisation_summary, plot_network_summary, ) # %% [markdown] # ## Load data # %% data = DataMap("../data") df_stats = data.load_estimation_summary() # %% [markdown] # ## Calculations # %% df_stats["mean_shrinkage"] = ( df_stats["var_nonzero_shrinkage"] + df_stats["covar_full_shrinkage"] ) / 2 df_stats["cov_used_df"] = ( df_stats["precision_density"] * df_stats["N"] ** 2 - df_stats["N"] ) / 2 + df_stats["N"] df_stats["var_regular_lost_df"] = df_stats["N"] ** 2 + df_stats["N"] df_stats["covar_regular_lost_df"] = (df_stats["N"] * (df_stats["N"] - 1)) / 2 df_stats["var_estimate_share"] = df_stats["var_regular_lost_df"] / (
Carhart4FactorModel, FactorModel, FamaFrench3FactorModel, SPY1FactorModel, SPYVariance1FactorModel, ) from euraculus.estimate import prepare_log_data from euraculus.factor import estimate_models from euraculus.utils import months_difference # %% [markdown] # ## Set up # ### Data # %% data = DataMap("../data") df_rf = data.load_rf() # %% [markdown] # ### Models # %% ret_models = { "spy_capm": SPY1FactorModel(data), "capm": CAPM(data), "ff3": FamaFrench3FactorModel(data), "c4": Carhart4FactorModel(data), } # %% var_models = {
# %% [markdown] # ## Set up # ### Sampling date # %% sampling_date = dt.datetime(year=2019, month=12, day=31) # %% sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Data # %% data = DataMap("../data") # df_idio_var = data.load_historic(sampling_date=sampling_date, column="var_idio") # df_logvar_resid = data.load_historic( # sampling_date=sampling_date, column="logvar_capm_resid" # ) df_var = data.load_historic(sampling_date=sampling_date, column="var") df_noisevar = data.load_historic(sampling_date=sampling_date, column="noisevar") df_spy_var = data.load_spy_data(series="var").loc[df_var.index] df_info = data.load_asset_estimates( sampling_date=sampling_date, columns=["ticker", "comnam", "last_size", "mean_size"] ) # %% [markdown] # ### Tickers # %%
describe_var, describe_cov, describe_fevd, collect_var_estimates, collect_cov_estimates, collect_fevd_estimates, ) # %% [markdown] # ## Setup # %% [markdown] # ### Data # %% data = DataMap("../data") # %% option = "logvar_capm_resid" # "spy_capm_decomp" # %% [markdown] # ### Dates # %% # define timeframe first_sampling_date = dt.datetime(year=1994, month=1, day=31) last_sampling_date = dt.datetime(year=2021, month=12, day=31) # %% [markdown] # ### Hyperparamters