# In sample fit X_in = pd.concat([ market_cap.stack(), pe.stack(), pe_lyr.stack(), pb.stack(), ps.stack(), pcf.stack(), turnover.stack() ], axis=1) X_in.columns = ["market_cap", "pe", "pe_lyr", "pb", "ps", "pcf", "turnover"] y_in = rets.stack() y_in = y_in.loc[X_in.index] y_out = rets.shift(-1).stack() X_out = X_in.loc[y_out.index] weights_in = np.sqrt(market_cap_raw_value.stack().loc[X_in.index]) weights_out = np.sqrt(market_cap_raw_value.stack().loc[X_out.index]) assert len(X_out) == len(y_out) rets_out = rets.shift(-1) market_price = get_price("000906.XSHG", fields=["close"], start_date='2014-01-01', end_date='2020-01-01') market_ret = market_price.pct_change() market_ret_out = market_ret.shift(-1)
""" We test cross sectional regression with 2 different designs 1. use raw sector return as a factor 2. introduce all industries as factors, i.e. dummy variables """ _tmp = abs(sector_rets).max() mask = _tmp[_tmp < 0.1].index rets, sector_rets, market_cap, pe, pe_lyr, pb, ps, pcf, turnover = \ rets[mask], sector_rets[mask], market_cap[mask], pe[mask], pe_lyr[mask], pb[mask], ps[mask], pcf[mask], \ turnover[mask] TICKER = list(rets.columns) rets_out = rets.shift(-1).dropna(how="all") weights_out = np.sqrt(market_cap_raw_value.loc[rets_out.index]) def regular_fit(ret, **kwargs): """ sector return as a single factor """ factor_returns = pd.DataFrame(0, index=ret.index, columns=["const"] + list(kwargs.keys())) # intercept R2 = pd.DataFrame(0, index=ret.index, columns=['R2']) adj_R2 = pd.DataFrame(0, index=ret.index, columns=["Adj R2"]) t_stats = pd.DataFrame(0, index=ret.index, columns=list(kwargs.keys()))
# outside module import numpy as np import pandas as pd import statsmodels.api as sm import linearmodels from loguru import logger import matplotlib.pyplot as plt import datetime # inside module from Utils import * from Pipeline import rets, sector_rets, market_cap_raw_value, market_cap, pe, pe_lyr, pb, ps, pcf, turnover TICKER = list(rets.columns) rets_out = rets.shift(-1) rets_in = rets def sample_analysis(rets, num): """ Random sample some equity, perform some linear regression test 1. residual correlation 2. constant variance, wls 3. covariance matrix 3. residual normality 4. R square, other evaluations """ ticker = [TICKER[e] for e in np.random.randint(0, len(TICKER) - 1, num)]