예제 #1
0
# In sample fit
X_in = pd.concat([
    market_cap.stack(),
    pe.stack(),
    pe_lyr.stack(),
    pb.stack(),
    ps.stack(),
    pcf.stack(),
    turnover.stack()
],
                 axis=1)
X_in.columns = ["market_cap", "pe", "pe_lyr", "pb", "ps", "pcf", "turnover"]
y_in = rets.stack()
y_in = y_in.loc[X_in.index]

y_out = rets.shift(-1).stack()
X_out = X_in.loc[y_out.index]

weights_in = np.sqrt(market_cap_raw_value.stack().loc[X_in.index])
weights_out = np.sqrt(market_cap_raw_value.stack().loc[X_out.index])

assert len(X_out) == len(y_out)

rets_out = rets.shift(-1)
market_price = get_price("000906.XSHG",
                         fields=["close"],
                         start_date='2014-01-01',
                         end_date='2020-01-01')
market_ret = market_price.pct_change()
market_ret_out = market_ret.shift(-1)
예제 #2
0
"""
We test cross sectional regression with 2 different designs
1. use raw sector return as a factor
2. introduce all industries as factors, i.e. dummy variables

"""

_tmp = abs(sector_rets).max()
mask = _tmp[_tmp < 0.1].index

rets, sector_rets, market_cap, pe, pe_lyr, pb, ps, pcf, turnover = \
    rets[mask], sector_rets[mask], market_cap[mask], pe[mask], pe_lyr[mask], pb[mask], ps[mask], pcf[mask], \
    turnover[mask]

TICKER = list(rets.columns)
rets_out = rets.shift(-1).dropna(how="all")
weights_out = np.sqrt(market_cap_raw_value.loc[rets_out.index])


def regular_fit(ret, **kwargs):
    """
    sector return as a single factor

    """
    factor_returns = pd.DataFrame(0,
                                  index=ret.index,
                                  columns=["const"] +
                                  list(kwargs.keys()))  # intercept
    R2 = pd.DataFrame(0, index=ret.index, columns=['R2'])
    adj_R2 = pd.DataFrame(0, index=ret.index, columns=["Adj R2"])
    t_stats = pd.DataFrame(0, index=ret.index, columns=list(kwargs.keys()))
예제 #3
0
# outside module
import numpy as np
import pandas as pd
import statsmodels.api as sm
import linearmodels
from loguru import logger
import matplotlib.pyplot as plt
import datetime

# inside module
from Utils import *
from Pipeline import rets, sector_rets, market_cap_raw_value, market_cap, pe, pe_lyr, pb, ps, pcf, turnover

TICKER = list(rets.columns)

rets_out = rets.shift(-1)
rets_in = rets


def sample_analysis(rets, num):
    """
    Random sample some equity, perform some linear regression test

    1. residual correlation
    2. constant variance, wls
    3. covariance matrix
    3. residual normality
    4. R square, other evaluations
    """

    ticker = [TICKER[e] for e in np.random.randint(0, len(TICKER) - 1, num)]