def compute_rolling_regression( window_size: int, endog: pd.DataFrame, exog: pd.DataFrame ): """ Wrapper function to compute rolling regression co-efficients for pre-processed LOB using stats-models. Based on Amaya, Rochen et al (2015) we assume the coefficient is the liquidity cost and alpha is the intercept. Ref: https://www.statsmodels.org/dev/examples/notebooks/generated/rolling_ls.html Calculation described in "Distilling Liquidity Costs from Limit Order Books" by Amaya, Rochen et al (2015). Paper source: https://www.sciencedirect.com/science/article/abs/pii/S0378426618301353 :window_size: Size of the window :endog: Dependent variable - y :exog: Independent variable - x :return: rols_results (instance of statsmodels results object), rols_params (pd.DataFrame) """ endog = endog exog = sm.add_constant(exog, prepend=False) rols = RollingOLS(endog, exog, window=window_size) rols_results = rols.fit() rols_params = rols_results.params rols_params.columns = ["liquidity_cost", "intercept"] return rols_results, rols_params
def playing_with_rolling(self, pair, fromDate="2015-01-01", toDate="2018-01-01"): symbol1 = pair[0] symbol2 = pair[1] data1 = self.portfolio[symbol1][self.analysisOn][fromDate:toDate] data2 = self.portfolio[symbol2][self.analysisOn][fromDate:toDate] model = sm.OLS(data1, sm.add_constant(data2)) window = 180 model2 = RollingOLS(data1, sm.add_constant(data2), window=window) results = model.fit() results2 = model2.fit() # spread = data1 - results.params[1] * data2 - results.params[0] # spread_rolling = data1 - results2.params.adjusted_close * data2 - results2.params.const spread = data1 - results.params[1] * data2 spread_rolling = data1 - results2.params.adjusted_close * data2 spread_mean = pd.Series(spread_rolling).rolling(window=window).mean() spread_std = pd.Series(spread_rolling).rolling(window=window).std() fig, axs = plt.subplots(2) # plt.plot((spread - spread.mean())/spread.std()) axs[0].plot((spread_rolling - spread_mean) / spread_std) axs[0].xaxis.set_major_locator(plt.MaxNLocator(15)) axs[1].plot(results2.params.adjusted_close['2013-03-15':]) axs[1].xaxis.set_major_locator(plt.MaxNLocator(15)) # plt.plot(spread) # plt.plot(spread_rolling) plt.show()
def test_params_only(basic_data, method): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res = mod.fit(method=method, params_only=False) res_params_only = mod.fit(method=method, params_only=True) # use assert_allclose to incorporate for numerical errors on x86 platforms assert_allclose(res_params_only.params, res.params)
def test_methods(basic_data, params_only): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res_inv = mod.fit(method="inv", params_only=params_only) res_lstsq = mod.fit(method="lstsq", params_only=params_only) res_pinv = mod.fit(method="pinv", params_only=params_only) assert_allclose(res_inv.params, res_lstsq.params) assert_allclose(res_inv.params, res_pinv.params)
def test_methods(basic_data): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res_inv = mod.fit(method='inv') res_lstsq = mod.fit(method='lstsq') res_pinv = mod.fit(method='pinv') assert_allclose(res_inv.params, res_lstsq.params) assert_allclose(res_inv.params, res_pinv.params)
def test_expanding(basic_data): y, x, w = basic_data xa = np.asarray(x) mod = RollingOLS(y, x, 150, min_nobs=50, expanding=True) res = mod.fit() params = np.asarray(res.params) assert np.all(np.isnan(params[:49])) first = np.where(np.cumsum(np.all(np.isfinite(xa), axis=1)) >= 50)[0][0] assert np.all(np.isfinite(params[first:]))
def rolling_ols_model(): # Rolling Ordinary Least Squares (Rolling OLS) from statsmodels.regression.rolling import RollingOLS data = get_dataset("longley") exog = sm.add_constant(data.exog, prepend=False) rolling_ols = RollingOLS(data.endog, exog) model = rolling_ols.fit(reset=50) return ModelWithResults(model=model, alg=rolling_ols, inference_dataframe=exog)
def processcsv(self, datafile): df = pd.read_csv( datafile, sep=",", header=None, names=("Cuvette", "Time", "Temperature", "Absorbance" )) # Assumes a csv-file following the named columns # calculate time in seconds instead of minutes (as the software supplies) df["Time"] = df["Time"] * 60 # calculate temperature in Kelvin instead of degrees Celsius df["Temperature"] = df["Temperature"] + 273.15 if self.ProductAbsorbing: df["StartingConcentration"] = [ self.startingconcentrations[x - 1] for x in df["Cuvette"] ] # calculate concentration depending on start concentration and depletion of substrate df["Concentration"] = df["StartingConcentration"] - \ df["Absorbance"]/self.ExtCoeff else: # calculate concentration directly from absorbance df["Concentration"] = df["Absorbance"] / self.ExtCoeff # The rolling regression leaves NaN for the first window, #I would prefer to have the low temperature points available #and reverse the dataframe for this reason df.sort_index(ascending=False, inplace=True) cuvettes = df.groupby("Cuvette") regression = pd.DataFrame() # Build up a dataframe cuvette by cuvette for cuvette in cuvettes: cuvettedf = cuvette[1] Velocity = sm.add_constant(cuvettedf["Time"]) Concentration = cuvettedf["Concentration"] movingregression = RollingOLS(Concentration, Velocity, window=4).fit(params_only=True) regression = pd.concat([regression, movingregression.params]) dfwregression = df.join(regression, rsuffix="_regression") # Repeat rolling regression other direction, double the number of points df.sort_index(ascending=True, inplace=True) cuvettes = df.groupby("Cuvette") regression = pd.DataFrame() # Build up a dataframe cuvette by cuvette for cuvette in cuvettes: cuvettedf = cuvette[1] Velocity = sm.add_constant(cuvettedf["Time"]) Concentration = cuvettedf["Concentration"] movingregression = RollingOLS(Concentration, Velocity, window=4).fit(params_only=True) regression = pd.concat([regression, movingregression.params]) dfwregression = df.join(regression, rsuffix="_regression") dfwregression.dropna(inplace=True) # Remove the NaN rows # Whether absorbance is increasing or decreasing, velocities should always be positive. dfwregression["Time_regression"] = np.abs( dfwregression["Time_regression"]) return dfwregression
def _calculateTi(self): """ Calculates the technical indicator for the given input data. The input data are taken from an attribute of the parent class. Returns: pandas.DataFrame: The calculated indicator. Index is of type ``pandas.DatetimeIndex``. It contains two columns, the ``upper_band``, ``lower_band``. Raises: NotEnoughInputData: Not enough data for calculating the indicator. """ # Not enough data for the requested period if len(self._input_data.index) < self._period: raise NotEnoughInputData('Projection Bands', self._period, len(self._input_data.index)) pbs = pd.DataFrame(index=self._input_data.index, columns=['upper_band', 'lower_band'], data=None, dtype='float64') # Calculate n-periods slope of high values high_slope = RollingOLS( endog=self._input_data['high'].fillna(value=0, inplace=False).to_list(), exog=sm.add_constant(list(range(len(self._input_data.index)))), window=self._period).fit(params_only=True).params[:, 1] # Calculate n-periods slope of low values low_slope = RollingOLS( endog=self._input_data['low'].fillna(value=0, inplace=False).to_list(), exog=sm.add_constant(list(range(len(self._input_data.index)))), window=self._period).fit(params_only=True).params[:, 1] # Calculate the projection bands for i in range(self._period - 1, len(self._input_data.index)): pbs['upper_band'].values[i] = max( [self._input_data['high'].values[i]] + [(j * high_slope[i]) + self._input_data['high'].values[i - j] for j in range(1, self._period)]) pbs['lower_band'].values[i] = min( [self._input_data['low'].values[i]] + [(j * low_slope[i]) + self._input_data['low'].values[i - j] for j in range(1, self._period)]) return pbs.round(4)
def get_rolling_beta(df: pd.DataFrame, hist: pd.DataFrame, mark: pd.DataFrame, n: pd.DataFrame) -> pd.DataFrame: """Turns a holdings portfolio into a rolling beta dataframe Parameters ---------- df : pd.DataFrame The dataframe of daily holdings hist : pd.DataFrame A dataframe of historical returns mark : pd.DataFrame The dataframe of market performance n : int The period to get returns for Returns ---------- final : pd.DataFrame Dataframe with rolling beta """ df = df["Holding"] uniques = df.columns.tolist() res = df.div(df.sum(axis=1), axis=0) res = res.fillna(0) comb = pd.merge(hist["Close"], mark["Market"], how="outer", left_index=True, right_index=True) comb = comb.fillna(method="ffill") for col in hist["Close"].columns: exog = sm.add_constant(comb["Close"]) rols = RollingOLS(comb[col], exog, window=252) rres = rols.fit() res[f"beta_{col}"] = rres.params["Close"] final = res.fillna(method="ffill") for uni in uniques: final[f"prod_{uni}"] = final[uni] * final[f"beta_{uni}"] dropped = final[[f"beta_{x}" for x in uniques]].copy() final = final.drop(columns=[f"beta_{x}" for x in uniques] + uniques) final["total"] = final.sum(axis=1) final = final[final.index >= datetime.now() - timedelta(days=n + 1)] comb = pd.merge(final, dropped, how="left", left_index=True, right_index=True) return comb
def calibrate(self, windowOLS, **kwargs): #x, y, time = super().get_sample(self.x,self.y, self.timestamp, start_hist, end_hist) #model = RollingOLS(endog =self.y, exog=self.x,window=self.windowOLS) #rres = model.fit() #self.beta = rres.params.reshape(-1, ) self.windowOLS = min(windowOLS, len(self.y - 1)) df = pd.DataFrame({'y': self.y, 'x': self.x, 'c': 1}) model = RollingOLS(endog=df['y'], exog=df[['x', 'c']], window=self.windowOLS) rres = model.fit() self.beta = rres.params['x'].values.reshape(-1, )
def calc_aggregates(data, days): model = RollingOLS(data["BTC-GBP"].Close, data["ETH-GBP"].Close, window=days) result = model.fit() rolling_beta = result.params.Close rolling_beta.name = "beta" spread = data["BTC-GBP"].Close - rolling_beta * data["ETH-GBP"].Close return { "mean": spread.mean(), "std": spread.std(), "beta": rolling_beta.iloc[-1], }
def run_regressions(totals: pd.DataFrame, window: int = 3, infectious_period: float = 4.5) -> pd.DataFrame: # run rolling regressions and get parameters model = RollingOLS.from_formula(formula="logdelta ~ time", window=window, data=totals) rolling = model.fit(method="lstsq") growthrates = rolling.params.join(rolling.bse, rsuffix="_stderr") growthrates["rsq"] = rolling.rsquared growthrates.rename( lambda s: s.replace("time", "gradient").replace("const", "intercept"), axis=1, inplace=True) # calculate growth rates growthrates[ "egrowthrateM"] = growthrates.gradient + 2 * growthrates.gradient_stderr growthrates[ "egrowthratem"] = growthrates.gradient - 2 * growthrates.gradient_stderr growthrates["R"] = growthrates.gradient * infectious_period + 1 growthrates[ "RM"] = growthrates.gradient + 2 * growthrates.gradient_stderr * infectious_period + 1 growthrates[ "Rm"] = growthrates.gradient - 2 * growthrates.gradient_stderr * infectious_period + 1 growthrates["date"] = growthrates.index growthrates["days"] = totals.time return growthrates
def _calculateTi(self): """ Calculates the technical indicator for the given input data. The input data are taken from an attribute of the parent class. Returns: pandas.DataFrame: The calculated indicator. Index is of type ``pandas.DatetimeIndex``. It contains one column, the ``lri``. Raises: NotEnoughInputData: Not enough data for calculating the indicator. """ # Not enough data for the requested period if len(self._input_data.index) < self._period: raise NotEnoughInputData('Linear Regression Indicator', self._period, len(self._input_data.index)) lri = pd.DataFrame(index=self._input_data.index, columns=['lri'], data=None, dtype='float64') # n-period Rolling OLS rolling_ols = RollingOLS( endog=self._input_data['close'].fillna(value=0, inplace=False).to_list(), exog=sm.add_constant(list(range(len(self._input_data.index)))), window=self._period).fit(params_only=True) for i in range(len(self._input_data.index)): lri['lri'].values[i] = round( rolling_ols.params[i][0] + i * rolling_ols.params[i][1], 4) return lri
def capm(self, close, market, window_length_return, window_length_beta): r_market = self.log_Returns(market, window_length_return).loc[slice(close.index[0], close.index[-1])] exog = sm.add_constant(r_market) cap_beta = pd.DataFrame(columns=close.columns) for tick in close.columns: r_assets = self.log_Returns(close[[tick]], window_length_return) endog = r_assets rols = RollingOLS(endog, exog, window=window_length_beta) rres = rols.fit() capm = rres.params.dropna() capm.columns = ['intercept', 'beta'] cap_beta.loc[:, tick] = capm['beta'] return cap_beta
def computeForDay(self, strategy, timeSeriesTick, timeSeriesTrade): timeSeriesReg = timeSeriesTick.resample( str(int(self.resamplePeriod)) + "S" ).first() timeSeriesReg = timeSeriesReg.fillna(method="pad") timeTable = timeSeriesReg.to_frame() timeTable["second"] = timeSeriesReg.index.astype(np.int64) timeTable["second"] = (timeTable["second"] - timeTable["second"][0]) / math.pow( 10, 9 ) # self.betaSeries = pd.stats.ols.MovingOLS(y=timeTable['price'], x=timeTable['second'], window_type='rolling', window = self.period, intercept=True).beta mod = RollingOLS( timeTable["price"], add_constant(timeTable["second"], prepend=False), window=self.period, ) self.betaSeries = mod.fit().params return {"betaSeries": self.betaSeries}
def calc_beta_ret(df, market_port_ret, window=52): # Find country beta's through rolling regression y = market_port_ret rolling_betas = {} for c in df.columns: X = sm.add_constant(df[c]) model = RollingOLS(y, X, window) rolling_res = model.fit(params_only=True) rolling_betas[c] = rolling_res.params.dropna() # Put all beta's for every country and every date in a dataframe out_df = pd.DataFrame() for key, value in rolling_betas.items(): col = pd.DataFrame(value[key]) if out_df.empty: out_df = out_df.append(col) else: out_df = pd.concat([out_df, col], axis=1) return out_df
def test_formula(): y, x, w = gen_data(250, 3, True, pandas=True) fmla = "y ~ 1 + x0 + x1 + x2" data = pd.concat([y, x], axis=1) mod = RollingWLS.from_formula(fmla, window=100, data=data, weights=w) res = mod.fit() alt = RollingWLS(y, x, window=100) alt_res = alt.fit() assert_allclose(res.params, alt_res.params) ols_mod = RollingOLS.from_formula(fmla, window=100, data=data) ols_mod.fit()
def _estimate_trailing_capm( returns: pd.Series, benchmark: pd.Series, rf: float, window: int, ) -> RollingRegressionResults: returns, benchmark = align(adjust(returns, rf), adjust(benchmark, rf)) y = returns.to_numpy() x = sm.add_constant(benchmark.to_numpy()) return RollingOLS(y, x, window=window).fit()
def rolling_OLS_Kt(curves, window=14) -> pd.DataFrame: """ A Rolling window Ordinary Least Squares inference of the derivative of the logarithm of the number of cases. {args} """ a, b = window if isinstance(window, Sequence) else (window, window) daily = diff(cases(curves), smooth=a) # We first make a OLS inference to extrapolate series to past Y = np.log(daily).values X = np.arange(len(Y)) ols = sm.OLS(Y[:b], sm.add_constant(X[:b]), missing="drop") res = ols.fit() # We need at least c new observations to obtain a result without NaNs m = res.params[1] X_ = np.arange(X[0] - b, X[0]) Y_ = m * (X_ - X[0]) + Y[0] X = np.concatenate([X_, X]) Y = np.concatenate([Y_, Y]) # Use Rolling OLS to obtain an inference to the growth ratio ols = RollingOLS(Y, sm.add_constant(X), window=b, missing="drop") res = ols.fit() Kt = res.params[b:, 1] low, high = res.conf_int()[b:, :, 1].T out = pd.DataFrame({ "Kt": Kt, "Kt_low": low, "Kt_high": high }, index=curves.index) return out
def rollingRegressionWrap(X_colName = ["VMG","MKT"],Y_colName = ["monthlyReturn"],data_rollingReg= data_rollingReg, refData = stockReturnData, refCol = ["SID"]): #init valid SID and invalid SID invalid_SID = [] # give variable # X_colName = ["VMG","MKT"] # Y_colName = ["monthlyReturn"] SID_list = np.unique(refData[refCol]) # run rolling regression newColNames = ["Trading_Month", "SID", "adjusted_rSquared", "JB_pValue"] t_StatCol = [val + "_t_Stat" for val in X_colName] newColNames.extend(t_StatCol) newColNames.extend(X_colName) rollingResult_df = pd.DataFrame(columns = newColNames) progress_bar = tqdm.tqdm(SID_list) for asset in progress_bar: try: # add SID column subDataSet = pd.DataFrame(data_rollingReg[data_rollingReg["SID"]==asset]) Y = subDataSet[Y_colName] X = sm.add_constant(subDataSet[X_colName]) Trading_Month = subDataSet["Trading_Month"].values SIDs = subDataSet["SID"].values JB_pval = subDataSet.rolling(36)["monthlyReturn"].apply(lambda var: sp.stats.jarque_bera(var)[1]).values subReg = RollingOLS(Y,X, window = 36, missing = "drop").fit() rSquared_adj = subReg.rsquared_adj.values t_Stat = subReg.tvalues.values params = subReg.params.values dataDf = np.hstack([SIDs[...,np.newaxis], Trading_Month[...,np.newaxis], rSquared_adj[...,np.newaxis], JB_pval[...,np.newaxis], params[:,1:], t_Stat[:,1:]]) assetDf = pd.DataFrame(data = dataDf, columns = newColNames) rollingResult_df = pd.concat([rollingResult_df, assetDf], ignore_index = True) except: # print(asset + ": {} trading months".format(Y.shape[0])) invalid_SID.append([asset, Y.shape[0]]) progress_bar.set_description(f'Processing {asset}') return(rollingResult_df,invalid_SID)
def __init__(self, X: Union[pd.Series, List[pd.Series]], y: pd.Series, w: int, fit_intercept: bool = True): df = pd.concat(X, axis=1) if isinstance(X, list) else X.to_frame() df = sm.add_constant(df) if fit_intercept else df df.columns = range(len(df.columns)) if fit_intercept else range(1, len(df.columns) + 1) if w <= len(df.columns): raise MqValueError('Window length must be larger than the number of explanatory variables') df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)] # filter out nan and inf y = y[~y.isin([np.nan, np.inf, -np.inf])] df_aligned, y_aligned = df.align(y, 'inner', axis=0) # align series self._X = df_aligned.copy() self._res = RollingOLS(y_aligned, df_aligned, w).fit()
def exponential_momentum(ts, min_nobs, window): ''' Andrew Clenow's Method 1. ln(ts) = m*ln(t) + c 2. annualised momentum = ((e^(m))^(252) -1 ) * 100 :return: annualised momentum score ''' exog = sm.add_constant(np.arange(0, len(ts))) rolling_param = RollingOLS(np.log(ts), exog, min_nobs=min_nobs, window=window).fit() return (np.power(np.exp(rolling_param.params['x1']), 252) - 1) * 100 * rolling_param.rsquared
def get_rolling_beta(self, window: int): for ticker in self.tickers: try: if len(self.priceData[ticker]) > window: exogVariable = add_constant(self.priceData[ticker]["IndexRiskPremium"]) endogVariable = self.priceData[ticker]["RiskPremium"] rollingOLSModel = RollingOLS(endogVariable, exogVariable, window).fit() self.priceData[ticker]["RollingBeta"] = rollingOLSModel.params["IndexRiskPremium"].abs() else: self.priceData[ticker]["RollingBeta"] = nan except ValueError: self.priceData[ticker]["RollingBeta"] = nan
def test_min_nobs(basic_data): y, x, w = basic_data if not np.any(np.isnan(np.asarray(x))): return mod = RollingOLS(y, x, 150) res = mod.fit() # Ensures that the constraint binds min_nobs = res.nobs[res.nobs != 0].min() + 1 mod = RollingOLS(y, x, 150, min_nobs=min_nobs) res = mod.fit() assert np.all(res.nobs[res.nobs != 0] >= min_nobs)
def get_rolling_factor_loadings(ticker, rolling_window): returns = get_stock_return(ticker) Y, X = prep_data_for_regression(ticker, returns) rollingmodel = RollingOLS(Y, X, window=rolling_window).fit( cov_type='HAC', cov_kwds={'maxlags': 1}) rolling_factor_loadings = rollingmodel.params.reset_index().dropna() rolling_factor_loadings = pd.melt(rolling_factor_loadings, id_vars=['index']) rolling_factor_loadings['ticker'] = ticker rolling_factor_loadings['window_size'] = rolling_window return rolling_factor_loadings
def rollingOLS(totals: pd.DataFrame, window: int = 3, infectious_period: float = 4.5) -> pd.DataFrame: """ legacy rolling regression-based implementation of Bettencourt/Ribeiro method """ # run rolling regressions and get parameters model = RollingOLS.from_formula(formula = "logdelta ~ time", window = window, data = totals) rolling = model.fit(method = "lstsq") growthrates = rolling.params.join(rolling.bse, rsuffix="_stderr") growthrates["rsq"] = rolling.rsquared growthrates.rename(lambda s: s.replace("time", "gradient").replace("const", "intercept"), axis = 1, inplace = True) # calculate growth rates growthrates["egrowthrateM"] = growthrates.gradient + 2 * growthrates.gradient_stderr growthrates["egrowthratem"] = growthrates.gradient - 2 * growthrates.gradient_stderr growthrates["R"] = growthrates.gradient * infectious_period + 1 growthrates["RM"] = growthrates.gradient + 2 * growthrates.gradient_stderr * infectious_period + 1 growthrates["Rm"] = growthrates.gradient - 2 * growthrates.gradient_stderr * infectious_period + 1 growthrates["date"] = growthrates.index.get_level_values('status_change_date') growthrates["days"] = totals.time return growthrates
def test_save_load(data): y, x, w = data res = RollingOLS(y, x, window=60).fit() fh = BytesIO() # test wrapped results load save pickle res.save(fh) fh.seek(0, 0) res_unpickled = res.__class__.load(fh) assert type(res_unpickled) is type(res) # noqa: E721 fh = BytesIO() # test wrapped results load save pickle res.save(fh, remove_data=True) fh.seek(0, 0) res_unpickled = res.__class__.load(fh) assert type(res_unpickled) is type(res) # noqa: E721
def regress_factor_loadings(self, portfolio, benchmark_returns: pd.Series = None, date: datetime = None, regression_window: int = 36, rolling=False, show=True): ''' :param portfolio: str, pd.Series, TimeDataFrame, Portfolio... If more than an asset, we compute an equal weighted returns :param benchmark_returns: :param date: :param regression_window: :param plot: :return: ''' if not (isinstance(portfolio, TimeDataFrame) or isinstance(portfolio, Portfolio)): portfolio = TimeDataFrame(portfolio) if len(portfolio.df_returns.columns) > 1: # TODO actually, do an equal weighting raise TypeError('Inappropriate argument type for portfolio') if portfolio.frequency != self.factors_timedf.frequency: portfolio_copy = portfolio.set_frequency(self.factors_timedf.frequency, inplace=False) \ .slice_dataframe(to_date=date, inplace=False) else: portfolio_copy = portfolio if benchmark_returns is None: # if no benchmark specified, just use the one in the model timedf_merged = portfolio_copy.merge([self.factors_timedf], inplace=False) else: timedf_merged = portfolio_copy.merge( [self.factors_timedf, benchmark_returns], inplace=False) timedf_merged.df_returns.drop(['MKT-RF'], axis=1, inplace=True) timedf_merged.df_returns.rename( columns={benchmark_returns: 'MKT-RF'}, inplace=True) timedf_merged.df_returns['MKT-RF'] = timedf_merged.df_returns[ 'MKT-RF'] - timedf_merged.df_returns['RF'] portfolio_returns, factors_df = timedf_merged.df_returns.iloc[:, 0] - timedf_merged.df_returns['RF'], \ timedf_merged.df_returns.iloc[:, 1:] portfolio_returns.rename('XsRet', inplace=True) factors_df.drop(['RF'], axis=1, inplace=True) # don't need it anymore if rolling: # endogenous is the portfolio returns (y, dependent), exogenous is the factors (x, explanatory, independent) rols = RollingOLS(endog=portfolio_returns, exog=factors_df, window=regression_window) rres = rols.fit() params = rres.params.dropna() print(params.tail()) if show: rres.plot_recursive_coefficient(variables=factors_df.columns, figsize=(10, 6)) plt.show() return rres else: # need to merge again to run regression on dataframe (with y being XsRet) df_stock_factor = pd.merge(portfolio_returns, factors_df, left_index=True, right_index=True) df_stock_factor = df_stock_factor.iloc[-regression_window:, :] # rename because will give syntax error with '-' when running regression df_stock_factor.rename(columns={'MKT-RF': 'MKT'}, inplace=True) reg = sm.ols(formula='XsRet ~ {}'.format(' + '.join( factors_df.columns)), data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) print(reg.summary()) if show: nrows, ncols = ceil(len(factors_df.columns) / 3), min( len(factors_df.columns), 3) fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(12, 5)) plt.tight_layout() for i, factor in enumerate(df_stock_factor.iloc[:, 1:]): idx_x, idx_y = floor(i / 3), floor(i % 3) ax = axs if nrows > 1: ax = axs[idx_x, ] if ncols > 1: ax = ax[idx_y] X = np.linspace(df_stock_factor[factor].min(), df_stock_factor[factor].max()) Y = reg.params[i + 1] * X + reg.params[0] # beta * x + alpha ax.plot(X, Y) # plt.draw() # plt.pause(0.001) ax.scatter(df_stock_factor[factor], df_stock_factor.iloc[:, 0], alpha=0.3) ax.grid(True) ax.axis('tight') ax.set_xlabel(factor if factor != 'MKT' else 'MKT-RF') ax.set_ylabel('Portfolio Excess Returns') # plt.ion() plt.show() return reg
def calibrate(self, windowOLS, copula_lookback, recalibrate_n, **kwargs): self.windowOLS = int(windowOLS) self.copula_lookback = int(copula_lookback) self.recalibrate_n = int(recalibrate_n) df = pd.DataFrame({'y':self.y,'x':self.x,'c':1}) model = RollingOLS(endog =df['y'], exog=df['x'],window=self.windowOLS) rres = model.fit() self.beta = rres.params['x'].values.reshape(-1, ) # Copula decision: df['x_log_ret']= np.log(df.x) - np.log(df.x.shift(1)) df['y_log_ret']= np.log(df.y) - np.log(df.y.shift(1)) # Convert the two returns series to two uniform values u and v using the empirical distribution functions ecdf_x, ecdf_y = ECDF(df.x_log_ret), ECDF(df.y_log_ret) u, v = [ecdf_x(a) for a in df.x_log_ret], [ecdf_y(a) for a in df.y_log_ret] # Compute the Akaike Information Criterion (AIC) for different copulas and choose copula with minimum AIC tau = stats.kendalltau(df.x_log_ret, df.y_log_ret)[0] # estimate Kendall'rank correlation AIC ={} # generate a dict with key being the copula family, value = [theta, AIC] for i in ['clayton', 'frank', 'gumbel']: param = self._parameter(i, tau) lpdf = [self._lpdf_copula(i, param, x, y) for (x, y) in zip(u, v)] # Replace nan with zero and inf with finite numbers in lpdf list lpdf = np.nan_to_num(lpdf) loglikelihood = sum(lpdf) AIC[i] = [param, -2 * loglikelihood + 2] # Choose the copula with the minimum AIC copula = min(AIC.items(), key = lambda x: x[1][1])[0] self.startIdx = copula_lookback + 1 # Because first is NAN df['MI_u_v'] = 0.5 df['MI_v_u'] = 0.5 for i in np.arange(self.startIdx , len(df)-recalibrate_n, recalibrate_n): window = range(i - copula_lookback, i) predWindow = range(i, i + recalibrate_n) x_hist = df.x_log_ret.iloc[window] y_hist = df.y_log_ret.iloc[window] x_forw = df.x_log_ret.iloc[predWindow] y_forw = df.y_log_ret.iloc[predWindow] # Estimate Kendall'rank correlation tau = stats.kendalltau(x_hist, y_hist)[0] # Estimate the copula parameter: theta theta = self._parameter(copula, tau) # Simulate the empirical distribution function for returns of selected trading pair ecdf_x, ecdf_y = ECDF(x_hist), ECDF(y_hist) # Now get future values a, b = self._misprice_index(copula, theta, ecdf_x(x_forw), ecdf_y(y_forw)) df.MI_u_v.iloc[predWindow] = a df.MI_v_u.iloc[predWindow] = b self.MI_u_v = df.MI_u_v self.MI_v_u = df.MI_v_u