Esempio n. 1
0
def test_params_only(basic_data, method):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res = mod.fit(method=method, params_only=False)
    res_params_only = mod.fit(method=method, params_only=True)
    # use assert_allclose to incorporate for numerical errors on x86 platforms
    assert_allclose(res_params_only.params, res.params)
Esempio n. 2
0
def test_methods(basic_data):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res_inv = mod.fit(method='inv')
    res_lstsq = mod.fit(method='lstsq')
    res_pinv = mod.fit(method='pinv')
    assert_allclose(res_inv.params, res_lstsq.params)
    assert_allclose(res_inv.params, res_pinv.params)
Esempio n. 3
0
def test_methods(basic_data, params_only):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res_inv = mod.fit(method="inv", params_only=params_only)
    res_lstsq = mod.fit(method="lstsq", params_only=params_only)
    res_pinv = mod.fit(method="pinv", params_only=params_only)
    assert_allclose(res_inv.params, res_lstsq.params)
    assert_allclose(res_inv.params, res_pinv.params)
Esempio n. 4
0
def test_min_nobs(basic_data):
    y, x, w = basic_data
    if not np.any(np.isnan(np.asarray(x))):
        return
    mod = RollingOLS(y, x, 150)
    res = mod.fit()
    # Ensures that the constraint binds
    min_nobs = res.nobs[res.nobs != 0].min() + 1
    mod = RollingOLS(y, x, 150, min_nobs=min_nobs)
    res = mod.fit()
    assert np.all(res.nobs[res.nobs != 0] >= min_nobs)
Esempio n. 5
0
 def playing_with_rolling(self,
                          pair,
                          fromDate="2015-01-01",
                          toDate="2018-01-01"):
     symbol1 = pair[0]
     symbol2 = pair[1]
     data1 = self.portfolio[symbol1][self.analysisOn][fromDate:toDate]
     data2 = self.portfolio[symbol2][self.analysisOn][fromDate:toDate]
     model = sm.OLS(data1, sm.add_constant(data2))
     window = 180
     model2 = RollingOLS(data1, sm.add_constant(data2), window=window)
     results = model.fit()
     results2 = model2.fit()
     # spread = data1 - results.params[1] * data2 - results.params[0]
     # spread_rolling = data1 - results2.params.adjusted_close * data2 - results2.params.const
     spread = data1 - results.params[1] * data2
     spread_rolling = data1 - results2.params.adjusted_close * data2
     spread_mean = pd.Series(spread_rolling).rolling(window=window).mean()
     spread_std = pd.Series(spread_rolling).rolling(window=window).std()
     fig, axs = plt.subplots(2)
     # plt.plot((spread - spread.mean())/spread.std())
     axs[0].plot((spread_rolling - spread_mean) / spread_std)
     axs[0].xaxis.set_major_locator(plt.MaxNLocator(15))
     axs[1].plot(results2.params.adjusted_close['2013-03-15':])
     axs[1].xaxis.set_major_locator(plt.MaxNLocator(15))
     # plt.plot(spread)
     # plt.plot(spread_rolling)
     plt.show()
def compute_rolling_regression(
    window_size: int, endog: pd.DataFrame, exog: pd.DataFrame
):
    """ Wrapper function to compute rolling regression co-efficients 
    for pre-processed LOB using stats-models.
    
    Based on Amaya, Rochen et al (2015) we assume the coefficient is the liquidity cost
    and alpha is the intercept.
    
    Ref: 
        https://www.statsmodels.org/dev/examples/notebooks/generated/rolling_ls.html
        
        Calculation described in "Distilling Liquidity Costs from Limit Order Books"
        by Amaya, Rochen et al (2015).
        Paper source: https://www.sciencedirect.com/science/article/abs/pii/S0378426618301353
        
    :window_size: Size of the window
    :endog: Dependent variable - y
    :exog: Independent variable - x
    :return: rols_results (instance of statsmodels results object), rols_params (pd.DataFrame)
    """
    endog = endog
    exog = sm.add_constant(exog, prepend=False)
    rols = RollingOLS(endog, exog, window=window_size)
    rols_results = rols.fit()
    rols_params = rols_results.params
    rols_params.columns = ["liquidity_cost", "intercept"]
    return rols_results, rols_params
Esempio n. 7
0
def test_expanding(basic_data):
    y, x, w = basic_data
    xa = np.asarray(x)
    mod = RollingOLS(y, x, 150, min_nobs=50, expanding=True)
    res = mod.fit()
    params = np.asarray(res.params)
    assert np.all(np.isnan(params[:49]))
    first = np.where(np.cumsum(np.all(np.isfinite(xa), axis=1)) >= 50)[0][0]
    assert np.all(np.isfinite(params[first:]))
Esempio n. 8
0
def rolling_ols_model():
    # Rolling Ordinary Least Squares (Rolling OLS)
    from statsmodels.regression.rolling import RollingOLS

    data = get_dataset("longley")
    exog = sm.add_constant(data.exog, prepend=False)
    rolling_ols = RollingOLS(data.endog, exog)
    model = rolling_ols.fit(reset=50)

    return ModelWithResults(model=model, alg=rolling_ols, inference_dataframe=exog)
Esempio n. 9
0
def get_rolling_beta(df: pd.DataFrame, hist: pd.DataFrame, mark: pd.DataFrame,
                     n: pd.DataFrame) -> pd.DataFrame:
    """Turns a holdings portfolio into a rolling beta dataframe

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe of daily holdings
    hist : pd.DataFrame
        A dataframe of historical returns
    mark : pd.DataFrame
        The dataframe of market performance
    n : int
        The period to get returns for

    Returns
    ----------
    final : pd.DataFrame
        Dataframe with rolling beta
    """
    df = df["Holding"]
    uniques = df.columns.tolist()
    res = df.div(df.sum(axis=1), axis=0)
    res = res.fillna(0)
    comb = pd.merge(hist["Close"],
                    mark["Market"],
                    how="outer",
                    left_index=True,
                    right_index=True)
    comb = comb.fillna(method="ffill")
    for col in hist["Close"].columns:
        exog = sm.add_constant(comb["Close"])
        rols = RollingOLS(comb[col], exog, window=252)
        rres = rols.fit()
        res[f"beta_{col}"] = rres.params["Close"]
    final = res.fillna(method="ffill")
    for uni in uniques:
        final[f"prod_{uni}"] = final[uni] * final[f"beta_{uni}"]
    dropped = final[[f"beta_{x}" for x in uniques]].copy()
    final = final.drop(columns=[f"beta_{x}" for x in uniques] + uniques)
    final["total"] = final.sum(axis=1)
    final = final[final.index >= datetime.now() - timedelta(days=n + 1)]
    comb = pd.merge(final,
                    dropped,
                    how="left",
                    left_index=True,
                    right_index=True)
    return comb
Esempio n. 10
0
def calc_aggregates(data, days):

    model = RollingOLS(data["BTC-GBP"].Close,
                       data["ETH-GBP"].Close,
                       window=days)
    result = model.fit()
    rolling_beta = result.params.Close
    rolling_beta.name = "beta"

    spread = data["BTC-GBP"].Close - rolling_beta * data["ETH-GBP"].Close

    return {
        "mean": spread.mean(),
        "std": spread.std(),
        "beta": rolling_beta.iloc[-1],
    }
Esempio n. 11
0
    def calibrate(self, windowOLS, **kwargs):

        #x, y, time = super().get_sample(self.x,self.y, self.timestamp, start_hist, end_hist)
        #model = RollingOLS(endog =self.y, exog=self.x,window=self.windowOLS)
        #rres = model.fit()
        #self.beta = rres.params.reshape(-1, )
        self.windowOLS = min(windowOLS, len(self.y - 1))

        df = pd.DataFrame({'y': self.y, 'x': self.x, 'c': 1})

        model = RollingOLS(endog=df['y'],
                           exog=df[['x', 'c']],
                           window=self.windowOLS)
        rres = model.fit()

        self.beta = rres.params['x'].values.reshape(-1, )
Esempio n. 12
0
    def capm(self, close, market, window_length_return, window_length_beta):

        r_market = self.log_Returns(market, window_length_return).loc[slice(close.index[0], close.index[-1])]

        exog = sm.add_constant(r_market)

        cap_beta = pd.DataFrame(columns=close.columns)

        for tick in close.columns:
            r_assets = self.log_Returns(close[[tick]], window_length_return)

            endog = r_assets
            rols = RollingOLS(endog, exog, window=window_length_beta)
            rres = rols.fit()
            capm = rres.params.dropna()
            capm.columns = ['intercept', 'beta']
            cap_beta.loc[:, tick] = capm['beta']

        return cap_beta
Esempio n. 13
0
    def computeForDay(self, strategy, timeSeriesTick, timeSeriesTrade):
        timeSeriesReg = timeSeriesTick.resample(
            str(int(self.resamplePeriod)) + "S"
        ).first()
        timeSeriesReg = timeSeriesReg.fillna(method="pad")
        timeTable = timeSeriesReg.to_frame()
        timeTable["second"] = timeSeriesReg.index.astype(np.int64)
        timeTable["second"] = (timeTable["second"] - timeTable["second"][0]) / math.pow(
            10, 9
        )

        # self.betaSeries = pd.stats.ols.MovingOLS(y=timeTable['price'], x=timeTable['second'], window_type='rolling', window = self.period, intercept=True).beta
        mod = RollingOLS(
            timeTable["price"],
            add_constant(timeTable["second"], prepend=False),
            window=self.period,
        )
        self.betaSeries = mod.fit().params
        return {"betaSeries": self.betaSeries}
Esempio n. 14
0
def calc_beta_ret(df, market_port_ret, window=52):
    # Find country beta's through rolling regression
    y = market_port_ret
    rolling_betas = {}
    for c in df.columns:
        X = sm.add_constant(df[c])
        model = RollingOLS(y, X, window)
        rolling_res = model.fit(params_only=True)
        rolling_betas[c] = rolling_res.params.dropna()
    
    # Put all beta's for every country and every date in a dataframe
    out_df = pd.DataFrame()
    for key, value in rolling_betas.items():
        col = pd.DataFrame(value[key])
        if out_df.empty:
            out_df = out_df.append(col)
        else:
            out_df = pd.concat([out_df, col], axis=1)
    
    return out_df
Esempio n. 15
0
File: K.py Progetto: pydemic/pydemic
def rolling_OLS_Kt(curves, window=14) -> pd.DataFrame:
    """
    A Rolling window Ordinary Least Squares inference of the derivative of the
    logarithm of the number of cases.

    {args}
    """

    a, b = window if isinstance(window, Sequence) else (window, window)
    daily = diff(cases(curves), smooth=a)

    # We first make a OLS inference to extrapolate series to past
    Y = np.log(daily).values
    X = np.arange(len(Y))
    ols = sm.OLS(Y[:b], sm.add_constant(X[:b]), missing="drop")
    res = ols.fit()

    # We need at least c new observations to obtain a result without NaNs
    m = res.params[1]

    X_ = np.arange(X[0] - b, X[0])
    Y_ = m * (X_ - X[0]) + Y[0]

    X = np.concatenate([X_, X])
    Y = np.concatenate([Y_, Y])

    # Use Rolling OLS to obtain an inference to the growth ratio
    ols = RollingOLS(Y, sm.add_constant(X), window=b, missing="drop")
    res = ols.fit()

    Kt = res.params[b:, 1]
    low, high = res.conf_int()[b:, :, 1].T

    out = pd.DataFrame({
        "Kt": Kt,
        "Kt_low": low,
        "Kt_high": high
    },
                       index=curves.index)

    return out
Esempio n. 16
0
    def regress_factor_loadings(self,
                                portfolio,
                                benchmark_returns: pd.Series = None,
                                date: datetime = None,
                                regression_window: int = 36,
                                rolling=False,
                                show=True):
        '''

        :param portfolio: str, pd.Series, TimeDataFrame, Portfolio... If more than an asset, we compute an equal weighted returns
        :param benchmark_returns:
        :param date:
        :param regression_window:
        :param plot:
        :return:
        '''
        if not (isinstance(portfolio, TimeDataFrame)
                or isinstance(portfolio, Portfolio)):
            portfolio = TimeDataFrame(portfolio)

        if len(portfolio.df_returns.columns) > 1:
            # TODO actually, do an equal weighting
            raise TypeError('Inappropriate argument type for portfolio')

        if portfolio.frequency != self.factors_timedf.frequency:
            portfolio_copy = portfolio.set_frequency(self.factors_timedf.frequency, inplace=False) \
                .slice_dataframe(to_date=date, inplace=False)
        else:
            portfolio_copy = portfolio

        if benchmark_returns is None:  # if no benchmark specified, just use the one in the model
            timedf_merged = portfolio_copy.merge([self.factors_timedf],
                                                 inplace=False)
        else:
            timedf_merged = portfolio_copy.merge(
                [self.factors_timedf, benchmark_returns], inplace=False)
            timedf_merged.df_returns.drop(['MKT-RF'], axis=1, inplace=True)
            timedf_merged.df_returns.rename(
                columns={benchmark_returns: 'MKT-RF'}, inplace=True)
            timedf_merged.df_returns['MKT-RF'] = timedf_merged.df_returns[
                'MKT-RF'] - timedf_merged.df_returns['RF']

        portfolio_returns, factors_df = timedf_merged.df_returns.iloc[:, 0] - timedf_merged.df_returns['RF'], \
                                        timedf_merged.df_returns.iloc[:, 1:]

        portfolio_returns.rename('XsRet', inplace=True)
        factors_df.drop(['RF'], axis=1, inplace=True)  # don't need it anymore

        if rolling:
            # endogenous is the portfolio returns (y, dependent), exogenous is the factors (x, explanatory, independent)
            rols = RollingOLS(endog=portfolio_returns,
                              exog=factors_df,
                              window=regression_window)
            rres = rols.fit()
            params = rres.params.dropna()
            print(params.tail())
            if show:
                rres.plot_recursive_coefficient(variables=factors_df.columns,
                                                figsize=(10, 6))
                plt.show()
            return rres
        else:
            # need to merge again to run regression on dataframe (with y being XsRet)
            df_stock_factor = pd.merge(portfolio_returns,
                                       factors_df,
                                       left_index=True,
                                       right_index=True)
            df_stock_factor = df_stock_factor.iloc[-regression_window:, :]
            # rename because will give syntax error with '-' when running regression
            df_stock_factor.rename(columns={'MKT-RF': 'MKT'}, inplace=True)
            reg = sm.ols(formula='XsRet ~ {}'.format(' + '.join(
                factors_df.columns)),
                         data=df_stock_factor).fit(cov_type='HAC',
                                                   cov_kwds={'maxlags': 1})
            print(reg.summary())
            if show:
                nrows, ncols = ceil(len(factors_df.columns) / 3), min(
                    len(factors_df.columns), 3)
                fig, axs = plt.subplots(nrows=nrows,
                                        ncols=ncols,
                                        figsize=(12, 5))
                plt.tight_layout()

                for i, factor in enumerate(df_stock_factor.iloc[:, 1:]):

                    idx_x, idx_y = floor(i / 3), floor(i % 3)
                    ax = axs
                    if nrows > 1:
                        ax = axs[idx_x, ]
                    if ncols > 1:
                        ax = ax[idx_y]

                    X = np.linspace(df_stock_factor[factor].min(),
                                    df_stock_factor[factor].max())
                    Y = reg.params[i +
                                   1] * X + reg.params[0]  # beta * x + alpha
                    ax.plot(X, Y)
                    # plt.draw()
                    # plt.pause(0.001)

                    ax.scatter(df_stock_factor[factor],
                               df_stock_factor.iloc[:, 0],
                               alpha=0.3)
                    ax.grid(True)
                    ax.axis('tight')
                    ax.set_xlabel(factor if factor != 'MKT' else 'MKT-RF')
                    ax.set_ylabel('Portfolio Excess Returns')
                # plt.ion()
                plt.show()

            return reg
Esempio n. 17
0
 def get_rolling_linear_regression(self,
                                   df,
                                   window_size,
                                   target_name,
                                   hedge_name,
                                   autocorr_periods=0):
     """
     when autocorr_periods is greater than 2, we will take the lag of the hedge against the current of the target
     :param df:
     :param window_size:
     :param target_name:
     :param hedge_name:
     :param autocorr_periods:
     :return:
     """
     from statsmodels.regression.rolling import RollingOLS
     df_lr = sm.add_constant(df)
     df_lr[target_name + 'Rank'] = df_lr[target_name].rank()
     df_lr[hedge_name + 'Rank'] = df_lr[hedge_name].rank()
     if autocorr_periods > 2:
         for lag_p in range(1, autocorr_periods):
             df_lr['SpearmanCorr_hedge_lag' + str(lag_p)] = df_lr[target_name].rank().rolling(window=window_size). \
                 corr(df_lr[hedge_name].rank().shift(-lag_p))
             df_lr['SpearmanCorr_tgt_lag' + str(lag_p)] = df_lr[hedge_name].rank().rolling(window=window_size). \
                 corr(df_lr[target_name].rank().shift(-lag_p))
             df_lr['PearsonCorr_hedge_lag' + str(lag_p)] = df_lr[target_name]. \
                 rolling(window=window_size).corr(other=df_lr[hedge_name].shift(-lag_p))
             df_lr['PearsonCorr_tgt_lag' + str(lag_p)] = df_lr[hedge_name]. \
                 rolling(window=window_size).corr(other=df_lr[target_name].shift(-lag_p))
             model_hedge_lagp = RollingOLS(endog=df_lr[target_name].values,
                                           exog=df_lr[['const', hedge_name
                                                       ]].shift(-lag_p),
                                           window=window_size)
             model_tgt_lagp = RollingOLS(endog=df_lr[hedge_name].values,
                                         exog=df_lr[['const', target_name
                                                     ]].shift(-lag_p),
                                         window=window_size)
             rres_hedge_lagp = model_hedge_lagp.fit()
             rres_tgt_lagp = model_tgt_lagp.fit()
             intercept_lagp = rres_hedge_lagp.params['const']
             slope_lagp = rres_hedge_lagp.params[hedge_name]
             r_squared_lagp = rres_hedge_lagp.rsquared
             df_lr['intercept_hedge_lag' + str(lag_p)] = intercept_lagp
             df_lr['interecept_tgt_lap' +
                   str(lag_p)] = rres_tgt_lagp.params['const']
             df_lr['slope_hedge_lag' + str(lag_p)] = slope_lagp
             df_lr['slope_tgt_lag' +
                   str(lag_p)] = rres_tgt_lagp.params[target_name]
             df_lr['r_squared_hedge_lag' + str(lag_p)] = r_squared_lagp
             df_lr['r_squared_tgt_lag' +
                   str(lag_p)] = rres_tgt_lagp.rsquared
     model = RollingOLS(endog=df_lr[target_name].values,
                        exog=df_lr[['const', hedge_name]],
                        window=window_size)
     rres = model.fit()
     intercept = rres.params['const']
     slope = rres.params[hedge_name]
     r_squared = rres.rsquared
     df_lr['SpearmanCorr'] = df_lr[target_name + 'Rank'].rolling(
         window=window_size).corr(df_lr[hedge_name + 'Rank'])
     df_lr['PearsonCorr'] = df_lr[target_name]. \
         rolling(window=window_size).corr(other=df_lr[hedge_name])
     df_lr['r_squared'] = r_squared
     df_lr['intercept'] = intercept
     df_lr['slope'] = slope
     df_lr['linreg_f_stat_p_val'] = rres.f_pvalue
     p_val_colnames = ['intercept_p_val', 'slope_p_val']
     arrOfArr = np.split(rres.pvalues, 2, axis=1)
     for i in range(len(p_val_colnames)):
         b = np.array(arrOfArr[i]).flatten()
         c = pd.Series(b, index=df_lr.index)
         c.dropna(inplace=True)
         df_lr[p_val_colnames[i]] = c
     df_lr = df_lr.drop(
         columns=[target_name + 'Rank', hedge_name + 'Rank', 'const'],
         axis=1).dropna()
     return df_lr
Esempio n. 18
0
                                     start="1-1-1926")[0]
industries.head()

# The first model estimated is a rolling version of the CAPM that
# regresses
# the excess return of Technology sector firms on the excess return of the
# market.
#
# The window is 60 months, and so results are available after the first 60
# (`window`)
# months. The first 59 (`window - 1`) estimates are all `nan` filled.

endog = industries.HiTec - factors.RF.values
exog = sm.add_constant(factors["Mkt-RF"])
rols = RollingOLS(endog, exog, window=60)
rres = rols.fit()
params = rres.params.copy()
params.index = np.arange(1, params.shape[0] + 1)
params.head()

params.iloc[57:62]

params.tail()

# We next plot the market loading along with a 95% point-wise confidence
# interval.
# The `alpha=False` omits the constant column, if present.

fig = rres.plot_recursive_coefficient(variables=["Mkt-RF"], figsize=(14, 6))

# Next, the model is expanded to include all three factors, the excess
Esempio n. 19
0
    def calibrate(self, windowOLS, copula_lookback, recalibrate_n, **kwargs):
        self.windowOLS = int(windowOLS)
        self.copula_lookback = int(copula_lookback)
        self.recalibrate_n = int(recalibrate_n)
        
        df = pd.DataFrame({'y':self.y,'x':self.x,'c':1})
        
        model = RollingOLS(endog =df['y'], exog=df['x'],window=self.windowOLS)
        rres = model.fit()
      
        self.beta = rres.params['x'].values.reshape(-1, )
        
        # Copula decision:
        df['x_log_ret']= np.log(df.x) - np.log(df.x.shift(1))
        df['y_log_ret']= np.log(df.y) - np.log(df.y.shift(1))
        
        # Convert the two returns series to two uniform values u and v using the empirical distribution functions
        ecdf_x, ecdf_y  = ECDF(df.x_log_ret), ECDF(df.y_log_ret)
        u, v = [ecdf_x(a) for a in df.x_log_ret], [ecdf_y(a) for a in df.y_log_ret]
        
        # Compute the Akaike Information Criterion (AIC) for different copulas and choose copula with minimum AIC
        tau = stats.kendalltau(df.x_log_ret, df.y_log_ret)[0]  # estimate Kendall'rank correlation
        AIC ={}  # generate a dict with key being the copula family, value = [theta, AIC]

        for i in ['clayton', 'frank', 'gumbel']:
            param = self._parameter(i, tau)
            lpdf = [self._lpdf_copula(i, param, x, y) for (x, y) in zip(u, v)]
            # Replace nan with zero and inf with finite numbers in lpdf list
            lpdf = np.nan_to_num(lpdf) 
            loglikelihood = sum(lpdf)
            AIC[i] = [param, -2 * loglikelihood + 2]
        # Choose the copula with the minimum AIC
        copula = min(AIC.items(), key = lambda x: x[1][1])[0]
        
        self.startIdx = copula_lookback + 1 # Because first is NAN
        
        df['MI_u_v'] = 0.5
        df['MI_v_u'] = 0.5
        
        for i in np.arange(self.startIdx , len(df)-recalibrate_n, recalibrate_n):
            
            window = range(i - copula_lookback, i) 
            predWindow = range(i, i + recalibrate_n)
            
            x_hist = df.x_log_ret.iloc[window]
            y_hist = df.y_log_ret.iloc[window]
            x_forw = df.x_log_ret.iloc[predWindow]
            y_forw = df.y_log_ret.iloc[predWindow]
            
            # Estimate Kendall'rank correlation
            tau = stats.kendalltau(x_hist, y_hist)[0] 

            # Estimate the copula parameter: theta
            theta = self._parameter(copula, tau)

            # Simulate the empirical distribution function for returns of selected trading pair
            ecdf_x,  ecdf_y  = ECDF(x_hist), ECDF(y_hist) 

            # Now get future values
            a, b = self._misprice_index(copula, theta, ecdf_x(x_forw), ecdf_y(y_forw))
            
            df.MI_u_v.iloc[predWindow] = a
            df.MI_v_u.iloc[predWindow] = b
                        
        self.MI_u_v = df.MI_u_v
        self.MI_v_u = df.MI_v_u