Python RollingOLS Examples, statsmodels.regression.rolling.RollingOLS Python Examples

Example #1

0

Show file

File: liquidity_costs.py Project: PhilipMachineLearning/arch_config

def compute_rolling_regression(
    window_size: int, endog: pd.DataFrame, exog: pd.DataFrame
):
    """ Wrapper function to compute rolling regression co-efficients 
    for pre-processed LOB using stats-models.
    
    Based on Amaya, Rochen et al (2015) we assume the coefficient is the liquidity cost
    and alpha is the intercept.
    
    Ref: 
        https://www.statsmodels.org/dev/examples/notebooks/generated/rolling_ls.html
        
        Calculation described in "Distilling Liquidity Costs from Limit Order Books"
        by Amaya, Rochen et al (2015).
        Paper source: https://www.sciencedirect.com/science/article/abs/pii/S0378426618301353
        
    :window_size: Size of the window
    :endog: Dependent variable - y
    :exog: Independent variable - x
    :return: rols_results (instance of statsmodels results object), rols_params (pd.DataFrame)
    """
    endog = endog
    exog = sm.add_constant(exog, prepend=False)
    rols = RollingOLS(endog, exog, window=window_size)
    rols_results = rols.fit()
    rols_params = rols_results.params
    rols_params.columns = ["liquidity_cost", "intercept"]
    return rols_results, rols_params

Example #2

0

Show file

File: cointAnalysis.py Project: petemik/tradingPlatform

 def playing_with_rolling(self,
                          pair,
                          fromDate="2015-01-01",
                          toDate="2018-01-01"):
     symbol1 = pair[0]
     symbol2 = pair[1]
     data1 = self.portfolio[symbol1][self.analysisOn][fromDate:toDate]
     data2 = self.portfolio[symbol2][self.analysisOn][fromDate:toDate]
     model = sm.OLS(data1, sm.add_constant(data2))
     window = 180
     model2 = RollingOLS(data1, sm.add_constant(data2), window=window)
     results = model.fit()
     results2 = model2.fit()
     # spread = data1 - results.params[1] * data2 - results.params[0]
     # spread_rolling = data1 - results2.params.adjusted_close * data2 - results2.params.const
     spread = data1 - results.params[1] * data2
     spread_rolling = data1 - results2.params.adjusted_close * data2
     spread_mean = pd.Series(spread_rolling).rolling(window=window).mean()
     spread_std = pd.Series(spread_rolling).rolling(window=window).std()
     fig, axs = plt.subplots(2)
     # plt.plot((spread - spread.mean())/spread.std())
     axs[0].plot((spread_rolling - spread_mean) / spread_std)
     axs[0].xaxis.set_major_locator(plt.MaxNLocator(15))
     axs[1].plot(results2.params.adjusted_close['2013-03-15':])
     axs[1].xaxis.set_major_locator(plt.MaxNLocator(15))
     # plt.plot(spread)
     # plt.plot(spread_rolling)
     plt.show()

Example #3

0

Show file

def test_params_only(basic_data, method):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res = mod.fit(method=method, params_only=False)
    res_params_only = mod.fit(method=method, params_only=True)
    # use assert_allclose to incorporate for numerical errors on x86 platforms
    assert_allclose(res_params_only.params, res.params)

Example #4

0

Show file

def test_methods(basic_data, params_only):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res_inv = mod.fit(method="inv", params_only=params_only)
    res_lstsq = mod.fit(method="lstsq", params_only=params_only)
    res_pinv = mod.fit(method="pinv", params_only=params_only)
    assert_allclose(res_inv.params, res_lstsq.params)
    assert_allclose(res_inv.params, res_pinv.params)

Example #5

0

Show file

def test_methods(basic_data):
    y, x, _ = basic_data
    mod = RollingOLS(y, x, 150)
    res_inv = mod.fit(method='inv')
    res_lstsq = mod.fit(method='lstsq')
    res_pinv = mod.fit(method='pinv')
    assert_allclose(res_inv.params, res_lstsq.params)
    assert_allclose(res_inv.params, res_pinv.params)

Example #6

0

Show file

def test_expanding(basic_data):
    y, x, w = basic_data
    xa = np.asarray(x)
    mod = RollingOLS(y, x, 150, min_nobs=50, expanding=True)
    res = mod.fit()
    params = np.asarray(res.params)
    assert np.all(np.isnan(params[:49]))
    first = np.where(np.cumsum(np.all(np.isfinite(xa), axis=1)) >= 50)[0][0]
    assert np.all(np.isfinite(params[first:]))

Example #7

0

Show file

def rolling_ols_model():
    # Rolling Ordinary Least Squares (Rolling OLS)
    from statsmodels.regression.rolling import RollingOLS

    data = get_dataset("longley")
    exog = sm.add_constant(data.exog, prepend=False)
    rolling_ols = RollingOLS(data.endog, exog)
    model = rolling_ols.fit(reset=50)

    return ModelWithResults(model=model, alg=rolling_ols, inference_dataframe=exog)

Example #8

0

Show file

    def processcsv(self, datafile):
        df = pd.read_csv(
            datafile,
            sep=",",
            header=None,
            names=("Cuvette", "Time", "Temperature", "Absorbance"
                   ))  # Assumes a csv-file following the named columns
        # calculate time in seconds instead of minutes (as the software supplies)
        df["Time"] = df["Time"] * 60
        # calculate temperature in Kelvin instead of degrees Celsius
        df["Temperature"] = df["Temperature"] + 273.15
        if self.ProductAbsorbing:
            df["StartingConcentration"] = [
                self.startingconcentrations[x - 1] for x in df["Cuvette"]
            ]
            # calculate concentration depending on start concentration and depletion of substrate
            df["Concentration"] = df["StartingConcentration"] - \
                df["Absorbance"]/self.ExtCoeff
        else:
            # calculate concentration directly from absorbance
            df["Concentration"] = df["Absorbance"] / self.ExtCoeff
        # The rolling regression leaves NaN for the first window,
        #I would prefer to have the low temperature points available
        #and reverse the dataframe for this reason
        df.sort_index(ascending=False, inplace=True)
        cuvettes = df.groupby("Cuvette")
        regression = pd.DataFrame()  # Build up a dataframe cuvette by cuvette
        for cuvette in cuvettes:
            cuvettedf = cuvette[1]
            Velocity = sm.add_constant(cuvettedf["Time"])
            Concentration = cuvettedf["Concentration"]
            movingregression = RollingOLS(Concentration, Velocity,
                                          window=4).fit(params_only=True)
            regression = pd.concat([regression, movingregression.params])
        dfwregression = df.join(regression, rsuffix="_regression")
        # Repeat rolling regression other direction, double the number of points
        df.sort_index(ascending=True, inplace=True)
        cuvettes = df.groupby("Cuvette")
        regression = pd.DataFrame()  # Build up a dataframe cuvette by cuvette
        for cuvette in cuvettes:
            cuvettedf = cuvette[1]
            Velocity = sm.add_constant(cuvettedf["Time"])
            Concentration = cuvettedf["Concentration"]
            movingregression = RollingOLS(Concentration, Velocity,
                                          window=4).fit(params_only=True)
            regression = pd.concat([regression, movingregression.params])

        dfwregression = df.join(regression, rsuffix="_regression")
        dfwregression.dropna(inplace=True)  # Remove the NaN rows
        # Whether absorbance is increasing or decreasing, velocities should always be positive.
        dfwregression["Time_regression"] = np.abs(
            dfwregression["Time_regression"])
        return dfwregression

Example #9

0

Show file

    def _calculateTi(self):
        """
        Calculates the technical indicator for the given input data. The input
        data are taken from an attribute of the parent class.

        Returns:
            pandas.DataFrame: The calculated indicator. Index is of type
            ``pandas.DatetimeIndex``. It contains two columns, the
            ``upper_band``, ``lower_band``.

        Raises:
            NotEnoughInputData: Not enough data for calculating the indicator.
        """

        # Not enough data for the requested period
        if len(self._input_data.index) < self._period:
            raise NotEnoughInputData('Projection Bands', self._period,
                                     len(self._input_data.index))

        pbs = pd.DataFrame(index=self._input_data.index,
                           columns=['upper_band', 'lower_band'],
                           data=None,
                           dtype='float64')

        # Calculate n-periods slope of high values
        high_slope = RollingOLS(
            endog=self._input_data['high'].fillna(value=0,
                                                  inplace=False).to_list(),
            exog=sm.add_constant(list(range(len(self._input_data.index)))),
            window=self._period).fit(params_only=True).params[:, 1]

        # Calculate n-periods slope of low values
        low_slope = RollingOLS(
            endog=self._input_data['low'].fillna(value=0,
                                                 inplace=False).to_list(),
            exog=sm.add_constant(list(range(len(self._input_data.index)))),
            window=self._period).fit(params_only=True).params[:, 1]

        # Calculate the projection bands
        for i in range(self._period - 1, len(self._input_data.index)):

            pbs['upper_band'].values[i] = max(
                [self._input_data['high'].values[i]] +
                [(j * high_slope[i]) + self._input_data['high'].values[i - j]
                 for j in range(1, self._period)])

            pbs['lower_band'].values[i] = min(
                [self._input_data['low'].values[i]] +
                [(j * low_slope[i]) + self._input_data['low'].values[i - j]
                 for j in range(1, self._period)])

        return pbs.round(4)

Example #10

0

Show file

def get_rolling_beta(df: pd.DataFrame, hist: pd.DataFrame, mark: pd.DataFrame,
                     n: pd.DataFrame) -> pd.DataFrame:
    """Turns a holdings portfolio into a rolling beta dataframe

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe of daily holdings
    hist : pd.DataFrame
        A dataframe of historical returns
    mark : pd.DataFrame
        The dataframe of market performance
    n : int
        The period to get returns for

    Returns
    ----------
    final : pd.DataFrame
        Dataframe with rolling beta
    """
    df = df["Holding"]
    uniques = df.columns.tolist()
    res = df.div(df.sum(axis=1), axis=0)
    res = res.fillna(0)
    comb = pd.merge(hist["Close"],
                    mark["Market"],
                    how="outer",
                    left_index=True,
                    right_index=True)
    comb = comb.fillna(method="ffill")
    for col in hist["Close"].columns:
        exog = sm.add_constant(comb["Close"])
        rols = RollingOLS(comb[col], exog, window=252)
        rres = rols.fit()
        res[f"beta_{col}"] = rres.params["Close"]
    final = res.fillna(method="ffill")
    for uni in uniques:
        final[f"prod_{uni}"] = final[uni] * final[f"beta_{uni}"]
    dropped = final[[f"beta_{x}" for x in uniques]].copy()
    final = final.drop(columns=[f"beta_{x}" for x in uniques] + uniques)
    final["total"] = final.sum(axis=1)
    final = final[final.index >= datetime.now() - timedelta(days=n + 1)]
    comb = pd.merge(final,
                    dropped,
                    how="left",
                    left_index=True,
                    right_index=True)
    return comb

Example #11

0

Show file

    def calibrate(self, windowOLS, **kwargs):

        #x, y, time = super().get_sample(self.x,self.y, self.timestamp, start_hist, end_hist)
        #model = RollingOLS(endog =self.y, exog=self.x,window=self.windowOLS)
        #rres = model.fit()
        #self.beta = rres.params.reshape(-1, )
        self.windowOLS = min(windowOLS, len(self.y - 1))

        df = pd.DataFrame({'y': self.y, 'x': self.x, 'c': 1})

        model = RollingOLS(endog=df['y'],
                           exog=df[['x', 'c']],
                           window=self.windowOLS)
        rres = model.fit()

        self.beta = rres.params['x'].values.reshape(-1, )

Example #12

0

Show file

def calc_aggregates(data, days):

    model = RollingOLS(data["BTC-GBP"].Close,
                       data["ETH-GBP"].Close,
                       window=days)
    result = model.fit()
    rolling_beta = result.params.Close
    rolling_beta.name = "beta"

    spread = data["BTC-GBP"].Close - rolling_beta * data["ETH-GBP"].Close

    return {
        "mean": spread.mean(),
        "std": spread.std(),
        "beta": rolling_beta.iloc[-1],
    }

Example #13

0

Show file

File: growthratefit_india.py Project: mansueto-institute/covid19

def run_regressions(totals: pd.DataFrame,
                    window: int = 3,
                    infectious_period: float = 4.5) -> pd.DataFrame:
    # run rolling regressions and get parameters
    model = RollingOLS.from_formula(formula="logdelta ~ time",
                                    window=window,
                                    data=totals)
    rolling = model.fit(method="lstsq")

    growthrates = rolling.params.join(rolling.bse, rsuffix="_stderr")
    growthrates["rsq"] = rolling.rsquared
    growthrates.rename(
        lambda s: s.replace("time", "gradient").replace("const", "intercept"),
        axis=1,
        inplace=True)

    # calculate growth rates
    growthrates[
        "egrowthrateM"] = growthrates.gradient + 2 * growthrates.gradient_stderr
    growthrates[
        "egrowthratem"] = growthrates.gradient - 2 * growthrates.gradient_stderr
    growthrates["R"] = growthrates.gradient * infectious_period + 1
    growthrates[
        "RM"] = growthrates.gradient + 2 * growthrates.gradient_stderr * infectious_period + 1
    growthrates[
        "Rm"] = growthrates.gradient - 2 * growthrates.gradient_stderr * infectious_period + 1
    growthrates["date"] = growthrates.index
    growthrates["days"] = totals.time

    return growthrates

Example #14

0

Show file

    def _calculateTi(self):
        """
        Calculates the technical indicator for the given input data. The input
        data are taken from an attribute of the parent class.

        Returns:
            pandas.DataFrame: The calculated indicator. Index is of type
            ``pandas.DatetimeIndex``. It contains one column, the ``lri``.

        Raises:
            NotEnoughInputData: Not enough data for calculating the indicator.
        """

        # Not enough data for the requested period
        if len(self._input_data.index) < self._period:
            raise NotEnoughInputData('Linear Regression Indicator',
                                     self._period, len(self._input_data.index))

        lri = pd.DataFrame(index=self._input_data.index,
                           columns=['lri'],
                           data=None,
                           dtype='float64')

        # n-period Rolling OLS
        rolling_ols = RollingOLS(
            endog=self._input_data['close'].fillna(value=0,
                                                   inplace=False).to_list(),
            exog=sm.add_constant(list(range(len(self._input_data.index)))),
            window=self._period).fit(params_only=True)

        for i in range(len(self._input_data.index)):
            lri['lri'].values[i] = round(
                rolling_ols.params[i][0] + i * rolling_ols.params[i][1], 4)

        return lri

Example #15

0

Show file

File: factorize.py Project: rebootshen/Quantitative

    def capm(self, close, market, window_length_return, window_length_beta):

        r_market = self.log_Returns(market, window_length_return).loc[slice(close.index[0], close.index[-1])]

        exog = sm.add_constant(r_market)

        cap_beta = pd.DataFrame(columns=close.columns)

        for tick in close.columns:
            r_assets = self.log_Returns(close[[tick]], window_length_return)

            endog = r_assets
            rols = RollingOLS(endog, exog, window=window_length_beta)
            rres = rols.fit()
            capm = rres.params.dropna()
            capm.columns = ['intercept', 'beta']
            cap_beta.loc[:, tick] = capm['beta']

        return cap_beta

Example #16

0

Show file

    def computeForDay(self, strategy, timeSeriesTick, timeSeriesTrade):
        timeSeriesReg = timeSeriesTick.resample(
            str(int(self.resamplePeriod)) + "S"
        ).first()
        timeSeriesReg = timeSeriesReg.fillna(method="pad")
        timeTable = timeSeriesReg.to_frame()
        timeTable["second"] = timeSeriesReg.index.astype(np.int64)
        timeTable["second"] = (timeTable["second"] - timeTable["second"][0]) / math.pow(
            10, 9
        )

        # self.betaSeries = pd.stats.ols.MovingOLS(y=timeTable['price'], x=timeTable['second'], window_type='rolling', window = self.period, intercept=True).beta
        mod = RollingOLS(
            timeTable["price"],
            add_constant(timeTable["second"], prepend=False),
            window=self.period,
        )
        self.betaSeries = mod.fit().params
        return {"betaSeries": self.betaSeries}

Example #17

0

Show file

File: factor.py Project: wingwingz/AFP

def calc_beta_ret(df, market_port_ret, window=52):
    # Find country beta's through rolling regression
    y = market_port_ret
    rolling_betas = {}
    for c in df.columns:
        X = sm.add_constant(df[c])
        model = RollingOLS(y, X, window)
        rolling_res = model.fit(params_only=True)
        rolling_betas[c] = rolling_res.params.dropna()
    
    # Put all beta's for every country and every date in a dataframe
    out_df = pd.DataFrame()
    for key, value in rolling_betas.items():
        col = pd.DataFrame(value[key])
        if out_df.empty:
            out_df = out_df.append(col)
        else:
            out_df = pd.concat([out_df, col], axis=1)
    
    return out_df

Example #18

0

Show file

def test_formula():
    y, x, w = gen_data(250, 3, True, pandas=True)
    fmla = "y ~ 1 + x0 + x1 + x2"
    data = pd.concat([y, x], axis=1)
    mod = RollingWLS.from_formula(fmla, window=100, data=data, weights=w)
    res = mod.fit()
    alt = RollingWLS(y, x, window=100)
    alt_res = alt.fit()
    assert_allclose(res.params, alt_res.params)
    ols_mod = RollingOLS.from_formula(fmla, window=100, data=data)
    ols_mod.fit()

Example #19

0

Show file

def _estimate_trailing_capm(
    returns: pd.Series,
    benchmark: pd.Series,
    rf: float,
    window: int,
) -> RollingRegressionResults:
    returns, benchmark = align(adjust(returns, rf), adjust(benchmark, rf))

    y = returns.to_numpy()
    x = sm.add_constant(benchmark.to_numpy())
    return RollingOLS(y, x, window=window).fit()

Example #20

0

Show file

File: K.py Project: pydemic/pydemic

def rolling_OLS_Kt(curves, window=14) -> pd.DataFrame:
    """
    A Rolling window Ordinary Least Squares inference of the derivative of the
    logarithm of the number of cases.

    {args}
    """

    a, b = window if isinstance(window, Sequence) else (window, window)
    daily = diff(cases(curves), smooth=a)

    # We first make a OLS inference to extrapolate series to past
    Y = np.log(daily).values
    X = np.arange(len(Y))
    ols = sm.OLS(Y[:b], sm.add_constant(X[:b]), missing="drop")
    res = ols.fit()

    # We need at least c new observations to obtain a result without NaNs
    m = res.params[1]

    X_ = np.arange(X[0] - b, X[0])
    Y_ = m * (X_ - X[0]) + Y[0]

    X = np.concatenate([X_, X])
    Y = np.concatenate([Y_, Y])

    # Use Rolling OLS to obtain an inference to the growth ratio
    ols = RollingOLS(Y, sm.add_constant(X), window=b, missing="drop")
    res = ols.fit()

    Kt = res.params[b:, 1]
    low, high = res.conf_int()[b:, :, 1].T

    out = pd.DataFrame({
        "Kt": Kt,
        "Kt_low": low,
        "Kt_high": high
    },
                       index=curves.index)

    return out

Example #21

0

Show file

def rollingRegressionWrap(X_colName = ["VMG","MKT"],Y_colName = ["monthlyReturn"],data_rollingReg= data_rollingReg, refData = stockReturnData, refCol = ["SID"]):
    #init valid SID and invalid SID
    invalid_SID = []
    
    # give variable
    # X_colName = ["VMG","MKT"]
    # Y_colName = ["monthlyReturn"]
    
    SID_list = np.unique(refData[refCol])
    
    # run rolling regression
    newColNames = ["Trading_Month", "SID", "adjusted_rSquared", "JB_pValue"]
    
    t_StatCol = [val + "_t_Stat" for val in X_colName]
    
    newColNames.extend(t_StatCol)
    newColNames.extend(X_colName)
    
    rollingResult_df = pd.DataFrame(columns = newColNames)
    
    progress_bar = tqdm.tqdm(SID_list)
    for asset in progress_bar:
        try:
            # add SID column
            subDataSet = pd.DataFrame(data_rollingReg[data_rollingReg["SID"]==asset])
            Y = subDataSet[Y_colName]
            X = sm.add_constant(subDataSet[X_colName])
            
            Trading_Month = subDataSet["Trading_Month"].values
            SIDs = subDataSet["SID"].values
            JB_pval = subDataSet.rolling(36)["monthlyReturn"].apply(lambda var: sp.stats.jarque_bera(var)[1]).values
            subReg = RollingOLS(Y,X, window = 36, missing = "drop").fit()
            rSquared_adj = subReg.rsquared_adj.values
            t_Stat = subReg.tvalues.values
            params = subReg.params.values
            
            dataDf = np.hstack([SIDs[...,np.newaxis], 
                                Trading_Month[...,np.newaxis], 
                                rSquared_adj[...,np.newaxis], 
                                JB_pval[...,np.newaxis], 
                                params[:,1:], 
                                t_Stat[:,1:]])
            
            assetDf = pd.DataFrame(data = dataDf, columns = newColNames)
            rollingResult_df = pd.concat([rollingResult_df, assetDf], ignore_index = True)
        except:
            # print(asset + ": {} trading months".format(Y.shape[0]))
            invalid_SID.append([asset, Y.shape[0]])
            
        progress_bar.set_description(f'Processing {asset}')
            
    return(rollingResult_df,invalid_SID)

Example #22

0

Show file

File: statistics.py Project: wechen1/gs-quant

    def __init__(self, X: Union[pd.Series, List[pd.Series]], y: pd.Series, w: int, fit_intercept: bool = True):
        df = pd.concat(X, axis=1) if isinstance(X, list) else X.to_frame()
        df = sm.add_constant(df) if fit_intercept else df
        df.columns = range(len(df.columns)) if fit_intercept else range(1, len(df.columns) + 1)

        if w <= len(df.columns):
            raise MqValueError('Window length must be larger than the number of explanatory variables')

        df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]  # filter out nan and inf
        y = y[~y.isin([np.nan, np.inf, -np.inf])]
        df_aligned, y_aligned = df.align(y, 'inner', axis=0)  # align series

        self._X = df_aligned.copy()
        self._res = RollingOLS(y_aligned, df_aligned, w).fit()

Example #23

0

Show file

def exponential_momentum(ts, min_nobs, window):
    '''
    Andrew Clenow's Method
    1. ln(ts) = m*ln(t) + c
    2. annualised momentum = ((e^(m))^(252) -1 ) * 100
    :return:
        annualised momentum score
    '''
    exog = sm.add_constant(np.arange(0, len(ts)))
    rolling_param = RollingOLS(np.log(ts),
                               exog,
                               min_nobs=min_nobs,
                               window=window).fit()
    return (np.power(np.exp(rolling_param.params['x1']), 252) -
            1) * 100 * rolling_param.rsquared

Example #24

0

Show file

File: preProcessor.py Project: WYChoi1995/KRXcontest-Stocklight

    def get_rolling_beta(self, window: int):
        for ticker in self.tickers:
            try:
                if len(self.priceData[ticker]) > window:
                    exogVariable = add_constant(self.priceData[ticker]["IndexRiskPremium"])
                    endogVariable = self.priceData[ticker]["RiskPremium"]
                    rollingOLSModel = RollingOLS(endogVariable, exogVariable, window).fit()

                    self.priceData[ticker]["RollingBeta"] = rollingOLSModel.params["IndexRiskPremium"].abs()

                else:
                    self.priceData[ticker]["RollingBeta"] = nan

            except ValueError:
                self.priceData[ticker]["RollingBeta"] = nan

Example #25

0

Show file

def test_min_nobs(basic_data):
    y, x, w = basic_data
    if not np.any(np.isnan(np.asarray(x))):
        return
    mod = RollingOLS(y, x, 150)
    res = mod.fit()
    # Ensures that the constraint binds
    min_nobs = res.nobs[res.nobs != 0].min() + 1
    mod = RollingOLS(y, x, 150, min_nobs=min_nobs)
    res = mod.fit()
    assert np.all(res.nobs[res.nobs != 0] >= min_nobs)

Example #26

0

Show file

def get_rolling_factor_loadings(ticker, rolling_window):

    returns = get_stock_return(ticker)

    Y, X = prep_data_for_regression(ticker, returns)

    rollingmodel = RollingOLS(Y, X, window=rolling_window).fit(
        cov_type='HAC', cov_kwds={'maxlags': 1})

    rolling_factor_loadings = rollingmodel.params.reset_index().dropna()
    rolling_factor_loadings = pd.melt(rolling_factor_loadings,
                                      id_vars=['index'])

    rolling_factor_loadings['ticker'] = ticker
    rolling_factor_loadings['window_size'] = rolling_window

    return rolling_factor_loadings

Example #27

0

Show file

def rollingOLS(totals: pd.DataFrame, window: int = 3, infectious_period: float = 4.5) -> pd.DataFrame:
    """ legacy rolling regression-based implementation of Bettencourt/Ribeiro method """
    # run rolling regressions and get parameters
    model   = RollingOLS.from_formula(formula = "logdelta ~ time", window = window, data = totals)
    rolling = model.fit(method = "lstsq")
    
    growthrates = rolling.params.join(rolling.bse, rsuffix="_stderr")
    growthrates["rsq"] = rolling.rsquared
    growthrates.rename(lambda s: s.replace("time", "gradient").replace("const", "intercept"), axis = 1, inplace = True)

    # calculate growth rates
    growthrates["egrowthrateM"] = growthrates.gradient + 2 * growthrates.gradient_stderr
    growthrates["egrowthratem"] = growthrates.gradient - 2 * growthrates.gradient_stderr
    growthrates["R"]            = growthrates.gradient * infectious_period + 1
    growthrates["RM"]           = growthrates.gradient + 2 * growthrates.gradient_stderr * infectious_period + 1
    growthrates["Rm"]           = growthrates.gradient - 2 * growthrates.gradient_stderr * infectious_period + 1
    growthrates["date"]         = growthrates.index.get_level_values('status_change_date')
    growthrates["days"]         = totals.time

    return growthrates

Example #28

0

Show file

def test_save_load(data):
    y, x, w = data
    res = RollingOLS(y, x, window=60).fit()
    fh = BytesIO()
    # test wrapped results load save pickle
    res.save(fh)
    fh.seek(0, 0)
    res_unpickled = res.__class__.load(fh)
    assert type(res_unpickled) is type(res)  # noqa: E721

    fh = BytesIO()
    # test wrapped results load save pickle
    res.save(fh, remove_data=True)
    fh.seek(0, 0)
    res_unpickled = res.__class__.load(fh)
    assert type(res_unpickled) is type(res)  # noqa: E721

Example #29

0

Show file

    def regress_factor_loadings(self,
                                portfolio,
                                benchmark_returns: pd.Series = None,
                                date: datetime = None,
                                regression_window: int = 36,
                                rolling=False,
                                show=True):
        '''

        :param portfolio: str, pd.Series, TimeDataFrame, Portfolio... If more than an asset, we compute an equal weighted returns
        :param benchmark_returns:
        :param date:
        :param regression_window:
        :param plot:
        :return:
        '''
        if not (isinstance(portfolio, TimeDataFrame)
                or isinstance(portfolio, Portfolio)):
            portfolio = TimeDataFrame(portfolio)

        if len(portfolio.df_returns.columns) > 1:
            # TODO actually, do an equal weighting
            raise TypeError('Inappropriate argument type for portfolio')

        if portfolio.frequency != self.factors_timedf.frequency:
            portfolio_copy = portfolio.set_frequency(self.factors_timedf.frequency, inplace=False) \
                .slice_dataframe(to_date=date, inplace=False)
        else:
            portfolio_copy = portfolio

        if benchmark_returns is None:  # if no benchmark specified, just use the one in the model
            timedf_merged = portfolio_copy.merge([self.factors_timedf],
                                                 inplace=False)
        else:
            timedf_merged = portfolio_copy.merge(
                [self.factors_timedf, benchmark_returns], inplace=False)
            timedf_merged.df_returns.drop(['MKT-RF'], axis=1, inplace=True)
            timedf_merged.df_returns.rename(
                columns={benchmark_returns: 'MKT-RF'}, inplace=True)
            timedf_merged.df_returns['MKT-RF'] = timedf_merged.df_returns[
                'MKT-RF'] - timedf_merged.df_returns['RF']

        portfolio_returns, factors_df = timedf_merged.df_returns.iloc[:, 0] - timedf_merged.df_returns['RF'], \
                                        timedf_merged.df_returns.iloc[:, 1:]

        portfolio_returns.rename('XsRet', inplace=True)
        factors_df.drop(['RF'], axis=1, inplace=True)  # don't need it anymore

        if rolling:
            # endogenous is the portfolio returns (y, dependent), exogenous is the factors (x, explanatory, independent)
            rols = RollingOLS(endog=portfolio_returns,
                              exog=factors_df,
                              window=regression_window)
            rres = rols.fit()
            params = rres.params.dropna()
            print(params.tail())
            if show:
                rres.plot_recursive_coefficient(variables=factors_df.columns,
                                                figsize=(10, 6))
                plt.show()
            return rres
        else:
            # need to merge again to run regression on dataframe (with y being XsRet)
            df_stock_factor = pd.merge(portfolio_returns,
                                       factors_df,
                                       left_index=True,
                                       right_index=True)
            df_stock_factor = df_stock_factor.iloc[-regression_window:, :]
            # rename because will give syntax error with '-' when running regression
            df_stock_factor.rename(columns={'MKT-RF': 'MKT'}, inplace=True)
            reg = sm.ols(formula='XsRet ~ {}'.format(' + '.join(
                factors_df.columns)),
                         data=df_stock_factor).fit(cov_type='HAC',
                                                   cov_kwds={'maxlags': 1})
            print(reg.summary())
            if show:
                nrows, ncols = ceil(len(factors_df.columns) / 3), min(
                    len(factors_df.columns), 3)
                fig, axs = plt.subplots(nrows=nrows,
                                        ncols=ncols,
                                        figsize=(12, 5))
                plt.tight_layout()

                for i, factor in enumerate(df_stock_factor.iloc[:, 1:]):

                    idx_x, idx_y = floor(i / 3), floor(i % 3)
                    ax = axs
                    if nrows > 1:
                        ax = axs[idx_x, ]
                    if ncols > 1:
                        ax = ax[idx_y]

                    X = np.linspace(df_stock_factor[factor].min(),
                                    df_stock_factor[factor].max())
                    Y = reg.params[i +
                                   1] * X + reg.params[0]  # beta * x + alpha
                    ax.plot(X, Y)
                    # plt.draw()
                    # plt.pause(0.001)

                    ax.scatter(df_stock_factor[factor],
                               df_stock_factor.iloc[:, 0],
                               alpha=0.3)
                    ax.grid(True)
                    ax.axis('tight')
                    ax.set_xlabel(factor if factor != 'MKT' else 'MKT-RF')
                    ax.set_ylabel('Portfolio Excess Returns')
                # plt.ion()
                plt.show()

            return reg

Example #30

0

Show file

    def calibrate(self, windowOLS, copula_lookback, recalibrate_n, **kwargs):
        self.windowOLS = int(windowOLS)
        self.copula_lookback = int(copula_lookback)
        self.recalibrate_n = int(recalibrate_n)
        
        df = pd.DataFrame({'y':self.y,'x':self.x,'c':1})
        
        model = RollingOLS(endog =df['y'], exog=df['x'],window=self.windowOLS)
        rres = model.fit()
      
        self.beta = rres.params['x'].values.reshape(-1, )
        
        # Copula decision:
        df['x_log_ret']= np.log(df.x) - np.log(df.x.shift(1))
        df['y_log_ret']= np.log(df.y) - np.log(df.y.shift(1))
        
        # Convert the two returns series to two uniform values u and v using the empirical distribution functions
        ecdf_x, ecdf_y  = ECDF(df.x_log_ret), ECDF(df.y_log_ret)
        u, v = [ecdf_x(a) for a in df.x_log_ret], [ecdf_y(a) for a in df.y_log_ret]
        
        # Compute the Akaike Information Criterion (AIC) for different copulas and choose copula with minimum AIC
        tau = stats.kendalltau(df.x_log_ret, df.y_log_ret)[0]  # estimate Kendall'rank correlation
        AIC ={}  # generate a dict with key being the copula family, value = [theta, AIC]

        for i in ['clayton', 'frank', 'gumbel']:
            param = self._parameter(i, tau)
            lpdf = [self._lpdf_copula(i, param, x, y) for (x, y) in zip(u, v)]
            # Replace nan with zero and inf with finite numbers in lpdf list
            lpdf = np.nan_to_num(lpdf) 
            loglikelihood = sum(lpdf)
            AIC[i] = [param, -2 * loglikelihood + 2]
        # Choose the copula with the minimum AIC
        copula = min(AIC.items(), key = lambda x: x[1][1])[0]
        
        self.startIdx = copula_lookback + 1 # Because first is NAN
        
        df['MI_u_v'] = 0.5
        df['MI_v_u'] = 0.5
        
        for i in np.arange(self.startIdx , len(df)-recalibrate_n, recalibrate_n):
            
            window = range(i - copula_lookback, i) 
            predWindow = range(i, i + recalibrate_n)
            
            x_hist = df.x_log_ret.iloc[window]
            y_hist = df.y_log_ret.iloc[window]
            x_forw = df.x_log_ret.iloc[predWindow]
            y_forw = df.y_log_ret.iloc[predWindow]
            
            # Estimate Kendall'rank correlation
            tau = stats.kendalltau(x_hist, y_hist)[0] 

            # Estimate the copula parameter: theta
            theta = self._parameter(copula, tau)

            # Simulate the empirical distribution function for returns of selected trading pair
            ecdf_x,  ecdf_y  = ECDF(x_hist), ECDF(y_hist) 

            # Now get future values
            a, b = self._misprice_index(copula, theta, ecdf_x(x_forw), ecdf_y(y_forw))
            
            df.MI_u_v.iloc[predWindow] = a
            df.MI_v_u.iloc[predWindow] = b
                        
        self.MI_u_v = df.MI_u_v
        self.MI_v_u = df.MI_v_u