def __init__(self, benchmark, models, size=0.05, block_size=None, reps=1000, bootstrap='stationary', studentize=True, nested=False): super(StepM, self).__init__() self.benchmark = ensure2d(benchmark, 'benchmark') self.models = ensure2d(models, 'models') self.spa = SPA(benchmark, models, block_size=block_size, reps=reps, bootstrap=bootstrap, studentize=studentize, nested=nested) self.block_size = self.spa.block_size self.t, self.k = self.models.shape self.reps = reps self.size = size self._superior_models = None self.bootstrap = self.spa.bootstrap self._model = 'StepM' if self.spa.studentize: method = 'bootstrap' if self.spa.nested else 'asymptotic' else: method = 'none' self._info = OrderedDict([('FWER (size)', '{:0.2f}'.format(self.size)), ('studentization', method), ('bootstrap', str(self.spa.bootstrap)), ('ID', hex(id(self)))])
def __init__( self, benchmark: ArrayLike, models: ArrayLike, block_size: Optional[int] = None, reps: int = 1000, bootstrap: str = "stationary", studentize: bool = True, nested: bool = False, ) -> None: super().__init__() self.benchmark: np.ndarray[Any, np.dtype[np.float64]] = ensure2d( benchmark, "benchmark") self.models: np.ndarray[Any, np.dtype[np.float64]] = ensure2d( models, "models") self.reps: int = reps if block_size is None: self.block_size = int(np.sqrt(benchmark.shape[0])) else: self.block_size = block_size self.studentize: bool = studentize self.nested: bool = nested self._loss_diff = np.asarray(self.benchmark) - np.asarray(self.models) self._loss_diff_var = np.empty(0) self.t: int = self._loss_diff.shape[0] self.k: int = self._loss_diff.shape[1] bootstrap = bootstrap.lower().replace(" ", "_") if bootstrap in ("circular", "cbb"): bootstrap_inst = CircularBlockBootstrap(self.block_size, self._loss_diff) elif bootstrap in ("stationary", "sb"): bootstrap_inst = StationaryBootstrap(self.block_size, self._loss_diff) elif bootstrap in ("moving_block", "mbb"): bootstrap_inst = MovingBlockBootstrap(self.block_size, self._loss_diff) else: raise ValueError("Unknown bootstrap:" + bootstrap) self.bootstrap: CircularBlockBootstrap = bootstrap_inst self._pvalues: Dict[str, float] = {} self._simulated_vals: Optional[NDArray] = None self._selector = np.ones(self.k, dtype=np.bool_) self._model = "SPA" if self.studentize: method = "bootstrap" if self.nested else "asymptotic" else: method = "none" self._info = dict([ ("studentization", method), ("bootstrap", str(self.bootstrap)), ("ID", hex(id(self))), ])
def __init__( self, benchmark: ArrayLike, models: ArrayLike, size: float = 0.05, block_size: Optional[int] = None, reps: int = 1000, bootstrap: Literal[ "stationary", "sb", "circular", "cbb", "moving block", "mbb" ] = "stationary", studentize: bool = True, nested: bool = False, ) -> None: super(StepM, self).__init__() self.benchmark: np.ndarray[Any, np.dtype[np.float64]] = ensure2d( benchmark, "benchmark" ) self.models: np.ndarray[Any, np.dtype[np.float64]] = ensure2d(models, "models") self.spa: SPA = SPA( benchmark, models, block_size=block_size, reps=reps, bootstrap=bootstrap, studentize=studentize, nested=nested, ) self.block_size: int = self.spa.block_size self.t: int = self.models.shape[0] self.k: int = self.models.shape[1] self.reps: int = reps self.size: float = size self._superior_models: Optional[List[Hashable]] = None self.bootstrap: CircularBlockBootstrap = self.spa.bootstrap self._model = "StepM" if self.spa.studentize: method = "bootstrap" if self.spa.nested else "asymptotic" else: method = "none" self._info = dict( [ ("FWER (size)", "{:0.2f}".format(self.size)), ("studentization", method), ("bootstrap", str(self.spa.bootstrap)), ("ID", hex(id(self))), ] )
def _cov( cov_type: str, kernel: str, bandwidth: Optional[int], force_int: bool, df_adjust: bool, rhs: pd.DataFrame, resids: pd.Series, ) -> Tuple[pd.DataFrame, lrcov.CovarianceEstimator]: """Estimate the covariance""" kernel = kernel.lower().replace("-", "").replace("_", "") if kernel not in KERNEL_ESTIMATORS: raise ValueError(KERNEL_ERR) x = np.asarray(rhs) eps = ensure2d(np.asarray(resids), "eps") nobs, nx = x.shape sigma_xx = x.T @ x / nobs sigma_xx_inv = np.linalg.inv(sigma_xx) kernel_est = KERNEL_ESTIMATORS[kernel] scale = nobs / (nobs - nx) if df_adjust else 1.0 if cov_type in ("unadjusted", "homoskedastic"): est = kernel_est(eps, bandwidth, center=False, force_int=force_int) sigma2 = np.squeeze(est.cov.long_run) cov = (scale * sigma2) * sigma_xx_inv / nobs elif cov_type in ("robust", "kernel"): scores = x * eps est = kernel_est(scores, bandwidth, center=False, force_int=force_int) s = est.cov.long_run cov = scale * sigma_xx_inv @ s @ sigma_xx_inv / nobs else: raise ValueError("Unknown cov_type") cov_df = pd.DataFrame(cov, columns=rhs.columns, index=rhs.columns) return cov_df, est
def __init__(self, losses, size, reps=1000, block_size=None, method="R", bootstrap="stationary"): super(MCS, self).__init__() self.losses = ensure2d(losses, "losses") self._losses_arr = np.asarray(self.losses) if self._losses_arr.shape[1] < 2: raise ValueError("losses must have at least two columns") self.size = size self.reps = reps if block_size is None: self.block_size = int(np.sqrt(losses.shape[0])) else: self.block_size = block_size self.t, self.k = losses.shape self.method = method # Bootstrap indices since the same bootstrap should be used in the # repeated steps indices = np.arange(self.t) bootstrap = bootstrap.lower().replace(" ", "_") if bootstrap in ("stationary", "sb"): bootstrap = StationaryBootstrap(self.block_size, indices) elif bootstrap in ("circular", "cbb"): bootstrap = CircularBlockBootstrap(self.block_size, indices) elif bootstrap in ("moving_block", "mbb"): bootstrap = MovingBlockBootstrap(self.block_size, indices) else: raise ValueError("Unknown bootstrap:" + bootstrap) self.bootstrap = bootstrap self._bootsrap_indices = [] # For testing self._model = "MCS" self._info = OrderedDict( [("size", "{0:0.2f}".format(self.size)), ("bootstrap", str(bootstrap)), ("ID", hex(id(self)))] )
def __init__(self, losses, size, reps=1000, block_size=None, method='R', bootstrap='stationary'): super(MCS, self).__init__() self.losses = ensure2d(losses, 'losses') self._losses_arr = np.asarray(self.losses) if self._losses_arr.shape[1] < 2: raise ValueError('losses must have at least two columns') self.size = size self.reps = reps if block_size is None: self.block_size = int(np.sqrt(losses.shape[0])) else: self.block_size = block_size self.t, self.k = losses.shape self.method = method # Bootstrap indices since the same bootstrap should be used in the # repeated steps indices = np.arange(self.t) bootstrap = bootstrap.lower().replace(' ', '_') if bootstrap in ('stationary', 'sb'): bootstrap = StationaryBootstrap(self.block_size, indices) elif bootstrap in ('circular', 'cbb'): bootstrap = CircularBlockBootstrap(self.block_size, indices) elif bootstrap in ('moving_block', 'mbb'): bootstrap = MovingBlockBootstrap(self.block_size, indices) else: raise ValueError('Unknown bootstrap:' + bootstrap) self.bootstrap = bootstrap self._bootsrap_indices = [] # For testing self._model = 'MCS' self._info = OrderedDict([('size', '{0:0.2f}'.format(self.size)), ('bootstrap', str(bootstrap)), ('ID', hex(id(self)))]) self._results_computed = False
def __init__( self, benchmark, models, size=0.05, block_size=None, reps=1000, bootstrap="stationary", studentize=True, nested=False, ): super(StepM, self).__init__() self.benchmark = ensure2d(benchmark, "benchmark") self.models = ensure2d(models, "models") self.spa = SPA( benchmark, models, block_size=block_size, reps=reps, bootstrap=bootstrap, studentize=studentize, nested=nested, ) self.block_size = self.spa.block_size self.t, self.k = self.models.shape self.reps = reps self.size = size self._superior_models = None self.bootstrap = self.spa.bootstrap self._model = "StepM" if self.spa.studentize: method = "bootstrap" if self.spa.nested else "asymptotic" else: method = "none" self._info = OrderedDict( [ ("FWER (size)", "{:0.2f}".format(self.size)), ("studentization", method), ("bootstrap", str(self.spa.bootstrap)), ("ID", hex(id(self))), ] )
def __init__(self, benchmark, models, block_size=None, reps=1000, bootstrap='stationary', studentize=True, nested=False): super(SPA, self).__init__() self.benchmark = ensure2d(benchmark, 'benchmark') self.models = ensure2d(models, 'models') self.reps = reps if block_size is None: self.block_size = int(np.sqrt(benchmark.shape[0])) else: self.block_size = block_size self.studentize = studentize self.nested = nested self._loss_diff = np.asarray(self.benchmark) - np.asarray(self.models) self._loss_diff_var = None self.t, self.k = self._loss_diff.shape bootstrap = bootstrap.lower().replace(' ', '_') if bootstrap in ('stationary', 'sb'): bootstrap = StationaryBootstrap(self.block_size, self._loss_diff) elif bootstrap in ('circular', 'cbb'): bootstrap = CircularBlockBootstrap(self.block_size, self._loss_diff) elif bootstrap in ('moving_block', 'mbb'): bootstrap = MovingBlockBootstrap(self.block_size, self._loss_diff) else: raise ValueError('Unknown bootstrap:' + bootstrap) self.bootstrap = bootstrap self._pvalues = None self._simulated_vals = None self._selector = np.ones(self.k, dtype=np.bool) self._model = 'SPA' if self.studentize: method = 'bootstrap' if self.nested else 'asymptotic' else: method = 'none' self._info = OrderedDict([('studentization', method), ('bootstrap', str(self.bootstrap)), ('ID', hex(id(self)))])
def __init__( self, benchmark: ArrayLike, models: ArrayLike, size: float = 0.05, block_size: Optional[int] = None, reps: int = 1000, bootstrap: str = "stationary", studentize: bool = True, nested: bool = False, ) -> None: super(StepM, self).__init__() self.benchmark = ensure2d(benchmark, "benchmark") self.models = ensure2d(models, "models") self.spa = SPA( benchmark, models, block_size=block_size, reps=reps, bootstrap=bootstrap, studentize=studentize, nested=nested, ) self.block_size = self.spa.block_size self.t, self.k = self.models.shape self.reps = reps self.size = size self._superior_models: Optional[List[Hashable]] = None self.bootstrap = self.spa.bootstrap self._model = "StepM" if self.spa.studentize: method = "bootstrap" if self.spa.nested else "asymptotic" else: method = "none" self._info = dict([ ("FWER (size)", "{:0.2f}".format(self.size)), ("studentization", method), ("bootstrap", str(self.spa.bootstrap)), ("ID", hex(id(self))), ])
def _cross_section(y: ArrayLike1D, x: ArrayLike2D, trend: str) -> RegressionResults: if trend not in ("n", "c", "ct", "ctt"): raise ValueError('trend must be one of "n", "c", "ct" or "ctt"') y = ensure1d(y, "y", True) x = ensure2d(x, "x") if not isinstance(x, pd.DataFrame): cols = [f"x{i}" for i in range(1, x.shape[1] + 1)] x = pd.DataFrame(x, columns=cols, index=y.index) x = add_trend(x, trend) res = OLS(y, x).fit() return res
def __init__( self, benchmark, models, block_size=None, reps=1000, bootstrap="stationary", studentize=True, nested=False ): super(SPA, self).__init__() self.benchmark = ensure2d(benchmark, "benchmark") self.models = ensure2d(models, "models") self.reps = reps if block_size is None: self.block_size = int(np.sqrt(benchmark.shape[0])) else: self.block_size = block_size self.studentize = studentize self.nested = nested self._loss_diff = np.asarray(self.benchmark) - np.asarray(self.models) self._loss_diff_var = None self.t, self.k = self._loss_diff.shape bootstrap = bootstrap.lower().replace(" ", "_") if bootstrap in ("stationary", "sb"): bootstrap = StationaryBootstrap(self.block_size, self._loss_diff) elif bootstrap in ("circular", "cbb"): bootstrap = CircularBlockBootstrap(self.block_size, self._loss_diff) elif bootstrap in ("moving_block", "mbb"): bootstrap = MovingBlockBootstrap(self.block_size, self._loss_diff) else: raise ValueError("Unknown bootstrap:" + bootstrap) self.bootstrap = bootstrap self._pvalues = None self._simulated_vals = None self._selector = np.ones(self.k, dtype=np.bool) self._model = "SPA" if self.studentize: method = "bootstrap" if self.nested else "asymptotic" else: method = "none" self._info = OrderedDict( [("studentization", method), ("bootstrap", str(self.bootstrap)), ("ID", hex(id(self)))] )
def __init__( self, losses: ArrayLike, size: float, reps: int = 1000, block_size: Optional[int] = None, method: Literal["R", "max"] = "R", bootstrap: Literal[ "stationary", "sb", "circular", "cbb", "moving block", "mbb" ] = "stationary", ) -> None: super().__init__() self.losses: np.ndarray[Any, np.dtype[np.float64]] = ensure2d(losses, "losses") self._losses_arr = np.asarray(self.losses) if self._losses_arr.shape[1] < 2: raise ValueError("losses must have at least two columns") self.size: float = size self.reps: int = reps if block_size is None: self.block_size = int(np.sqrt(losses.shape[0])) else: self.block_size = block_size self.t: int = losses.shape[0] self.k: int = losses.shape[1] self.method: Literal["R", "max"] = method # Bootstrap indices since the same bootstrap should be used in the # repeated steps indices = np.arange(self.t) bootstrap_meth = bootstrap.lower().replace(" ", "_") if bootstrap_meth in ("circular", "cbb"): bootstrap_inst = CircularBlockBootstrap(self.block_size, indices) elif bootstrap_meth in ("stationary", "sb"): bootstrap_inst = StationaryBootstrap(self.block_size, indices) elif bootstrap_meth in ("moving_block", "mbb"): bootstrap_inst = MovingBlockBootstrap(self.block_size, indices) else: raise ValueError(f"Unknown bootstrap: {bootstrap_meth}") self.bootstrap: CircularBlockBootstrap = bootstrap_inst self._bootstrap_indices: List[NDArray] = [] # For testing self._model = "MCS" self._info = dict( [ ("size", "{0:0.2f}".format(self.size)), ("bootstrap", str(bootstrap_inst)), ("ID", hex(id(self))), ] ) self._results_computed = False
def __init__( self, x: ArrayLike, bandwidth: Optional[float] = None, df_adjust: int = 0, center: bool = True, weights: Optional[ArrayLike] = None, force_int: bool = False, ): self._x_orig = ensure2d(x, "x") self._x = np.asarray(self._x_orig) self._center = center if self._center: self._x = self._x - self._x.mean(0) if bandwidth is not None and (not np.isscalar(bandwidth) or bandwidth < 0): raise ValueError("bandwidth must be a non-negative scalar.") self._bandwidth = bandwidth self._auto_bandwidth = bandwidth is None if not np.isscalar(df_adjust) or df_adjust < 0: raise ValueError("df_adjust must be a non-negative integer.") self._df_adjust = int(df_adjust) self._df = self._x.shape[0] - self._df_adjust if self._df <= 0: raise ValueError( "Degrees of freedom is <= 0 after adjusting the sample " "size of x using df_adjust. df_adjust must be less than" f" {self._x.shape[0]}" ) if weights is None: xw = self._x_weights = np.ones((self._x.shape[1], 1)) else: xw = ensure1d(np.asarray(weights), "weights") xw = self._x_weights = xw[:, None] if ( xw.shape[0] != self._x.shape[1] or xw.shape[1] != 1 or np.any(xw < 0) or np.all(xw == 0) ): raise ValueError( f"weights must be a 1 by {self._x.shape[1]} (x.shape[1]) " f"array with non-negative values where at least one value is " "strictly greater than 0." ) self._force_int = force_int
def test_ensure2d(self): s = Series([1, 2, 3], name='x') df = ensure2d(s, 'x') assert isinstance(df, DataFrame) df2 = ensure2d(df, 'x') assert df is df2 npa = ensure2d(s.values, 'x') assert isinstance(npa, np.ndarray) assert npa.ndim == 2 npa = ensure2d(np.array(1.0), 'x') assert isinstance(npa, np.ndarray) assert npa.ndim == 2 with pytest.raises(ValueError): ensure2d(np.array([[[1]]]), 'x') with pytest.raises(TypeError): ensure2d([1], 'x')
def _check_cointegrating_regression( y: ArrayLike1D, x: ArrayLike2D, trend: str, supported_trends: Tuple[str, ...] = ("n", "c", "ct", "ctt"), ) -> CointegrationSetup: y = ensure1d(y, "y", True) x = ensure2d(x, "x") if y.shape[0] != x.shape[0]: raise ValueError( f"The number of observations in y and x differ. y has " f"{y.shape[0]} observtations, and x has {x.shape[0]}.") if not isinstance(x, pd.DataFrame): cols = [f"x{i}" for i in range(1, x.shape[1] + 1)] x_df = pd.DataFrame(x, columns=cols, index=y.index) else: x_df = x trend = trend.lower() if trend.lower() not in supported_trends: trends = ",".join([f'"{st}"' for st in supported_trends]) raise ValueError(f"Unknown trend. Must be one of {{{trends}}}") return CointegrationSetup(y, x_df, trend)
def phillips_ouliaris( y: ArrayLike1D, x: ArrayLike2D, trend: str = "c", *, test_type: str = "Zt", kernel: str = "bartlett", bandwidth: Optional[int] = None, force_int: bool = False, ) -> PhillipsOuliarisTestResults: r""" Test for cointegration within a set of time series. Parameters ---------- y : array_like The left-hand-side variable in the cointegrating regression. x : array_like The right-hand-side variables in the cointegrating regression. trend : {"n","c","ct","ctt"}, default "c" Trend to include in the cointegrating regression. Trends are: * "n": No deterministic terms * "c": Constant * "ct": Constant and linear trend * "ctt": Constant, linear and quadratic trends test_type : {"Za", "Zt", "Pu", "Pz"}, default "Zt" The test statistic to compute. Supported options are: * "Za": The Zα test based on the the debiased AR(1) coefficient. * "Zt": The Zt test based on the t-statistic from an AR(1). * "Pu": The Pᵤ variance-ratio test. * "Pz": The Pz test of the trace of the product of an estimate of the long-run residual variance and the inner-product of the data. See the notes for details on the test. kernel : str, default "bartlett" The string name of any of any known kernel-based long-run covariance estimators. Common choices are "bartlett" for the Bartlett kernel (Newey-West), "parzen" for the Parzen kernel and "quadratic-spectral" for the Quadratic Spectral kernel. bandwidth : int, default None The bandwidth to use. If not provided, the optimal bandwidth is estimated from the data. Setting the bandwidth to 0 and using "unadjusted" produces the classic OLS covariance estimator. Setting the bandwidth to 0 and using "robust" produces White's covariance estimator. force_int : bool, default False Whether the force the estimated optimal bandwidth to be an integer. Returns ------- PhillipsOuliarisTestResults Results of the Phillips-Ouliaris test. See Also -------- arch.unitroot.ADF Augmented Dickey-Fuller testing. arch.unitroot.PhillipsPerron Phillips & Perron's unit root test. arch.unitroot.cointegration.engle_granger Engle & Granger's cointegration test. Notes ----- .. warning:: The critical value simulation is on-going and so the critical values may change slightly as more simulations are completed. These are still based on far more simulations (minimum 2,000,000) than were possible in 1990 (5000) that are reported in [1]_. Supports 4 distinct tests. Define the cross-sectional regression .. math:: y_t = x_t \beta + d_t \gamma + u_t where :math:`d_t` are any included deterministic terms. Let :math:`\hat{u}_t = y_t - x_t \hat{\beta} + d_t \hat{\gamma}`. The Zα and Zt statistics are defined as .. math:: \hat{Z}_\alpha & = T \times z \\ \hat{Z}_t & = \frac{\hat{\sigma}_u}{\hat{\omega}^2} \times \sqrt{T} z \\ z & = (\hat{\alpha} - 1) - \hat{\omega}^2_1 / \hat{\sigma}^2_u where :math:`\hat{\sigma}^2_u=T^{-1}\sum_{t=2}^T \hat{u}_t^2`, :math:`\hat{\omega}^2_1` is an estimate of the one-sided strict autocovariance, and :math:`\hat{\omega}^2` is an estimate of the long-run variance of the process. The :math:`\hat{P}_u` variance-ratio statistic is defined as .. math:: \hat{P}_u = \frac{\hat{\omega}_{11\cdot2}}{\tilde{\sigma}^2_u} where :math:`\tilde{\sigma}^2_u=T^{-1}\sum_{t=1}^T \hat{u}_t^2` and .. math:: \hat{\omega}_{11\cdot 2} = \hat{\omega}_{11} - \hat{\omega}'_{21} \hat{\Omega}_{22}^{-1} \hat{\omega}_{21} and .. math:: \hat{\Omega}=\left[\begin{array}{cc} \hat{\omega}_{11} & \hat{\omega}'_{21}\\ \hat{\omega}_{21} & \hat{\Omega}_{22} \end{array}\right] is an estimate of the long-run covariance of :math:`\xi_t`, the residuals from an VAR(1) on :math:`z_t=[y_t,z_t]` that includes and trends included in the test. .. math:: z_t = \Phi z_{t-1} + \xi_\tau The final test statistic is defined .. math:: \hat{P}_z = T \times \mathrm{tr}(\hat{\Omega} M_{zz}^{-1}) where :math:`M_{zz} = \sum_{t=1}^T \tilde{z}'_t \tilde{z}_t`, :math:`\tilde{z}_t` is the vector of data :math:`z_t=[y_t,x_t]` detrended using any trend terms included in the test, :math:`\tilde{z}_t = z_t - d_t \hat{\kappa}` and :math:`\hat{\Omega}` is defined above. The specification of the :math:`\hat{P}_z` test statistic when trend is "n" differs from the expression in [1]_. We recenter :math:`z_t` by subtracting the first observation, so that :math:`\tilde{z}_t = z_t - z_1`. This is needed to ensure that the initial value does not affect the distribution under the null. When the trend is anything other than "n", this set is not needed and the test statistics is identical whether the first observation is subtracted or not. References ---------- .. [1] Phillips, P. C., & Ouliaris, S. (1990). Asymptotic properties of residual based tests for cointegration. Econometrica: Journal of the Econometric Society, 165-193. """ test_type = test_type.lower() if test_type not in ("za", "zt", "pu", "pz"): raise ValueError( f"Unknown test_type: {test_type}. Only Za, Zt, Pu and Pz are supported." ) kernel = kernel.lower().replace("-", "").replace("_", "") if kernel not in KERNEL_ESTIMATORS: raise ValueError(KERNEL_ERR) y = ensure2d(y, "y") x = ensure2d(x, "x") xsection = _cross_section(y, x, trend) data = xsection.model.data x_df = data.orig_exog.iloc[:, :x.shape[1]] z = pd.concat([data.orig_endog, x_df], axis=1) if test_type in ("pu", "pz"): return _po_ptests(z, xsection, test_type, trend, kernel, bandwidth, force_int) return _po_ztests(z, xsection, test_type, trend, kernel, bandwidth, force_int)