def pdf(self, u: Array, log=False): assert self.smoothing == "beta", "Empirical Copula only has density (PDF) for smoothing = 'beta'" assert isinstance( self.data, np.ndarray), "data is still undefined for EmpiricalCopula" u = self.pobs(u, self._ties) data_rank = rank_data(self.data, 1, self._ties) n = len(self.data) if log: return np.array([ log_sum( np.array([ sum(beta.logpdf(row, a=row_rank, b=n + 1 - row_rank)) for row_rank in data_rank ])) for row in u ]) - np.log(n + self._offset) else: return np.array([ sum([ np.prod(beta.pdf(row, a=row_rank, b=n + 1 - row_rank)) for row_rank in data_rank ]) for row in u ]) / (n + self._offset)
def gof_sn(self): r""" Performs the "Sn" gof test, described in Genest et al. (2009) Compares the empirical copula against a parametric estimate of the copula derived under the null hypothesis. Notes ----- Given the pseudo-observations :math:`U_{ij} \forall i = 1, \dots, n, j = 1, \dots, d` and the empirical copula given by :math:`C_n(\textbf{u}) = \frac{1}{n} \sum^n_{i=1} \textbf{I}(U_{i1} \leq u_1, \dots, U_{id} \leq u_d)` where :math:`\textbf{u} \in [0, 1]^d`, the null hypothesis, :math:`H_0` thus tests if .. math:: C \in C_0 where :math:`C_0` is the true class of the copulae under :math:`H_0`. The test statistic T is defined as .. math:: T = n \int_{[0, 1]^d} [C_n(\textbf{u}) - C_\theta_n(\textbf{u})]^2 dC_n(\textbf{u}) where :math:`C_\theta_n(\textbf{u})` is the estimation of :math:`C` under :math:`H_0`. The approximate p-value is then given by: .. math:: \sum^M_{k=1} \textbf{I}(|T_k| \geq |T|) / M Returns ------- dict A dictionary of fitted results """ t = gof_t_stat(self._copula, self._u, to_pobs=False) # pre-calculate dupe ranks to sort random data with letter dupe_ranks = np.sort(rank_data(self._data, 1), 0).astype(int) - 1 if self._has_ties else None t0 = np.repeat(np.nan, self._reps) for i in range(self._reps): u_r = self._copula.random(len(self._data)) if self._has_ties: u_r = self._sort_data_by_column_inplace(u_r, dupe_ranks) t0[i] = gof_t_stat(self._copula, u_r, self._ties) return { "method": f"Parametric bootstrap-based goodness-of-fit of {self._copula.name} with Sn", "parameter": self._copula.params, "statistic": t, "pvalue": (sum(np.abs(t0) >= abs(t)) + 0.5) / (self._reps + 1) }
def variance(self, typ: str): """ Variance of the Inversion of a Rank Correlation Measure Estimator Parameters ---------- typ: {'itau', 'irho'} The type of rank correlation measure to use Returns ------- float: Variance of the inversion of rank correlation measure estimator """ u = self.data dim = self.copula.dim nrow = len(self.data) ncol = dim * (dim - 1) // 2 v = np.zeros((nrow, ncol)) if typ == 'itau': for i in range(dim - 1): for j in range(i + 1, dim): ec = 2 * np.sum([(u[:, i] <= u[k, i]) & (u[:, j] <= u[k, j]) for k in range(nrow)]) / nrow v[:, i * (dim - 1) + j] = ec - u[:, i] - u[:, j] else: ord = np.argsort( -u, 1, ) + 1 ordb = rank_data(u, 1) for i in range(dim - 1): for j in range(i + 1, dim): a = np.array([ 0, *np.cumsum(u[ord[:, i], j][nrow - ordb[:, i]]) / nrow ]) b = np.array([ 0, *np.cumsum(u[ord[:, j], i][nrow - ordb[:, j]]) / nrow ]) v[:, i * (dim - 1) + j] = u[:, i] * u[:, j] + a + b # TODO complete rest of function GETL raise NotImplementedError
def __init__(self, data: Union[pd.DataFrame, np.ndarray], ties: str, fit_ties): self.data = data.to_numpy() if isinstance(data, pd.DataFrame) else np.asarray(data) self.ties = ties self.fit_ties = fit_ties self.has_ties = False nrow, ncol = self.data.shape for i in range(ncol): if len(np.unique(self.data[:, i])) != nrow: self.has_ties = True break # data used for fitting the main copula self.pobs = pseudo_obs(self.data, ties=ties) # data used fo self.fitted_pobs = pseudo_obs(self.data, ties=fit_ties) if self.has_ties and ties != fit_ties else self.pobs self._duplicated_rank_array = np.sort(rank_data(self.data, 1), 0).astype(int) - 1
def variance(self, typ: str): """ Variance of the Inversion of a Rank Correlation Measure Estimator Parameters ---------- typ: {'itau', 'irho'} The type of rank correlation measure to use Returns ------- float: Variance of the inversion of rank correlation measure estimator """ u = self.data dim = self.copula.dim nrow = len(self.data) ncol = dim * (dim - 1) // 2 v = np.zeros((nrow, ncol)) if typ == 'itau': for i in range(dim - 1): for j in range(i + 1, dim): ec = 2 * np.sum([(u[:, i] <= u[k, i]) & (u[:, j] <= u[k, j]) for k in range(nrow)]) / nrow v[:, i * (dim - 1) + j] = ec - u[:, i] - u[:, j] else: ord = np.argsort(-u, 1, ) + 1 ordb = rank_data(u, 1) for i in range(dim - 1): for j in range(i + 1, dim): a = np.array([0, *np.cumsum(u[ord[:, i], j][nrow - ordb[:, i]]) / nrow]) b = np.array([0, *np.cumsum(u[ord[:, j], i][nrow - ordb[:, j]]) / nrow]) v[:, i * (dim - 1) + j] = u[:, i] * u[:, j] + a + b # TODO complete rest of function GETL raise NotImplementedError
def rad_sym_test(x, N=1000, ties: Ties = 'average'): r""" Test of Radial Symmetry for a Multivariate Copula. Test for assessing the radial symmetry of the underlying multivariate copula based on the empirical copula. The test statistic is a multivariate extension of the definition adopted in the first reference. An approximate p-value for the test statistic is obtained by means of a appropriate bootstrap which can take the presence of ties in the component series of the data into account; see the second reference. A random vector :math:`X` is called radially symmetric (for d = 1 simply symmetric) about :math:`a \in R^d` if :math:`X − a = a − X`, that is, if :math:`X − a` and :math:`a − X` are equal in distribution. In a hand-wavy manner, perhaps the consequence of the radial symmetry test is to verify if an elliptical copula should be used to fit the data as elliptical copulas are radial symmetric. Parameters ---------- x: {array_like, pandas.DataFrame} A matrix like data structure N: int Number of bootstrap iterations to be used to simulate realizations of the test statistic under the null hypothesis ties String specifying how ranks should be computed if there are ties in any of the coordinate samples. Options include 'average', 'min', 'max', 'dense', 'ordinal'. Returns ------- TestStatistic Test statistics for the radial symmetry test. The null hypothesis assumes that the vectors are radially symmetric. Thus a small p-value will indicate evidence against radial symmetry Examples -------- >>> from copulae.datasets import load_danube >>> from copulae.gof import rad_sym_test >>> danube = load_danube() >>> test_stats = rad_sym_test(danube) >>> print(test_stats.p_value) A small p-value here indicates strong evidence against radial symmetry. References ---------- Genest, C. and G. Nešlehová, J. (2014). On tests of radial symmetry for bivariate copulas. Statistical Papers 55, 1107–1119. Kojadinovic, I. (2017). Some copula inference procedures adapted to the presence of ties. Computational Statistics and Data Analysis 112, 24–41, http://arxiv.org/abs/1609.05519. """ x = np.asarray(x) assert isinstance( N, int ) and N >= 1, "number of replications for exchangeability test must be a positive integer" assert x.ndim == 2, "input data must be a 2-dimensional matrix" n, p = x.shape u = pseudo_obs(x, ties) s = rad_sym_test_stat(u.ravel('F'), n, p) has_ties = False for i in range(p): if len(np.unique(x[:, i])) != n: has_ties = True break ir = np.floor(rank_data(np.sort(u, 0), axis=1)).astype(int) - 1 s0 = np.array([rad_sym_replicate(u, ir, n, p, has_ties) for _ in range(N)]) return TestStatistic( s, (np.sum(s0 >= s) + 0.5) / (N + 1), "Test of radial symmetry based on the empirical copula")
def exch_test(x, y, N=1000, m=0, ties='average'): r""" Test of Exchangeability for a Bivariate Copula. Test for assessing the exchangeability of the underlying bivariate copula based on the empirical copula. The test statistics are defined in the first two references. Approximate p-values for the test statistics are obtained by means of a multiplier technique if there are no ties in the component series of the bivariate data, or by means of an appropriate bootstrap otherwise. A random vector X is called exchangeable iff :math:`(X1, ..., Xd) = (X_{\pi(1)}, ..., X_{\pi(d)})` for any permutation :math:`(\pi(1), \pi(2), \dots, \pi(d))` of :math:`(1, \dots, d)`. A copula C is called exchangeable iff C is the distribution function of an exchangeable random vector (with uniform marginal distributions on [0, 1]). For such a copula :math:`C(u1, u2, ..., ud ) = C(u\pi(1), u\pi(2), ..., u\pi(d))` holds for any permutation :math:`(\pi(1), \pi(2), \dots, \pi(d))` of :math:`(1, \dots, d)`. Examples of exchangeable copulas: Gumbel, Clayton, and also the Gaussian copula :math:`C_P^{Ga}` and the t-Copula :math:`C_{ν,P}^t`, if P is an equicorrelation matrix, i.e. :math:`R = \rho J_d + (1 − \rho)I_d`. :math:`J_d \in R^{d×d}` is a matrix consisting only of ones, and :math:`I_d \in R^{d×d}` is the d-dimensional identity matrix. For bivariate exchangeable copulas we have: .. math:: P(U_2 \leq u_2|U_1 = u_1) = P(U_1 \leq u_2|U_2 = u_1). Parameters ---------- x: array_like first vector for the exchangeability test y: array_like second vector for the exchangeability test N: int Number of multiplier or bootstrap iterations to be used to simulate realizations of the test statistic under the null hypothesis. m: int If m = 0, integration in the Cramér–von Mises statistic is carried out with respect to the empirical copula. If m > 0, integration is carried out with respect to the Lebesgue measure and m specifies the size of the integration grid. ties: str, optional String specifying how ranks should be computed if there are ties in any of the coordinate samples. Options include 'average', 'min', 'max', 'dense', 'ordinal'. Returns ------- TestStatistic Test statistics for the exchangeability test. The null hypothesis assumes that the vectors are exchangeable. Thus a small p-value will indicate evidence against exchangeability Examples -------- >>> from copulae.datasets import load_danube >>> from copulae.gof import exch_test >>> danube = load_danube().values >>> test_stats = exch_test(danube[:, 0], danube[:, 1]) >>> print(test_stats.p_value) A small p-value here indicates strong evidence against exchangeability References ---------- Genest, C., G. Nešlehová, J. and Quessy, J.-F. (2012). Tests of symmetry for bivariate copulas. Annals of the Institute of Statistical Mathematics 64, 811–834. Kojadinovic, I. and Yan, J. (2012). A nonparametric test of exchangeability for extreme-value and left-tail decreasing bivariate copulas. The Scandinavian Journal of Statistics 39:3, 480–496. Kojadinovic, I. (2017). Some copula inference procedures adapted to the presence of ties. Computational Statistics and Data Analysis 112, 24–41, http://arxiv.org/abs/1609.05519. """ x = pseudo_obs(x, ties) y = pseudo_obs(y, ties) u = np.vstack([x, y]).T assert isinstance( m, int) and m >= 0, "size of the integration grid must be an integer >= 0" assert x.ndim == 1 and y.ndim == 1, "x and y must be vectors. Exchangeability tests is bivariate" assert isinstance( N, int ) and N >= 1, "number of replications for exchangeability test must be a positive integer" n = len(u) if m > 0: xis = np.linspace(1 / m, 1 - 1 / m, m) g = np.stack([np.tile(xis, m), np.repeat(xis, m)]).T ng = m * m else: g = u ng = n s = exch_test_stat(u, g, n, ng) has_ties = len(np.unique(x)) != n or len(np.unique(y)) != n if has_ties: ir = np.floor(rank_data(np.sort(u, 0), axis=1)).astype(int) - 1 s0 = np.asarray( [exch_replication(ir, u, g, n, m, ng) for _ in range(N)]) else: s0 = exch_test_cn(u, g, n, ng, N) return TestStatistic(s, (np.sum(s0 >= s) + 0.5) / (N + 1), "Test of exchangeability for bivariate copulas")