Python rank_data Beispiele, copulae.core.rank_data Python Beispiele

Beispiel #1

0

Datei anzeigen

    def pdf(self, u: Array, log=False):
        assert self.smoothing == "beta", "Empirical Copula only has density (PDF) for smoothing = 'beta'"
        assert isinstance(
            self.data,
            np.ndarray), "data is still undefined for EmpiricalCopula"
        u = self.pobs(u, self._ties)

        data_rank = rank_data(self.data, 1, self._ties)
        n = len(self.data)

        if log:
            return np.array([
                log_sum(
                    np.array([
                        sum(beta.logpdf(row, a=row_rank, b=n + 1 - row_rank))
                        for row_rank in data_rank
                    ])) for row in u
            ]) - np.log(n + self._offset)
        else:
            return np.array([
                sum([
                    np.prod(beta.pdf(row, a=row_rank, b=n + 1 - row_rank))
                    for row_rank in data_rank
                ]) for row in u
            ]) / (n + self._offset)

Beispiel #2

0

Datei anzeigen

    def gof_sn(self):
        r"""
        Performs the "Sn" gof test, described in Genest et al. (2009)

        Compares the empirical copula against a parametric estimate of the copula derived under the null hypothesis.

        Notes
        -----
        Given the pseudo-observations :math:`U_{ij} \forall i = 1, \dots, n, j = 1, \dots, d` and the empirical copula
        given by :math:`C_n(\textbf{u}) = \frac{1}{n} \sum^n_{i=1} \textbf{I}(U_{i1} \leq u_1, \dots, U_{id} \leq u_d)`
        where :math:`\textbf{u} \in [0, 1]^d`, the null hypothesis, :math:`H_0` thus tests if

        .. math::

            C \in C_0

        where :math:`C_0` is the true class of the copulae under :math:`H_0`. The test statistic T is defined as

        .. math::

            T = n \int_{[0, 1]^d} [C_n(\textbf{u}) - C_\theta_n(\textbf{u})]^2 dC_n(\textbf{u})

        where :math:`C_\theta_n(\textbf{u})` is the estimation of :math:`C` under :math:`H_0`.

        The approximate p-value is then given by:

        .. math::

            \sum^M_{k=1} \textbf{I}(|T_k| \geq |T|) / M


        Returns
        -------
        dict
            A dictionary of fitted results
        """
        t = gof_t_stat(self._copula, self._u, to_pobs=False)

        # pre-calculate dupe ranks to sort random data with letter
        dupe_ranks = np.sort(rank_data(self._data, 1),
                             0).astype(int) - 1 if self._has_ties else None

        t0 = np.repeat(np.nan, self._reps)
        for i in range(self._reps):
            u_r = self._copula.random(len(self._data))

            if self._has_ties:
                u_r = self._sort_data_by_column_inplace(u_r, dupe_ranks)

            t0[i] = gof_t_stat(self._copula, u_r, self._ties)

        return {
            "method":
            f"Parametric bootstrap-based goodness-of-fit of {self._copula.name} with Sn",
            "parameter": self._copula.params,
            "statistic": t,
            "pvalue": (sum(np.abs(t0) >= abs(t)) + 0.5) / (self._reps + 1)
        }

Beispiel #3

0

Datei anzeigen

    def variance(self, typ: str):
        """
        Variance of the Inversion of a Rank Correlation Measure Estimator

        Parameters
        ----------
        typ: {'itau', 'irho'}
            The type of rank correlation measure to use

        Returns
        -------
        float:
            Variance of the inversion of rank correlation measure estimator
        """

        u = self.data
        dim = self.copula.dim
        nrow = len(self.data)
        ncol = dim * (dim - 1) // 2
        v = np.zeros((nrow, ncol))

        if typ == 'itau':
            for i in range(dim - 1):
                for j in range(i + 1, dim):
                    ec = 2 * np.sum([(u[:, i] <= u[k, i]) &
                                     (u[:, j] <= u[k, j])
                                     for k in range(nrow)]) / nrow
                    v[:, i * (dim - 1) + j] = ec - u[:, i] - u[:, j]
        else:
            ord = np.argsort(
                -u,
                1,
            ) + 1
            ordb = rank_data(u, 1)

            for i in range(dim - 1):
                for j in range(i + 1, dim):
                    a = np.array([
                        0,
                        *np.cumsum(u[ord[:, i], j][nrow - ordb[:, i]]) / nrow
                    ])
                    b = np.array([
                        0,
                        *np.cumsum(u[ord[:, j], i][nrow - ordb[:, j]]) / nrow
                    ])
                    v[:, i * (dim - 1) + j] = u[:, i] * u[:, j] + a + b
        # TODO complete rest of function GETL

        raise NotImplementedError

Beispiel #4

0

Datei anzeigen

    def __init__(self, data: Union[pd.DataFrame, np.ndarray], ties: str, fit_ties):
        self.data = data.to_numpy() if isinstance(data, pd.DataFrame) else np.asarray(data)
        self.ties = ties
        self.fit_ties = fit_ties

        self.has_ties = False
        nrow, ncol = self.data.shape
        for i in range(ncol):
            if len(np.unique(self.data[:, i])) != nrow:
                self.has_ties = True
                break

        # data used for fitting the main copula
        self.pobs = pseudo_obs(self.data, ties=ties)

        # data used fo
        self.fitted_pobs = pseudo_obs(self.data, ties=fit_ties) if self.has_ties and ties != fit_ties else self.pobs
        self._duplicated_rank_array = np.sort(rank_data(self.data, 1), 0).astype(int) - 1

Beispiel #5

0

Datei anzeigen

Datei: est_cor_inversion.py Projekt: chrisburr/copulae

    def variance(self, typ: str):
        """
        Variance of the Inversion of a Rank Correlation Measure Estimator

        Parameters
        ----------
        typ: {'itau', 'irho'}
            The type of rank correlation measure to use

        Returns
        -------
        float:
            Variance of the inversion of rank correlation measure estimator
        """

        u = self.data
        dim = self.copula.dim
        nrow = len(self.data)
        ncol = dim * (dim - 1) // 2
        v = np.zeros((nrow, ncol))

        if typ == 'itau':
            for i in range(dim - 1):
                for j in range(i + 1, dim):
                    ec = 2 * np.sum([(u[:, i] <= u[k, i]) & (u[:, j] <= u[k, j]) for k in range(nrow)]) / nrow
                    v[:, i * (dim - 1) + j] = ec - u[:, i] - u[:, j]
        else:
            ord = np.argsort(-u, 1, ) + 1
            ordb = rank_data(u, 1)

            for i in range(dim - 1):
                for j in range(i + 1, dim):
                    a = np.array([0, *np.cumsum(u[ord[:, i], j][nrow - ordb[:, i]]) / nrow])
                    b = np.array([0, *np.cumsum(u[ord[:, j], i][nrow - ordb[:, j]]) / nrow])
                    v[:, i * (dim - 1) + j] = u[:, i] * u[:, j] + a + b
        # TODO complete rest of function GETL

        raise NotImplementedError

Beispiel #6

0

Datei anzeigen

def rad_sym_test(x, N=1000, ties: Ties = 'average'):
    r"""
    Test of Radial Symmetry for a Multivariate Copula.

    Test for assessing the radial symmetry of the underlying multivariate copula based on the empirical copula. The
    test statistic is a multivariate extension of the definition adopted in the first reference. An approximate
    p-value for the test statistic is obtained by means of a appropriate bootstrap which can take the presence of
    ties in the component series of the data into account; see the second reference.

    A random vector :math:`X` is called radially symmetric (for d = 1 simply symmetric) about :math:`a \in R^d` if
    :math:`X − a = a − X`, that is, if :math:`X − a` and :math:`a − X` are equal in distribution. In a hand-wavy
    manner, perhaps the consequence of the radial symmetry test is to verify if an elliptical copula should be used
    to fit the data as elliptical copulas are radial symmetric.

    Parameters
    ----------
    x: {array_like, pandas.DataFrame}
        A matrix like data structure

    N: int
        Number of bootstrap iterations to be used to simulate realizations of the test statistic under the null
        hypothesis

    ties
        String specifying how ranks should be computed if there are ties in any of the coordinate samples. Options
        include 'average', 'min', 'max', 'dense', 'ordinal'.

    Returns
    -------
    TestStatistic
        Test statistics for the radial symmetry test. The null hypothesis assumes that the vectors are radially
        symmetric. Thus a small p-value will indicate evidence against radial symmetry

    Examples
    --------
    >>> from copulae.datasets import load_danube
    >>> from copulae.gof import rad_sym_test

    >>> danube = load_danube()
    >>> test_stats = rad_sym_test(danube)
    >>> print(test_stats.p_value)

    A small p-value here indicates strong evidence against radial symmetry.

    References
    ----------
    Genest, C. and G. Nešlehová, J. (2014). On tests of radial symmetry for bivariate copulas. Statistical Papers 55,
    1107–1119.

    Kojadinovic, I. (2017). Some copula inference procedures adapted to the presence of ties. Computational Statistics
    and Data Analysis 112, 24–41, http://arxiv.org/abs/1609.05519.
    """
    x = np.asarray(x)

    assert isinstance(
        N, int
    ) and N >= 1, "number of replications for exchangeability test must be a positive integer"
    assert x.ndim == 2, "input data must be a 2-dimensional matrix"

    n, p = x.shape
    u = pseudo_obs(x, ties)

    s = rad_sym_test_stat(u.ravel('F'), n, p)

    has_ties = False
    for i in range(p):
        if len(np.unique(x[:, i])) != n:
            has_ties = True
            break

    ir = np.floor(rank_data(np.sort(u, 0), axis=1)).astype(int) - 1
    s0 = np.array([rad_sym_replicate(u, ir, n, p, has_ties) for _ in range(N)])

    return TestStatistic(
        s, (np.sum(s0 >= s) + 0.5) / (N + 1),
        "Test of radial symmetry based on the empirical copula")

Beispiel #7

0

Datei anzeigen

Datei: exchangeability.py Projekt: otreewen2020/copulae

def exch_test(x, y, N=1000, m=0, ties='average'):
    r"""
    Test of Exchangeability for a Bivariate Copula.

    Test for assessing the exchangeability of the underlying bivariate copula based on the empirical copula.
    The test statistics are defined in the first two references. Approximate p-values for the test statistics are
    obtained by means of a multiplier technique if there are no ties in the component series of the bivariate
    data, or by means of an appropriate bootstrap otherwise.

    A random vector X is called exchangeable iff :math:`(X1, ..., Xd) = (X_{\pi(1)}, ..., X_{\pi(d)})`
    for any permutation :math:`(\pi(1), \pi(2), \dots, \pi(d))` of :math:`(1, \dots, d)`.

    A copula C is called exchangeable iff C is the distribution function of an exchangeable random vector
    (with uniform marginal distributions on [0, 1]). For such a copula
    :math:`C(u1, u2, ..., ud ) = C(u\pi(1), u\pi(2), ..., u\pi(d))` holds for any permutation
    :math:`(\pi(1), \pi(2), \dots, \pi(d))` of :math:`(1, \dots, d)`.

    Examples of exchangeable copulas:
        Gumbel, Clayton, and also the Gaussian copula :math:`C_P^{Ga}` and the t-Copula :math:`C_{ν,P}^t`, if
        P is an equicorrelation matrix, i.e. :math:`R = \rho J_d + (1 − \rho)I_d`. :math:`J_d \in R^{d×d}`
        is a matrix consisting only of ones, and :math:`I_d \in R^{d×d}` is the d-dimensional identity matrix.

    For bivariate exchangeable copulas we have:

    .. math::

        P(U_2 \leq u_2|U_1 = u_1) = P(U_1 \leq u_2|U_2 = u_1).

    Parameters
    ----------
    x: array_like
        first vector for the exchangeability test

    y: array_like
        second vector for the exchangeability test

    N: int
        Number of multiplier or bootstrap iterations to be used to simulate realizations of the test statistic under
        the null hypothesis.

    m: int
        If m = 0, integration in the Cramér–von Mises statistic is carried out with respect to the empirical copula.
        If m > 0, integration is carried out with respect to the Lebesgue measure and m specifies the size of the
        integration grid.

    ties: str, optional
        String specifying how ranks should be computed if there are ties in any of the coordinate samples. Options
        include 'average', 'min', 'max', 'dense', 'ordinal'.

    Returns
    -------
    TestStatistic
        Test statistics for the exchangeability test. The null hypothesis assumes that the vectors are exchangeable.
        Thus a small p-value will indicate evidence against exchangeability

    Examples
    --------
    >>> from copulae.datasets import load_danube
    >>> from copulae.gof import exch_test
    >>> danube = load_danube().values
    >>> test_stats = exch_test(danube[:, 0], danube[:, 1])
    >>> print(test_stats.p_value)

    A small p-value here indicates strong evidence against exchangeability

    References
    ----------
    Genest, C., G. Nešlehová, J. and Quessy, J.-F. (2012). Tests of symmetry for bivariate copulas. Annals of the
    Institute of Statistical Mathematics 64, 811–834.

    Kojadinovic, I. and Yan, J. (2012). A nonparametric test of exchangeability for extreme-value and left-tail
    decreasing bivariate copulas. The Scandinavian Journal of Statistics 39:3, 480–496.

    Kojadinovic, I. (2017). Some copula inference procedures adapted to the presence of ties. Computational Statistics
    and Data Analysis 112, 24–41, http://arxiv.org/abs/1609.05519.
    """
    x = pseudo_obs(x, ties)
    y = pseudo_obs(y, ties)
    u = np.vstack([x, y]).T

    assert isinstance(
        m,
        int) and m >= 0, "size of the integration grid must be an integer >= 0"
    assert x.ndim == 1 and y.ndim == 1, "x and y must be vectors. Exchangeability tests is bivariate"
    assert isinstance(
        N, int
    ) and N >= 1, "number of replications for exchangeability test must be a positive integer"

    n = len(u)
    if m > 0:
        xis = np.linspace(1 / m, 1 - 1 / m, m)
        g = np.stack([np.tile(xis, m), np.repeat(xis, m)]).T
        ng = m * m
    else:
        g = u
        ng = n

    s = exch_test_stat(u, g, n, ng)

    has_ties = len(np.unique(x)) != n or len(np.unique(y)) != n

    if has_ties:
        ir = np.floor(rank_data(np.sort(u, 0), axis=1)).astype(int) - 1
        s0 = np.asarray(
            [exch_replication(ir, u, g, n, m, ng) for _ in range(N)])

    else:
        s0 = exch_test_cn(u, g, n, ng, N)

    return TestStatistic(s, (np.sum(s0 >= s) + 0.5) / (N + 1),
                         "Test of exchangeability for bivariate copulas")