Esempio n. 1
0
def cdf(x, mu, loc=0, scale=1):
    """
    CDF for the inverse Gaussian distribution.
    """
    with mpmath.extradps(5):
        mu, loc, scale = _validate_params(mu, loc, scale)
        x = mpmath.mpf(x)
        if x <= loc:
            return mpmath.mp.zero
        z = (x - loc) / scale
        t1 = mpmath.ncdf((z / mu - 1) / mpmath.sqrt(z))
        t2 = mpmath.exp(2 / mu) * mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z))
        return t1 + t2
Esempio n. 2
0
def logcdf(x, mu, loc=0, scale=1):
    """
    Logarithm of the CDF for the inverse Gaussian distribution.
    """
    with mpmath.extradps(5):
        mu, loc, scale = _validate_params(mu, loc, scale)
        x = mpmath.mpf(x)
        if x <= loc:
            return -mpmath.mp.inf
        z = (x - loc) / scale
        t1 = mpmath.log(mpmath.ncdf((z / mu - 1) / mpmath.sqrt(z)))
        t2 = (2 / mu) + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z)))
        return t1 + mpmath.log1p(mpmath.exp(t2 - t1))
Esempio n. 3
0
def logsf(x, mu, loc=0, scale=1):
    """
    Logarithm of the survival function for the inverse Gaussian distribution.
    """
    with mpmath.extradps(5):
        mu, loc, scale = _validate_params(mu, loc, scale)
        x = mpmath.mpf(x)
        if x <= loc:
            return mpmath.mp.zero
        z = (x - loc) / scale
        t1 = mpmath.log(mpmath.ncdf(-(z / mu - 1) / mpmath.sqrt(z)))
        t2 = 2 / mu + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z)))
        return t1 + mpmath.log1p(-mpmath.exp(t2 - t1))
Esempio n. 4
0
def _norm_delta_cdf(a, b):
    """
    Compute CDF(b) - CDF(a) for the standard normal distribution CDF.

    The function assumes a <= b.
    """
    with mpmath.extradps(5):
        if a == b:
            return mpmath.mp.zero
        if a > 0:
            delta = mpmath.ncdf(-a) - mpmath.ncdf(-b)
        else:
            delta = mpmath.ncdf(b) - mpmath.ncdf(a)
        return delta
Esempio n. 5
0
def sf(x, mu=0, sigma=1):
    """
    Log-normal distribution survival function.
    """
    _validate_sigma(sigma)
    if x <= 0:
        return mpmath.mp.one
    lnx = mpmath.log(x)
    return mpmath.ncdf(-lnx, -mu, sigma)
Esempio n. 6
0
def cdf(x, mu=0, sigma=1):
    """
    Log-normal distribution cumulative distribution function.
    """
    _validate_sigma(sigma)
    if x <= 0:
        return mpmath.mp.zero
    lnx = mpmath.log(x)
    return mpmath.ncdf(lnx, mu, sigma)
Esempio n. 7
0
def invsf(p, a, b):
    """
    Inverse of the survival function of the standard normal distribution.
    """
    _validate_params(a, b)
    if p < 0 or p > 1:
        return mpmath.nan

    with mpmath.extradps(5):
        p = mpmath.mpf(p)
        a = mpmath.mpf(a)
        b = mpmath.mpf(b)

        p2 = -p * _norm_delta_cdf(a, b) + mpmath.ncdf(b)
        x = normal.invcdf(p2)

    return x
Esempio n. 8
0
def invcdf(p, a, b):
    """
    Inverse of the CDF of the truncated standard normal distribution.

    This function is also known as the quantile function or the percent
    point function.
    """
    _validate_params(a, b)
    if p < 0 or p > 1:
        return mpmath.nan

    with mpmath.extradps(5):
        p = mpmath.mpf(p)
        a = mpmath.mpf(a)
        b = mpmath.mpf(b)

        p2 = p * _norm_delta_cdf(a, b) + mpmath.ncdf(a)
        x = normal.invcdf(p2)

    return x
Esempio n. 9
0
def mpmath_normal_cdf2(x, y, r):
    """
    This function produces correct results for inputs currently present in /test/Tests/Data/SpecialFunctionsValues.
    Other inputs may fall into areas where currently present algorithms produce incorrect results and may require modifying this function.
    """
    if x == -mpmath.inf or y == -mpmath.inf:
        return mpmath.mpf('0')
    if x == mpmath.inf:
        return mpmath.ncdf(y)
    if y == mpmath.inf:
        return mpmath.ncdf(x)
    if r == mpmath.mpf('1'):
        return mpmath.ncdf(min(x, y))
    if r == mpmath.mpf('-1'):
        return mpmath.mpf('0') if x <= -y else mpmath.ncdf(x) - mpmath.ncdf(-y)

    if abs(y) > abs(x):
        z = x
        x = y
        y = z

    if r < 0:
        # phi(x,y,r) = phi(inf,y,r) - phi(-x,y,-r)
        return max(mpmath.ncdf(x) - mpmath_normal_cdf2(x, -y, -r), mpmath.mpf('0'))

    if x + y > 0:
        # phi(x,y,r) = phi(-x,-y,r) - phi(x,y,-1)
        return mpmath_normal_cdf2(-x, -y, r) + (mpmath.mpf('0') if x <= -y else mpmath.ncdf(x) - mpmath.ncdf(-y))

    def f(t):
        if abs(t) == mpmath.mpf('1'):
            return mpmath.mpf('0')
        omt2 = (1 - t) * (1 + t)
        return 1 / (2 * mpmath.pi * mpmath.sqrt(omt2)) * mpmath.exp(-(x * x + y * y - 2 * t * x * y) / (2 * omt2))
    result, err = mpmath.quad(f, [-1, r], error=True)
    
    if mpmath.mpf('1e50') * abs(err) > abs(result):
        print(f"Suspiciously big error when evaluating an integral for normal_cdf2({x}, {y}, {r}).")
        print(f"Integral: {result}")
        print(f"Integral error estimate: {err}")
    return result
    def calculate_pval(self,
                       gene_set: GeneSet,
                       max_pairs: int = None) -> GeneSetDataCorrelation:
        """
        Calculate p-val for a single gene-set. Are genes closer in space than expected.
        Compares gene set similarities to similarities between random pairs.
        :param gene_set:
        :param max_pairs: Should number of calculated similarities be limited
        :return: data with gene set pointer and pval, median and mean of similarity
        """
        geneIDs = gene_set.genes
        try:
            set_similarities_data = self.calculator.similarities(
                geneIDs, max_n_similarities=max_pairs, as_list=False)
        except EnrichmentError:
            raise

        set_similarities = list(set_similarities_data.values())
        mean_set = mean_list(set_similarities)
        median_set = median(set_similarities)
        n = len(set_similarities)
        if self.storage._summary_type == MEAN:
            center_set = mean_set
        elif self.storage._summary_type == MEDIAN:
            center_set = mean_set
        else:
            raise ValueError('Possible summary types are', MEAN, 'and', MEDIAN)
        se = self.storage.get_se(n)
        center_random = self.storage._center
        p = float(1 - mpmath.ncdf(center_set, mu=center_random, sigma=se))

        gene_set_data = GeneSetDataCorrelation(gene_set)
        gene_set_data.mean = mean_set
        gene_set_data.median = median_set
        gene_set_data.pval = p
        gene_set_data.most_similar = self.retain_most_similar(
            set_similarities_data, 10)
        return gene_set_data
Esempio n. 11
0
def odds_ratio(table, kind='conditional', alternative='two-sided'):
    r"""
    Compute the odds ratio for a 2x2 contingency table.

    Parameters
    ----------
    table : array_like of ints
        A 2x2 contingency table.  Elements must be non-negative integers.
    kind : str, optional
        Which kind of odds ratio to compute, either the sample
        odds ratio (``kind='sample'``) or the conditional odds ratio
        (``kind='conditional'``).  Default is ``'conditional'``.
    alternative : {'two-sided', 'less', 'greater'}, optional
        Defines the alternative hypothesis.
        The following options are available (default is 'two-sided'):

        * 'two-sided'
        * 'less': one-sided
        * 'greater': one-sided

    Returns
    -------
    result : `OddsRatioResult` instance
        The returned object has two computed attributes:

        odds_ratio : mpmath.mpf
            * If `kind` is ``'sample'``, this is
              ``table[0, 0]*table[1, 1]/(table[0, 1]*table[1, 0])``.
              This is the prior odds ratio and not a posterior estimate.
            * If `kind` is ``'conditional'``, this is the conditional
              maximum likelihood estimate for the odds ratio. It is
              the noncentrality parameter of Fisher's noncentral
              hypergeometric distribution with the same hypergeometric
              parameters as `table` and whose mean is ``table[0, 0]``.
        pvalue : fractions.Fraction or mpmath.mpf
            The p-value associated with the computed odds ratio.

            * If `kind` is ``'sample'``, the p-value is based on the
              normal approximation to the distribution of the log of
              the sample odds ratio.
            * If `kind` is ``'conditional'``, the p-value is computed
              by `mpsci.stats.fisher_exact`.

        The object also stores the input arguments `table`, `kind`
        and `alternative` as attributes.

        The object has the method `odds_ratio_ci` that computes
        the confidence interval of the odds ratio.

    References
    ----------
    .. [1] J. Cornfield (1956), A statistical problem arising from
           retrospective studies. In Neyman, J. (ed.), Proceedings of
           the Third Berkeley Symposium on Mathematical Statistics and
           Probability 4, pp. 135-148.
    .. [2] H. Sahai and A. Khurshid (1996), Statistics in Epidemiology:
           Methods, Techniques, and Applications, CRC Press LLC, Boca
           Raton, Florida.

    """
    if kind not in ['conditional', 'sample']:
        raise ValueError("kind must be 'conditional' or 'sample'.")
    if alternative not in ['two-sided', 'less', 'greater']:
        raise ValueError("alternative must be 'two-sided', 'less' or "
                         "'greater'.")

    if len(table) != 2 or (len(table[0]) != 2 or len(table[1]) != 2):
        raise ValueError("The input `table` must be shaped like a 2x2 array.")

    a, b, c, d = _unpack_table_to_mpf(table)
    if a < 0 or b < 0 or c < 0 or d < 0:
        raise ValueError("All values in `table` must be nonnegative.")

    if _row_or_column_zero(table):
        # If both values in a row or column are zero, the p-value is 1 and
        # the odds ratio is NaN.
        result = OddsRatioResult(table=table, kind=kind,
                                 alternative=alternative,
                                 odds_ratio=mpmath.nan, pvalue=1)
        return result

    if kind == 'sample':
        oddsratio = _sample_odds_ratio(table)
        log_or = mpmath.log(oddsratio)
        se = mpmath.sqrt(1/a + 1/b + 1/c + 1/d)
        if alternative == 'two-sided':
            pvalue = 2*mpmath.ncdf(-abs(log_or)/se)
        elif alternative == 'less':
            pvalue = mpmath.ncdf(log_or/se)
        else:
            pvalue = mpmath.ncdf(-log_or/se)
    else:
        # kind is 'conditional'
        oddsratio = _conditional_oddsratio(table)
        # We can use fisher_exact to compute the p-value.
        pvalue = fisher_exact(table, alternative=alternative)[1]

    result = OddsRatioResult(table=table, kind=kind, alternative=alternative,
                             odds_ratio=oddsratio, pvalue=pvalue)
    return result
Esempio n. 12
0
def sf(x, mu=0, sigma=1):
    """
    Normal distribution survival function.
    """
    return mpmath.ncdf(-x, mu, sigma)
Esempio n. 13
0
def standardNormalCDF(z):
    '''
  Standard normal cumulative distribution function
  '''
    return mpmath.ncdf(z)
Esempio n. 14
0
def _psi(chi):
    return mpmath.ncdf(chi) - chi * mpmath.npdf(chi) - mpmath.mpf('0.5')
Esempio n. 15
0
def f48(x):
    # erf_Q
    return 1 - mpmath.ncdf(x)
Esempio n. 16
0
def f49(x):
    # hazard
    return mpmath.npdf(x) / (1 - mpmath.ncdf(x))
Esempio n. 17
0
def cdf(x, mu=0, sigma=1):
    """
    Normal distribution cumulative distribution function.
    """
    # Defined here for consistency, but this is just mpmath.ncdf
    return mpmath.ncdf(x, mu, sigma)