def invcdf(p, a, b, c, loc=0, scale=1): """ Inverse of the CDF of the generalized exponential distribution. This is also known as the quantile function. """ if p < 0 or p > 1: raise ValueError("'p' must be between 0 and 1.") with mpmath.extradps(5): p = mpmath.mpf(p) a, b, c, loc, scale = _validate_params(a, b, c, loc, scale) s = a + b r = b / c y = -mpmath.log1p(-p) s = a + b r = b / c def _genexpon_invcdf_rootfunc(z): return s*z + r*mpmath.expm1(-c*z) - y z0 = y / s z1 = (y + r) / s z = mpmath.findroot(_genexpon_invcdf_rootfunc, (z0, z1), solver='anderson') x = loc + scale*z return x
def mean(p): """ Mean of the log-series distribution. """ p = _validate_p(p) with _mpm.extradps(5): return p / (p - 1) / _mpm.log1p(-p)
def var(p): """ Variance of the log-series distribution. """ p = _validate_p(p) with _mpm.extradps(5): l1p = _mpm.log1p(-p) return -(p*(p + l1p)) / (1 - p)**2 / l1p**2
def sf(k, p): """ Survival function of the log-series distribution. """ p = _validate_p(p) if k < 1: return _mpm.mp.one with _mpm.extradps(5): return -_mpm.betainc(k + 1, 0, 0, p) / _mpm.log1p(-p)
def cdf(k, p): """ CDF of the log-series distribution. """ p = _validate_p(p) if k < 1: return _mpm.mp.zero with _mpm.extradps(5): return 1 + _mpm.betainc(k + 1, 0, 0, p) / _mpm.log1p(-p)
def logpmf(k, p): """ Natural log of the PMF of the log-series distribution. """ p = _validate_p(p) if k < 1: return -_mpm.mp.inf with _mpm.extradps(5): return k*_mpm.log(p) - _mpm.log(k) - _mpm.log(-_mpm.log1p(-p))
def logpmf(k, n, p): """ Natural log of the probability mass function of the binomial distribution. """ _validate_np(n, p) with mpmath.extradps(5): return (logbinomial(n, k) + k*mpmath.log(p) + mpmath.fsum([n, -k])*mpmath.log1p(-p))
def invsf(p, loc=0, scale=1): """ Inverse survival function of the logistic distribution. """ with mpmath.extradps(5): p = mpmath.mpf(p) loc = mpmath.mpf(loc) scale = mpmath.mpf(scale) x = loc + scale * (mpmath.log1p(-p) - mpmath.log(p)) return x
def inv_yeo_johnson(x, lmbda): """ Inverse Yeo-Johnson transformation. See https://en.wikipedia.org/wiki/Power_transform#Yeo%E2%80%93Johnson_transformation """ with mpmath.extradps(5): x = mpmath.mpf(x) lmbda = mpmath.mpf(lmbda) if x >= 0: if lmbda == 0: return mpmath.expm1(x) else: return mpmath.expm1(mpmath.log1p(lmbda * x) / lmbda) else: if lmbda == 2: return -mpmath.expm1(-x) else: lmb2 = 2 - lmbda return -mpmath.expm1(mpmath.log1p(-lmb2 * x) / lmb2)
def yeo_johnson(x, lmbda): r""" Yeo-Johnson transformation of x. See https://en.wikipedia.org/wiki/Power_transform#Yeo%E2%80%93Johnson_transformation """ with mpmath.extradps(5): x = mpmath.mpf(x) lmbda = mpmath.mpf(lmbda) if x >= 0: if lmbda == 0: return mpmath.log1p(x) else: return mpmath.expm1(lmbda * mpmath.log1p(x)) / lmbda else: if lmbda == 2: return -mpmath.log1p(-x) else: lmb2 = 2 - lmbda return -mpmath.expm1(lmb2 * mpmath.log1p(-x)) / lmb2
def mode(mu, loc=0, scale=1): """ Mode of the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) s = 3 * mu / 2 # t is equivalent to sqrt(1 + 1/s**2) - 1. t = mpmath.expm1(mpmath.log1p(1 / s**2) / 2) # m = mu*(sqrt(1 + s**2) - s) = mu*s*(sqrt(1 + 1/s**2) - 1) = mu*s*t m = mu * s * t return scale * m + loc
def logpdf(x, c, d, scale): """ Log of the PDF of the Burr type XII distribution. """ _validate_params(c, d, scale) with mpmath.extradps(5): x = mpmath.mpf(x) c = mpmath.mpf(c) d = mpmath.mpf(d) scale = mpmath.mpf(scale) return (mpmath.log(c) + mpmath.log(d) + (c - 1)*mpmath.log(x) - c*mpmath.log(scale) - (d + 1)*mpmath.log1p((x / scale)**c))
def __init__(self, gvcfWritePath, ref_path, p_err, gq_bin_size, ctgName, bp_resolution=False, sample_name='None', mode='L'): # default p_error is 0.001, while it could be set by the users' option self.p_error = p_err self.LOG_10 = LOG_10 self.logp = math.log(self.p_error) / self.LOG_10 self.log1p = math.log1p(-self.p_error) / self.LOG_10 self.LOG_2 = LOG_2 # need to check with the clair3 settings #self.max_gq = 255 self.max_gq = 50 self.variantMath = mathcalculator() self.constant_log10_probs = self.variantMath.normalize_log10_prob( [-1.0, -1.0, -1.0]) self.gq_bin_size = gq_bin_size self.CW = None # set by the users if (gvcfWritePath != "PIPE"): if (not os.path.exists(gvcfWritePath)): os.mkdir(gvcfWritePath) self.CW = compressReaderWriter(output_path=os.path.join( gvcfWritePath, sample_name + GVCF_SUFFIX), compress=COMPRESS_GVCF) self.vcf_writer = self.CW.write_output() else: self.vcf_writer = sys.stdout self.writePath = gvcfWritePath self.sampleName = sample_name.split('.')[0] self.bp_resolution = bp_resolution self.reference_file_path = ref_path if (mode == 'L'): # dictionary to store constant log values for speeding up self.normalized_prob_pool = {} self.current_block = [] self._print_vcf_header() self.cur_gq_bin_index = None self.cur_gt = None self.cur_min_DP = None self.cur_max_DP = None self.cur_chr = None self.cur_raw_gq = None pass
def logsf(x, c, d, scale): """ Natural log of the survival function of the Burr type XII distribution. """ _validate_params(c, d, scale) with mpmath.extradps(5): x = mpmath.mpf(x) c = mpmath.mpf(c) d = mpmath.mpf(d) scale = mpmath.mpf(scale) if x < 0: return mpmath.ninf return -d*mpmath.log1p((x/scale)**c)
def logcdf(x, mu, loc=0, scale=1): """ Logarithm of the CDF for the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) x = mpmath.mpf(x) if x <= loc: return -mpmath.mp.inf z = (x - loc) / scale t1 = mpmath.log(mpmath.ncdf((z / mu - 1) / mpmath.sqrt(z))) t2 = (2 / mu) + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z))) return t1 + mpmath.log1p(mpmath.exp(t2 - t1))
def logsf(x, mu, loc=0, scale=1): """ Logarithm of the survival function for the inverse Gaussian distribution. """ with mpmath.extradps(5): mu, loc, scale = _validate_params(mu, loc, scale) x = mpmath.mpf(x) if x <= loc: return mpmath.mp.zero z = (x - loc) / scale t1 = mpmath.log(mpmath.ncdf(-(z / mu - 1) / mpmath.sqrt(z))) t2 = 2 / mu + mpmath.log(mpmath.ncdf(-(z / mu + 1) / mpmath.sqrt(z))) return t1 + mpmath.log1p(-mpmath.exp(t2 - t1))
def invcdf(p, loc=0, scale=1): """ Inverse CDF of the logistic distribution. This function is also known as the quantile function or the percent point function. """ with mpmath.extradps(5): p = mpmath.mpf(p) loc = mpmath.mpf(loc) scale = mpmath.mpf(scale) x = loc + scale * (mpmath.log(p) - mpmath.log1p(-p)) return x
def invsf(p, k, loc, scale): """ Inverse of the survival function for the Weibull distribution (for maxima). This is a three-parameter version of the distribution. The more typical two-parameter version has just the parameters k and scale. """ with mpmath.extradps(5): p = _validate_p(p) k, loc, scale = _validate_params(k, loc, scale) z = -mpmath.power(-mpmath.log1p(-p), 1 / k) x = scale * z + loc return x
def logpdf(x): """ Natual logarithm of the PDF of the raised cosine distribution. The PDF of the raised cosine distribution is f(x) = (1 + cos(x))/(2*pi) on the interval (-pi, pi) and zero elsewhere. """ with mpmath.extradps(5): if x <= -mpmath.pi or x >= mpmath.pi: return -mpmath.inf return mpmath.log1p(mpmath.cos(x)) - mpmath.log(2 * mpmath.pi)
def invsf(p, loc, scale): """ Inverse of the survival function for the Gumbel distribution. """ if scale <= 0: raise ValueError('scale must be positive.') with mpmath.extradps(5): p = mpmath.mpf(p) loc = mpmath.mpf(loc) scale = mpmath.mpf(scale) z = -mpmath.log(-mpmath.log1p(-p)) x = scale*z + loc return x
def invsf(p, c, beta, scale): """ Inverse survival function of the Gamma-Gompertz distribution. """ with mpmath.extradps(5): if p < 0 or p > 1: return mpmath.mp.nan p = mpmath.mpf(p) beta = mpmath.mpf(beta) c = mpmath.mpf(c) scale = mpmath.mpf(scale) r = mpmath.powm1(p, -1 / c) x = scale * mpmath.log1p(beta * r) return x
def logpdf(x, df): """ Logarithm of the PDF of Student's t distribution. """ if df <= 0: raise ValueError('df must be greater than 0') with mpmath.extradps(5): x = mpmath.mpf(x) df = mpmath.mpf(df) h = (df + 1) / 2 logp = (mpmath.loggamma(h) - mpmath.log(df * mpmath.pi)/2 - mpmath.loggamma(df/2) - h * mpmath.log1p(x**2/df)) return logp
def invcdf(p, c, beta, scale): """ Inverse CDF (i.e. quantile function) of the Gamma-Gompertz distribution. """ with mpmath.extradps(5): if p < 0 or p > 1: return mpmath.mp.nan p = mpmath.mpf(p) beta = mpmath.mpf(beta) c = mpmath.mpf(c) scale = mpmath.mpf(scale) # XXX It would be nice if the result could be formulated in a # way that avoids computing 1 - p. r = mpmath.powm1(1 - p, -1 / c) x = scale * mpmath.log1p(beta * r) return x
def logpdf(x, a, b): """ Logarithm of the PDF of the Benktander II distribution. Variable names follow the convention used on wikipedia. """ _validate_ab(a, b) if x < 1: return mpmath.ninf with mpmath.extradps(5): x = mpmath.mpf(x) a = mpmath.mpf(a) b = mpmath.mpf(b) t1 = (a / b) * (1 - x**b) t2 = (b - 2) * mpmath.log(x) t3 = mpmath.log1p(a * x**b - b) return t1 + t2 + t3
def invcdf(p, a, b): """ Inverse of the CDF of the Benktander I distribution. Variable names follow the convention used on wikipedia. """ _validate_ab(a, b) with mpmath.extradps(5): p = mpmath.mpf(p) a = mpmath.mpf(a) b = mpmath.mpf(b) one = mpmath.mp.one w = mpmath.log1p(-p) zlow = (-(a + one) + mpmath.sqrt((a + one)**2 - 4 * b * w)) / (2 * b) q = a + one - 2 * b / a zhigh = (-q + mpmath.sqrt(q**2 - 4 * b * w)) / (2 * b) z = mpmath.findroot(lambda z: (mpmath.log(1 + 2 * b / a * z) - (a + 1 + b * z) * z - w), (zlow, zhigh), method='anderson') return mpmath.exp(z)
def logpdf(x, xi, mu=0, sigma=1): """ Natural logarithm of the PDF of the generalized extreme value distribution. """ _validate_sigma(sigma) xi = mpmath.mpf(xi) mu = mpmath.mpf(mu) sigma = mpmath.mpf(sigma) # Formula from wikipedia, which has a sign convention for xi that # is the opposite of scipy's shape parameter. z = (x - mu) / sigma if xi != 0: t = mpmath.power(1 + z * xi, -1 / xi) logt = -mpmath.log1p(z * xi) / xi else: t = mpmath.exp(-z) logt = -z p = (xi + 1) * logt - t - mpmath.log(sigma) return p
def invcdf(p, a, b): """ Inverse CDF of the Benktander II distribution. Variable names follow the convention used on wikipedia. """ _validate_ab(a, b) if p < 0 or p > 1: return mpmath.nan with mpmath.extradps(5): p = mpmath.mpf(p) a = mpmath.mpf(a) b = mpmath.mpf(b) one = mpmath.mp.one if b == 1: return one - mpmath.log1p(-p) / a else: onemb = one - b c = a / onemb t = c * mpmath.exp(c) * mpmath.power(one - p, -b / onemb) return mpmath.power(mpmath.lambertw(t) / c, 1 / b)
def logpdf(x, chi, c): """ Logarithm of the PDF of the ARGUS probability distribution. """ if c <= 0: raise ValueError('c must be positive') if chi <= 0: raise ValueError('chi must be positive') if x < 0 or x > c: return mpmath.mp.ninf with mpmath.extradps(5): x = mpmath.mpf(x) chi = mpmath.mpf(chi) c = mpmath.mpf(c) z = x / c t1 = (3 * mpmath.log(chi) - mpmath.log(2 * mpmath.pi) / 2 - mpmath.log(_psi(chi))) t2 = -mpmath.log(c) + mpmath.log(z) t3 = mpmath.log1p(-z**2) / 2 t4 = -chi**2 / 2 * (1 - z**2) return t1 + t2 + t3 + t4
def logpdf(x, nu, loc, scale, scale_inv=None): """ Natural logarithm of the PDF for the multivariate t distribution. `loc` must be a sequence. `scale` is the scale matrix; it must be an instance of `mpmath.matrix`. `scale` must be positive definite. If given, `scale_inv` must be the inverse of `scale`. """ p = mpmath.mpf(len(loc)) with mpmath.extradps(5): nu = mpmath.mpf(nu) if scale_inv is None: with mpmath.extradps(5): scale_inv = mpmath.inverse(scale) tmp = mpmath.matrix(scale.cols, 1) for k, v in enumerate(loc): tmp[k] = mpmath.mpf(v) loc = tmp tmp = mpmath.matrix(scale.cols, 1) for k, v in enumerate(x): tmp[k] = mpmath.mpf(v) x = tmp delta = x - loc c = (nu + p) / 2 t1 = -c * mpmath.log1p((delta.T * scale_inv * delta)[0, 0] / nu) t2 = mpmath.loggamma(c) t3 = mpmath.loggamma(nu / 2) t4 = (p / 2) * mpmath.log(nu) t5 = (p / 2) * mpmath.log(mpmath.pi) with mpmath.extradps(5): det = mpmath.det(scale) t6 = mpmath.log(det) / 2 return t2 - t3 - t4 - t5 - t6 + t1
def boxcox1p(x, lmbda): r""" Box-Cox transformation of 1 + x. The transformation is { log(1+x) if lmbda == 0, f(x; lmbda) = { { (1+x)**lmbda - 1 { ---------------- if lmbda != 0 { lmbda This function is mathematically equivalent to `boxcox(1+x, lmba)`. It avoids the loss of precision that can occur if x is very small. *See also:* `mpsci.fun.boxcox` """ x = mpmath.mpf(x) lmbda = mpmath.mpf(lmbda) one = mpmath.mpf(1) if lmbda == 0: return mpmath.log1p(x) else: return mpmath.powm1(one + x, lmbda) / lmbda