Example #1
0
def ibetam(a, b, x):
    """
    Incomplete beta function defined as the Mathematica Beta[x, a, b]:
    Beta[x, a, b] = Integral[t^(a - 1) * (1 - t)^(b - 1), {t, 0, x}]
    This routine only works for (0 < x < 1) & (b > 0) as required by JAM.

    """
    # V1.0: Michele Cappellari, Oxford, 01/APR/2008
    # V2.0: Use Hypergeometric function for negative a or b.
    #    From equation (6.6.8) of Abramoviz & Stegun (1964)
    #    MC, Oxford, 04/APR/2008
    # V3.0: Use recurrence relation of equation (26.5.16)
    #    from Abramoviz & Stegun (1964) for (a < 0) & (b > 0).
    #    See the online book here http://www.nr.com/aands/
    #    After suggestion by Gary Mamon. MC, Oxford, 16/APR/2009

    a = a + 3e-7  # Perturb to avoid singularities in gamma and betainc
    if np.all(a > 0):
        ib = special.betainc(a, b, x)
    else:
        p = int(np.ceil(np.abs(np.min(a))))
        tot = np.zeros((x.size, a.size))
        for j in range(p):  # Do NOT use gamma recurrence relation to avoid instabilities
            tot += special.gamma(j + b + a)/special.gamma(j + 1 + a)*x**(j + a)
        ib = tot*(1 - x)**b/special.gamma(b) + special.betainc(a + p, b, x)

    return ib*special.beta(a, b)
Example #2
0
 def integrand(x, N1, X, N2, Y, r, s, lnb):
     # ss.binom.cdf(X, N, p) == betainc(N-X, X+1, 1-p), but betainc accepts non-integer parameters.
     # However, betainc(0, ., .) and betainc(., 0, .) are nan because gamma(0) == 0
     if s <= 0:  # Decreased, accumulate from 0 to Y 
         b = betainc(max(1e-19, N2 - Y),  Y+1,  (1-x)**r)
     else:       # Increased, accumulate from Y to N2, or 1 - cdf(from 0 to Y-1)
         b = 1 - betainc(N2 - (Y-1),  max(1e-19, Y),  (1-x)**r)
     return b * np.exp( X * np.log(x) + (N1 - X) * np.log(1 - x) - lnb)
def vol_cap(r, a):
	unit_volume = pi**(n/2.)/gamma(n/2.+1)
	V = unit_volume * r**n
	if a >= 0:
		return V / 2 * betainc((n+1)/2., 0.5, 1-(a/r)**2)
	else:
		return V - vol_cap(r, -a)
Example #4
0
def printIncbet():
    successes = 1
    failures = 0
    for d in na.arange(0, 1, 0.01):
        result = betainc(successes + 1, failures + 1, d);
        print "Result: S:", successes,
        print "F:",  failures, "d:", d, "inc: %.5f" % result
def betai(a, b, x):
    """
    Returns the incomplete beta function.

    I_x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)

    where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma
    function of a.

    The standard broadcasting rules apply to a, b, and x.

    Parameters
    ----------
    a : array_like or float > 0

    b : array_like or float > 0

    x : array_like or float
        x will be clipped to be no greater than 1.0 .

    Returns
    -------
    betai : ndarray
        Incomplete beta function.

    """
    x = np.asarray(x)
    x = np.where(x < 1.0, x, 1.0)  # if x > 1 then return 1.0
    return special.betainc(a, b, x)
    def train(self, bandit, max_budget):
        self.S = na.zeros(bandit.narms) * 0.0
        self.F = na.zeros(bandit.narms) * 0.0
        

        for a_i in bandit.actions:
            for i in range(self.n):
                if len(bandit.log) >= max_budget:
                    return
                r = bandit.sample(a_i)
                if r == 1:
                    self.S[a_i] += 1.0
                else:
                    self.F[a_i] += 1.0

                Pr_mu_less_than_bound = betainc(self.S[a_i] + 1, self.F[a_i] + 1, self.upperbound)
                ntrials = self.S[a_i] + self.F[a_i]
                arm_mean = self.S[a_i] / ntrials
                Pr_mu_greater_than_bound = 1 - Pr_mu_less_than_bound
                if Pr_mu_less_than_bound >= self.confidence:
                    break
            if Pr_mu_greater_than_bound >= self.confidence:
                print
                print "arm", a_i
                print "arm_mean", arm_mean
                print "s", self.S
                print "f", self.F
                return #this arm is awesome; leave
            else:
                # continue trying this arm
                pass
Example #7
0
File: tools.py Project: helloTC/ATT
def pearsonr(A, B):
    """
    A broadcasting method to compute pearson r and p
    Code reprint from stackflow
    -----------------------------------------------
    Parameters:
        A: matrix A, i*k
        B: matrix B, j*k
    Return:
        rcorr: matrix correlation, i*j
        pcorr: matrix correlation p, i*j
    Example:
        >>> rcorr, pcorr = pearsonr(A, B)
    """
    if isinstance(A,list):
        A = np.array(A)
    if isinstance(B,list):
        B = np.array(B)
    if np.ndim(A) == 1:
        A = np.expand_dims(A, axis=1).T
    if np.ndim(B) == 1:
        B = np.expand_dims(B, axis=1).T

    rcorr = 1.0 - distance.cdist(A, B, 'correlation')

    df = A.T.shape[1] - 2
    
    r_forp = rcorr*1.0
    r_forp[r_forp==1.0] = 0.0
    t_squared = rcorr.T**2*(df/((1.0-rcorr.T)*(1.0+rcorr.T)))
    pcorr = special.betainc(0.5*df, 0.5, df/(df+t_squared))
    return rcorr, pcorr
Example #8
0
def Dbeta(a=1.5,b=2.5): # the beta distribution
  return Distr(
    name='beta[a={0},b={1}]'.format(a,b),
    dom=(0.,1.), domv=(1.e-10,1.-1.e-10),
    mean=a/(a+b), std=sqrt(a*b/(a+b+1))/(a+b),
    pdf=lambda x: x**(a-1.)*(1.-x)**(b-1.)/beta(a,b),
    cdf=lambda x: betainc(a,b,x),
  )
Example #9
0
def p_from_r(r,n):
	r = max(min(r, 1.0), -1.0)
	df = n-2
	if abs(r) == 1.0:
		prob = 0.0
	else:
		t_squared = r*r * (df / ((1.0 - r) * (1.0 + r)))
		prob = special.betainc(0.5*df, 0.5, df / (df + t_squared))
	return prob
def _sp_subvector_error_out_of_range(radius, dimensions, subdimensions):
    dist = SubvectorLength(dimensions, subdimensions)
    sq_r = radius * radius

    normalization = 1.0 - dist.cdf(radius)
    b = (dimensions - subdimensions) / 2.0
    aligned_integral = beta(subdimensions / 2.0 + 1.0, b) * (1.0 - betainc(
        subdimensions / 2.0 + 1.0, b, sq_r))
    cross_integral = beta((subdimensions + 1) / 2.0, b) * (1.0 - betainc(
        (subdimensions + 1) / 2.0, b, sq_r))

    numerator = (sq_r * normalization + (
        aligned_integral - 2.0 * radius * cross_integral) / beta(
        subdimensions / 2.0, b))
    with np.errstate(invalid='ignore'):
        return np.where(
            numerator > np.MachAr().eps,
            numerator / normalization, np.zeros_like(normalization))
Example #11
0
    def cdf(self, y):
        a = self.a
        b = self.b
        p = self.p
        q = self.q

        z = np.exp(a * np.log((y/b)) - a*np.log(1 + (y/b)))
        cdf = betainc(p, q, z)
        return cdf
Example #12
0
def ttest_pval(len_array, corr):  
    ''' adapted from ~/anaconda3/envs/Py2/lib/python2.7/site-packages/scipy/stats/stats.py 
        >> pearsonr()
        
        calculates the p-value of the correlation indices using a ttest.
    '''
    df = len_array - 2
    t_squared = corr**2 * (df / ((1.0 - corr) * (1.0 + corr)))
    pval = betainc(0.5*df, 0.5, df/(df+t_squared))
    return pval
 def _f(main, aux, tau):
     """Little wrapper for incomplete beta function computation
     because the argument order is different from ROOT and
     there are problems with certain sets of arguments."""
     from scipy.special import betainc
     a = main
     b = aux + 1
     x = 1. / (1. + tau)
     result = betainc(a, b, x)
     return result
Example #14
0
    def cdf(self, y):
        a = self.a
        b = self.b
        c = self.c
        p = self.p
        q = self.q

        z = (y/b)**a
        cdf = betainc(p, q, z)
        return cdf
Example #15
0
def _uniform_order_statistic_cdf(i, n, t):
    """
    _uniform_order_statistic_cdf(i, n, t) -> Pr[U_(i+1) < t]
    
    Let U_1, ..., U_n ~ Uniform[0,1] be n independent random variables
    and let U_(1) < ... < U_(n) denote the same variables in sorted order.
    This function returns the Cumulative Distribution function of U_(i+1),
    i.e. Pr[U_(i+1) < t]

    note that this function also works for numpy array inputs"""
    return betainc(i+1, n-i, t)
Example #16
0
 def log_sf(self,x):
     scalar = not isinstance(x,np.ndarray)
     x = np.atleast_1d(x)
     errs = np.seterr(divide='ignore')
     ret = np.log(special.betainc(x+1,self.r,self.p))
     np.seterr(**errs)
     ret[x < 0] = np.log(1.)
     if scalar:
         return ret[0]
     else:
         return ret
Example #17
0
def myrincbeta(x,a,b):
# compute the regularized incomplete beta function.
  if a < 0:
      cbf=(sps.gamma(a)*sps.gamma(b))/sps.gamma(a+b)
      res = (x**a * (1.0-x)**b) / (a * cbf)
      return myrincbeta(x,a+1.0,b) + res
  else:
#      cbf=(sps.gamma(a)*sps.gamma(b))/sps.gamma(a+b)
      cbf=1.0 # sps.betainc is the regularized inc. beta fun.
      res=(sps.betainc(a,b,x) / cbf)
      return res
Example #18
0
def ttest(m1,e1,n1,m2,e2,n2):
    m1 = float(m1)
    e1 = float(e1)
    m2 = float(m2)
    e2 = float(e2)
    v1 = e1**2
    v2 = e2**2
    t  = (m1-m2)/sqrt(v1+v2)
    nu = (v1+v2)**2/(v1**2/(n1-1)+v2**2/(n2-1))
    x = nu/(nu+t**2)
    p = 1.-betainc(nu/2,.5,x)
    return p
Example #19
0
def F_int_approx(a, b, c, A, x, k, verbose=False):
#  	print a, b, c, x 	
	total = 0

	if x < 10e-10:
		return 0
	
	
	for i in range(1, k+1):
		if not( special.betainc(A + 1, i+1, x) ):
			beta_inc = 10e-320
		else:
			beta_inc = special.betainc(A + 1, i+1, x)
# 		print A+1, i+1, x, (math.lgamma(a+i) + math.lgamma(b+i) + math.lgamma(c) ) - (math.lgamma(a) + math.lgamma(b) + math.lgamma(c+i) + math.log(math.factorial(i))  ), special.betaln(A + 1, i+1)
		total += \
		math.exp( \
		(math.lgamma(a+i) + math.lgamma(b+i) + math.lgamma(c) ) - \
		(math.lgamma(a) + math.lgamma(b) + math.lgamma(c+i) + math.log(math.factorial(i))  ) +  \
		math.log( beta_inc ) + \
		special.betaln(A + 1, i+1) 
		)
	return total
Example #20
0
def corrcoef_matrix(matrix):
    # Code originating from http://stackoverflow.com/a/24547964 by http://stackoverflow.com/users/2455058/jingchao

    r = np.corrcoef(matrix)
    rf = r[np.triu_indices(r.shape[0], 1)]
    df = matrix.shape[1] - 2
    ts = rf * rf * (df / (1 - rf * rf))
    pf = betainc(0.5 * df, 0.5, df / (df + ts))
    p = np.zeros(shape=r.shape)
    p[np.triu_indices(p.shape[0], 1)] = pf
    p[np.tril_indices(p.shape[0], -1)] = pf
    p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
    return r, p
Example #21
0
def check_sample_mean(sm, v, n, popmean):
    # from stats.stats.ttest_1samp(a, popmean):
    # Calculates the t-obtained for the independent samples T-test on ONE group
    # of scores a, given a population mean.
    #
    # Returns: t-value, two-tailed prob
    df = n-1
    svar = ((n-1)*v) / float(df)    # looks redundant
    t = (sm-popmean) / np.sqrt(svar*(1.0/n))
    prob = betainc(0.5*df, 0.5, df/(df + t*t))

    # return t,prob
    npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' %
                (t, prob, popmean, sm))
def bdtr(k, n, p):
    if (k < 0):
        return np.nan

    if (k == n):
        return (1.0)

    dn = n - k
    if (k == 0):
        dk = np.exp(dn * np.log(1.0 - p))
    else:
        dk = k + 1
        dk = betainc(dn, dk, 1.0 - p)
    return dk
Example #23
0
def compute_policy(successes, failures, mu, epsilon, threshold_confidence, accept_confidence, reject_confidence):
    p_in_threshold = (
        betainc(successes + 1, failures + 1, mu + epsilon) - 
        betainc(successes + 1, failures + 1, mu - epsilon))

    p_below = betainc(successes + 1, failures + 1, mu)
    p_above = 1 - p_below
    if p_above > accept_confidence:
        result = "a"
    elif p_in_threshold > threshold_confidence:
        result = "t"
    elif p_below > reject_confidence:
        result = "r"
    else:
        result = "c"
    # result = "c"
    if successes == 0 and failures == 0:
        result = 'c'
    elif successes == 0:
        result = 'r'
    
    # if p_above > accept_confidence:
    #     result = "a"
    # else:
    #     result = "c"

    # if p_in_threshold > threshold_confidence:
    #     result = "t"
    # if p_below > reject_confidence:
    #     result = "r"
    # else:
    #     result = "c"




    return result
def printThresholds():
    max_idx = 20
    mu = 0.7
    epsilon = 0.2
    threshold_confidence = 0.7
    accept_confidence = 0.7
    reject_confidence = 0.95
    print "".rjust(2),
    for x in na.arange(0, max_idx):
        print ("%d" % x).rjust(2),
    print
    for successes in na.arange(0, max_idx):
        print ("%d" % successes).rjust(2),
        for failures in na.arange(0, max_idx):

            p_in_threshold = (
                betainc(successes + 1, failures + 1, mu + epsilon) - 
                betainc(successes + 1, failures + 1, mu - epsilon))

            p_below = betainc(successes + 1, failures + 1, mu)
            p_above = 1 - p_below
            if p_in_threshold > threshold_confidence:
                result = "t"
            elif p_below > reject_confidence:
                result = "r"
            elif p_above > accept_confidence:
                result = "A"
            else:
                result = "c"
            if (successes + failures <= 10):
                print str(result).rjust(2), 
            else:
                print " ".rjust(2),

            #print successes, failures, mu, "p(s): %.4f" %(1 - probability), 
            #print "p(f): %.4f" % probability
        print
Example #25
0
    def relative_error(self, confidence=0.98, D=0):
        p = 0
        if D:
            try:
                from scipy import special, optimize
            except ImportError:
                raise Exception("Scipy needed for relative error bounds")
            k = self.k

            u = lambda D, k, e: (k - 1.0) / ((1.0 - e) * D)
            l = lambda D, k, e: (k - 1.0) / ((1.0 + e) * D)
            objective = (
                lambda e, D, k, confidence: special.betainc(k, D - k + 1, u(D, k, e))
                - special.betainc(k, D - k + 1, l(D, k, e))
                - confidence
            )

            try:
                p = optimize.newton(objective, x0=0.05, args=(D, k, confidence))
            except RuntimeError:
                pass
        else:
            p = math.sqrt(2.0 / (math.pi * (self.k - 2)))
        return p
def bdtrc(k, n, p):
    if (k < 0):
        return (1.0)

    if (k == n):
        return (0.0)
    dn = n - k
    if (k == 0):
        if (p < .01):
            dk = -np.expm1(dn * np.log1p(-p))
        else:
            dk = 1.0 - np.exp(dn * np.log(1.0 - p))
    else:
        dk = k + 1
        dk = betainc(dk, dn, p)
    return dk
Example #27
0
def my_t1cdf(x):
    '''    
    cumulative distribution function of a t-dist. with 1 degree of freedom
    function p=my_t1cdf(x)
    input
          x = point
          output
          p = cumulative probability

    see also: tcdf 
    '''
    xsq=x*x;
    p = betainc(1 / (1 + xsq), 1/2, 1/2) / 2
    p[x>0]=1-p[x>0]
    
    return p    
Example #28
0
    def resid_anscombe(self, Y, mu):
        '''
        The Anscombe residuals

        Parameters
        ----------
        Y : array-like
            Endogenous response variable
        mu : array-like
            Fitted mean response variable

        Returns
        -------
        resid_anscombe : array
            The Anscombe residuals as defined below.

        Formulas
        ---------
        sqrt(n)*(cox_snell(Y)-cox_snell(mu))/(mu**(1/6.)*(1-mu)**(1/6.))

        where cox_snell is defined as
        cox_snell(x) = betainc(2/3., 2/3., x)*betainc(2/3.,2/3.)
        where betainc is the incomplete beta function

        Notes
        -----
        The name 'cox_snell' is idiosyncratic and is simply used for
        convenience following the approach suggested in Cox and Snell (1968).
        Further note that
        cox_snell(x) = x**(2/3.)/(2/3.)*hyp2f1(2/3.,1/3.,5/3.,x)
        where hyp2f1 is the hypergeometric 2f1 function.  The Anscombe
        residuals are sometimes defined in the literature using the
        hyp2f1 formulation.  Both betainc and hyp2f1 can be found in scipy.

        References
        ----------
        Anscombe, FJ. (1953) "Contribution to the discussion of H. Hotelling's
            paper." Journal of the Royal Statistical Society B. 15, 229-30.

        Cox, DR and Snell, EJ. (1968) "A General Definition of Residuals."
            Journal of the Royal Statistical Society B. 30, 248-75.

        '''
        cox_snell = lambda x: special.betainc(2/3., 2/3., x)\
                            *special.beta(2/3.,2/3.)
        return np.sqrt(self.n)*(cox_snell(Y)-cox_snell(mu))/\
                        (mu**(1/6.)*(1-mu)**(1/6.))
Example #29
0
	def _ccprmod(self, proba, target, B=20):
		"""
		Cf MATLAB code
		CCPRMOD Classifier competence based on probabilistic modelling
		
		cc = ccprmod(d,j,B)
		
		Input:
		proba - NxC matrix of normalised C class supports produced by the classifier for N objects
		target - Nx1 vector of indices of the correct classes for N objects
		B - number of points used in the calculation of the competence, higher values result in a more accurate estimation (optional, default B=20)
		
		Output:
		competences - Nx1 vector of the classifier competences		
		"""
		n_sample, n_classes = proba.shape

		# Generating points
		x = np.linspace(0, 1, B)
		x = repmat(x, n_sample, n_classes)
		
		# Calculating parameters of the beta pdfs
		a = np.zeros(x.shape)
		b = np.zeros(x.shape)
		betaincj = np.zeros(x.shape)

		for c in range(n_classes):
			a[:, c*B: (c+1)*B] = repmat(n_classes*proba[:,c].reshape(-1, 1),1,B)

		b = n_classes - a
		a[a==0] = 1e-9
		b[b==0] = 1e-9
		x[x==0] = 1e-9
		betaincj = betainc(a, b, x)

		# calculating competences
		cc = np.zeros((n_sample, 1))
		for n in range(n_sample):
			t = range(target[n]*B, (target[n]+1)*B)
			bc = betaincj[n, t]
			setdiff = list(set(range(n_classes*B)) - set(t))
			bi = betaincj[n, setdiff]
			bi = np.reshape(bi, (n_classes-1, B))
			cc[n] = sum((bc[1:] - bc[:-1])*np.prod((bi[:,:-1] + bi[:,1:])/2, axis=0))

		return cc
Example #30
0
def fv_test(x0,x1):
# taken from IDL library    
    nx0 = len(x0)
    nx1 = len(x1)
    v0 = np.var(x0)
    v1 = np.var(x1)
    if v0 >v1:
        f = v0/v1
        df0 = nx1-1
        df1 = nx0-1
    else:
        f = v1/v0
        df0 = nx1-1
        df1 = nx0-1
    prob = 2.0*betainc(0.5*df1,0.5*df0,df1/(df1+df0*f))
    if prob >1:
        return (f,2.0-prob)
    else:
        return (f,prob) 
Example #31
0
def pearsonr(x, y, eps=1e-5):
    r"""
    Calculate a Pearson correlation coefficient and the p-value for testing
    non-correlation.

    The Pearson correlation coefficient measures the linear relationship
    between two datasets. Strictly speaking, Pearson's correlation requires
    that each dataset be normally distributed, and not necessarily zero-mean.
    Like other correlation coefficients, this one varies between -1 and +1
    with 0 implying no correlation. Correlations of -1 or +1 imply an exact
    linear relationship. Positive correlations imply that as x increases, so
    does y. Negative correlations imply that as x increases, y decreases.

    The p-value roughly indicates the probability of an uncorrelated system
    producing datasets that have a Pearson correlation at least as extreme
    as the one computed from these datasets. The p-values are not entirely
    reliable but are probably reasonable for datasets larger than 500 or so.

    Parameters
    ----------
    x : (N,) array_like
        Input
    y : (N,) array_like
        Input

    Returns
    -------
    r : float
        Pearson's correlation coefficient
    p-value : float
        2-tailed p-value

    Notes
    -----

    The correlation coefficient is calculated as follows:

    .. math::

        r_{pb} = \frac{\sum (x - m_x) (y - m_y)}
                      {\sqrt{\sum (x - m_x)^2 \sum (y - m_y)^2}}

    where :math:`m_x` is the mean of the vector :math:`x` and :math:`m_y` is
    the mean of the vector :math:`y`.


    References
    ----------
    http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation

    Examples
    --------
    >>> from scipy import stats
    >>> a = np.array([0, 0, 0, 1, 1, 1, 1])
    >>> b = np.arange(7)
    >>> stats.pearsonr(a, b)
    (0.8660254037844386, 0.011724811003954654)

    >>> stats.pearsonr([1,2,3,4,5], [5,6,7,8,7])
    (0.83205029433784372, 0.080509573298498519)
    """
    # x and y should have same length.
    x = np.asarray(x)
    y = np.asarray(y)
    n = x.shape[-1]
    mx = x.mean(axis=-1, keepdims=True)
    my = y.mean(axis=-1, keepdims=True)
    xm, ym = x - mx, y - my
    r_num = np.sum(xm * ym, axis=-1)
    # r_den = np.sqrt(sum_of_squares(xm) * sum_of_squares(ym))
    xm = np.sum(xm**2, axis=-1)
    ym = np.sum(ym**2, axis=-1)
    r_den = np.sqrt(xm * ym)
    idx = np.where(r_den==0)[0]
    r_den[idx] = eps
    r = r_num / r_den
    r[idx] = 0.0
    # Presumably, if abs(r) > 1, then it is only some small artifact of
    # floating point arithmetic.
    r = np.clip(r, -1.0, 1.0)
    df = n - 2
    idx = np.where(abs(r) == 1.0)[0]
    r[idx] += eps
    t_squared = r**2 * (df / ((1.0 - r) * (1.0 + r)))
    prob = special.betainc(
        0.5*df, 0.5, np.fmin(np.asarray(df / (df + t_squared)), 1.0)
    )
    prob[idx] = 0.0
    r[idx] -= eps

    return r, prob
Example #32
0
def p(k, F):
    g = 2 * F * F + 1
    a = g / (1 + g)
    return np.power(g, -.5 * k) * np.power(a, k) / (
        k * beta(.5 * k, .5 * k + 1)) + 1 - betainc(.5 * k, .5 * k + 1, a)
Example #33
0
 def cdf(dist, a, b, *args, **kwargs):
     G = dist.cdf(*args, **kwargs)
     return sp.betainc(a, b, G)
Example #34
0
def _betai(a, b, x):
    x = np.asarray(x)
    x = np.where(x < 1.0, x, 1.0)  # if x > 1 then return 1.0
    return betainc(a, b, x)
Example #35
0
def _pearsonr(x: xr.DataArray, y: xr.DataArray, monitor: Monitor) -> xr.Dataset:
    """
    Calculates Pearson correlation coefficients and p-values for testing
    non-correlation of lon/lat/time xarray datasets for each lon/lat point.

    Heavily influenced by scipy.stats.pearsonr

    The Pearson correlation coefficient measures the linear relationship
    between two datasets. Strictly speaking, Pearson's correlation requires
    that each dataset be normally distributed, and not necessarily zero-mean.
    Like other correlation coefficients, this one varies between -1 and +1
    with 0 implying no correlation. Correlations of -1 or +1 imply an exact
    linear relationship. Positive correlations imply that as x increases, so
    does y. Negative correlations imply that as x increases, y decreases.

    The p-value roughly indicates the probability of an uncorrelated system
    producing datasets that have a Pearson correlation at least as extreme
    as the one computed from these datasets. The p-values are not entirely
    reliable but are probably reasonable for datasets larger than 500 or so.

    :param x: lon/lat/time xr.DataArray
    :param y: xr.DataArray of the same spatiotemporal extents and resolution as x.
    :param monitor: Monitor to use for monitoring the calculation
    :return: A dataset containing the correlation coefficients and p_values on
    the lon/lat grid of x and y.

    References
    ----------
    http://www.statsoft.com/textbook/glosp.html#Pearson%20Correlation
    """
    with monitor.starting("Calculate Pearson correlation", total_work=6):
        n = len(x['time'])

        xm, ym = x - x.mean(dim='time'), y - y.mean(dim='time')
        xm_ym = xm * ym
        r_num = xm_ym.sum(dim='time')
        xm_squared = xr.ufuncs.square(xm)
        ym_squared = xr.ufuncs.square(ym)
        r_den = xr.ufuncs.sqrt(xm_squared.sum(dim='time') *
                               ym_squared.sum(dim='time'))
        r_den = r_den.where(r_den != 0)
        r = r_num / r_den

        # Presumably, if abs(r) > 1, then it is only some small artifact of floating
        # point arithmetic.
        # At this point r should be a lon/lat dataArray, so it should be safe to
        # load it in memory explicitly. This may take time as it will kick-start
        # deferred processing.
        # Comparing with NaN produces warnings that can be safely ignored
        default_warning_settings = np.seterr(invalid='ignore')
        with monitor.child(1).observing("task 1"):
            negativ_r = r.values < -1.0
        with monitor.child(1).observing("task 2"):
            r.values[negativ_r] = -1.0
        with monitor.child(1).observing("task 3"):
            positiv_r = r.values > 1.0
        with monitor.child(1).observing("task 4"):
            r.values[positiv_r] = 1.0
        np.seterr(**default_warning_settings)
        r.attrs = {'description': 'Correlation coefficients between'
                   ' {} and {}.'.format(x.name, y.name)}

        df = n - 2
        t_squared = xr.ufuncs.square(r) * (df / ((1.0 - r.where(r != 1)) *
                                                 (1.0 + r.where(r != -1))))
        prob = df / (df + t_squared)
        with monitor.child(1).observing("task 5"):
            prob_values_in = prob.values
        with monitor.child(1).observing("task 6"):
            prob.values = betainc(0.5 * df, 0.5, prob_values_in)
        prob.attrs = {'description': 'Rough indicator of probability of an'
                      ' uncorrelated system producing datasets that have a Pearson'
                      ' correlation at least as extreme as the one computed from'
                      ' these datsets. Not entirely reliable, but reasonable for'
                      ' datasets larger than 500 or so.'}

        retset = xr.Dataset({'corr_coef': r,
                             'p_value': prob})
    return retset
Example #36
0
def zero_beta(x, *args):
    q, r, normal_val = args
    zero_beta = np.absolute(spec.betainc(q, r, x) - normal_val)
    return zero_beta
Example #37
0
def rolling_pr_rmsd(timestamps, data, window_size, center, min_periods):
    """
    DEPRECATED: use the faster version in metrics._fast instead! Only here for
    testing.

    Computation of rolling Pearson R.

    Parameters
    ----------
    timestamps : float64
        Time stamps as julian dates.
    data : numpy.ndarray
        Time series data in 2d array.
    window_size : float
        Window size in fraction of days.
    center : bool
        Set window at the center.
    min_periods : int
        Minimum number of observations in window required for computation.

    Results
    -------
    pr_arr : numpy.array
        Pearson R and p-value.
    """
    pr_arr = np.empty((timestamps.size, 2), dtype=np.float32)
    rmsd_arr = np.empty(timestamps.size, dtype=np.float32)
    ddof = 0

    for i in range(timestamps.size):
        time_diff = timestamps - timestamps[i]

        if center:
            inside_window = np.abs(time_diff) <= window_size
        else:
            inside_window = (time_diff <= 0) & (time_diff > -window_size)

        idx = np.nonzero(inside_window)[0]
        n_obs = inside_window.sum()

        if n_obs == 0 or n_obs < min_periods:
            pr_arr[i, :] = np.nan
        else:
            sub1 = data[idx[0]:idx[-1] + 1, 0]
            sub2 = data[idx[0]:idx[-1] + 1, 1]

            # pearson r
            pr_arr[i, 0] = np.corrcoef(sub1, sub2)[0, 1]

            # p-value
            if np.abs(pr_arr[i, 0]) == 1.0:
                pr_arr[i, 1] = 0.0
            else:
                df = n_obs - 2.
                t_squared = pr_arr[i, 0]*pr_arr[i, 0] * \
                    (df / ((1.0 - pr_arr[i, 0]) * (1.0 + pr_arr[i, 0])))
                x = df / (df + t_squared)
                x = np.ma.where(x < 1.0, x, 1.0)
                pr_arr[i, 1] = betainc(0.5 * df, 0.5, x)

            # rmsd
            rmsd_arr[i] = np.sqrt(
                np.sum((sub1 - sub2)**2) / (sub1.size - ddof))

    return pr_arr, rmsd_arr
Example #38
0
 def cumulative_probability(self, x):
     return betainc(self.a, self.b, x) / beta(self.a, self.b)
def OLS_AR1Corr(TheData, TheMDI, TheConfRange):  # ,Lowee=Lowee,Highee=Highee):
    ''' Calculates the linear trend using Ordinary Least Squares regression '''
    ''' Can cope with specified missing data indicator TheMDI '''
    ''' TheData: a numpy array of single time series data  - can have missing data = TheMDI'''
    ''' TheMDI: a number used to identify missing data (not NaN) '''
    ''' TheConfRange: a number between 0 and 1 for the desired confidence interval e.g. 0.9 for 90th pct CI '''
    ''' TheSlope[0]: Outputs the slope at a rate of unit per time step '''
    ''' TheSlope[1:3]: Outputs the 5th and 95th pctile standard error confidence intervals 
        (90pct confidence intervals) around the slope corrected for AR(1) correlation '''
    ''' TheSlope[3]: Outputs the 1 sigma standard error '''
    ''' TheSlope[4]: Outputs the +/- Confidence Interval for the given p-value '''
    ''' TheSlope[5]: Outputs the AR(1) correlation of regression residuals '''
    ''' TheSlope[6]: Outputs the effective degrees of freedom '''
    ''' TheSlope[7]: Outputs the p-value of the trend using two-sided students t test - can we reject H0 of no trend '''
    ''' Santer et al., 2008 - methodology '''
    ''' If Lowee and/or Highee are set they will come out as changed values '''
    ''' THis is intended to be identical to Alexey Kaplans IDL code which is almost identical to Samter et al '''
    ''' I have tested this with TESTDATA/data4S2008comp.dat and the IDL code and get identical values '''
    ''' The Kaplan code invokes a different method for missing data: '''
    '''      Data are compressed to only non-missing and then processed '''
    '''      I will test both running my method with missing data and compressing then running '''
    ''' The Kaplan method adds two caveats which I will add here: '''
    '''      If autocorrelation is -ve then set to 0 '''
    '''      If the effective Deg of Freedom < 3 then Inf or NaN are returned '''
    ''' Something else about the regression residuals using indices but I don't understand '''

    # Check the desired confidence range is between 0-1
    if ((TheConfRange < 0.) | (TheConfRange > 1.)):
        raise Exception("invalid confidnece range - should be between 0 and 1")

    # Set up empty list for returning the output values
    #    - slope per unit time,
    #    - lower (10th pct) CI,
    #    - upper (90th pct) CI]
    #    - 1 sigma SE
    #    - The +/- confidence interval for the given p value
    #    - AR(1) correlation in the residuals
    #    - the effective degrees of freedom
    #    - the p-value for the trend
    TheSlope = np.array([0., 0., 0., 0., 0., 0., 0., 0.])

    # Convert the data to a pandas dataframe?
    # First set any missing values to NaNs
    TheDataNANs = np.copy(TheData)  # copying just to be safe
    gots = np.where(TheDataNANs == TheMDI)

    # ADD A CATCH FOR No. Data points < 3 as in KAPLAN (actually I'm setting it to 50% missing!!!
    if ((float(len(gots[0])) / float(len(TheData))) > 0.5):
        TheSlope[0:8] = TheMDI
        print('Fewer than 50% valid data points')
        #        pdb.set_trace()
        return TheSlope
    # Tested

    if (len(gots[0]) > 0):
        TheDataNANs[np.where(TheDataNANs == TheMDI)] = float('NaN')
    DataDF = pd.DataFrame(TheDataNANs,
                          index=np.arange(len(TheDataNANs)),
                          columns=['variable'],
                          dtype=float,
                          copy=True)
    # This needs to be a copy otherwise changes to TheData lead to changes to DataDF.variable[]

    # As we are using formula= then we don't need to specify a column of 1s and an intercept is calculated
    olsmod = smf.ols(formula='variable ~ np.arange(len(TheDataNANs))',
                     data=DataDF,
                     missing='drop')  # drop the NaNs
    olsres = olsmod.fit()
    # olsmod.summary() prints all
    # olsmod.params prints the slopes for each item 0 = intercept, 1 = variable
    # olsmod.predict() prints the predicted values using the model fit, including intercept
    # olsmod.bse prints the standard errors (1 sigma using n-2 degrees of freedom) 0 = intercept, 1 = variable
    TheSlope[0] = olsres.params[1]
    #    print('Decadal Trend: ',np.round(TheSlope[0]*120,4))
    #pdb.set_trace()

    # Now get the original 1 sigma standard error which uses n-2 degrees of freedom and then calculate the corrected one
    # First we need to use masked arrays to make sure we account for missing data
    TheDataMSK = np.ma.masked_equal(
        TheData, TheMDI
    )  # better hope that this captures them all and nothing silly with floats

    # Now get the time series of regression residuals for each data point
    # First create a masked array of missing data
    TheResids = np.ma.masked_equal(np.repeat(TheMDI, len(TheDataMSK)), TheMDI)
    # Get a pointer array to the non-missing data but need to test if there are any missing first
    if (np.ma.count(TheDataMSK) == len(TheData)):
        print('no missing')
        # then we do not need to faff with the missing data
        # Subtract the predicted values for each time point from the actual values
        TheResids = TheDataMSK - olsres.predict(
        )  # if there are missing values then these won't be predicted so we need to fill back in
        MaskofPoints = np.arange(len(TheData))

    else:
        # we do need to faff with the missing data
        print('got a missing')
        MaskofPoints = np.where(np.ma.getmask(TheDataMSK) == False)
        # Subtract the predicted values for each time point from the actual values
        TheResids[MaskofPoints] = TheDataMSK[MaskofPoints] - olsres.predict(
        )  # if there are missing values then these won't be predicted so we need to fill back in

    # We need the AR(1) values of the regression residuals and shoudl probably test to make sure the data are autocorrelated
    # Using the np.ma.corrcoef works even if all data are present
    # This ignores missing data points so isn't ideal - better to take the longest continuous period of data?
    Lag1AR = np.ma.corrcoef(TheResids[0:-1], TheResids[1:])[0][1]
    TheSlope[5] = Lag1AR
    #    print('Autocorrelation at lag 1: ',np.round(Lag1AR,4))
    #pdb.set_trace()

    # ADD A CATCH FOR NEGATIVE AR(1) - as in Kaplan
    # ERROR - If AR(1) is negative then it should be given a value of 0 so it has no effect on reducing deg of freedom
    # previously I had set the trends to MDI - IDIOT!!!
    if (Lag1AR < 0.):
        Lag1AR = 0.
#TheSlope[0:5] = TheMDI
#TheSlope[6] = TheMDI
#        print('Negative AR(1)')
#return TheSlope
# Tested

# This is the original number of samples of time NOT INCLUDING MISSING DATA POINTS
    nORIG = np.ma.count(TheDataMSK)  # number of data points

    # Now get the effective number of samples dependent on the degree of autocorrelation at lag 1
    nEFF = nORIG * ((1 - Lag1AR) / (1 + Lag1AR))
    TheSlope[6] = nEFF
    #    print('Original no. time points: ',nORIG)
    #    print('Effective no. time points: ',np.round(nEFF,4))
    #pdb.set_trace()

    # ADD A CATCH FOR nEFF < 3 as in KAPLAN
    if (nEFF < 3):
        TheSlope[1:6] = TheMDI
        TheSlope[7] = TheMDI
        #        print('Fewer than 3 effective degrees of freedom: ',nEFF)
        return TheSlope
    # Tested

    # Now get the variance of the regression residuals s_e^2
    s_eSQ = (1 / (nEFF - 2)) * np.ma.sum(TheResids**2)
    # and just for comparison get it for the original number of samples
    s_eSQORIG = (1 / (nORIG - 2)) * np.ma.sum(TheResids**2)

    #    print('Decade Original variance of regression residuals: ',np.round(s_eSQORIG*120,4))
    #    print('Decade Effective variance of regression residuals: ',np.round(s_eSQ*120,4))
    #pdb.set_trace()

    # Now calculate the 1 sigma standard error
    s_1sig = (s_eSQ / np.sum((MaskofPoints - np.mean(MaskofPoints))**2))**0.5
    # and just for comparison get it for the original number of samples
    s_1sigORIG = (s_eSQORIG / np.sum(
        (MaskofPoints - np.mean(MaskofPoints))**2))**0.5
    TheSlope[3] = s_1sig

    #    print('Decade Original 1 sigma standard error: ',np.round(s_1sigORIG*120,4))
    #    print('Decade Effective 1 sigma standard error: ',np.round(s_1sig*120,4))
    #pdb.set_trace()

    #   Now calculate the p-value to test whether the H0 (no trend) is rejected (if p-value < 0.05)
    t_students_2tail = TheSlope[0] / s_1sig
    integration_lev = (nEFF - 2.0) / ((nEFF - 2.0) + t_students_2tail**2.)
    TheSlope[7] = betainc((nEFF - 2.0) / 2., 0.5, integration_lev)
    #    pdb.set_trace()

    # Now find the 90th percentile confidence intervals by integrating the area under the assumed curve
    # and populate TheSlope array with the lower and upper bound
    # I INCORRECTLY assumed that this is slope - 2*s_1sig and slope + 2*s_1sig - this would actually be 95pct confidence intervals approximately
    # This uses the inverse of students t CDF (quantile function) using the bisection method of the incomplete beta function (scipy.special.betainc)
    # When later the slope may be multiplied to get decadal trend the standard errors should be multiplied likewise
    ConfInt = CI_tINV(s_1sig, TheConfRange, nEFF)
    TheSlope[4] = ConfInt
    #    print('Confidence interval for the p-value ', ThePvalue,' :',np.round(ConfInt*120,4))
    TheSlope[1] = TheSlope[0] - ConfInt
    TheSlope[2] = TheSlope[0] + ConfInt

    #    print('Decade AR(1) corrected 90th pct standard error confidence intervals: ',np.round(TheSlope[1]*120,4),np.round(TheSlope[2]*120,4))
    #pdb.set_trace()

    return TheSlope  # ReadData
Example #40
0
    def _testBetaInc(self, dtype):
        try:
            from scipy import special  # pylint: disable=g-import-not-at-top
            np_dt = dtype.as_numpy_dtype

            # Test random values
            a_s = np.abs(np.random.randn(10, 10) * 30).astype(
                np_dt)  # in (0, infty)
            b_s = np.abs(np.random.randn(10, 10) * 30).astype(
                np_dt)  # in (0, infty)
            x_s = np.random.rand(10, 10).astype(np_dt)  # in (0, 1)
            with self.test_session(use_gpu=self.use_gpu):
                tf_a_s = tf.constant(a_s, dtype=dtype)
                tf_b_s = tf.constant(b_s, dtype=dtype)
                tf_x_s = tf.constant(x_s, dtype=dtype)
                tf_out = tf.betainc(tf_a_s, tf_b_s, tf_x_s).eval()
            scipy_out = special.betainc(a_s, b_s, x_s).astype(np_dt)

            # the scipy version of betainc uses a double-only implementation.
            # TODO(ebrevdo): identify reasons for (sometime) precision loss
            # with doubles
            tol = 1e-4 if dtype == tf.float32 else 5e-5
            self.assertAllCloseAccordingToType(scipy_out,
                                               tf_out,
                                               rtol=tol,
                                               atol=tol)

            # Test out-of-range values (most should return nan output)
            combinations = list(
                itertools.product([-1, 0, 0.5, 1.0, 1.5], repeat=3))
            a_comb, b_comb, x_comb = np.asarray(list(zip(*combinations)),
                                                dtype=np_dt)
            with self.test_session(use_gpu=self.use_gpu):
                tf_comb = tf.betainc(a_comb, b_comb, x_comb).eval()
            scipy_comb = special.betainc(a_comb, b_comb, x_comb).astype(np_dt)
            self.assertAllCloseAccordingToType(scipy_comb, tf_comb)

            # Test broadcasting between scalars and other shapes
            with self.test_session(use_gpu=self.use_gpu):
                self.assertAllCloseAccordingToType(special.betainc(
                    0.1, b_s, x_s).astype(np_dt),
                                                   tf.betainc(0.1, b_s,
                                                              x_s).eval(),
                                                   rtol=tol,
                                                   atol=tol)
                self.assertAllCloseAccordingToType(special.betainc(
                    a_s, 0.1, x_s).astype(np_dt),
                                                   tf.betainc(a_s, 0.1,
                                                              x_s).eval(),
                                                   rtol=tol,
                                                   atol=tol)
                self.assertAllCloseAccordingToType(special.betainc(
                    a_s, b_s, 0.1).astype(np_dt),
                                                   tf.betainc(a_s, b_s,
                                                              0.1).eval(),
                                                   rtol=tol,
                                                   atol=tol)
                self.assertAllCloseAccordingToType(special.betainc(
                    0.1, b_s, 0.1).astype(np_dt),
                                                   tf.betainc(0.1, b_s,
                                                              0.1).eval(),
                                                   rtol=tol,
                                                   atol=tol)
                self.assertAllCloseAccordingToType(special.betainc(
                    0.1, 0.1, 0.1).astype(np_dt),
                                                   tf.betainc(0.1, 0.1,
                                                              0.1).eval(),
                                                   rtol=tol,
                                                   atol=tol)

            with self.assertRaisesRegexp(ValueError,
                                         "Shapes .* are not compatible"):
                tf.betainc(0.5, [0.5], [[0.5]])

            with self.test_session(use_gpu=self.use_gpu):
                with self.assertRaisesOpError("Shapes of .* are inconsistent"):
                    a_p = tf.placeholder(dtype)
                    b_p = tf.placeholder(dtype)
                    x_p = tf.placeholder(dtype)
                    tf.betainc(a_p, b_p, x_p).eval(feed_dict={
                        a_p: 0.5,
                        b_p: [0.5],
                        x_p: [[0.5]]
                    })

        except ImportError as e:
            tf.logging.warn("Cannot test special functions: %s" % str(e))
Example #41
0
 def f(x):
     return betainc(a, b, x)
Example #42
0
 def cdf(aa, bb, x):
     return sp.betainc(aa, bb, x) * sp.beta(aa, bb)
Example #43
0
def betainc(x, a, b):
    return sc_special.betainc(a, b, x)
Example #44
0
def qso_engine(time,
               data,
               error,
               ltau=3.,
               lvar=-1.7,
               sys_err=0.,
               return_model=False):
    """Calculates the fit quality of a damped random walk to a qso lightcurve.
    The formalism is from Rybicki & Press (1994; arXiv:comp-gas/9405004)

    Data are modelled with a covariance function
        Lij = 0.5*var*tau*exp(-|time_i-time_j|/tau) .

    Input:
        time - measurement times, typically days
        data - measured magnitudes
        error - uncertainty in measured magnitudes

    Output (dictionary):

        chi2/nu - classical variability measure
        chi2_qso/nu - for goodness of fit given fixed parameters
        chi2_qso/nu_extra - for parameter fitting, add to chi2/nu
        chi^2/nu_NULL - expected chi2/nu for non-qso variable

        signif_qso - significance chi^2/nu<chi^2/nu_NULL (rule out false alarm)
        signif_not_qso - significance chi^2/nu>1 (rule out qso)
        signif_vary - significance that source is variable
        class - resulting source type (ambiguous, not_qso, qso)

        model - time series prediction for each datum given all others (iff return_model==True)
        dmodel - model uncertainty, including uncertainty in data

    Notes:
        T = L^(-1)
        Data variance is D
        Full covariance C^(-1) = (L+D)^(-1) = T [T+D^(-1)]^(-1) D^(-1)
        Code takes advantage of the tridiagonality of T and T+D^(-1).
    """

    out_dict = {}
    out_dict['chi2_qso/nu'] = 999
    out_dict['chi2_qso/nu_extra'] = 0.
    out_dict['signif_qso'] = 0.
    out_dict['signif_not_qso'] = 0.
    out_dict['signif_vary'] = 0.
    out_dict['chi2_qso/nu_NULL'] = 0.
    out_dict['chi2/nu'] = 0.
    out_dict['nu'] = 0
    out_dict['model'] = []
    out_dict['dmodel'] = []
    out_dict['class'] = 'ambiguous'

    lvar0 = np.log10(0.5) + lvar + ltau

    ln = len(data)
    dt = abs(time[1:] - time[:-1])

    # first make sure all dt>0
    g = np.where(dt > 0.)[0]
    lg = len(g)
    # must have at least 2 data points
    if lg <= 0:
        return out_dict

    if return_model:
        model = 1. * data
        dmodel = -1. * error

    if lg < ln:
        dt = dt[g]
        gg = np.zeros(lg + 1, dtype='int64')
        gg[1:] = g + 1
        dat = data[gg]
        wt = 1. / (sys_err**2 + error[gg]**2)
        ln = lg + 1
    else:
        dat = 1. * data
        wt = 1. / (sys_err**2 + error**2)

    out_dict['nu'] = ln - 1.
    varx = np.var(dat)
    dat0 = (dat * wt).sum() / wt.sum()
    out_dict['chi2/nu'] = ((dat - dat0)**2 * wt).sum() / out_dict['nu']

    # define tridiagonal matrix T = L^(-1)
    # sparse matrix form: ab[u + i - j, j] == a[i,j]   i<=j, (here u=1)
    T = np.zeros((2, ln), dtype='float64')
    arg = dt * np.exp(-np.log(10) * ltau)
    ri = np.exp(-arg)
    ei = 1. / (1. / ri - ri)
    T[0, 1:] = -ei
    T[1, :-1] = 1. + ri * ei
    T[1, 1:] += ri * ei
    T[1, ln - 1] += 1.
    T0 = np.median(T[1, :])
    T /= T0

    # equation for chi2_qso is [ (dat-x0) T Tp^(-1) D^(-1) (dat-x0) ]  , where Tp=T+D^(-1) and D^(-1)=wt
    fac = np.exp(np.log(10) * lvar0) / T0
    Tp = 1. * T
    Tp[1, :] += wt * fac
    # solve Tp*z=y for z (y=wt*dat)
    # This works for scipy __version__>='0.9.0' on anathem (20120809)
    b1 = (wt * dat).reshape((1, ln))
    b2 = b1.T
    #(Tpc,z) = solveh_banded(Tp,b2)
    z = solveh_banded(Tp, b2)
    Tpc = cholesky_banded(
        Tp
    )  # the solveh_banded() function used to return the cholesky matrix, now we get seperately
    z = z.T
    z = z[0, :]
    c1 = wt.reshape((1, ln))
    c2 = c1.T
    #(Tpc,z0) = solveh_banded(Tp,c2)
    z0 = solveh_banded(Tp, c2)
    #HAS NOT CHANGED#Tpc2 = cholesky_banded(Tp)
    z0 = z0.T
    z0 = z0[0, :]

    #finally, get u=T*z
    u = T[1, :] * z
    u[1:] += T[0, 1:] * z[:-1]
    u[:-1] += T[0, 1:] * z[1:]
    u0 = T[1, :] * z0
    u0[1:] += T[0, 1:] * z0[:-1]
    u0[:-1] += T[0, 1:] * z0[1:]

    # magnitude offset x0, error = 1./sqrt(u0sum)
    u0sum = u0.sum()
    x0 = u.sum() / u0sum

    # fit statistic
    out_dict['chi2_qso/nu'] = np.dot(dat - x0, u - u0 * x0) / out_dict['nu']

    # -2*log(likelihood) = chi2_qso + ldet_C + log(u0sum)
    #   first term: use chi2_qso/nu for goodness of fit with fixed parameters;
    #   all terms: use chi2_qso/nu + chi2_qso/nu_extra for fitting with variable parameters
    # get log of determinant for use later
    Tc = cholesky_banded(T)
    ldet_Tp = 2 * np.log(Tpc[1, :]).sum()
    ldet_T = 2 * np.log(Tc[1, :]).sum()
    ldet_C = ldet_Tp - ldet_T - np.log(wt).sum()
    out_dict['chi2_qso/nu_extra'] = (ldet_C + np.log(u0sum)) / out_dict['nu']

    # get trace of C^(-1) for significance calculation
    Tpm = chol_inverse_diag(Tpc)
    diagC = T[1, :] * wt * Tpm[1, :]
    diagC[:-1] += T[0, 1:] * wt[0:-1] * Tpm[0, 1:]
    diagC[1:] += T[0, 1:] * wt[1:] * Tpm[0, 1:]
    TrC = diagC.sum()

    # significance in sigma units (large means false alarm unlikely)
    # (expected value of chi2_qso under the NULL hypothesis is TrC*varx)
    out_dict['chi2_qso/nu_NULL'] = TrC * varx / out_dict['nu']
    a = ln / 2.
    x = (out_dict['chi2_qso/nu'] + 1.e-8) / (out_dict['chi2_qso/nu_NULL'] +
                                             out_dict['chi2_qso/nu'] + 1.e-8)
    prob = betainc(a, a, x)
    if prob <= 0:
        lprob = a * np.log(x) - np.log(a) + gammaln(2 * a) - 2 * gammaln(a)
    else:
        lprob = np.log(prob)
    out_dict['signif_qso'] = lprob2sigma(lprob)

    a = ln / 2.
    x = 1. / (1. + out_dict['chi2_qso/nu'])
    prob = betainc(a, a, x)
    if prob <= 0:
        lprob = a * np.log(x) - np.log(a) + gammaln(2 * a) - 2 * gammaln(a)
    else:
        lprob = np.log(prob)
    out_dict['signif_not_qso'] = lprob2sigma(lprob)

    x = out_dict['chi2/nu'] * out_dict['nu']
    prob = gammaincc(0.5 * out_dict['nu'], 0.5 * x)
    if prob <= 0:
        lprob = (0.5 * out_dict['nu'] - 1) * np.log(
            x) - 0.5 * x - 0.5 * out_dict['nu'] * np.log(2) - gammaln(
                0.5 * out_dict['nu'])
    else:
        lprob = np.log(prob)
    out_dict['signif_vary'] = lprob2sigma(lprob)

    if out_dict['signif_vary'] > 3:
        if out_dict['signif_qso'] > 3:
            out_dict['class'] = 'qso'
        elif out_dict['signif_not_qso'] > 3:
            out_dict['class'] = 'not_qso'

    # best-fit model for the lightcurve
    if return_model:
        model[gg] = dat - (u - u0 * x0) / diagC
        dmodel[gg] = 1. / np.sqrt(diagC)
        out_dict['model'] = model
        out_dict['dmodel'] = dmodel

    return out_dict
Example #45
0
 def f(x):
     y = n / (x**2 + n)
     if x > 0:
         return 0.5 * special.betainc(n / 2, 0.5, y)
     else:
         return 1.0 - 0.5 * special.betainc(n / 2, 0.5, y)
Example #46
0
 def _cdf(self, x, n, pr):
     k = floor(x)
     vals = (special.betainc(n, k + 1, pr) - special.betainc(n, 1, pr))
     return vals / (1. - pr ** n)
def Binc(a, b, x):
    return betainc(a, b, x) * gamma(a) * gamma(b) / gamma(a + b)
Example #48
0
 def cdf(self, x):
     """Evaluates the CDF along the values ``x``."""
     y = 0.5 * betainc(self.m / 2.0, 0.5, np.sin(np.pi * x)**2)
     return np.where(x < 0.5, y, 1 - y)
Example #49
0
def ccprmod(supports, idx_correct_label, B=20):
    """Python implementation of the ccprmod.m (Classifier competence based on probabilistic modelling)
    function. Matlab code is available at:
    http://www.mathworks.com/matlabcentral/mlc-downloads/downloads/submissions/28391/versions/6/previews/ccprmod.m/index.html

    Parameters
    ----------
    supports: array of shape = [n_samples, n_classes]
              containing the supports obtained by the base classifier for each class.

    idx_correct_label: array of shape = [n_samples]
                       containing the index of the correct class.

    B : int (Default = 20)
        number of points used in the calculation of the competence, higher values result
        in a more accurate estimation.

    Returns
    -------
    C_src : array of shape = [n_samples]
            representing the classifier competences at each data point

    Examples
    --------
    >>> supports = [[0.3, 0.6, 0.1],[1.0/3, 1.0/3, 1.0/3]]
    >>> idx_correct_label = [1,0]
    >>> ccprmod(supports,idx_correct_label)
    ans = [0.784953394056843, 0.332872292262951]

    References
    ----------
    T.Woloszynski, M. Kurzynski, A probabilistic model of classifier competence for dynamic ensemble selection,
    Pattern Recognition 44 (2011) 2656–2668.
    """
    if not isinstance(B, int):
        raise TypeError(
            'Parameter B should be an integer. Currently B is {0}'.format(
                type(B)))

    if B <= 0 or B is None:
        raise ValueError(
            'The parameter B should be higher than 0. Currently B is {0}'.
            format(B))

    supports = np.asarray(supports)
    idx_correct_label = np.array(idx_correct_label)
    supports[supports > 1] = 1

    N, C = supports.shape

    x = np.linspace(0, 1, B)
    x = np.matlib.repmat(x, N, C)

    a = npm.zeros(x.shape)

    for c in range(C):
        a[:, c * B:(c + 1) * B] = C * supports[:, c:c + 1]

    b = C - a

    # For extreme cases, with a or b equal to 0, add a small constant:
    eps = 1e-20
    a[a == 0] = eps
    b[b == 0] = eps
    betaincj = betainc(a, b, x)

    C_src = np.zeros(N)
    for n in range(N):
        t = range((idx_correct_label[n]) * B, (idx_correct_label[n] + 1) * B)
        bc = betaincj[n, t]
        bi = betaincj[n, list(set(range(0, (C * B))) - set(t))]
        bi = npm.transpose(npm.reshape(bi, (B, C - 1), order='F'))
        C_src[n] = np.sum(
            np.multiply((bc[0, 1:] - bc[0, 0:-1]),
                        np.prod((bi[:, 0:-1] + bi[:, 1:]) / 2, 0)))

    return C_src
Example #50
0
def process(alfa, beta, cov_margin, keep_all_edited, line):
    """
    Calculates, for a single line in a VCF formatted file, the
    confidence score based on depth of coverage and edit fraction %.

    :param line: string
        single vcf formatted line.
    :return confidence: float
        confidence value of the line
    :return return_string: basestring
        full vcf formatted line with confidence
    """
    (chr, pos, dot, ref, alt, qual,
     filter, info, format, cond) = line.split("\t")[:10]

    if chr[0] == "#":
        print line,
        return

    # retrieve total number of reads mapping to position
    infos = info.split(";")
    (dp, i16) = infos[:2]

    assert dp[:2] == "DP"
    num_reads = int(dp[3:])

    """
    # retrieve numbers of A's and G's on forward and reverse strand
    assert i16[:3] == "I16", i16
    (a_fwd, a_rev, g_fwd, g_rev) = (int(x) for x in i16[4:].split(",")[:4])
    print("warning: i16 not available")
    """

    dp4 = re.findall("DP4\=([\d\,]+)", info)[0]

    (a_fwd, a_rev, g_fwd, g_rev) = (int(x) for x in dp4.split(","))

    a = a_fwd + a_rev
    g = g_fwd + g_rev
    num_reads = a + g
    edit_frac = g / float(num_reads)

    # calc smoothed counts and confidence
    G = g + alfa
    A = a + beta
    theta = G / float(G + A)

    ########  MOST IMPORTANT LINE  ########
    # calculates the confidence of theta as
    # P( theta < cov_margin | A, G) ~ Beta_theta(G, A)
    confidence = 1 - betainc(G, A, cov_margin)

    # keep 100% edited sites or toss
    if A == 0 and not keep_all_edited:
        confidence = 0
        region = 'POSSIBLE_SNP'
    else:
        region = 'PASS'

    # print line in CONF format

    return_string = ("\t".join([chr, pos, str(num_reads), ref, alt, ""]) +
                     "\t".join(str(round(y, 9)) for y in [
                         confidence, theta, edit_frac
                     ]) +
                     "\t".join(["", region, info, format, cond]) +
                     "\n")
    return return_string
Example #51
0
def qso_engine(time,data,error,ltau=3.,lvar=-1.7,sys_err=0.,return_model=False):
    """Calculates the fit quality of a damped random walk to a qso lightcurve.
    Written by N. Butler ([email protected]), Feb. 2010.
    Version 1.0

    The formalism is from Rybicki & Press (1994; arXiv:comp-gas/9405004)

    Data are modelled with a covariance function
        Lij = 0.5*var*tau*exp(-|time_i-time_j|/tau) .

    Input:
        time - measurement times, typically days
        data - measured magnitudes
        error - uncertainty in measured magnitudes

    Output (dictionary):

        chi2/nu - classical variability measure
        chi2_qso/nu - for goodness of fit given fixed parameters
        chi2_qso/nu_extra - for parameter fitting, add to chi2/nu
        chi^2/nu_NULL - expected chi2/nu for non-qso variable

        signif_qso - significance chi^2/nu<chi^2/nu_NULL (rule out false alarm)
        signif_not_qso - significance chi^2/nu>1 (rule out qso)
        signif_vary - significance that source is variable
        class - resulting source type (ambiguous, not_qso, qso)

        model - time series prediction for each datum given all others (iff return_model==True)
        dmodel - model uncertainty, including uncertainty in data

    Notes:
        T = L^(-1)
        Data variance is D
        Full covariance C^(-1) = (L+D)^(-1) = T [T+D^(-1)]^(-1) D^(-1)
        Code takes advantage of the tridiagonality of T and T+D^(-1)."""


    out_dict={}
    out_dict['chi2_qso/nu']=999; out_dict['chi2_qso/nu_extra']=0.;
    out_dict['signif_qso']=0.; out_dict['signif_not_qso']=0.;  out_dict['signif_vary']=0.
    out_dict['chi2_qso/nu_NULL']=0.; out_dict['chi2/nu']=0.; out_dict['nu']=0
    out_dict['model']=[]; out_dict['dmodel']=[];
    out_dict['class']='ambiguous'

    lvar0 = log10(0.5)+lvar+ltau

    ln = len(data)
    dt = abs(time[1:]-time[:-1])

    # first make sure all dt>0
    g=where(dt>0.)[0]; lg = len(g)
    # must have at least 2 data points
    if (lg<=0):
        return out_dict

    if (return_model):
        model = 1.*data; dmodel = -1.*error

    if (lg<ln):
      dt = dt[g]
      gg = zeros(lg+1,dtype='int64'); gg[1:] = g+1
      dat = data[gg]; wt = 1./(sys_err**2+error[gg]**2)
      ln = lg+1
    else:
      dat = 1.*data
      wt = 1./(sys_err**2+error**2)

    out_dict['nu'] = ln-1.
    varx = var(dat)
    dat0 = (dat*wt).sum()/wt.sum()
    out_dict['chi2/nu'] = ( (dat-dat0)**2*wt ).sum()/out_dict['nu']

    # define tridiagonal matrix T = L^(-1)
    # sparse matrix form: ab[u + i - j, j] == a[i,j]   i<=j, (here u=1)
    T = zeros((2,ln),dtype='float64')
    arg = dt*exp(-log(10)*ltau); ri = exp(-arg); ei = ri/(1.-ri)/(1+ri)
    T[0,1:] = -ei; T[1,:-1] = 1.+ri*ei; T[1,1:] += ri*ei; T[1,ln-1] += 1.
    T0 = median(T[1,:]); T /= T0

    # equation for chi2_qso is [ (dat-x0) T Tp^(-1) D^(-1) (dat-x0) ]  , where Tp=T+D^(-1) and D^(-1)=wt
    fac = exp(log(10)*lvar0)/T0
    Tp = 1.*T; Tp[1,:] += wt*fac

    # solve Tp*z=y for z (y=wt*dat)
    b1 = (wt*dat).reshape((1,ln))
    z = transpose( solveh_banded(Tp,transpose(b1)) ); z = z[0,:]
    c1 = wt.reshape((1,ln))
    z0 = transpose( solveh_banded(Tp,transpose(c1)) ); z0 = z0[0,:]
    # original version which troubles solveh_banded:
    # (Tpc,z) = solveh_banded(Tp,(wt*dat).reshape((1,ln))); z = z[0,:]
    # (Tpc,z0) = solveh_banded(Tp,wt.reshape((1,ln))); z0 = z0[0,:]

    #finally, get u=T*z
    u = T[1,:]*z; u[1:] += T[0,1:]*z[:-1]; u[:-1] += T[0,1:]*z[1:]
    u0 = T[1,:]*z0; u0[1:] += T[0,1:]*z0[:-1]; u0[:-1] += T[0,1:]*z0[1:]

    # magnitude offset x0, error = 1./sqrt(u0sum)
    u0sum = u0.sum(); x0 = u.sum()/u0sum;

    # fit statistic
    out_dict['chi2_qso/nu'] = dot(dat-x0,u-u0*x0)/out_dict['nu']

    # -2*log(likelihood) = chi2_qso + ldet_C + log(u0sum)
    #   first term: use chi2_qso/nu for goodness of fit with fixed parameters;
    #   all terms: use chi2_qso/nu + chi2_qso/nu_extra for fitting with variable parameters
    # get log of determinant for use later
    Tc = cholesky_banded(T)
    Tpc = cholesky_banded(Tp)
    ldet_Tp = 2*log(Tpc[1,:]).sum(); ldet_T = 2*log(Tc[1,:]).sum()
    ldet_C = ldet_Tp-ldet_T-log(wt).sum()
    out_dict['chi2_qso/nu_extra'] = (ldet_C + log(u0sum))/out_dict['nu']

    # get trace of C^(-1) for significance calculation
    Tpm = chol_inverse_diag(Tpc)
    diagC = T[1,:]*wt*Tpm[1,:]
    diagC[:-1] += T[0,1:]*wt[0:-1]*Tpm[0,1:]
    diagC[1:] += T[0,1:]*wt[1:]*Tpm[0,1:]
    TrC = diagC.sum()

    # significance in sigma units (large means false alarm unlikely)
    # (expected value of chi2_qso under the NULL hypothesis is TrC*varx)
    out_dict['chi2_qso/nu_NULL'] = TrC*varx/out_dict['nu']
    a=ln/2.; x = (out_dict['chi2_qso/nu']+1.e-8)/(out_dict['chi2_qso/nu_NULL']+out_dict['chi2_qso/nu']+1.e-8)
    prob = betainc(a,a,x)
    if (prob<=0):
      lprob = a*log(x) - log(a) + gammaln(2*a) - 2*gammaln(a)
    else:
      lprob = log( prob )
    out_dict['signif_qso'] = lprob2sigma(lprob)

    a=ln/2.; x = 1./(1.+out_dict['chi2_qso/nu'])
    prob = betainc(a,a,x)
    if (prob<=0):
      lprob = a*log(x) - log(a) + gammaln(2*a) - 2*gammaln(a)
    else:
      lprob = log( prob )
    out_dict['signif_not_qso'] = lprob2sigma(lprob)

    x = out_dict['chi2/nu']*out_dict['nu']
    prob = gammaincc(0.5*out_dict['nu'],0.5*x)
    if (prob<=0):
      lprob = (0.5*out_dict['nu']-1)*log(x) - 0.5*x - 0.5*out_dict['nu']*log(2) - gammaln(0.5*out_dict['nu'])
    else:
      lprob = log( prob )
    out_dict['signif_vary'] = lprob2sigma(lprob)

    if (out_dict['signif_vary']>3):
        if (out_dict['signif_qso']>3):
            out_dict['class']='qso'
        elif (out_dict['signif_not_qso']>3):
            out_dict['class']='not_qso'

    # best-fit model for the lightcurve
    if (return_model):
      model[gg] = dat - (u-u0*x0)/diagC
      dmodel[gg] = 1./sqrt(diagC)
      out_dict['model'] = model
      out_dict['dmodel'] = dmodel

    return out_dict
Example #52
0
 def f1(k, n, p):
     return np.log1p(-special.betainc(k + 1, n, 1 - p))
Example #53
0
def IB1_cdf(x, b, p, q):
    z = (b / x)
    return 1 - betainc(p, q, z)
 def __call__(self, t):
     return betainc(self.in_curve, self.out_curve, t)
Example #55
0
def betainc1(a, b, x):
    if a == 0: return 1
    return betainc(a, b, x)
Example #56
0
 def _cdf(self, x, n, p):
     k = floor(x)
     return special.betainc(n, k+1, p)
Example #57
0
Visualizes the vector field in one 2D-plane of the phase-statemachine state space

"""

from numpy import *
from matplotlib.pylab import *
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from scipy.special import betainc

x0 = dot(linspace(0, 1, 500)[:, newaxis], ones((1, 500)))
x1 = x0.T

n = 25
x0diag = betainc(5, 5, linspace(-0.01, 1.01, n))

isolinevalues = linspace(0, 1, 10)


def f_proposed1(x0, x1, x2=0.001):
    """
    
    proposed function: 
    
        Matrix X:
        X = x^nx1 . 1^1xn
        
        lambda = X @ X.T * 8 * (X*X + (X*X).T) / (X + X.T + 0.01)**4
    
    """
Example #58
0
 def f(x):
     z = a * x / (a * x + b)
     return 1.0 - special.betainc(a / 2.0, b / 2.0, z)
 def cox_snell(x):
     return special.betainc(2 / 3., 2 / 3., x) * special.beta(
         2 / 3., 2 / 3.)
Example #60
0
def ibeta(a, b, x):
    return special.betainc(a, b, x) * special.beta(a, b)