Beispiel #1
0
    def _loglikelihood(params, x, tx, T):

        alpha, beta, gamma, delta = params

        beta_ab = betaln(alpha, beta)
        beta_gd = betaln(gamma, delta)

        indiv_loglike = (betaln(alpha + x, beta + T - x) - beta_ab +
                         betaln(gamma, delta + T) - beta_gd)

        recency_T = T - tx - 1

        J = np.arange(recency_T.max() + 1)

        @np.vectorize
        def _sum(x, tx, recency_T):
            j = J[:recency_T + 1]
            return log(
                np.sum(exp(betaln(alpha + x, beta + tx - x + j) - beta_ab +
                           betaln(gamma + 1, delta + tx + j) - beta_gd)))

        s = _sum(x, tx, recency_T)
        indiv_loglike = logaddexp(indiv_loglike, s)

        return indiv_loglike
	def logLike(self, x):
		alphaSigma, betaSigma, alphaY, betaY, alphaN, betaN, alphaQ, betaQ, alphaTau, betaTau = x
		nu = 0.001
				
# Priors
		priorSigma = - 2.5 * np.log(alphaSigma + betaSigma)
		priorY = - 2.5 * np.log(alphaY + betaY)
		priorN = - 2.5 * np.log(alphaN + betaN)
		priorQ = - 2.5 * np.log(alphaQ + betaQ)
		priorTau = - 2.5 * np.log(alphaTau+ betaTau)
		
		lnP = priorSigma + priorY + priorN + priorQ + priorTau
		
		cteSigma = (1.0-alphaSigma-betaSigma) * np.log(self.upper[0]-self.lower[0]) - sp.betaln(alphaSigma,betaSigma)
		cteY = (1.0-alphaY-betaY) * np.log(self.upper[1]-self.lower[1]) - sp.betaln(alphaY,betaY)
		cteN = (1.0-alphaN-betaN) * np.log(self.upper[2]-self.lower[2]) - sp.betaln(alphaN,betaN)
		cteQ = (1.0-alphaQ-betaQ) * np.log(self.upper[3]-self.lower[3]) - sp.betaln(alphaQ,betaQ)
		cteTau = (1.0-alphaTau-betaTau) * np.log(self.upper[4]-self.lower[4]) - sp.betaln(alphaTau,betaTau)
						
		vecSigma = cteSigma + (alphaSigma-1.0) * np.log(self.Sigma-self.lower[0]) + (betaSigma-1.0) * np.log(self.upper[0]-self.Sigma)
		vecY = cteY + (alphaY-1.0) * np.log(self.Y-self.lower[1]) + (betaY-1.0) * np.log(self.upper[1]-self.Y)
		vecN = cteN + (alphaN-1.0) * np.log(self.N-self.lower[2]) + (betaN-1.0) * np.log(self.upper[2]-self.N)
		vecQ = cteQ + (alphaQ-1.0) * np.log(self.Q-self.lower[3]) + (betaQ-1.0) * np.log(self.upper[3]-self.Q)
		vecTau = cteTau + (alphaTau-1.0) * np.log(self.Tau-self.lower[4]) + (betaTau-1.0) * np.log(self.upper[4]-self.Tau)
								
		lnP += np.sum(mi.logsumexp(vecSigma + vecY + vecN + vecQ + vecTau - np.log(self.length), axis=1))
		#print lnP
		
		return lnP
def test_pp2():
    for MAX in [1e1, 1e2, 1e3, 1e5, 1e7]:
        N = 20
        ms = np.linspace(1, MAX, N)
        ns = np.linspace(1, MAX, N)
        alphas = np.linspace(1, MAX, N)
        betas = np.linspace(1, MAX, N)

        A = np.zeros(N * N * N * N)
        B = np.zeros(N * N * N * N)

        pos = 0
        for mi, m in enumerate(ms):
            print mi
            for ni, n in enumerate(ns):
                for ai, a in enumerate(alphas):
                    for bi, b in enumerate(betas):

                        B[pos] = betaln(m + a, n + b) - betaln(a, b)
                        A[pos] = test_betaln(m + a, n + b) - test_betaln(a, b)

                        pos += 1

        pyplot.figure()
        pyplot.plot(((A - B) / B) * 100)
        pyplot.ylabel("Percent error")
        pyplot.title("MAX=%f" % MAX)

    pyplot.show()
def zprob(counts, zvalues, idx, abeta, bbeta, nmax, zalpha, nclusters):
    counts_sum0 = np.zeros(nclusters)  # sum of counts per cluster, omitting the index idx
    logbeta0 = np.zeros(nclusters)  # log-beta functions values per cluster before adding in index idx
    zcounts0 = np.zeros(nclusters)
    for k in xrange(nclusters):
        zcounts0[k] = np.sum(zvalues == k)
        counts_sum0[k] = np.sum(counts[zvalues == k])
        if zvalues[idx] == k:
            counts_sum0[k] -= counts[idx]
            zcounts0[k] -= 1
        logbeta0[k] = betaln(counts_sum0[k] + abeta, zcounts0[k] * nmax + bbeta - counts_sum0[k])

    logbeta0_sum = np.sum(logbeta0)
    lnzprob = np.zeros(nclusters)
    for k in xrange(nclusters):
        # calculate lnzprob by adding in data idx for z[idx] = k one at a time so we don't have to redo the beta
        # function calculations
        this_logbeta = betaln(counts_sum0[k] + counts[idx] + abeta,
            (zcounts0[k] + 1) * nmax + bbeta - counts_sum0[k] - counts[idx])
        logbeta_sum = logbeta0_sum - logbeta0[k] + this_logbeta
        lnzprob[k] = np.log(zcounts0[k] + 1 + zalpha / nclusters) + logbeta_sum

    lnzprob -= lnzprob.max()
    zprob = np.exp(lnzprob) / np.sum(np.exp(lnzprob))

    return zprob
Beispiel #5
0
    def conditional_expected_number_of_purchases_up_to_time(self, t):
        """
        Conditional expected purchases in future time period.

        The  expected  number  of  future  transactions across the next t transaction
        opportunities by a customer with purchase history (x, tx, n).

        E(X(n, n+n*)|alpha, beta, gamma, delta, frequency, recency, n)

        See (13) in Fader & Hardie 2010.

        Parameters:
            t: scalar or array of time periods (n+t)

        Returns: scalar or array of predicted transactions

        """

        x = self.data['frequency']
        tx = self.data['recency']
        n = self.data['n']

        params = self._unload_params('alpha','beta','gamma','delta')
        alpha, beta, gamma, delta = params

        p1 = 1 / exp(BetaGeoBetaBinomFitter._loglikelihood(params, x, tx, n))
        p2 = exp(special.betaln(alpha + x + 1, beta + n - x) - special.betaln(alpha, beta))
        p3 = delta / (gamma - 1) * exp(special.gammaln(gamma + delta) - special.gammaln(1 + delta))
        p4 = exp(special.gammaln(1 + delta + n) - special.gammaln(gamma + delta + n))
        p5 = exp(special.gammaln(1 + delta + n + t) - special.gammaln(gamma + delta + n + t))

        return p1 * p2 * p3 * (p4 - p5)
Beispiel #6
0
  def testBetaBetaKL(self):
    with self.test_session() as sess:
      for shape in [(10,), (4,5)]:
        a1 = 6.0*np.random.random(size=shape) + 1e-4
        b1 = 6.0*np.random.random(size=shape) + 1e-4 
        a2 = 6.0*np.random.random(size=shape) + 1e-4
        b2 = 6.0*np.random.random(size=shape) + 1e-4 
        # Take inverse softplus of values to test BetaWithSoftplusAB
        a1_sp = np.log(np.exp(a1) - 1.0)
        b1_sp = np.log(np.exp(b1) - 1.0)
        a2_sp = np.log(np.exp(a2) - 1.0)
        b2_sp = np.log(np.exp(b2) - 1.0)

        d1 = tf.contrib.distributions.Beta(a=a1, b=b1)
        d2 = tf.contrib.distributions.Beta(a=a2, b=b2)
        d1_sp = tf.contrib.distributions.BetaWithSoftplusAB(a=a1_sp, b=b1_sp)
        d2_sp = tf.contrib.distributions.BetaWithSoftplusAB(a=a2_sp, b=b2_sp)

        kl_expected = (special.betaln(a2, b2) - special.betaln(a1, b1)
                     + (a1 - a2)*special.digamma(a1)
                     + (b1 - b2)*special.digamma(b1)
                     + (a2 - a1 + b2 - b1)*special.digamma(a1 + b1))

        for dist1 in [d1, d1_sp]:
          for dist2 in [d2, d2_sp]:
            kl = tf.contrib.distributions.kl(dist1, dist2)
            kl_val = sess.run(kl)
            self.assertEqual(kl.get_shape(), shape)
            self.assertAllClose(kl_val, kl_expected)
        
        # Make sure KL(d1||d1) is 0
        kl_same = sess.run(tf.contrib.distributions.kl(d1, d1))
        self.assertAllClose(kl_same, np.zeros_like(kl_expected))
Beispiel #7
0
    def conditional_probability_alive(self, m):
        """
        Conditional probability customer is alive at transaction opportunity n + m.

        P(alive at n + m|alpha, beta, gamma, delta, frequency, recency, n)

        See (A10) in Fader and Hardie 2010.

        Parameters:
            m: scalar or array of transaction opportunities

        Returns: scalar or array of alive probabilities

        """

        params = self._unload_params('alpha','beta','gamma','delta')
        alpha, beta, gamma, delta = params

        x = self.data['frequency']
        tx = self.data['recency']
        n = self.data['n']

        p1 = special.betaln(alpha + x, beta + n - x) - special.betaln(alpha, beta)
        p2 = special.betaln(gamma, delta + n + m) - special.betaln(gamma, delta)
        p3 = BetaGeoBetaBinomFitter._loglikelihood(params, x, tx, n)

        return exp(p1 + p2) / exp(p3)
Beispiel #8
0
    def _loglikelihood(params, x, tx, T):

        N = x.shape[0]

        alpha, beta, gamma, delta = params

        beta_ab = special.betaln(alpha, beta)
        beta_gd = special.betaln(gamma, delta)

        indiv_loglike = (special.betaln(alpha + x, beta + T - x) - beta_ab +
                        special.betaln(gamma, delta + T) - beta_gd)

        recency_T = T - tx - 1

        def _sum(x, tx, recency_T):

            j = np.arange(recency_T+1)
            return log(
                np.sum(exp(special.betaln(alpha + x, beta + tx - x + j) - beta_ab +
                              special.betaln(gamma + 1, delta + tx + j) - beta_gd)))

        for i in np.arange(N):
            indiv_loglike[i] = logaddexp(indiv_loglike[i],_sum(x[i], tx[i], recency_T[i]))

        return indiv_loglike
Beispiel #9
0
    def KL_divergence(self, variational_posterior):
        mu, S, gamma, tau = (
            variational_posterior.mean.values,
            variational_posterior.variance.values,
            variational_posterior.gamma_group.values,
            variational_posterior.tau.values,
        )

        var_mean = np.square(mu) / self.variance
        var_S = S / self.variance - np.log(S)
        part1 = (gamma * (np.log(self.variance) - 1.0 + var_mean + var_S)).sum() / 2.0

        ad = self.alpha / self.input_dim
        from scipy.special import betaln, digamma

        part2 = (
            (gamma * np.log(gamma)).sum()
            + ((1.0 - gamma) * np.log(1.0 - gamma)).sum()
            + betaln(ad, 1.0) * self.input_dim
            - betaln(tau[:, 0], tau[:, 1]).sum()
            + ((tau[:, 0] - gamma - ad) * digamma(tau[:, 0])).sum()
            + ((tau[:, 1] + gamma - 2.0) * digamma(tau[:, 1])).sum()
            + ((2.0 + ad - tau[:, 0] - tau[:, 1]) * digamma(tau.sum(axis=1))).sum()
        )

        return part1 + part2
Beispiel #10
0
    def ss_score(self, ss, hp):
        heads = ss['heads']
        tails = ss['tails']
        alpha = hp['alpha']
        beta = hp['beta']
        logbeta_a_b = betaln(alpha, beta)

        return betaln(alpha+heads,beta+tails) - logbeta_a_b; 
 def BGNBD_LL(self, pars, freq, rec, age):
     r, alpha, a, b = pars
     a1 = betaln(a, b+freq) -betaln(a, b) + gammaln(r+freq) + r*log(alpha) - gammaln(r) - (r+freq)*log(a+age)
     # for we only analyze customers with more than one freq, so the delta parameter in the second part always equals to 1
     a2 = betaln(a+1, b+freq-1) - betaln(a, b) + gammaln(r+freq) + r*log(alpha) - gammaln(r) - (r+freq)*log(alpha+rec)
     # for the BGNBD_LL function should be maximized to get the optimized parameters while minimize method is used for minimization,
     # so we use negative form of BGNBD_LL function for model training.
     neg_LL = -(a1 + a2).sum() + self.penalty * log(pars).sum()
     return neg_LL
Beispiel #12
0
def _beta_KL(a_bar, b_bar, a, b):
    "@return: KL(Beta(a_bar, b_bar)||Beta(a, b))"
    return (
        betaln(a, b)
        - betaln(a_bar, b_bar)
        - (a - a_bar) * digamma(a_bar)
        - (b - b_bar) * digamma(b_bar)
        + (a - a_bar + b - b_bar) * digamma(a_bar + b_bar)
    )
def log_beta_binomial_likelihood( k, n, alpha, beta ):
    column_shape = ( k.size, 1 )
    k = k.reshape( column_shape )
    n = n.reshape( column_shape )

    row_shape = ( 1, alpha.size )
    alpha = alpha.reshape( row_shape )
    beta = beta.reshape( row_shape )

    return betaln( k + alpha, n - k + beta ) - betaln( alpha, beta )
def compute_total_likelihood(hps, suffs):

    a = hps.alpha + suffs.heads
    b = hps.beta + suffs.tails

    r = betaln(a, b) - betaln(hps.alpha, hps.beta)

    r += + suffs.binomln_accum

    return r
Beispiel #15
0
 def p2(j, x):
     i = I[j:]
     return np.sum(
         binom(i, x) *
         exp(
             betaln(alpha + x, beta + i - x) -
             betaln(alpha, beta) +
             betaln(gamma + 1, delta + i) -
             betaln(gamma, delta)
         )
     )
 def _get_statistics(self,data=[]):
     n, tot = self._fixedr_distns[0]._get_statistics(data)
     if n > 0:
         data = flattendata(data)
         alphas_n, betas_n = self.alphas_0 + tot, self.betas_0 + self.r_support*n
         log_marg_likelihoods = \
                 special.betaln(alphas_n, betas_n) \
                     - special.betaln(self.alphas_0, self.betas_0) \
                 + (special.gammaln(data[:,na]+self.r_support)
                     - special.gammaln(data[:,na]+1) \
                     - special.gammaln(self.r_support)).sum(0)
     else:
         log_marg_likelihoods = np.zeros_like(self.r_support)
     return log_marg_likelihoods
Beispiel #17
0
    def expected_number_of_transactions_in_first_n_periods(self, n):
        """
        Expected number of transactions occurring across first n transaction opportunities. Used by Fader
        and Hardie to assess in-sample fit.

        Pr(X(n) = x|alpha, beta, gamma, delta)

        See (7) in Fader & Hardie 2010.

        Parameters:
            n: scalar, number of transaction opportunities

        Returns: DataFrame of predicted values, indexed by x

        """

        params = self._unload_params('alpha', 'beta', 'gamma', 'delta')
        alpha, beta, gamma, delta = params

        x_counts = self.data.groupby('frequency')['n_custs'].sum()
        x = asarray(x_counts.index)

        p1 = special.binom(n, x) * exp(special.betaln(alpha + x, beta + n - x) - special.betaln(alpha, beta) +
         special.betaln(gamma, delta + n) - special.betaln(gamma, delta))

        for j in np.arange(x.shape[0]):
            i = np.arange(x[j], n)
            p2 = np.sum(special.binom(i, x[j]) *
                        exp(special.betaln(alpha + x[j], beta + i - x[j]) - special.betaln(alpha, beta) +
                            special.betaln(gamma +1, delta + i) - special.betaln(gamma, delta)))
            p1[j] += p2

        idx = pd.Index(x, name='frequency')
        return DataFrame(p1 * x_counts.sum(), index=idx, columns=['model'])
    def _posterior_hypparams(self,n,tot,normalizers,feasible):
        if n == 0:
            return n, self.alpha_0, self.r_probs, self.r_support
        else:
            r_probs = self.r_probs[feasible]
            r_support = self.r_support[feasible]
            log_marg_likelihoods = special.betaln(self.alpha_0 + tot - n*r_support,
                                                        self.beta_0 + r_support*n) \
                                    - special.betaln(self.alpha_0, self.beta_0) \
                                    + normalizers
            log_marg_probs = np.log(r_probs) + log_marg_likelihoods
            log_marg_probs -= log_marg_probs.max()
            marg_probs = np.exp(log_marg_probs)

            return n, self.alpha_0 + tot, marg_probs, r_support
    def _get_weighted_statistics(self,data,weights):
        n, tot = super(NegativeBinomialIntegerR,self)._get_weighted_statistics(data,weights)
        if n > 0:
            alpha_n, betas_n = self.alpha_0 + tot, self.beta_0 + self.r_support*n
            data, weights = flattendata(data), flattendata(weights)
            log_marg_likelihoods = \
                    special.betaln(alpha_n, betas_n) \
                        - special.betaln(self.alpha_0, self.beta_0) \
                    + (special.gammaln(data[:,na]+self.r_support)
                        - special.gammaln(data[:,na]+1) \
                        - special.gammaln(self.r_support)).dot(weights)
        else:
            log_marg_likelihoods = np.zeros_like(self.r_support)

        return n, tot, log_marg_likelihoods
def minNforHDIpower(genPriorMean, genPriorN, HDImaxwid=None, nullVal=None,
                    ROPE=None, desiredPower=0.8, audPriorMean=0.5,
                    audPriorN=2, HDImass=0.95, initSampSize=1, verbose=True):
    import numpy as np
    from HDIofICDF import HDIofICDF, beta
    from scipy.special import binom, betaln

    if HDImaxwid != None  and nullVal != None:
        sys.exit('One and only one of HDImaxwid and nullVal must be specified')
    if ROPE == None:
        ROPE = [nullVal, nullVal]
   # Convert prior mean and N to a, b parameter values of beta distribution.
    genPriorA = genPriorMean * genPriorN
    genPriorB = (1.0 - genPriorMean) * genPriorN
    audPriorA = audPriorMean * audPriorN
    audPriorB = (1.0 - audPriorMean) * audPriorN
    # Initialize loop for incrementing sampleSize
    sampleSize = initSampSize
    # Increment sampleSize until desired power is achieved.
    while True:
        zvec = np.arange(0, sampleSize+1) # All possible z values for N flips.
        # Compute probability of each z value for data-generating prior.
        pzvec = np.exp(np.log(binom(sampleSize, zvec))
                   + betaln(zvec + genPriorA, sampleSize - zvec + genPriorB)
                   - betaln(genPriorA, genPriorB))
        # For each z value, compute HDI. hdiMat is min, max of HDI for each z.
        hdiMat = np.zeros((len(zvec), 2))
        for zIdx in range(0, len(zvec)):
            z = zvec[zIdx]
            # Determine the limits of the highest density interval
            # hdp is a function from PyMC package and takes a sample vector as 
            # input, not a function.
            hdiMat[zIdx] = HDIofICDF(beta, credMass=HDImass, a=(z + audPriorA), 
                                     b=(sampleSize - z + audPriorB))
        if HDImaxwid != None:
            hdiWid = hdiMat[:,1] - hdiMat[:,0]
            powerHDI = np.sum(pzvec[hdiWid < HDImaxwid])
        if nullVal != None:
            powerHDI = np.sum(pzvec[(hdiMat[:,0] > ROPE[1]) | 
                                    (hdiMat[:,1] < ROPE[0])])
        if verbose:
            print(" For sample size = %s\npower = %s\n" % (sampleSize, powerHDI))

        if powerHDI > desiredPower:
            break
        else:
            sampleSize += 1
    return sampleSize
Beispiel #21
0
 def logpdf(self, f, y, Y_metadata=None):
     D = y.shape[1]
     fv, gv = f[:, :D], f[:, D:]
     ef = np.exp(fv)
     eg = np.exp(gv)
     lnpdf = (ef - 1)*np.log(y) + (eg - 1)*np.log(1-y) - betaln(ef, eg)
     return lnpdf
Beispiel #22
0
def B_(a, b):
	'''
	log Beta function
	'''

	res = special.betaln(a, b)
	return res
Beispiel #23
0
    def energy(self):
        sum = 0.0
        # class 0 negative log likelihood
        points = self.data0
        if points.size > 0:
            sum -= logp_normal(points, self.mu0, self.sigma0).sum()

        # class 1 negative log likelihood
        points = self.data1
        if points.size > 0:
            sum -= logp_normal(points, self.mu1, self.sigma1).sum()

        # Class proportion c (from page 3, eq 1)
        sum -= (
            log(self.c) * (self.dist0.alpha + self.dist0.n - 1)
            + log(1 - self.c) * (self.dist1.alpha + self.dist1.n - 1)
            - betaln(self.dist0.alpha + self.dist0.n, self.dist1.alpha + self.dist1.n)
        )

        # Now add in the priors...
        sum -= logp_invwishart(self.sigma0, self.dist0.kappa, self.dist0.S)
        sum -= logp_invwishart(self.sigma1, self.dist1.kappa, self.dist1.S)
        sum -= logp_normal(self.mu0, self.dist0.priormu, self.sigma0, self.dist0.nu)
        sum -= logp_normal(self.mu1, self.dist1.priormu, self.sigma1, self.dist1.nu)

        return sum
Beispiel #24
0
def peak2sigma(psdpeak,n0):
    """ translates a psd peak height into a multi-trial NULL-hypothesis probability
    NOTE: dstarr replaces '0' with 0.000001 to catch float-point accuracy bugs
          Which I otherwise stumble into.
    """

    # Student's-T
    prob0 = betai( 0.5*n0-2.,0.5,(n0-1.)/(n0-1.+2.*psdpeak) )
    if (0.5*n0-2.<=0.000001):
      lprob0=0.
    elif ( (n0-1.)/(n0-1.+2.*psdpeak) <=0.000001 ):
      lprob0=-999.
    elif (prob0==0):
        lprob0=(0.5*n0-2.)*log( (n0-1.)/(n0-1.+2.*psdpeak)
) - log(0.5*n0-2.) - betaln(0.5*n0-2.,0.5)
    else: lprob0=log(prob0)

    # ballpark number of independent frequencies
    #  (Horne and Baliunas, eq. 13)
    horne = long(-6.362+1.193*n0+0.00098*n0**2.)
    if (horne <= 0): horne=5

    if (lprob0>log(1.e-4) and prob0>0):
	# trials correction, monitoring numerical precision
        lprob = log( 1. - exp( horne*log(1-prob0) ) )
    elif (lprob0+log(horne)>log(1.e-4) and prob0>0):
        lprob = log( 1. - exp( -horne*prob0 ) )
    else:
        lprob = log(horne) + lprob0

    sigma = lprob2sigma(lprob)

    return sigma
Beispiel #25
0
def BNB_loglike(k,mean,n,sigma):
    #n=min(n,10000)
    #Put variables in beta-NB form (n,a,b)
    mean = max(mean, 0.00001)
    p = np.float64(n)/(n+mean)
    logps = [math.log(n) - math.log(n + mean),
             math.log(mean) - math.log(n + mean)]

    if sigma < 0.00001:
        loglike = -betaln(n,k+1)-math.log(n+k)+n*logps[0]+k*logps[1]
        return loglike

    sigma = (1/sigma)**2

    a = p * sigma + 1
    b = (1-p) * sigma
    
    loglike = 0
    
    #Rising Pochhammer = gamma(k+n)/gamma(n)
    #for j in range(k):
    #    loglike += math.log(j+n)
    if k>0:
        loglike = -lbeta_asymp(n,k) - math.log(k)
        #loglike=scipy.special.gammaln(k+n)-scipy.special.gammaln(n)
    else:
        loglike=0
    
    #Add log(beta(a+n,b+k))
    loglike += lbeta_asymp(a+n,b+k)
    
    #Subtract log(beta(a,b))
    loglike -= lbeta_asymp(a,b)

    return loglike
Beispiel #26
0
def domination(variations, control=None):
    """
    Calculates P(A > B) using a closed formula
    http://www.evanmiller.org/bayesian-ab-testing.html
    """
    values = OrderedDict()
    a, others = split(variations, control)
    for label, b in others.items():
        total = 0
        for i in range(b.alpha - 1):
            total += np.exp(spc.betaln(a.alpha + i, b.beta + a.beta) \
                - np.log(b.beta + i) - spc.betaln(1 + i, b.beta) - \
                                        spc.betaln(a.alpha, a.beta))
        values[label] = total

    return values
Beispiel #27
0
def explore_beta_conj_prior():
    max = 10.
    num_points = 100
    step = max / num_points
    possible_taus = (
        -  .1 * np.ones(2),
        - 1.  * np.ones(2),
        np.array([-1., -3]),
        -10.  * np.ones(2),
    )
    possible_nus = (.1, 1., 10.)
    a, b = np.ogrid[step:max:step,step:max:step]
    betaln_a_b = betaln(a, b)
    pl.subplots_adjust(wspace=.0, hspace=.1, top=.9, bottom=.05, left=.07, right=1.)
    for t, tau in enumerate(possible_taus):
        for n, nu in enumerate(possible_nus):
            g = tau[0] * (a-1.) + tau[1] * (b-1.) - nu * betaln_a_b
            subplot = pl.subplot(len(possible_nus), len(possible_taus), n * len(possible_taus) + t + 1)
            pl.imshow(g, origin='lower', extent=[step,max,step,max])
            if 0 == n:
                pl.title('$\\tau = %s$' % tau)
            if n != len(possible_nus) - 1:
                pl.gca().xaxis.set_ticks([])
            if 0 == t:
                pl.gca().set_ylabel('$\\nu = %.1f$' % nu)
            else:
                pl.gca().yaxis.set_ticks([])
            subplot.get_xaxis().set_ticks_position('none')
            subplot.get_yaxis().set_ticks_position('none')
Beispiel #28
0
def test_beta():
    N = 100
    xs = np.linspace(0.01, 1e5, N)
    ys = np.linspace(0.01, 1e5, N)

    vals = np.zeros((N, N))
    errs = np.zeros(N * N)

    pos = 0
    for xi, x in enumerate(xs):
        for yi, y in enumerate(ys):
            z = betaln(x, y)
            zhat = beta(x, y)
            errs[pos] = (zhat - z) / z
            vals[xi, yi] = z
            pos += 1

    pyplot.figure()
    pyplot.plot(errs)
    # pyplot.imshow(vals)
    # pyplot.figure()
    # PN = 5
    # for i in range(PN):
    #     pyplot.subplot(1, PN, i+1)
    #     pyplot.plot(ys, vals[N/ PN * i, :])
    #     pyplot.title(xs[N/PN * i])
    # pyplot.plot(delta)
    pyplot.show()
 def loglike(self, data, paravec, sign = 1):
     la, lp, lq = 0, 0, 0
     lb = paravec
     loglike = len(data) * sp.log(np.exp(la)) + (np.exp(la)*np.exp(lp)-1) * sum(sp.log(data)) \
              - len(data)*np.exp(la)*np.exp(lp)*sp.log(np.exp(lb)) - len(data) * betaln(np.exp(lp), np.exp(lq)) -\
             (np.exp(lp)+np.exp(lq)) * sum(sp.log(1+(data/np.exp(lb))**np.exp(la)))
     loglike = sign*loglike
     return loglike
def AS_betabinom_loglike(logps, sigma, AS1, AS2, hetp, error):
    a = math.exp(logps[0] + math.log(1/sigma**2 - 1))
    b = math.exp(logps[1] + math.log(1/sigma**2 - 1))
    
    part1 = 0
    part1 += betaln(AS1 + a, AS2 + b)
    part1 -= betaln(a, b)
    
    if hetp==1:
        return part1        

    e1 = math.log(error) * AS1 + math.log(1 - error) * AS2
    e2 = math.log(error) * AS2 + math.log(1 - error) * AS1
    if hetp == 0:
        return addlogs(e1, e2)
    
    return addlogs(math.log(hetp)+part1, math.log(1-hetp) + addlogs(e1,e2))
Beispiel #31
0
def log_beta_neg_binomial(k, r, a, b):
    return gammaln(r + k) - gammaln(r) - log(factorial(k)) + betaln(
        a + r, b + k) - betaln(a, b)
Beispiel #32
0
def log_beta_binomial(k, n, a, b):
    # if k < 0 or k > n: return -np.inf # is this check necessary? it makes operations on arrays annoying
    # if k == 0: return r*log(1-p)
    # print(n,k)
    return log(comb(n, k)) + betaln(k + a, n - k + b) - betaln(a, b)
Beispiel #33
0
def beta_binomial(k, n, a, b):
    return comb(n, k) * exp(
        betaln(k + a, n - k + b) - betaln(a, b))  # this can avoid overflow
Beispiel #34
0
 def h(a, b, c, d):
     total = 0.0
     for i in range(1, c):
         total += np.exp(
             betaln(a + i, b + d) - np.log(i) - betaln(a, b) - betaln(i, d))
     return 1 - (np.exp(betaln(a, b + d) - betaln(a, b))) - total
    def _compute_log_p_data(self, n, k, betaln_prior):
        alpha, beta = self.prior

        # see https://www.cs.ubc.ca/~murphyk/Teaching/CS340-Fall06/reading/bernoulli.pdf, equation (42)
        # which can be expressed as a fraction of beta functions
        return betaln(alpha + (n - k), beta + k) - betaln_prior
Beispiel #36
0
 def get_entropy(self, X=None):
     a, b = self._get_alphabeta(X)
     return (special.betaln(a, b) - special.digamma(a) * (a - 1) -
             special.digamma(b) * (b - 1) + special.digamma(a + b) *
             (a + b - 2))
Beispiel #37
0
 def ref(self, x, y):
     return special.betaln(x, y)
Beispiel #38
0
def binomln(n, k):
    "Log of scipy.special.binom calculated entirely in the log domain"
    return -betaln(1 + n - k, 1 + k) - np.log(n + 1)
Beispiel #39
0
 def _logpmf(self, x, n, a, b):
     k = floor(x)
     combiln = -log(n + 1) - betaln(n - k + 1, k + 1)
     return combiln + betaln(k + a, n - k + b) - betaln(a, b)
 def _logpmf(self, k, M, n, N):
     tot, good = M, n
     bad = tot - good
     return betaln(good+1, 1) + betaln(bad+1,1) + betaln(tot-N+1, N+1)\
         - betaln(k+1, good-k+1) - betaln(N-k+1,bad-N+k+1)\
         - betaln(tot+1, 1)
Beispiel #41
0
def BetaEntropy(x):
    # To compute EqlogQmu and EqlogQtheta
    return betaln(x[0], x[1]) - (x[0] - 1) * psi(x[0]) - (x[1] - 1) * psi(
        x[1]) + (x[0] + x[1] - 2) * psi(x[0] + x[1])
def beta_binom(prior, y):
    alpha, beta = prior
    h = np.sum(y)
    n = len(y)
    p_y = np.exp(betaln(alpha + h, beta + n - h) - betaln(alpha, beta))
    return p_y
Beispiel #43
0
    def compute_fixations(self, gen, app='beta_tataru', store=True, **kwargs):
        """ this method compute the approximated fixations probabilities of
        a wright-fisher process after gen generations.
        app is a string indicating how to approximate transitions and 
        integrate them in these computations.
        If store is true, all probabilities used in 
        the computations are stored is the fix_proba attribute such that
        self.fix_proba[i, j, k, l, m, n, o, p] is the probability for the 
        wright-fisher process to be fixed (in 0 for i = 0 or in 1 for i = 1) 
        after j generations from an initial frequency
        self.x0[k], were the parameters of the process are : 
        N = self.N[l]
        s = self.s[m]
        h = self.h[n]
        u = self.u[o]
        v = self.v[p]
        """
        # setting the approximation recursion
        self.app = app

        # the last fixation probability computed is
        last_gen_fix = self.fix_proba.shape[1] - 1

        # if the probability is already computed, return it
        if last_gen_fix >= gen:
            return self.fix_proba[:, gen, ]

        # the last moment generation already computed is
        last_gen_mom = self.moments.shape[1] - 1

        # approximated moments until this time are needed
        if gen > last_gen_mom:
            self.compute_moments(gen=gen, store=True)

        # if store  is true, increase the self.fix_proba matrix size to store
        # the new computations
        if store:
            fix_proba_t = np.full(shape=(2, gen + 1,
                                         *self.fix_proba.shape[2:]),
                                  fill_value=np.nan)
            fix_proba_t[:, :(last_gen_fix + 1), ] = self.fix_proba
            self.fix_proba = fix_proba_t

        # getting the fitness function and it's derivatives
        fit = self.fitness[0]
        fit1 = self.fitness[1]
        fit2 = self.fitness[2]

        prev_p0 = self.fix_proba[0, last_gen_fix].copy()
        prev_p1 = self.fix_proba[1, last_gen_fix].copy()

        # do the recursion until this time
        while last_gen_fix < gen:

            if app == 'beta_tataru':
                with np.errstate(invalid='ignore', divide='ignore'):
                    scaling = (1 - self.fix_proba[0, last_gen_fix] -
                               self.fix_proba[1, last_gen_fix])
                    cond_mean = ((self.moments[0, last_gen_fix, ] -
                                  self.fix_proba[1, last_gen_fix, ]) / scaling)
                    cond_var = ((self.moments[1, last_gen_fix, ] +
                                 self.moments[0, last_gen_fix, ]**2 -
                                 self.fix_proba[1, last_gen_fix, ]) / scaling -
                                cond_mean**2)
                    const = cond_mean * (1 - cond_mean) / cond_var - 1
                const[scaling <= 0] = 0
                const[cond_var == 0] = 0
                cond_mean[scaling <= 0] = 0
                cond_alpha = cond_mean * const
                cond_beta = (1 - cond_mean) * const
                # this mask capture values where ss.beta is either nan or 0
                mask = (cond_alpha <= 0) | (cond_beta <= 0) | (ss.beta(
                    cond_alpha, cond_beta) == 0)

                # p0n+1 = p0n * (1 - v) ** N + p1n * u ** N
                # + (1 - p0n - p1n) * (1 - u - v) ** N
                # * Beta(cond_alpha, cond_beta + N) / Beta(cond_alpha,
                #                                           cond_beta)
                next_p0 = (prev_p0 *
                           (1 - self.v[np.newaxis, np.newaxis, np.newaxis,
                                       np.newaxis, np.newaxis, :])**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis] + prev_p1 *
                           (self.u[np.newaxis, np.newaxis, np.newaxis,
                                   np.newaxis, :, np.newaxis]**
                            self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis]))

                with np.errstate(invalid='ignore', divide='ignore'):
                    next_p0 += (
                        (1 - prev_p1 - prev_p0) *
                        (1 - self.u[np.newaxis, np.newaxis, np.newaxis,
                                    np.newaxis, :, np.newaxis] -
                         self.v[np.newaxis, np.newaxis, np.newaxis, np.newaxis,
                                np.newaxis, :])**
                        self.N[np.newaxis, :, np.newaxis, np.newaxis,
                               np.newaxis, np.newaxis] *
                        ss.beta(
                            cond_alpha, cond_beta +
                            self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis]) /
                        ss.beta(cond_alpha, cond_beta))

                next_p0[mask] = prev_p0[mask]

                next_p1 = (prev_p0 *
                           (self.v[np.newaxis, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis, :])**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis] + prev_p1 *
                           (1 - self.u[np.newaxis, np.newaxis, np.newaxis,
                                       np.newaxis, :, np.newaxis]**
                            self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis]))
                with np.errstate(invalid='ignore', divide='ignore'):
                    next_p1 += (
                        (1 - prev_p1 - prev_p0) *
                        (1 - self.u[np.newaxis, np.newaxis, np.newaxis,
                                    np.newaxis, :, np.newaxis] -
                         self.v[np.newaxis, np.newaxis, np.newaxis, np.newaxis,
                                np.newaxis, :])**
                        self.N[np.newaxis, :, np.newaxis, np.newaxis,
                               np.newaxis, np.newaxis] *
                        ss.beta(
                            cond_alpha +
                            self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis], cond_beta) /
                        ss.beta(cond_alpha, cond_beta))
                next_p1[mask] = prev_p1[mask]

            elif app == 'beta_custom':
                pass
            elif app == 'beta_numerical':
                scaling = (1 - self.fix_proba[0, last_gen_fix] -
                           self.fix_proba[1, last_gen_fix])
                with np.errstate(invalid='ignore', divide='ignore'):
                    cond_mean = ((self.moments[0, last_gen_fix, ] -
                                  self.fix_proba[1, last_gen_fix, ]) / scaling)
                    cond_var = ((self.moments[1, last_gen_fix, ] +
                                 self.moments[0, last_gen_fix, ]**2 -
                                 self.fix_proba[1, last_gen_fix, ]) / scaling -
                                cond_mean**2)
                cond_mean[scaling <= 0] = 0.5
                cond_var[scaling <= 0] = 0
                cond_var[cond_var < 0] = 0

                with np.errstate(divide='ignore', invalid='ignore'):
                    const = cond_mean * (1 - cond_mean) / cond_var - 1
                const[scaling <= 0] = 0
                const[cond_var == 0] = 0
                cond_alpha = cond_mean * const
                cond_beta = (1 - cond_mean) * const
                mask = (cond_alpha <= 0) | (cond_beta <= 0) | (ss.beta(
                    cond_alpha, cond_beta) == 0)

                next_p0 = (prev_p0 *
                           (1 - fit(np.zeros(shape=(1, 1, 1, 1, 1, 1, 1))))**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis] + prev_p1 *
                           (1 - fit(np.ones(shape=(1, 1, 1, 1, 1, 1, 1))))**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis])

                x = kwargs['grid'][:, np.newaxis, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis, np.newaxis]
                with np.errstate(over='ignore'):
                    to_int = np.exp(
                        self.N[np.newaxis, :, np.newaxis, np.newaxis,
                               np.newaxis, np.newaxis] * np.log(1 - fit(x)) +
                        (cond_alpha - 1) * np.log(x) +
                        (cond_beta - 1) * np.log(1 - x) -
                        ss.betaln(cond_alpha, cond_beta))
                    assert np.all((1 - fit(x) < 1))
                    assert np.all((1 - fit(x) > 0))

                # replacing inf values by the upper bound in numpy float
                to_int[to_int == np.inf] = np.finfo(np.float64).max

                integrated = np.sum(
                    (to_int[:-1, ] + to_int[1:, ]) / 2 * (x[1:, ] - x[:-1, ]),
                    axis=0)

                next_p0 += ((1 - prev_p0 - prev_p1) * integrated)
                next_p0 = next_p0[0, ]

                next_p0[mask] = prev_p0[mask]

                next_p1 = (prev_p0 *
                           (fit(np.zeros(shape=(1, 1, 1, 1, 1, 1, 1))))**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis] + prev_p1 *
                           (fit(np.ones(shape=(1, 1, 1, 1, 1, 1, 1))))**
                           self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                  np.newaxis, np.newaxis])

                x = kwargs['grid'][:, np.newaxis, np.newaxis, np.newaxis,
                                   np.newaxis, np.newaxis, np.newaxis]
                to_int = np.exp(self.N[np.newaxis, :, np.newaxis, np.newaxis,
                                       np.newaxis, np.newaxis] *
                                np.log(fit(x)) + (cond_alpha - 1) * np.log(x) +
                                (cond_beta - 1) * np.log(1 - x) -
                                ss.betaln(cond_alpha, cond_beta))

                integrated = np.sum(
                    (to_int[:-1, ] + to_int[1:, ]) / 2 * (x[1:, ] - x[:-1, ]),
                    axis=0)

                next_p1 += ((1 - prev_p0 - prev_p1) * integrated)
                next_p1 = next_p1[0, ]

                next_p1[mask] = prev_p1[mask]

            elif app == 'gauss_numerical':
                pass
            elif app == 'wf_exact':
                pass
            else:
                raise NotImplementedError

            prev_p0 = next_p0
            prev_p1 = next_p1

            last_gen_fix += 1

            if store:
                self.fix_proba[0, last_gen_fix, ] = prev_p0.copy()
                self.fix_proba[1, last_gen_fix, ] = prev_p1.copy()

        ret_mat = np.empty(shape=(2, *self.fix_proba.shape[2:]))
        ret_mat[0, ] = prev_p0
        ret_mat[1, ] = prev_p1

        return ret_mat
Beispiel #44
0
def lnprior_vtan(vtan, vmax=1500., alpha=default_alpha, beta=default_beta):
    """ broad velocity prior. Peaks at ~180 km/s with a tail """
    if vtan > vmax or vtan < 0: return -np.inf
    return -special.betaln(alpha, beta) + (alpha - 1.) * np.log(
        vtan / vmax) + (beta - 1.) * np.log(1 - vtan / vmax)
Beispiel #45
0
def sum_log_beta_neg_binomial(ks, r, a, b):
    N = len(ks)
    norm = N * (betaln(a, b) + gammaln(r))
    terms = gammaln(r + ks) - log(factorial(ks)) + betaln(a + r, b + ks)
    return sum(terms) - norm
Beispiel #46
0
 def _logsf(self, x, alpha):
     return log(x) + special.betaln(x, alpha + 1)
Beispiel #47
0
def betaincderp(x,
                p,
                q,
                min_iters=3,
                max_iters=200,
                err_threshold=1e-12,
                debug=False):

    if (x == 0):
        return 0

    if (x > p / (p + q)):
        if debug:
            print('Switching to betaincderq')
        return -betaincderq(1 - x, q, p, min_iters, max_iters, err_threshold,
                            debug)

    derp_old = 0
    Am2 = 1
    Am1 = 1
    Bm2 = 0
    Bm1 = 1
    dAm2 = 0
    dAm1 = 0
    dBm2 = 0
    dBm1 = 0

    C1 = exp(p * log(x) + (q - 1) * log(1 - x) - log(p) - betaln(p, q))
    C2 = log(x) - 1 / p + digamma(p + q) - digamma(p)

    for n in range(1, max_iters + 1):
        a_n_ = _a_n(x, p, q, n)
        b_n_ = _b_n(x, p, q, n)
        da_n_dp = _da_n_dp(x, p, q, n)
        db_n_dp = _db_n_dp(x, p, q, n)

        A = Am2 * a_n_ + Am1 * b_n_
        dA = da_n_dp * Am2 + a_n_ * dAm2 + db_n_dp * Am1 + b_n_ * dAm1
        B = Bm2 * a_n_ + Bm1 * b_n_
        dB = da_n_dp * Bm2 + a_n_ * dBm2 + db_n_dp * Bm1 + b_n_ * dBm1

        Am2 = Am1
        Am1 = A
        dAm2 = dAm1
        dAm1 = dA
        Bm2 = Bm1
        Bm1 = B
        dBm2 = dBm1
        dBm1 = dB

        if n < min_iters - 1:
            continue

        dr1 = A / B
        dr2 = (dA - dr1 * dB) / B

        derp = C1 * (dr1 * C2 + dr2)

        # Check for convergence
        errapx = abs(derp_old - derp)
        d_errapx = errapx / max(err_threshold, abs(derp))
        derp_old = derp

        if d_errapx <= err_threshold:
            break

        if n >= max_iters:
            raise RuntimeError('Derivative did not converge')

    if debug:
        # TODO: Add approx error
        print(f'Converged in {n+1} iterations, appx error = {errapx}')
        print(f'Estimated betainc = {C1 * dr1}')

    return derp
Beispiel #48
0
 def likelihood(self, data):
     h = np.sum(data > 0)
     n = len(data)
     return np.exp(
         betaln(self.belief[0] + h, self.belief[1] + n - h) -
         betaln(self.belief[0], self.belief[1]))
Beispiel #49
0
def kernel(mu, gam, M):
    return -ss.beta.pdf(mu, gam[0], gam[1]) * betaln(mu * M, (1 - mu) * M)
Beispiel #50
0
 def _logpmf(self, x, alpha):
     return log(alpha) + special.betaln(x, alpha + 1)
Beispiel #51
0
 def calc_marginal_logp(N, k, alpha, beta):
     lnck = utils.log_nchoosek(N, k)
     numer = betaln(k+alpha, N-k+beta)
     denom = betaln(alpha, beta)
     return lnck + numer - denom
Beispiel #52
0
 def f(delta):
     logMean = betaln(alpha + delta, beta) - logBab
     return logMean - logPercentile
def pdf_cdf_prod(x, prior, posterior):
    lnCDF = log(betainc(prior[0], prior[1], x))
    lnPDF = (posterior[0] - 1) * log(x) + (
        posterior[1] - 1) * log(1 - x) - betaln(posterior[0], posterior[1])

    return exp(lnCDF + lnPDF)
Beispiel #54
0
 def _h(self, aA, bA, aB, bB, i):
     return np.exp(
         betaln(aA + i, bB + bA) - betaln(1 + i, bB) -
         betaln(aA, bA)) / (bB + i)
Beispiel #55
0
    for k in range(M):
        a = alphaPost[k, 0]
        b = alphaPost[k, 1]
        # p += postZ[k] * np.exp(beta.logpdf(thetas, a, b)) # this also works
        p += postZ[k] * beta.pdf(thetas, a, b)

    return p


dataSS = np.array([20, 10])
alphaPrior = np.array([[20, 20], [30, 10]])
M = 2
mixprior = np.array([0.5, 0.5])
logmarglik = np.zeros((2, ))
for i in range(M):
    logmarglik[i] = betaln(alphaPrior[i, 0] + dataSS[0], alphaPrior[i, 1] + dataSS[1]) \
                    - betaln(alphaPrior[i, 0], alphaPrior[i, 1])

mixpost = np.exp(normalizeLogspace(logmarglik + np.log(mixprior)))
alphaPost = np.zeros_like(alphaPrior)
for z in range(M):
    alphaPost[z, :] = alphaPrior[z, :] + dataSS

grid = np.arange(0.0001, 0.9999, 0.01)
post = evalpdf(grid, mixpost, alphaPost)
prior = evalpdf(grid, mixprior, alphaPrior)
fig, axs = plt.subplots(1, 1)
fig.suptitle('mixture of Beta distributions', fontsize=10)
axs.plot(grid, prior, '--r', label='prior')
axs.plot(grid, post, '-b', label='posterior')
axs.legend()
Beispiel #56
0
 def func(x):
     return (x[1] - 1.) * np.log(x[0]) + (x[2] - 1.) * np.log(1. - x[0]) - sps.betaln(x[1], x[2])
Beispiel #57
0
def divergence_beta(alpha_1, beta_1, alpha_2, beta_2):
    return betaln(alpha_2, beta_2) - betaln(alpha_1, beta_1) + \
            (alpha_1 - alpha_2) * psi(alpha_1) + \
            (beta_1 - beta_2) * psi(beta_1) + \
            (alpha_2 - alpha_1 + beta_2 - beta_1) * psi(alpha_1 + beta_1)
Beispiel #58
0
 def p2(j, x):
     i = I[int(j):]
     return np.sum(
         binom(i, x) * exp(
             betaln(alpha + x, beta + i - x) - betaln(alpha, beta) +
             betaln(gamma + 1, delta + i) - betaln(gamma, delta)))
Beispiel #59
0
def init_brute(CS, NS, KS, NSminusKS, BAFS, minusBAFS):
    global LH
    LH = (lambda S: -np.sum(CS + betaln(KS + np.abs(
        S) * BAFS, NSminusKS + np.abs(S) * minusBAFS) - betaln(
            np.abs(S) * BAFS,
            np.abs(S) * minusBAFS)) if S != 0 else np.inf)
Beispiel #60
0
 def log_partition(self) -> np.ndarray:
     return betaln(*self.parameters)