Esempio n. 1
0
 def __init__(self, d, nu, mu, Lambda):
     self.nu = nu
     self.d = d
     self.mu = mu
     self.precision = inv(Lambda)
     self.logdet = slogdet(Lambda)[1]
     self.Z = gammaln(nu / 2) + d / 2 * (math.log(nu) + math.log(math.pi)) - gammaln((nu + d) / 2)
Esempio n. 2
0
def mSel(a, mu, s, k, m):
    hyperg = hyp1f1(k + mu, m + 2.0 * mu, s)
    ret = binom(m, k)
    ret /= zSel(a, mu, s)
    ret *= math.exp(gammaln(k + mu) + gammaln(m - k + mu) - gammaln(m + 2.0 * mu))
    ret *= (1.0 - math.exp(-(1.0 - a) * s) * hyperg)
    return ret
Esempio n. 3
0
def lpol_fiar(d, n=20):
    '''AR representation of fractional integration

    .. math:: (1-L)^{d} for |d|<0.5  or |d|<1 (?)

    Parameters
    ----------
    d : float
        fractional power
    n : int
        number of terms to calculate, including lag zero

    Returns
    -------
    ar : array
        coefficients of lag polynomial

    Notes:
    first coefficient is 1, negative signs except for first term,
    ar(L)*x_t
    '''
    #hide import inside function until we use this heavily
    from scipy.special import gamma, gammaln
    j = np.arange(n)
    ar = - np.exp(gammaln(-d+j) - gammaln(j+1) - gammaln(-d))
    ar[0] = 1
    return ar
Esempio n. 4
0
    def batch_bound(self, gamma):
        """
        Computes the estimate of held out probability using only the recent
        batch; doesn't try to estimate whole corpus.  If the recent batch isn't
        used to update lambda, then this is the held-out probability.
        """
        wordids = self.recentbatch['wordids']
        wordcts = self.recentbatch['wordcts']
        batchD = len(wordids)

        score = 0
        Elogtheta = dirichlet_expectation(gamma)
        expElogtheta = n.exp(Elogtheta)

        # E[log p(docs | theta, beta)]
        for d in range(0, batchD):
            gammad = gamma[d, :]
            ids = wordids[d]
            cts = n.array(wordcts[d])
            phinorm = n.zeros(len(ids))
            for i in range(0, len(ids)):
                # print d, i, Elogtheta[d, :], self._Elogbeta[:, ids[i]]
                temp = Elogtheta[d, :] + self._Elogbeta[:, ids[i]]
                tmax = max(temp)
                phinorm[i] = n.log(sum(n.exp(temp - tmax))) + tmax
            score += n.sum(cts * phinorm)

        # E[log p(theta | alpha) - log q(theta | gamma)]
        score += n.sum((self._alpha - gamma)*Elogtheta)
        score += n.sum(gammaln(gamma) - gammaln(self._alpha))
        score += sum(gammaln(self._alpha*self._K) - gammaln(n.sum(gamma, 1)))

        return score
def joint_logdist(pi, alpha, sigma, tau, u):
    abs_pi = len(pi)
    n = np.sum(pi)
    tmp = abs_pi * log(alpha) + (n - 1.) * log(u) - gammaln(n) - (n - sigma * abs_pi) * log(u + tau) \
          - (alpha / sigma) * ((u + tau) ** sigma - tau ** sigma)
    tmp += np.sum(gammaln(pi - sigma) - gammaln(1. - sigma))
    return tmp
Esempio n. 6
0
def F(x, dim, dfd=np.inf, dfn=1):
    """
    EC densities for F and Chi^2 (dfd=inf) random fields. 
    """
    
    m = float(dfd)
    n = float(dfn)
    D = float(dim)

    if dim > 0:
        x = np.asarray(x, np.float64)
        k = K(dim=dim, dfd=dfd, dfn=dfn)(x)

        if np.isfinite(m):
            f = x*n/m
            t = -np.log(1 + f) * (m+n-2.) / 2.
            t += np.log(f) * (n-D) / 2.
            t += gammaln((m+n-D)/2.) - gammaln(m/2.)  
        else:
            f = x*n
            t = np.log(f/2.) * (n-D) / 2. - f/2. 
            
        t -= np.log(2*np.pi) * D / 2. + np.log(2) * (D-2)/2. + gammaln(n/2.)
        k *= np.exp(t) 

        return k
    else:
        if np.isfinite(m):
            return scipy.stats.f.sf(x, dfn, dfd)
        else:
            return scipy.stats.chi.sf(x, dfn)
def log_binomial(n, k):
    """Log of the binomial coefficiet based on the gamma function.

    Note that Gamma(n + 1) = n! when n is integer. The use of the log
    scale improves numerical stability.
    """
    return gammaln(n + 1) - gammaln(k + 1) - gammaln(n - k + 1)
def fx(X=None, m=None, nu=None, a=None, _lambda=None):
    #Compute the pdf of pearson4
    Xx = (X - _lambda) / a
    k = -0.5 * log(pi) - log(a) - gammaln(m - 0.5) + 2 * (real(gammaln(m + (nu / 2) * 1j))) - gammaln(m)
    pearspdf = exp(k - m * log(1 + Xx ** 2) - nu * math.atan(Xx))
    
    return pearspdf
    def BD_family_vtx_score(n_ijk, a_ijk):
        """
        This function returns the value of a special log Gamma function of
        n_ijk, a_ijk that occurs in both (BDEU and K2) bayesian scoring
        functions.

        Parameters
        ----------
        n_ijk : numpy.array
        a_ijk : numpy.array

        Returns
        -------
        float

        """

        n_ij = n_ijk.sum(axis=-1)
        a_ij = a_ijk.sum(axis=-1)
        # print('n_ijk', n_ijk)
        # print('n_ij', n_ij)

        part1 = sp.gammaln(a_ijk + n_ijk) - sp.gammaln(a_ijk)
        part1 = part1.sum()

        part2 = sp.gammaln(a_ij) - sp.gammaln(a_ij + n_ij)
        part2 = part2.sum()
        return part1 + part2
Esempio n. 10
0
    def likelihood_under_the_prior(self, x):
        """ Computes the likelihood of x under the prior

        Parameters
        ----------
        x, array of shape (self.n_samples,self.dim)

        returns
        -------
        w, the likelihood of x under the prior model (unweighted)
        """
        if self.prior_dens is not None:
            return self.prior_dens * np.ones(x.shape[0])

        a = self._prior_dof
        tau = self._prior_shrinkage
        tau /= (1 + tau)
        m = self._prior_means
        b = self._prior_scale
        ib = np.linalg.inv(b[0])
        ldb = np.log(detsh(b[0]))

        scalar_w = np.log(tau / np.pi) * self.dim
        scalar_w += 2 * gammaln((a + 1) / 2)
        scalar_w -= 2 * gammaln((a - self.dim) / 2)
        scalar_w -= ldb * a
        w = scalar_w * np.ones(x.shape[0])

        for i in range(x.shape[0]):
            w[i] -= (a + 1) * np.log(detsh(ib + tau * (m - x[i:i + 1]) *
                                           (m - x[i:i + 1]).T))

        w /= 2
        return np.exp(w)
Esempio n. 11
0
    def _log_v_quotient(n, t, tp, gamma, mu, k_max=1000, diff=50.0, memo={}):
        # TODO: Make it possible to use a custom pmf p_K, e.g.
        #       - Geometric: p_K(k) = (1-r)^(k-1) * r
        #       - Poisson: p_K(k) = mu^(k-1)/(k-1)! * exp(-mu)

        try:

            return memo[(n, t, tp)]

        except KeyError:

            def help(s):
                comp = gammaln(n) - gammaln(n+s*gamma) - diff
                ret = np.empty(k_max, dtype=float)
                for k in range(k_max):
                    ret[k] = gammaln(n) + k*np.log(mu) - gammaln(1.0+k) - \
                            gammaln(1.0+s*gamma) - gammaln(n+(k+s)*gamma) + \
                            gammaln(1.0+(k+s)*gamma)
                    if ret[k] < comp:
                        break
                ret = s*gamma*np.log(n) + _logsumexp(k+1, ret[:k+1])
                return ret

            ret = help(t) - help(tp)
            ret += (t-tp) * (np.log(mu) - gamma*np.log(n))
            ret += gammaln(1.0 + t*gamma) - gammaln(1.0 + tp*gamma)

            memo[(n, t, tp)] = ret

            return ret
    def __init__(self, vocab, K1, K2, D, eta0,eta1,eta2, tau0, kappa):
        """
        Arguments:
        K1,K2: The size of the first and second hidden layers.
        vocab: A set of words to recognize. Ignore the words not in this set.
        D: Total number of documents in the population, or an estimate 
        of the number in the truely online setting.
        eta0,eta1,eta2: Hyperparameters for the weight matrices in the first, second and third layers respectively.  
        tau0, kappa: Learning parameters.
        """

        self._K1 = K1
        self._K2 = K2
        self._W = len(vocab)
        self._D = D
        self._eta0 = eta0
        self._eta1 = eta1
        self._eta2 = eta2
        self._const = K1 * (self._W * gammaln(eta0) - gammaln(self._W * eta0)) + K2 * (K1 *gammaln(eta1) - gammaln(K1 * eta1))
        self._tau0 = tau0 + 1
        self._kappa = kappa
        self._updatect = 0
        # Initialize the variational distribution q(W|lambda).
        self._lambda0 = np.random.gamma(100., 1./100., (self._K1, self._W))
        self._ElogW0 = dirichlet_expectation(self._lambda0)
        self._lambda1 = np.random.gamma(100., 1./100., (self._K2, self._K1))        
        self._ElogW1 = dirichlet_expectation(self._lambda1)
        with open('dataset/wordids-test-small.p', 'rb') as f:
            self._wordids_test = cPickle.load(f)
        with open('dataset/wordcts-test-small.p', 'rb') as f:
            self._wordcts_test = cPickle.load(f)
Esempio n. 13
0
def brillouin_d(counts):
    """Calculate Brillouin index of alpha diversity, which is defined as:

    .. math::

       HB = \\frac{\\ln N!-\\sum^5_{i=1}{\\ln n_i!}}{N}


    Parameters
    ----------
    counts : 1-D array_like, int
        Vector of counts.

    Returns
    -------
    double
        Brillouin index.

    Notes
    -----
    The implementation here is based on the description given in the SDR-IV
    online manual [1]_.

    References
    ----------
    .. [1] http://www.pisces-conservation.com/sdrhelp/index.html

    """
    counts = _validate(counts)
    nz = counts[counts.nonzero()]
    n = nz.sum()
    return (gammaln(n + 1) - gammaln(nz + 1).sum()) / n
Esempio n. 14
0
def K(dim=4, dfn=7, dfd=np.inf):
    r"""
    Determine the polynomial K in:

        Worsley, K.J. (1994). 'Local maxima and the expected Euler
        characteristic of excursion sets of \chi^2, F and t fields.' Advances in
        Applied Probability, 26:13-42.

    If dfd=inf, return the limiting polynomial.
    """
    def lbinom(n, j):
        return gammaln(n+1) - gammaln(j+1) - gammaln(n-j+1)

    m = dfd
    n = dfn
    D = dim
    k = np.arange(D)
    coef = 0
    for j in range(int(np.floor((D-1)/2.)+1)):
        if np.isfinite(m):
            t = (gammaln((m+n-D)/2.+j) - 
                 gammaln(j+1) -
                 gammaln((m+n-D)/2.))
            t += lbinom(m-1, k-j) - k * np.log(m)
        else:
            _t = np.power(2., -j) / (factorial(k-j) * factorial(j))
            t = np.log(_t)
            t[np.isinf(_t)] = -np.inf
        t += lbinom(n-1, D-1-j-k) 
        coef += (-1)**(D-1) * factorial(D-1) * np.exp(t) * np.power(-1.*n, k) 
    return np.poly1d(coef[::-1])
Esempio n. 15
0
 def test2(self):
     # A test of the identity
     #     Gamma_2(a) = sqrt(pi) * Gamma(a) * Gamma(a - 0.5)
     a = np.array([2.5, 10.0])
     result = multigammaln(a, 2)
     expected = np.log(np.sqrt(np.pi)) + gammaln(a) + gammaln(a - 0.5)
     assert_almost_equal(result, expected)
Esempio n. 16
0
    def pdf(self, data=None):
        """Probability density function (PDF).

        Parameters
        ----------
        data : array_like
            Grid of point to evaluate PDF at.

            (k,) - one observation, k dimensions

            (T, k) - T observations, k dimensions

        Returns
        -------
        (T, ) array
            PDF values

        """
        ndim = self.lam.size
        if data is None:
            raise ValueError('No data given!')
        self.data = np.atleast_2d(data)
        # (T, k) array
        diff = self.data - self.const_mu()
        # (k, T) array
        diff_norm = scl.solve(self.const_sigma(), diff.T)
        # (T, ) array
        diff_sandwich = (diff.T * diff_norm).sum(0)
        term1 = ((np.pi * self.eta) ** ndim
            * scl.det(self.const_sigma())) **(-.5)
        term2 = np.exp(gammaln((self.eta + self.ndim) / 2)
            - gammaln(self.eta / 2))
        term3 = (1 + diff_sandwich / self.eta) ** (- (self.eta + ndim) / 2)
        return term1 * term2 * term3
Esempio n. 17
0
def log_d_pois_like_trunc_5(d,s1,s2,a,p):
    """double poisson w max 5 goals"""
    #dp = np.sign(d)*np.power(np.abs(d),p)
    dp = 1.5*np.arctan(d)    #print(dp)
    return ( log(a)*(s1+s2)+dp*(s1-s2) - 2*a*cosh(dp)
         -gammaln(s1+1) - gammaln(s2+1) 
        -log(gammaincc(6,a*exp(-dp))*gammaincc(6,a*exp(dp)) ) ) 
Esempio n. 18
0
def test_errstate_all_but_one():
    olderr = sc.geterr()
    with sc.errstate(all='raise', singular='ignore'):
        sc.gammaln(0)
        with assert_raises(sc.SpecialFunctionError):
            sc.spence(-1.0)
    assert_equal(olderr, sc.geterr())
Esempio n. 19
0
def dirichlet_llhood(theta, alpha):
    """Compute the log likelihood of theta under Dirichlet(alpha).

    Arguments
    ---------
    theta : ndarray
        Categorical probability distribution. theta[i] is the probability
        of item i. Elements of the array have to sum to 1.0 (not forced for
        efficiency reasons)

    alpha : ndarray
        Parameters of the Dirichlet distribution

    Returns
    -------
    log_likelihood : float
        Log lihelihood of theta given alpha
    """

    # substitute -inf with SMALLEST_FLOAT, so that 0*log(0) is 0 when necessary
    log_theta = ninf_to_num(log(theta))

    #log_theta = np.nan_to_num(log_theta)
    return (gammaln(alpha.sum())
            - (gammaln(alpha)).sum()
            + ((alpha - 1.) * log_theta).sum())
 def computeLikelihood(self, doc, phi, gamma):
     """
     Compute the document likelihood, given all model parameters.
     """
     gammaSum = numpy.sum(gamma)
     digSum = digamma(gammaSum)
     dig = digamma(gamma) - digSum # precompute the difference
     
     likelihood = gammaln(self.alpha * self.numTopics) - \
                  self.numTopics * gammaln(self.alpha) - \
                  gammaln(gammaSum)
     
     likelihood += numpy.sum((self.alpha - 1) * dig + gammaln(gamma) - (gamma - 1) * dig)
     
     for n, (wordIndex, wordCount) in enumerate(doc):
         try:
             phin, lprob = phi[n], self.logProbW[:, wordIndex]
             code = """
             const int num_terms = Nphin[0];
             double result = 0.0;
             for (int i=0; i < num_terms; i++) {
                 if (phin[i] > 1e-8 || phin[i] < -1e-8)
                     result += phin[i] * (dig[i] - log(phin[i]) + LPROB1(i));
             }
             return_val = wordCount * result;
             """
             likelihood += weave.inline(code, ['dig', 'phin', 'lprob', 'wordCount'])
         except:
             partial = phi[n] * (dig - numpy.log(phi[n]) + self.logProbW[:, wordIndex])
             partial[numpy.isnan(partial)] = 0.0 # replace NaNs (from 0 * log(0) in phi) with 0.0
             likelihood += wordCount * numpy.sum(partial)
     return likelihood
 def optAlpha(self, max_iter=1000, newton_thresh=1e-5):
     """
     Estimate new topic priors (actually just one scalar shared across all
     topics).
     """
     initA = 100.0
     logA = numpy.log(initA) # keep computations in log space
     logger.debug("optimizing old alpha %s" % self.alpha)
     
     for i in xrange(max_iter):
         a = numpy.exp(logA)
         if not numpy.isfinite(a):
             initA = initA * 10.0
             logger.warning("alpha is NaN; new init alpha=%f" % initA)
             a = initA
             logA = numpy.log(a)
         s = self.state
         f = s.numDocs * (gammaln(self.numTopics * a) - self.numTopics * gammaln(a)) + (a - 1) * s.alphaSuffStats
         df = s.alphaSuffStats + s.numDocs * (self.numTopics * digamma(self.numTopics * a) - self.numTopics * digamma(a))
         d2f = s.numDocs * (self.numTopics * self.numTopics * trigamma(self.numTopics * a) - self.numTopics * trigamma(a))
         logA -= df / (d2f * a + df)
         logger.debug("alpha maximization: f=%f, df=%f" % (f, df))
         if numpy.abs(df) <= newton_thresh:
             break
     result = numpy.exp(logA) # convert back from log space
     logger.info("estimated old alpha %s to new alpha %s" % (self.alpha, result))
     return result
Esempio n. 22
0
    def marginal_likelihood(self, log=True, normalize=True):
        n = float(self.n)
        d = self.n_dims
        
        kappa_0 = self.kappa_0
        mu_0 = self.mu_0
        nu_0 = self.nu_0
        sigma2_0 = self.sigma2_0
        x_sum = self.x_sum
        s_sum = self.s_sum

        kappa_n = kappa_0 + n
        nu_n = nu_0 + n
        x_bar = x_sum / n if n > 0 else np.zeros_like(x_sum)
        ss_diff = s_sum - n * x_bar * x_bar
        ms_diff = ((n * kappa_0) / kappa_n) * (x_bar - mu_0)**2

        mu_n = (kappa_0 * mu_0 + x_sum) / kappa_n
        sigma2_n = (nu_0 * sigma2_0 + ss_diff + ms_diff) / nu_n

        loglike = 0.0
        loglike += d * (gammaln(0.5 * nu_n) - gammaln(0.5 * nu_0))
        loglike += d * 0.5 * (np.log(kappa_0) - np.log(kappa_n))
        loglike += (0.5 * nu_0 * (np.log(nu_0) + np.log(sigma2_0))).sum()
        loglike -= (0.5 * nu_n * (np.log(nu_n) + np.log(sigma2_n))).sum()
        loglike -= d * 0.5 * n * np.log(np.pi)

        return loglike if log else np.exp(loglike)
Esempio n. 23
0
def _e_log_beta(c0,d0,c,d):
    ''' Calculates expectation of log pdf of beta distributed parameter'''
    log_C    = gammaln(c0 + d0) - gammaln(c0) - gammaln(d0)
    psi_cd   = psi(c+d)
    log_mu   = (c0 - 1) * ( psi(c) - psi_cd )
    log_i_mu = (d0 - 1) * ( psi(d) - psi_cd )
    return np.sum(log_C + log_mu + log_i_mu)
    def uncollapsed_likelihood(self, X, params):
        """
        Calculates the score of the data X under this component model with mean
        mu and precision rho.
        Inputs:
            X: A column of data (numpy)
            params: a dict with the following keys
                weights: a list of category weights (should sum to 1)
        """
        check_data_type_column_data(X)
        check_model_parameters_dict(params)

        hypers = self.get_hypers()

        assert len(params['weights']) == int(hypers[b'K'])

        dirichlet_alpha = hypers[b'dirichlet_alpha']
        K = float(hypers[b'K'])
        check_data_vs_k(X,K)

        weights = numpy.array(params['weights'])

        log_likelihood = self.log_likelihood(X, params)
        logB = gammaln(dirichlet_alpha)*K - gammaln(dirichlet_alpha*K)
        log_prior = -logB + numpy.sum((dirichlet_alpha-1.0)*numpy.log(weights))

        log_p = log_likelihood + log_prior

        return log_p
    def log_likelihood(X, params):
        """
        Calculates the log likelihood of the data X given mean mu and precision
        rho.
        Inputs:
            X: a column of data (numpy)
            params: a dict with the following keys
                weights: a list of categories weights (should sum to 1)
        """
        check_data_type_column_data(X)
        check_model_parameters_dict(params)

        N = len(X)
        K = len(params['weights'])
        check_data_vs_k(X,K)
        counts= numpy.bincount(X,minlength=K)

        weights = numpy.array(params['weights'])

        A = gammaln(N+1)-numpy.sum(gammaln(counts+1))
        B = numpy.sum(counts*numpy.log(weights));

        log_likelihood = A+B

        return log_likelihood
Esempio n. 26
0
    def stdc4(self, n):
        """
        Calculate c4 factor.

        The c4 factor is required to obtain an unbiased estimator
        for the standard deviation.

        It is proportional to the factor B used in Kenney 1940, who
        started from a slightly different definition of the sample
        variance.

        Parameters
        ----------
        n : int
            Number of points in the sample

        Returns
        -------
        c4, ln(c4) : float
            The c4 factor and its natural logarithm.
        """
        lnc4 = 0.5 * (np.log(2.0) - np.log(n - 1.0)) + \
            ss.gammaln(n / 2.0) - ss.gammaln((n - 1.) / 2.)
        c4 = np.exp(lnc4)
        return c4, lnc4
Esempio n. 27
0
def tstd_lls(y, params, df):
    '''t loglikelihood given observations and mean mu and variance sigma2 = 1

    Parameters
    ----------
    y : array, 1d
        normally distributed random variable
    params: array, (nobs, 2)
        array of mean, variance (mu, sigma2) with observations in rows
    df : integer
        degrees of freedom of the t distribution

    Returns
    -------
    lls : array
        contribution to loglikelihood for each observation

    Notes
    -----
    parameterized for garch
    '''

    mu, sigma2 = params.T
    df = df*1.0
    #lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
    #lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2.)/sigma2) + 0.5 * np.log(sigma2)
    lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
    lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2)/sigma2) + 0.5 * np.log(sigma2)

    return lls
Esempio n. 28
0
def log_pochhammer(a, b):
    '''
    Pochhammer symbol (a)_b implemented for integer ``b``.
    '''
    assert a > 0
    assert b >= 0
    return gammaln(a+b) - gammaln(a)
Esempio n. 29
0
File: shm.py Progetto: cnguyen/dipy
def spherical_harmonics(m, n, theta, phi):
    x = np.cos(phi)
    val = lpmv(m, n, x).astype(complex)
    val *= np.sqrt((2 * n + 1) / 4.0 / np.pi)
    val *= np.exp(0.5 * (gammaln(n - m + 1) - gammaln(n + m + 1)))
    val = val * np.exp(1j * m * theta)
    return val
Esempio n. 30
0
    def update_V(self, corpus):
        lb = 0 

        sumLnZ = np.sum(psi(corpus.A) - np.log(corpus.B), 0)        # K dim

        tmp = np.dot(corpus.R, corpus.w)  # M x K
        sum_r_w = np.sum(tmp, 0)
        assert len(sum_r_w) == self.K

        for i in xrange(self.c_a_max_step):
            one_V = 1-self.V
            stickLeft = self.getStickLeft(self.V)       # prod(1-V_(dim-1))
            p = self.V * stickLeft

            psiV = psi(self.beta * p)

            vVec = - self.beta*stickLeft*sum_r_w + self.beta*stickLeft*sumLnZ - corpus.M*self.beta*stickLeft*psiV;

            for k in xrange(self.K):
                tmp1 = self.beta*sum(sum_r_w[k+1:]*p[k+1:]/one_V[k]);
                tmp2 = self.beta*sum(sumLnZ[k+1:]*p[k+1:]/one_V[k]);
                tmp3 = corpus.M*self.beta*sum(psiV[k+1:]*p[k+1:]/one_V[k]);
                vVec[k] = vVec[k] + tmp1 - tmp2;
                vVec[k] = vVec[k] + tmp3;
                vVec[k] = vVec[k] 
            vVec[:self.K-2] -= (self.alpha-1)/one_V[:self.K-2];
            vVec[self.K-1] = 0;
            step_stick = self.getstepSTICK(self.V,vVec,sum_r_w,sumLnZ,self.beta,self.alpha,corpus.M);
            self.V = self.V + step_stick*vVec;
            self.p = self.getP(self.V)

        lb += self.K*gammaln(self.alpha+1) - self.K*gammaln(self.alpha) + np.sum((self.alpha-1)*np.log(1-self.V[:self.K-1]))
        if self.is_verbose:
            print 'p(V)-q(V) %f' % lb
        return lb
Esempio n. 31
0
 def calc_predictive_logp(x, N, sum_x, a, b):
     an, bn = Poisson.posterior_hypers(N, sum_x, a, b)
     am, bm = Poisson.posterior_hypers(N + 1, sum_x + x, a, b)
     ZN = Poisson.calc_log_Z(an, bn)
     ZM = Poisson.calc_log_Z(am, bm)
     return ZM - ZN - gammaln(x + 1)
Esempio n. 32
0
 def test1(self):
     # A test of the identity
     #     Gamma_1(a) = Gamma(a)
     np.random.seed(1234)
     a = np.abs(np.random.randn())
     assert_array_equal(multigammaln(a, 1), gammaln(a))
Esempio n. 33
0
    def log_likelihood(self):
        """
        Compute marginal log likelihood of the model
        """
        ll = self.n_doc * gammaln(self.alpha * self.n_topic)
        ll -= self.n_doc * self.n_topic * gammaln(self.alpha)
        ll += self.n_topic * gammaln(self.beta * self.n_voca)
        ll -= self.n_topic * self.n_voca * gammaln(self.beta)

        for di in xrange(self.n_doc):
            ll += gammaln(self.DT[di]).sum() - gammaln(self.DT[di].sum())

        for ki in xrange(self.n_topic):
            ll += gammaln(self.TW[ki]).sum() - gammaln(self.sum_T[ki])

        if self.n_class != 1:
            ll += (self.n_class - 1) * gammaln(self.gamma * (self.n_class - 1))
            ll -= (self.n_class - 1) * self.n_voca * gammaln(self.gamma)
            ll += (self.n_class + 2) * gammaln(self.eta * (self.n_class + 2))
            ll -= (self.n_class + 2) * (self.n_class + 2) * gammaln(self.eta)

            for ci in xrange(1, self.n_class):
                ll += gammaln(self.CW[ci]).sum() - gammaln(self.sum_C[ci])
            for ci in xrange(self.n_class + 2):
                ll += gammaln(self.T[ci]).sum() - gammaln(self.T[ci].sum())

        return ll
Esempio n. 34
0
def _expect_score(alpha, beta, lens, cnt_1):
    m = lens.shape[0]
    score = gammaln(alpha + cnt_1).sum() + gammaln(lens - cnt_1 + beta).sum() - gammaln(lens + alpha + beta).sum() + \
            m * gammaln(alpha + beta) - m * gammaln(alpha) - m * gammaln(beta)
    return score
Esempio n. 35
0
def get_log_p0(n,
               k,
               e,
               c0,
               pseudo_alpha=None,
               pseudo_beta=None,
               cutoff_f=None):
    """
    Returns the (log) probability that the variant is absent and present
    :param n: coverage (number of reads)
    :param k: observed number of reads reporting the variant
    :param e: sequencing error rate
    :param c0: prior mixture parameter of delta function and uniform distribution
    :param pseudo_alpha: alpha parameter for the beta distributed part of the prior
    :param pseudo_beta: beta parameter for the beta distributed part of the prior
    :param cutoff_f: cutoff frequency for variants being absent
    :return: tuple (log probability that variant is absent, log probability that variant is present)
    """

    # cutoff frequency
    if cutoff_f is None:
        cutoff_f = 0.05
        logger.warning(
            'Cutoff absent frequency was not set in the Bayesian inference model! Assumed {:1e}.'
            .format(cutoff_f))

    if not isinstance(n, numbers.Real) or not isinstance(k, numbers.Real):
        raise (RuntimeError(
            'Sequencing read counts need to be numbers: n={}, k={}!'.format(
                n, k)))

    if math.isnan(n) or math.isnan(k):
        logger.warning(
            'Sequencing read counts should be numbers: n={}, k={}!'.format(
                n, k))

    if e > cutoff_f:
        raise RuntimeError(
            'Error rate e={} can not be higher than the calculated cutoff absent frequency {}'
            .format(e, cutoff_f))

    if k > n:
        raise RuntimeError(
            'Number of variant reads cannot be higher than the sequencing depth: {} <= {}'
            .format(k, n))

    if pseudo_alpha is None:
        pseudo_alpha = def_sets.PSEUDO_ALPHA
    if pseudo_beta is None:
        pseudo_beta = def_sets.PSEUDO_BETA

    # pseudocounts are added to n and k, but removed for the computation of p0 at the end
    n_new = n + pseudo_alpha - 1 + pseudo_beta - 1
    k_new = k + pseudo_alpha - 1

    # overall weight assigned to the delta spike
    delta_value = math.log(c0) + loglp(n, k, 0, e)
    # whole integral normalizing constant so that c0 is recovered without data when cutoff = 0
    beta_norm_const = math.log(1 - 2 * e) + (
        math.log(1.0 - c0) + gammaln(pseudo_alpha + pseudo_beta) -
        gammaln(pseudo_alpha) - gammaln(pseudo_beta))
    # correct the cutoff for change of variables
    new_cutoff = cutoff_f * (1 - 2 * e) + e
    # compute the integral of allele frequencies below the cutoff, given we are in the beta function
    tmp_beta_inc = betainc(k + pseudo_alpha, n - k + pseudo_beta, new_cutoff)
    if tmp_beta_inc == 0.0:
        fraction_below_cutoff = -1e10  # very small number in log space
    else:
        fraction_below_cutoff = math.log(tmp_beta_inc)

    # compute the total weight of the beta distribution
    total_weight_beta = (-1 * math.log(1 - 2 * e) + gammaln(k_new + 1) +
                         gammaln(n_new - k_new + 1) - gammaln(n_new + 2) +
                         beta_norm_const)

    posterior_term_1 = -logsumexp([
        -fraction_below_cutoff,
        delta_value - fraction_below_cutoff - total_weight_beta
    ])
    posterior_term_2 = -logsumexp([0, total_weight_beta - delta_value])
    # print("Posterior term 1 ", posterior_term_1)
    # print("Posterior term 2 ", posterior_term_2)
    p0 = logsumexp([posterior_term_1, posterior_term_2])
    try:
        if p0 >= 0.0:
            p1 = -1e10
        elif p0 > -1e-10:
            p1 = math.log(-p0)
        else:
            p1 = math.log(-math.expm1(p0))
    except ValueError:
        logger.error('ERROR: {}'.format(p0))
        raise RuntimeError('Posterior probability could not be calculated!')

    return p0, p1
Esempio n. 36
0
 def unincorporate(self, rowid):
     x = self.data.pop(rowid)
     self.N -= 1
     self.sum_x -= x
     self.sum_log_fact_x -= gammaln(x + 1)
Esempio n. 37
0
 def calc_log_Z(a, b):
     Z = gammaln(a) - a * log(b)
     return Z
                Lattice_Initial = Square_Lattice_Bare.copy()
                As_component[0] = ComputeAs_component_0_Square(
                    N_x, Lattice_Initial, deg2_weight, loop_weight, rung)

                max = As_component[0]
                max_n_index = 0
                # Highest and lowest number of 'on' faces to be considered
                high = N_faces
                low = 0
                for n in range(1, N_faces + 1):
                    if np.mod(n, 1) == 0:
                        print('n: ' + str(n))

                    # Constructs list of combinations (loop configurations) to analyze
                    if (gammaln(N_faces + 1) - gammaln(n + 1) - gammaln(N_faces - n + 1)) > \
                            np.log(range_samples):  # equivalent to nchoosek(N_faces, n) > samples
                        combs = ComputeRandomUniqueCombinations(
                            N_faces, n, range_samples)
                        avg = 1
                    else:
                        combs = np.reshape(
                            list(combinations(range(1, N_faces + 1), n)),
                            (-1, n))
                        avg = 0

                    # Computes exp(-energy) for each loop config to be analyzed
                    def ComputeAs_component_contribution_Square_parallel(
                            sample):
                        return ComputeAs_component_contribution_Square(
                            N_x, N_y, Lattice_Initial, deg2_weight, gamma,
Esempio n. 39
0
def dinvgamma(x,a,b):
    return a * np.log(b) - gammaln(a) - (a+1)*x - b*np.exp(-x)
def ComputeLoopProperties_Square(N_x, N_y, N_faces, Lattice_Initial,
                                 deg2_weight, CP_weight, CR_weight,
                                 loop_weight, n_low, n_high, samples,
                                 deg4_samples, iteration):
    # Computes average loop number and loop size

    print('Iteration: ' + str(iteration + 1))
    rung = np.mod(iteration, N_y + 1) + 1
    loop_numbers = np.zeros(n_high - n_low + 1)
    loop_sizes = np.zeros(n_high - n_low + 1)
    loop_total_sizes = np.zeros(n_high - n_low + 1)
    Z0_components = np.zeros(n_high - n_low +
                             1)  # Components of Z with no strings

    for n in range(n_low, n_high + 1):
        if np.mod(iteration + 1, 16) == 0:
            print('Iteration: ' + str(iteration + 1) + ' n: ' + str(n))
        if n == 0:
            # Contribution from contractible configuration (no faces flipped)
            Z0_components[0] += 1
            #loop_sizes[0] += 1

            # Contribution from noncontractible configuration
            Lattice_nc = AddNCLoop(Lattice_Initial.copy(), N_x, rung)
            deg2 = Lattice_nc.degree().count(2)
            loops = len([x for x in Lattice_nc.components() if len(x) > 1])
            edges = Lattice_nc.ecount()
            w = (deg2_weight**deg2) * (loop_weight**loops)

            Z0_components[0] += w
            loop_numbers[0] += loops * w
            loop_sizes[0] += edges / loops * w
            loop_total_sizes[0] += edges * w
        else:
            n_index = n - n_low

            # Constructs list of combinations (loop configurations) to analyze
            if (gammaln(N_faces+1) - gammaln(n+1) - gammaln(N_faces-n+1)) > \
                    np.log(samples):  # equivalent to nchoosek(N_faces, n) > samples
                combs = ComputeRandomUniqueCombinations(N_faces, n, samples)
            else:
                combs = np.reshape(
                    list(combinations(range(1, N_faces + 1), n)), (-1, n))

            # Computes exp(-energy) for each loop config to be analyzed
            for i in range(0, np.shape(combs)[0]):
                # Finds coordinates of faces to be flipped in loop configuration
                coords = np.zeros([n, 2])
                for j in range(0, n):
                    coords[j, :] = [
                        np.floor((combs[i, j] - 1) / N_y) + 1,
                        np.mod(combs[i, j] - 1, N_y) + 1
                    ]

                # Flips faces, contractible config
                Lattice_c = FlipSquareLatticeFaces(Lattice_Initial.copy(),
                                                   coords, N_x)
                # Flips faces, noncontractible config
                Lattice_nc = AddNCLoop(Lattice_c.copy(), N_x, rung)

                # Contribution from contractible configuration
                deg2 = Lattice_c.degree().count(2)
                deg4 = Lattice_c.degree().count(4)
                loops0 = len([x for x in Lattice_c.components() if len(x) > 1])
                edges = Lattice_c.ecount()
                if deg4 >= 1:
                    deg4_configs = Compute_deg4_configs(deg4, deg4_samples)
                    deg4_avg_factor = (3**deg4 / np.shape(deg4_configs)[0])
                    for deg4_config in deg4_configs:
                        CP1 = list(deg4_config).count(
                            1)  # Corner passes of type 1
                        CP2 = list(deg4_config).count(
                            3)  # Corner passes of type 2
                        CP = CP1 + CP2
                        CR = list(deg4_config).count(2)  # Crossings

                        loops = loops0 + CP1  # One type of corner pass increases the number of loops
                        w = (deg2_weight**deg2) * (CP_weight**CP) * (
                            CR_weight**CR) * (loop_weight**
                                              loops) * (deg4_avg_factor)

                        Z0_components[n_index] += w
                        loop_numbers[n_index] += loops * w
                        loop_sizes[n_index] += (edges / loops) * w
                        loop_total_sizes[n_index] += edges * w
                else:
                    loops = loops0
                    w = (deg2_weight**deg2) * (loop_weight**loops)

                    Z0_components[n_index] += w
                    loop_numbers[n_index] += loops * w
                    loop_sizes[n_index] += (edges / loops) * w
                    loop_total_sizes[n_index] += edges * w

                # Contribution from noncontractible configuration
                deg2 = Lattice_nc.degree().count(2)
                deg4 = Lattice_nc.degree().count(4)

                loops0 = len(
                    [x for x in Lattice_nc.components() if len(x) > 1])
                edges = Lattice_nc.ecount()
                if deg4 >= 1:
                    deg4_configs = Compute_deg4_configs(deg4, deg4_samples)
                    deg4_avg_factor = (3**deg4 / np.shape(deg4_configs)[0])
                    for deg4_config in deg4_configs:
                        CP1 = list(deg4_config).count(
                            1)  # Corner passes of type 1
                        CP2 = list(deg4_config).count(
                            3)  # Corner passes of type 2
                        CP = CP1 + CP2
                        CR = list(deg4_config).count(2)  # Crossings

                        loops = loops0 + CP1  # One type of corner pass increases the number of loops
                        w = (deg2_weight**deg2) * (CP_weight**CP) * (
                            CR_weight**CR) * (loop_weight**
                                              loops) * deg4_avg_factor

                        Z0_components[n_index] += w
                        loop_numbers[n_index] += loops * w
                        loop_sizes[n_index] += (edges / loops) * w
                        loop_total_sizes[n_index] += edges * w
                else:
                    loops = loops0
                    w = (deg2_weight**deg2) * (loop_weight**loops)

                    Z0_components[n_index] += w
                    loop_numbers[n_index] += loops * w
                    loop_sizes[n_index] += (edges / loops) * w
                    loop_total_sizes[n_index] += edges * w
            if (gammaln(N_faces + 1) - gammaln(n + 1) - gammaln(N_faces - n + 1)) > \
                    np.log(samples):  # equivalent to nchoosek(N_faces, n) > samples
                Z0_components[n_index] *= 1 / samples * np.exp(
                    gammaln(N_faces + 1) - gammaln(n + 1) -
                    gammaln(N_faces - n + 1))
                loop_numbers[n_index] *= 1 / samples * np.exp(
                    gammaln(N_faces + 1) - gammaln(n + 1) -
                    gammaln(N_faces - n + 1))
                loop_sizes[n_index] *= 1 / samples * np.exp(
                    gammaln(N_faces + 1) - gammaln(n + 1) -
                    gammaln(N_faces - n + 1))
                loop_total_sizes[n_index] *= 1 / samples * np.exp(
                    gammaln(N_faces + 1) - gammaln(n + 1) -
                    gammaln(N_faces - n + 1))
    loop_number = sum(loop_numbers) / (sum(Z0_components))
    loop_size = sum(loop_sizes) / (sum(Z0_components))
    loop_total_size = sum(loop_total_sizes) / (sum(Z0_components))
    return loop_number, loop_size, loop_total_size
Esempio n. 41
0
 def log_normalizer(S, xi):
     return gammaln(S + xi) - gammaln(xi) - gammaln(S + 1)
Esempio n. 42
0
File: bhc.py Progetto: kishoreb4/bhc
    def build(self):
        n_objects = self.data.shape[0]

        weights = []

        # active nodes
        active_nodes = np.arange(n_objects)
        # assignments - starting each point in its own cluster
        assignments = np.arange(n_objects)
        # stores information from temporary merges
        tmp_merge = None
        hierarchy_cut = False

        # for every single data point
        log_p = np.zeros(n_objects)
        log_d = np.zeros(n_objects)
        n = np.ones(n_objects)
        for i in range(n_objects):
            # compute log(d_k)
            log_d[i] = BayesianHierarchicalClustering.__calc_log_d(
                self.alpha, n[i], None)
            # compute log(p_i)
            log_p[i] = self.model.calc_log_mlh(self.data[i])

        ij = n_objects - 1

        # for every pair of data points
        for i in range(n_objects):
            for j in range(i + 1, n_objects):
                # compute log(d_k)
                n_ch = n[i] + n[j]
                log_d_ch = log_d[i] + log_d[j]
                log_dk = BayesianHierarchicalClustering.__calc_log_d(
                    self.alpha, n_ch, log_d_ch)
                # compute log(pi_k)
                log_pik = np.log(self.alpha) + gammaln(n_ch) - log_dk
                # compute log(p_k)
                data_merged = np.vstack((self.data[i], self.data[j]))
                log_p_k = self.model.calc_log_mlh(data_merged)
                # compute log(r_k)
                log_p_ch = log_p[i] + log_p[j]
                r1 = log_pik + log_p_k
                r2 = log_d_ch - log_dk + log_p_ch
                log_r = r1 - r2
                # store results
                merge_info = [i, j, log_r, r1, r2]
                tmp_merge = merge_info if tmp_merge is None \
                    else np.vstack((tmp_merge, merge_info))

        # find clusters to merge
        arc_list = np.empty(0, dtype=api.Arc)
        while active_nodes.size > 1:
            # find i, j with the highest probability of the merged hypothesis
            max_log_rk = np.max(tmp_merge[:, 2])
            ids_matched = np.argwhere(tmp_merge[:, 2] == max_log_rk)
            position = np.min(ids_matched)
            i, j, log_r, r1, r2 = tmp_merge[position]
            i = int(i)
            j = int(j)
            weights.append(log_r)

            # cut if required and stop
            if self.cut_allowed and log_r < 0:
                hierarchy_cut = True
                break

            # turn nodes i,j off
            tmp_merge[np.argwhere(tmp_merge[:, 0] == i).flatten(), 2] = -np.inf
            tmp_merge[np.argwhere(tmp_merge[:, 1] == i).flatten(), 2] = -np.inf
            tmp_merge[np.argwhere(tmp_merge[:, 0] == j).flatten(), 2] = -np.inf
            tmp_merge[np.argwhere(tmp_merge[:, 1] == j).flatten(), 2] = -np.inf

            # new node ij
            ij = n.size
            n_ch = n[i] + n[j]
            n = np.append(n, n_ch)
            # compute log(d_ij)
            log_d_ch = log_d[i] + log_d[j]
            log_d_ij = BayesianHierarchicalClustering.__calc_log_d(
                self.alpha, n[ij], log_d_ch)
            log_d = np.append(log_d, log_d_ij)
            # update assignments
            assignments[np.argwhere(assignments == i)] = ij
            assignments[np.argwhere(assignments == j)] = ij

            # create arcs from ij to i,j
            arc_i = api.Arc(ij, i)
            arc_j = api.Arc(ij, j)
            arc_list = np.append(arc_list, [arc_i, arc_j])

            # delete i,j from active list and add ij
            i_idx = np.argwhere(active_nodes == i).flatten()
            j_idx = np.argwhere(active_nodes == j).flatten()
            active_nodes = np.delete(active_nodes, [i_idx, j_idx])
            active_nodes = np.append(active_nodes, ij)
            # compute log(p_ij)
            t1 = np.maximum(r1, r2)
            t2 = np.minimum(r1, r2)
            log_p_ij = t1 + np.log(1 + np.exp(t2 - t1))
            log_p = np.append(log_p, log_p_ij)

            # for every pair ij x active
            x_mat_ij = self.data[np.argwhere(assignments == ij).flatten()]
            for k in range(active_nodes.size - 1):
                # compute log(d_k)
                n_ch = n[k] + n[ij]
                log_d_ch = log_d[k] + log_d[ij]
                log_dij = BayesianHierarchicalClustering.__calc_log_d(
                    self.alpha, n_ch, log_d_ch)
                # compute log(pi_k)
                log_pik = np.log(self.alpha) + gammaln(n_ch) - log_dij
                # compute log(p_k)
                data_merged = self.data[np.argwhere(
                    assignments == active_nodes[k]).flatten()]
                log_p_ij = self.model.calc_log_mlh(
                    np.vstack((x_mat_ij, data_merged)))
                # compute log(r_k)
                log_p_ch = log_p[ij] + log_p[active_nodes[k]]
                r1 = log_pik + log_p_ij
                r2 = log_d_ch - log_dij + log_p_ch
                log_r = r1 - r2
                # store results
                merge_info = [ij, active_nodes[k], log_r, r1, r2]
                tmp_merge = np.vstack((tmp_merge, merge_info))

        return api.Result(arc_list,
                          np.arange(0, ij + 1),
                          log_p[-1],
                          np.array(weights),
                          hierarchy_cut,
                          len(np.unique(assignments)))
Esempio n. 43
0
 def _loglikelihood(prior, distr, dirichlet_distr, size):
     # calculate log-likelihood
     score = np.sum((prior - distr) * dirichlet_distr)
     score += np.sum(gammaln(distr) - gammaln(prior))
     score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))
     return score
Esempio n. 44
0
def _gamma(N):
    from scipy.special import gammaln
    # Note: this is closely approximated by (1 - 0.75 / N) for large N
    return np.sqrt(2 / N) * np.exp(gammaln(N / 2) - gammaln((N - 1) / 2))
Esempio n. 45
0
                            key=nodes_pop_scaled.__getitem__,
                            reverse=True)[i]
            if importations[node_id][tt] > 0.0:
                add_OutbreakIndivisualDengue(
                    cb, tt, {},
                    importations[node_id][tt] / nodes_scaled[i_node],
                    'Strain_1', [i_node])

    return params_dict


# Just for fun, let the numerical derivative baseline scale with the number of dimensions
volume_fraction = 0.05  # desired fraction of N-sphere area to unit cube area for numerical derivative (automatic radius scaling with N)
num_params = len([p for p in params if p['Dynamic']])
r = math.exp(1 / float(num_params) *
             (math.log(volume_fraction) + gammaln(num_params / 2. + 1) -
              num_params / 2. * math.log(math.pi)))

optimtool = OptimTool(
    params,
    lambda p: p,
    mu_r=
    r,  # <-- radius for numerical derivatve.  CAREFUL not to go too small with integer parameters
    sigma_r=r / 10.,  # <-- stdev of radius
    center_repeats=
    1,  # <-- Number of times to replicate the center (current guess).  Nice to compare intrinsic to extrinsic noise
    samples_per_iteration=
    100  # <-- Samples per iteration, includes center repeats.  Actual number of sims run is this number times number of sites.
)

calib_name = "CHIKV_Calib_Habitat_Imports_admin2nodes_Migration" \
Esempio n. 46
0
    def log_normalizer(self, S, n=None):
        if n is not None:
            S = S[:, n]

        return gammaln(S + self.xi) - gammaln(self.xi) - gammaln(S + 1)
Esempio n. 47
0
 def log_density(self,ys,K,n=1):
     ldetK=np.linalg.slogdet(K)[1]
     K_inv=np.linalg.inv(K)
     yK_invy=sum([sum([ys[i]*K_inv[i,j]*ys[j] for j in range(n)]) for i in range(n)])
     ld=self.density_constant-ldetK-(self.a0+.5*n)*np.log(self.b0+.5*yK_invy)-gammaln(self.a0+.5*n)
     return(ld)
Esempio n. 48
0
def logp_phi_n(n_per_mk, b, gamma, phi):
    logp = ( xlogy(gamma-1,phi) - phi - gammaln(gamma) 
            + np.sum(b * (gammaln(phi+n_per_mk) - gammaln(phi) - gammaln(n_per_mk+1) - (phi+n_per_mk)*np.log(2)), axis=0) )
    return np.log(phi>0) + logp
Esempio n. 49
0
    def doc_e_step(self, doc, ss, Elogsticks_1st, word_list, unique_words,
                   doc_word_ids, doc_word_counts, var_converge):
        """
        e step for a single doc
        """
        chunkids = [unique_words[id] for id in doc_word_ids]

        Elogbeta_doc = self.m_Elogbeta[:, doc_word_ids]
        ## very similar to the hdp equations
        v = np.zeros((2, self.m_K - 1))
        v[0] = 1.0
        v[1] = self.m_alpha

        # back to the uniform
        phi = np.ones((len(doc_word_ids), self.m_K)) * 1.0 / self.m_K

        likelihood = 0.0
        old_likelihood = -1e200
        converge = 1.0
        eps = 1e-100

        iter = 0
        max_iter = 100
        # not yet support second level optimization yet, to be done in the future
        while iter < max_iter and (converge < 0.0 or converge > var_converge):
            ### update variational parameters

            # var_phi
            if iter < 3:
                var_phi = np.dot(phi.T, (Elogbeta_doc * doc_word_counts).T)
                (log_var_phi, log_norm) = log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)
            else:
                var_phi = np.dot(
                    phi.T, (Elogbeta_doc * doc_word_counts).T) + Elogsticks_1st
                (log_var_phi, log_norm) = log_normalize(var_phi)
                var_phi = np.exp(log_var_phi)

            # phi
            if iter < 3:
                phi = np.dot(var_phi, Elogbeta_doc).T
                (log_phi, log_norm) = log_normalize(phi)
                phi = np.exp(log_phi)
            else:
                phi = np.dot(var_phi, Elogbeta_doc).T + Elogsticks_2nd
                (log_phi, log_norm) = log_normalize(phi)
                phi = np.exp(log_phi)

            # v
            phi_all = phi * np.array(doc_word_counts)[:, np.newaxis]
            v[0] = 1.0 + np.sum(phi_all[:, :self.m_K - 1], 0)
            phi_cum = np.flipud(np.sum(phi_all[:, 1:], 0))
            v[1] = self.m_alpha + np.flipud(np.cumsum(phi_cum))
            Elogsticks_2nd = expect_log_sticks(v)

            likelihood = 0.0
            # compute likelihood
            # var_phi part/ C in john's notation
            likelihood += np.sum((Elogsticks_1st - log_var_phi) * var_phi)

            # v part/ v in john's notation, john's beta is alpha here
            log_alpha = np.log(self.m_alpha)
            likelihood += (self.m_K - 1) * log_alpha
            dig_sum = sp.psi(np.sum(v, 0))
            likelihood += np.sum(
                (np.array([1.0, self.m_alpha])[:, np.newaxis] - v) *
                (sp.psi(v) - dig_sum))
            likelihood -= np.sum(sp.gammaln(np.sum(v, 0))) - np.sum(
                sp.gammaln(v))

            # Z part
            likelihood += np.sum((Elogsticks_2nd - log_phi) * phi)

            # X part, the data part
            likelihood += np.sum(
                phi.T * np.dot(var_phi, Elogbeta_doc * doc_word_counts))

            converge = (likelihood - old_likelihood) / abs(old_likelihood)
            old_likelihood = likelihood

            if converge < -0.000001:
                logger.warning('likelihood is decreasing!')

            iter += 1

        # update the suff_stat ss
        # this time it only contains information from one doc
        ss.m_var_sticks_ss += np.sum(var_phi, 0)
        ss.m_var_beta_ss[:, chunkids] += np.dot(var_phi.T,
                                                phi.T * doc_word_counts)

        return likelihood
Esempio n. 50
0
def log_factorial(x):
    """Returns the logarithm of x!
    Also accepts lists and NumPy arrays in place of x."""
    return gammaln(np.array(x) + 1)
Esempio n. 51
0
 def ig1fun(nu):
     return np.log(2 * mu**2) - np.log(
         (sigma**2 + mu**2) * (nu - 2)) + 2 * (gammaln(nu / 2) - gammaln(
             (nu - 1) / 2))
def bfu(a, b, c, d, n, a_plus_b, a_plus_c, alpha, alpha_sum, beta):
    """
    Function for computing Bayes factors unconditional on n
    """

    num = (log(1.0 + 1.0 / beta) +
           gammaln(n + alpha_sum - 1.0) +
           4 * gammaln(alpha) +
           gammaln(a_plus_b + 2 * alpha - 1.0) +
           gammaln(c + d + 2 * alpha - 1.0) +
           gammaln(a_plus_c + 2 * alpha - 1.0) +
           gammaln(b + d + 2 * alpha - 1.0) +
           2 * gammaln(alpha_sum - 2.0))
    den = (gammaln(alpha_sum - 1.0) +
           sum([gammaln(alpha + x) for x in [a, b, c, d]]) +
           2 * gammaln(n + alpha_sum - 2.0) +
           4 * gammaln(2 * alpha - 1.0))

    return exp(num - den)
Esempio n. 53
0
def binomln(x1, x2):
    return gammaln(x1+1) - gammaln(x2+1) - gammaln(x1-x2+1)
Esempio n. 54
0
def log_beta_pdf(x,a,b):
    o = gammaln(a + b) - gammaln(a) - gammaln(b)
    o = o + (a-1)*np.log(x) + (b-1)*np.log(1-x)
    return o
Esempio n. 55
0
def newton_cotes(rn, equal=0):
    """
    Return weights and error coefficient for Newton-Cotes integration.

    Suppose we have (N+1) samples of f at the positions
    x_0, x_1, ..., x_N.  Then an N-point Newton-Cotes formula for the
    integral between x_0 and x_N is:

    :math:`\\int_{x_0}^{x_N} f(x)dx = \\Delta x \\sum_{i=0}^{N} a_i f(x_i)
    + B_N (\\Delta x)^{N+2} f^{N+1} (\\xi)`

    where :math:`\\xi \\in [x_0,x_N]`
    and :math:`\\Delta x = \\frac{x_N-x_0}{N}` is the average samples spacing.

    If the samples are equally-spaced and N is even, then the error
    term is :math:`B_N (\\Delta x)^{N+3} f^{N+2}(\\xi)`.

    Parameters
    ----------
    rn : int
        The integer order for equally-spaced data or the relative positions of
        the samples with the first sample at 0 and the last at N, where N+1 is
        the length of `rn`.  N is the order of the Newton-Cotes integration.
    equal : int, optional
        Set to 1 to enforce equally spaced data.

    Returns
    -------
    an : ndarray
        1-D array of weights to apply to the function at the provided sample
        positions.
    B : float
        Error coefficient.

    Notes
    -----
    Normally, the Newton-Cotes rules are used on smaller integration
    regions and a composite rule is used to return the total integral.

    """
    try:
        N = len(rn)-1
        if equal:
            rn = np.arange(N+1)
        elif np.all(np.diff(rn) == 1):
            equal = 1
    except:
        N = rn
        rn = np.arange(N+1)
        equal = 1

    if equal and N in _builtincoeffs:
        na, da, vi, nb, db = _builtincoeffs[N]
        an = na * np.array(vi, dtype=float) / da
        return an, float(nb)/db

    if (rn[0] != 0) or (rn[-1] != N):
        raise ValueError("The sample positions must start at 0"
                         " and end at N")
    yi = rn / float(N)
    ti = 2 * yi - 1
    nvec = np.arange(N+1)
    C = ti ** nvec[:, np.newaxis]
    Cinv = np.linalg.inv(C)
    # improve precision of result
    for i in range(2):
        Cinv = 2*Cinv - Cinv.dot(C).dot(Cinv)
    vec = 2.0 / (nvec[::2]+1)
    ai = Cinv[:, ::2].dot(vec) * (N / 2.)

    if (N % 2 == 0) and equal:
        BN = N/(N+3.)
        power = N+2
    else:
        BN = N/(N+2.)
        power = N+1

    BN = BN - np.dot(yi**power, ai)
    p1 = power+1
    fac = power*math.log(N) - gammaln(p1)
    fac = math.exp(fac)
    return ai, BN*fac
Esempio n. 56
0
 def __init__(self, a, b):
     self.a = float(a)
     self.b = float(b)
     self.constant = -gammaln(self.a) + a * np.log(b)
Esempio n. 57
0
 def p_tau(self, t):
     """ P(τ|a,b) """
     t = row(t)
     lnP = -gammaln(col(self.a)) + col(self.a * np.log(
         self.b)) + col(self.a - 1) * np.log(t) - col(self.b) * t
     return np.exp(lnP)
    def _lower_bound(self,Y2,XMw,MwXY,XSX,Sigma,E_w_sq,e_tau,e_A,b,d,a_init,b_init,
                                                                            c_init,
                                                                            d_init):
        '''
        Calculates lower bound and writes it to instance variable self.lower_bound.
        Does not include constants that do not change from one iteration to another.
        
        Parameters:
        -----------
        Y2: float
            Dot product Y.T*Y
            
        XMw: float
             L2 norm of X*Mw, where Mw - mean of posterior of weights
            
        MwXY: float
             Product of posterior mean of weights (Mw) and X.T*Y
             
        XSX: float
             Trace of matrix X*Sigma*X.T, where Sigma - covariance of posterior of weights
             
        Sigma: numpy array of size [self.m,self.m]
             Covariance matrix for Qw(w)
             
        E_w_sq: numpy array of size [self.m , 1]
             Vector of weight squares
        
        e_tau: float
             Mean of precision for noise parameter
             
        e_A: numpy array of size [self.m, 1]
             Vector of means of precision parameters for weight distribution
        
        b: numpy array
           Learned rate parameter of Gamma distribution
        
        d: float/int
           Learned rate parameter of Gamma distribution
           
        a_init: numpy array
           Initial shape parameter for Gamma distributed weights
           
        b_init: numpy array
           Initial rate parameter
           
        c_init: float
           Initial shape parameter for Gamma distributed precision of likelihood
           
        d_init: float
           Initial rate parameter
        '''
        # precompute for diffrent parts of lower bound
        e_log_tau       = psi(self.c) - np.log(d)
        e_log_alpha     = psi(self.a) - np.log(b)
        
        # Integration of likelihood Ew[Ealpha[Etau[ log P(Y| X*w, tau^-1)]]]
        like_first      =  0.5 * self.n * e_log_tau
        like_second     =  0.5 * e_tau * (Y2 - 2*MwXY + XMw + XSX)
        like            = like_first - like_second
        
        # Integration of weights Ew[Ealpha[Etau[ log P(w| 0, alpha)]]]
        weights         = 0.5*(np.sum((e_log_alpha)) - np.dot(e_A,E_w_sq))
        
        # Integration of precision parameter for weigts Ew[Ealpha[Etau[ log P(alpha| a, b)]]]
        alpha_prior     = np.dot((a_init-1),e_log_alpha)-np.dot(b_init,e_A)
        
        # Integration of precison parameter for likelihood
        tau_prior       = (c_init - 1)*e_log_tau - e_tau*d_init
        
        # E [ log( q_tau(tau) )]
        q_tau_const     = self.c*np.log(d) - gammaln(self.c)
        q_tau           = q_tau_const - d*e_tau + (self.c-1)*e_log_tau
        
        # E [ log( q_alpha(alpha)]
        q_alpha_const   = np.dot(self.a,np.log(b)) - np.sum(gammaln(self.a))
        q_alpha         = q_alpha_const - np.dot(b,e_A) + np.dot((self.a-1),e_log_alpha)
        
        # E [ log( q_w(w)) ]
        q_w             = -0.5*np.linalg.slogdet(Sigma)[1]

        # lower bound        
        L = like + weights + alpha_prior + tau_prior - q_w - q_alpha - q_tau
        self.lower_bound.append(L)
Esempio n. 59
0
def logfactorial(x):
    return gammaln(x + 1)
Esempio n. 60
0
 def kldiv(u, w):
     p, q = u.alpha, w.alpha
     return gammaln(p.sum()) - gammaln(q.sum()) \
            - np.sum(gammaln(p) - gammaln(q)) \
            + np.sum((p-q)*(digamma(q)-digamma(q.sum())))