Example #1
0
def LogJ(m_params, v_params, theano_observed_matrix):
    m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma = v_params
    w, r, gamma, gamma0, c0, sigma = m_params
    is_observed_matrix_numpy = ~np.isnan(observed_matrix)
    is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64))
    theano_observed_matrix.set_value((np.nan_to_num(
        is_observed_matrix_numpy * observed_matrix)).astype(np.float64))
    log_j_t0 = time.clock()
    results, updates = theano.scan(
        fn=LogJointScanFn,
        sequences=[dict(input=np.arange(N), taps=[-1])],
        outputs_info=[dict(initial=np.float64(0), taps=[-1])],
        non_sequences=[is_observed_matrix, theano_observed_matrix])
    log_joint = results[-1]
    log_joint2 = (((D * gamma * T.log(gamma))[0] * r).sum() -
                  (D * T.gammaln(gamma[0] * r)).sum() +
                  ((gamma[0] * r - 1) * T.log(w)).sum() -
                  (gamma[0] * w).sum() +
                  (gamma0 * T.log(c0) -
                   THRESHOLD_RANK * T.gammaln(gamma0 / THRESHOLD_RANK) +
                   (gamma0 / THRESHOLD_RANK - 1)[0] * (T.log(r)).sum() -
                   (c0[0] * r).sum() - gamma - gamma0 - c0)[0])
    log_joint += log_joint2

    return (log_joint)
Example #2
0
    def shanon_Entropy_studentt(self, log_cov, freedom):
        Nrff, dout = log_cov.shape
        const = T.log(
            ((freedom - 2) * np.pi)**(dout / 2)
        ) + T.gammaln(freedom / 2) - T.gammaln((freedom + dout) / 2) + (T.psi(
            (freedom + dout) / 2) - T.psi(freedom / 2)) * (freedom + dout) / 2

        return 0.5 * T.sum(log_cov) + Nrff * const
Example #3
0
    def entropy_pi(self):
        log_gamma_term = T.sum( T.gammaln(self.tau_IBP[:,0]) + T.gammaln(self.tau_IBP[:,1]) \
                       - T.gammaln(self.tau_IBP[:,0] + self.tau_IBP[:,1]) )
        digamma_term = T.sum( (1.0-self.tau_IBP[:,0])*T.psi(self.tau_IBP[:,0])
                     + (1.0-self.tau_IBP[:,1])*T.psi(self.tau_IBP[:,1])
                     + (self.tau_IBP[:,0]+self.tau_IBP[:,1]-2.0)*T.psi(self.tau_IBP[:,0]+self.tau_IBP[:,1]) )

        return log_gamma_term + digamma_term
Example #4
0
 def calc_kl_divergence(self, prior_alpha, prior_beta):
     # use taylor approx for Digamma function
     psi_a_taylor_approx = T.log(
         self.a) - 1. / (2 * self.a) - 1. / (12 * self.a**2)
     kl = (self.a - prior_alpha) * psi_a_taylor_approx
     kl += -T.gammaln(self.a) + T.gammaln(prior_alpha) + prior_alpha * (
         T.log(self.b) - T.log(prior_beta)) + (
             (self.a * (prior_beta - self.b)) / self.b)
     return kl.sum(axis=1)
Example #5
0
def _log_partition_symfunc():
    natural_params = T.vector()
    log_Z = T.sum(T.gammaln(natural_params + 1.)) -\
        T.gammaln(T.sum(natural_params + 1))

    func = theano.function([natural_params], log_Z)
    grad_func = theano.function([natural_params],
                                T.grad(T.sum(log_Z), natural_params))
    return func, grad_func
def loglik_primary_f(k, y, theta, lower_n):
    logit_p = theta[0]
    logn = theta[1]
    n = lower_n + T.exp(logn)
    k = k[:, 0]

    p = T.nnet.nnet.sigmoid(logit_p)

    combiln = T.gammaln(n + 1) - (T.gammaln(k + 1) + T.gammaln(n - k + 1))
    # add y to stop theano from complaining
    #loglik = combiln + k * T.log(p) + (n - k) * T.log1p(-p) + 0.0 * T.sum(y)
    loglik = combiln + k * T.log(p) + (n - k) * T.log(1.0 - p) + 0.0 * T.sum(y)
    return loglik
Example #7
0
    def __init__(self, mu=0.0, beta=None, cov=None, *args, **kwargs):
        super(GeneralizedGaussian, self).__init__(*args, **kwargs)
        # assert(mu.shape[0] == cov.shape[0] == cov.shape[1])
        dim = mu.shape[0]

        self.mu = mu
        self.beta = beta
        self.prec = tt.nlinalg.pinv(cov)
        # self.k = (dim * tt.gamma(dim / 2.0)) / \
        #          ((np.pi**(dim / 2.0)) * tt.gamma(1 + dim / (2 * beta)) * (2**(1 + dim / (2 * beta))))
        self.logk = tt.log(dim) + tt.gammaln(dim / 2.0) - \
                    (dim / 2.0) * tt.log(np.pi) - \
                    tt.gammaln(1 + dim / (2 * beta)) - \
                    (1 + dim / (2 * beta)) * tt.log(2.0)
Example #8
0
 def _negCLL(self, z, X):#, validation = False):
     """Estimate -log p[x|z]"""
     if self.params['data_type']=='binary':
         p_x_z    = self._conditionalXgivenZ(z)
         negCLL_m = T.nnet.binary_crossentropy(p_x_z,X)
     elif self.params['data_type'] =='bow':
         #Likelihood under a multinomial distribution
         if self.params['likelihood'] == 'mult':
             lsf      = self._conditionalXgivenZ(z)
             p_x_z    = T.exp(lsf) 
             negCLL_m = -1*(X*lsf)
         elif self.params['likelihood'] =='poisson':
             loglambda_p = self._conditionalXgivenZ(z)
             p_x_z       = T.exp(loglambda_p)
             negCLL_m    = -X*loglambda_p+T.exp(loglambda_p)+T.gammaln(X+1)
         else:
             raise ValueError,'Invalid choice for likelihood: '+self.params['likelihood']
     elif self.params['data_type']=='real':
         params   = self._conditionalXgivenZ(z)
         mu,logvar= params[0], params[1]
         p_x_z    = mu  
         negCLL_m = 0.5 * np.log(2 * np.pi) + 0.5*logvar + 0.5 * ((X - mu_p)**2)/T.exp(logvar)
     else:
         assert False,'Bad data_type: '+str(self.params['data_type'])
     return p_x_z, negCLL_m.sum(1,keepdims=True)
Example #9
0
    def compute_LogDensity_Yterms(self,
                                  Y=None,
                                  X=None,
                                  padleft=False,
                                  persamp=False):
        """
        
        TODO: The persamp option allows this function to return a list of the costs
        computed for each sample. This is useful for implementing more
        sophisticated optimization procedures such as NVIL. TO BE IMPLEMENTED...
         
        NOTE: Please accompany a compute function with an eval function that
        allows evaluation from an external program. compute functions assume by
        default that the 0th dimension of the data arrays is the trial
        dimension. If you deal with a single trial and the trial dimension is
        omitted, set padleft to False to padleft.
        """
        if Y is None: Y = self.Y
        if X is None: X = self.X

        if padleft: Y = T.shape_padleft(Y, 1)

        Yprime = theano.clone(self.Rate, replace={self.X: X})
        Density = T.sum(Y * T.log(Yprime) - Yprime - T.gammaln(Y + 1))

        return Density
Example #10
0
    def get_viewed_cost(self, v0, vk_stat):
        # Binary cross-entropy
        cost = 0
        if self.input_type == InputType.binary:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat) + (1 - v0) * T.log(1 - clip_vk_stat), axis=1)

        # Sum square error
        elif self.input_type == InputType.gaussian:
            cost = T.sum((v0 - vk_stat) ** 2, axis=1)

        # Categorical cross-entropy
        elif self.input_type == InputType.categorical:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat), axis=1)

        elif self.input_type == InputType.poisson:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum(-vk_stat + v0 * T.log(clip_vk_stat) - T.gammaln(1 + v0), axis=1)

        if self.input_type == InputType.replicated_softmax:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum((v0 / self.total_count) * T.log(clip_vk_stat), axis=1)

        return cost
Example #11
0
 def likelihood(xs):
     return tt.sum(
         tt.log(beta) -
         tt.log(2.0 * std *
                tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta))) -
         tt.gammaln(1.0 / beta) + -tt.power(
             tt.abs_(xs - mu) / std *
             tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta)), beta))
Example #12
0
 def likelihood(xs):
     return T.sum(
         T.log(beta) -
         T.log(2.0 * std *
               T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta))) -
         T.gammaln(1.0 / beta) + -T.power(
             T.abs_(xs - mu) / std *
             T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta)), beta))
def log_joint_fn(N, D, K,  m_params, y, cov, mask):

    w, r, gamma, gamma0, c0, sigma = m_params

    results, updates = theano.scan(fn=log_joint_scan_fn,
                                   sequences=np.arange(N),
                                   outputs_info=[dict(initial=np.float64(0), taps=[-1])],
                                   non_sequences=[y, cov, mask])

    log_joint = results[-1]

    log_joint += ((D * gamma * T.log(gamma))[0] * r).sum() - (D * T.gammaln(gamma[0] * r)).sum() + (
    (gamma[0] * r - 1) * T.log(w)).sum() - (gamma[0] * w).sum() + (
                gamma0 * T.log(c0) - K * T.gammaln(gamma0 / K) + (gamma0 / K - 1)[0] * (T.log(r)).sum() - (
                c0[0] * r).sum() - gamma - gamma0 - c0)[0]

    return log_joint
Example #14
0
def log_negative_binomial(x, p, log_r, eps=0.0):
    """
    Compute log pdf of a negative binomial distribution with success probability p and number of failures, r, until the experiment is stopped, at values x.
    
    A simple variation of Stirling's approximation is used: log x! = x log x - x.
    """

    x = T.clip(x, eps, x)

    p = T.clip(p, eps, 1.0 - eps)

    r = T.exp(log_r)
    r = T.clip(r, eps, r)

    y = T.gammaln(x + r) - T.gammaln(x + 1) - T.gammaln(r) \
        + x * T.log(p) + r * T.log(1 - p)

    return y
Example #15
0
def log_poisson(x, log_lambda, eps=0.0):

    x = T.clip(x, eps, x)

    lambda_ = T.exp(log_lambda)
    lambda_ = T.clip(lambda_, eps, lambda_)

    y = x * log_lambda - lambda_ - T.gammaln(x + 1)

    return y
Example #16
0
def log_gumbel_softmax(y, logits, tau=1):
    shape = logits.shape
    k = shape[-1]
    logits_flat = logits.reshape((-1, k))
    p_flat = T.nnet.softmax(logits_flat)
    p = p_flat.reshape(shape)
    log_gamma = T.gammaln(k)
    logsum = T.log(T.sum(p / (y**tau), axis=-1))
    sumlog = T.sum(T.log(p / (y**(tau + 1))), axis=-1)
    return log_gamma + (k - 1) * T.log(tau) - k * logsum + sumlog
Example #17
0
def log_gumbel_softmax(x, mu, tau=1.0, eps=1e-6):
    """
    Compute logpdf of a Gumbel Softmax distribution with parameters p, at values x.
        .. See Appendix B.[1:2] https://arxiv.org/pdf/1611.01144v2.pdf
    """
    k = mu.shape[-1]
    logpdf = T.gammaln(k) + (k - 1) * T.log(tau + eps) \
        - k * T.log(T.sum(T.exp(mu) / T.power(x, tau), axis=2) + eps) \
        + T.sum(mu - (tau + 1) * T.log(x + eps), axis=2)
    return logpdf
    def eval_prior(self, buffers):
        """
        Evaluates prior on the latent variables
        """
        zsamp = self.S[:, buffers[0]:-buffers[1]]
        n_samples = zsamp.shape[0] // self.batch_size
        zsamp = zsamp.reshape((self.batch_size, n_samples, -1))
        ks = zsamp.sum(axis=-1)
        ns = zsamp.shape[-1].astype(config.floatX) * T.ones_like(ks)
        log_nok = T.gammaln(ns + 1) - T.gammaln(ks + 1) - T.gammaln(ns - ks +
                                                                    1)
        log_p = 0
        if self.n_genparams == 1:
            log_p = -0.5 * (T.log(2 * np.pi) + 2 * T.log(self.p_sigma) +
                            ((self.P / self.p_sigma)**2).sum(axis=-1))
            log_p = log_p.reshape((self.batch_size, n_samples))

        return log_nok + ks * T.log(
            self.pz) + (ns - ks) * T.log(1 - self.pz) + log_p
Example #19
0
 def evaluateLogDensity(self,X,Y):
     # This is the log density of the generative model (*not* negated)
     Ypred = theano.clone(self.rate,replace={self.Xsamp: X})
     resY  = Y-Ypred
     resX  = X[1:]-T.dot(X[:-1],self.A.T)
     resX0 = X[0]-self.x0
     LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     PoisDensity = T.sum(Y * T.log(Ypred)  - Ypred - T.gammaln(Y + 1))
     LogDensity = LatentDensity + PoisDensity
     return LogDensity
Example #20
0
 def evaluateLogDensity(self,X,Y):
     # This is the log density of the generative model (*not* negated)
     Ypred = theano.clone(self.rate,replace={self.Xsamp: X})
     resY  = Y-Ypred
     resX  = X[1:]-T.dot(X[:-1],self.A.T)
     resX0 = X[0]-self.x0
     LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     PoisDensity = T.sum(Y * T.log(Ypred)  - Ypred - T.gammaln(Y + 1))
     LogDensity = LatentDensity + PoisDensity
     return LogDensity
Example #21
0
    def _get_log_partition_func(dim, nparams):
        np1, np2, np3, np4 = nparams
        idxs = np.arange(dim) + 1
        W = T.nlinalg.matrix_inverse(np1 - (1. / np3) * T.outer(np2, np2))
        log_Z = .5 * (np4 + dim) * T.log(T.nlinalg.det(W))
        log_Z += .5 * (np4 + dim) * dim * np.log(2)
        log_Z += .5 * dim * (dim - 4)
        log_Z += T.sum(T.gammaln(.5 * (np4 + dim + 1 - idxs)))
        log_Z += -.5 * dim * T.log(np3)

        return log_Z, theano.function([], log_Z)
Example #22
0
    def liklihood_studnet_t(self, target, free_param):
        self.beta = T.exp(self.ls)
        Covariance = self.beta
        LL = self.log_mvns(target, self.output,
                           Covariance)  # - 0.5*T.sum(T.dot(betaI,Ktilda)))

        N, n_out = target.shape

        CH_const = T.gammaln((n_out + n_out + free_param) / 2) - T.log(
            ((free_param + n_out) * np.pi)**(n_out / 2)) - T.gammaln(
                (free_param + n_out) / 2)

        ch_mc, updates = theano.scan(
            fn=lambda a: T.sum(T.log(1 + T.sum(a * a, -1) / free_param)),
            sequences=[W_samples])

        CH_MC = T.mean(ch_mc)

        CH = CH_const * num_FF - CH_MC * (free_param + n_out) / 2

        return LL
Example #23
0
def mvt_logpdf_theano(x, mu, Li, df):
    import theano.tensor as T
    dim = Li.shape[0]
    Ki = Li.T.dot(Li)
    #determinant is just multiplication of diagonal elements of cholesky
    logdet = 2*T.log(1./T.diag(Li)).sum()
    lpdf_const = (T.gammaln((df + dim) / 2)
                       -(T.gammaln(df/2)
                         + (T.log(df)+T.log(np.pi)) * dim*0.5
                         + logdet * 0.5)
                       )

    d = (x - mu.reshape((1 ,mu.size))).T
    Ki_d_scal = T.dot(Ki, d) / df          #vector
    d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar
    
    res_pdf = (lpdf_const 
               - 0.5 * (df+dim) * T.log(d_Ki_d_scal_1)).flatten() 
    if res_pdf.size == 1:
        res_pdf = T.float(res_pdf)
    return res_pdf 
Example #24
0
def _log_partition_symfunc():
    natural_params = T.vector()
    size = natural_params.shape[0] // 4
    np1, np2, np3, np4 = T.split(natural_params, 4 * [size], 4)

    log_Z = T.sum(T.gammaln(.5 * (np4 + 1)))
    log_Z += T.sum(- .5 * (np4 + 1) * T.log(.5 * (np1 - (np2 ** 2) / np3)))
    log_Z += T.sum(-.5 * T.log(np3))

    func = theano.function([natural_params], log_Z)
    grad_func = theano.function([natural_params],
                                T.grad(T.sum(log_Z), natural_params))
    return func, grad_func
Example #25
0
File: niw.py Project: afcarl/trmix
def log_partf(b, s, C, v, logdet=None):
    D = b.size

    # multivariate log-gamma function
    g = tt.sum(tt.gammaln((v + 1. - tt.arange(1, D + 1)) /
                          2.)) + D * (D - 1) / 4. * np.log(np.pi)

    # log-partition function
    if logdet is None:
        return -v / 2. * tt.log(tl.det(C - tt.dot(b, b.T) / (4 * s))) \
         + v * np.log(2.) + g - D / 2. * tt.log(s)
    else:
        return -v / 2. * logdet + v * np.log(2.) + g - D / 2. * tt.log(s)
Example #26
0
    def logp_cho(cls, value, mu, cho, freedom, mapping):
        delta = mapping.inv(value) - mu

        lcho = tsl.solve_lower_triangular(cho, delta)
        beta = lcho.T.dot(lcho)
        n = cho.shape[0].astype(th.config.floatX)

        np5 = np.float32(0.5)
        np2 = np.float32(2.0)
        npi = np.float32(np.pi)

        r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2))
        r2 = ifelse(
            tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi),
            tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) -
            np5 * n * tt.log((freedom - np2) * npi))
        r3 = -tt.sum(tt.log(tnl.diag(cho)))
        det_m = mapping.logdet_dinv(value)

        r1 = debug(r1, name='r1', force=True)
        r2 = debug(r2, name='r2', force=True)
        r3 = debug(r3, name='r3', force=True)
        det_m = debug(det_m, name='det_m', force=True)

        r = r1 + r2 + r3 + det_m

        cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta)))
        cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m)))
        cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho)))
        cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho)))
        return ifelse(
            cond1, np.float32(-1e30),
            ifelse(
                cond2, np.float32(-1e30),
                ifelse(cond3, np.float32(-1e30),
                       ifelse(cond4, np.float32(-1e30), r))))
Example #27
0
 def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t):
     import theano
     import theano.tensor as T
     sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \
         - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1))
     M_nlc = theano.ifelse.ifelse(T.eq(L_t, -1), T.sum(M_t, axis=0),
                                  M_t[L_t])
     # for numerics: only account for values, where M_nlc is not zero
     a = T.switch(T.eq(M_nlc, 0.), T.min(sum_log_poisson), sum_log_poisson)
     a = T.max(a, keepdims=True)
     logarg = T.switch(
         T.eq(M_nlc, 0.), 0.,
         T.exp(sum_log_poisson - a) * M_nlc /
         T.cast(M_t.shape[0], dtype='float32'))
     logarg = T.sum(logarg)
     return ll_t + a[0] + T.log(logarg)
    def logp(self, x):
        alpha = self.alpha
        n = tt.sum(x, axis=-1)
        sum_alpha = tt.sum(alpha, axis=-1)

        const = (tt.gammaln(n + 1) +
                 tt.gammaln(sum_alpha)) - tt.gammaln(n + sum_alpha)
        series = tt.gammaln(x + alpha) - (tt.gammaln(x + 1) +
                                          tt.gammaln(alpha))
        result = const + tt.sum(series, axis=-1)
        return result
Example #29
0
 def liklihood_studnet_t(self,target,free_param):            
     self.beta = T.exp(self.ls)
     Covariance = self.beta
     LL = self.log_mvns(target, self.output, Covariance)# - 0.5*T.sum(T.dot(betaI,Ktilda)))  
     
     N,n_out=target.shape    
     
     CH_const=T.gammaln((n_out+n_out+free_param)/2)-T.log(((free_param+n_out)*np.pi)**(n_out/2))-T.gammaln((free_param+n_out)/2) 
      
     ch_mc,updates=theano.scan(fn=lambda a: T.sum(T.log(1+T.sum(a*a,-1)/free_param)),
                           sequences=[W_samples])
      
     CH_MC=T.mean(ch_mc)
      
     CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2    
     
 
     return LL
def LogJ(m_params,v_params,theano_observed_matrix):
    m_matrix_estimate, s_matrix_estimate, m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma=v_params
    matrix_estimate, w, r, gamma, gamma0, c0, sigma= m_params
    is_observed_matrix_numpy=~np.isnan(observed_matrix)
    is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64))
    theano_observed_matrix.set_value((np.nan_to_num(is_observed_matrix_numpy*observed_matrix)).astype(np.float64))
    log_j_t0=time.clock()
    log_joint=0
    for n in range(observed_count):
        log_joint+= np.power((observation_record[0,n]-matrix_estimate[int(observation_record_numpy[1,n]), int(observation_record_numpy[2,n])]),2)
    log_joint=(-1/(2*ERROR[0]*ERROR[0]))*log_joint
    print("first result")
    print(log_joint.eval())
    log_joint += -(N/2.0)*T.nlinalg.Det()(covariance_matrix) - (1/2.0)*T.nlinalg.trace(T.dot(T.dot(matrix_estimate, matrix_estimate.T), T.nlinalg.MatrixInverse()(covariance_matrix)))
    log_joint2= (((D*gamma*T.log(gamma))[0]*r).sum()-(D*T.gammaln(gamma[0]*r)).sum()+((gamma[0]*r-1)*T.log(w)).sum()-(gamma[0]*w).sum() + (gamma0*T.log(c0)-THRESHOLD_RANK*T.gammaln(gamma0/THRESHOLD_RANK)+(gamma0/THRESHOLD_RANK-1)[0]*(T.log(r)).sum()-(c0[0]*r).sum()-gamma-gamma0-c0)[0])
    log_joint += log_joint2
 
    return(log_joint)
Example #31
0
    def logp(self, value):
        topology = self.topology
        taxon_count = tt.as_tensor_variable(topology.get_taxon_count())
        root_index = topology.get_root_index()

        r = self.r
        a = self.a
        rho = self.rho

        log_coeff = (taxon_count - 1) * tt.log(2.0) - tt.gammaln(taxon_count)
        tree_logp = log_coeff + (taxon_count - 1) * tt.log(
            r * rho) + taxon_count * tt.log(1 - a)

        mrhs = -r * value
        zs = tt.log(rho + ((1 - rho) - a) * tt.exp(mrhs))
        ls = -2 * zs + mrhs
        root_term = mrhs[root_index] - zs[root_index]

        return tree_logp + tt.sum(ls) + root_term
    def likelihood(self, z, y):
        η = z.flatten(min(2, z.ndim)) + self.bias
        Δ = self.binsize
        # 1st part of the likelihood
        L1 = tt.dot(y, η)
        if z.ndim > 1:
            ndim = z.ndim - 1
            shp_z = z.shape[-ndim:]
            L1 = L1.reshape(shp_z, ndim=ndim)
        # 2nd part of the likelihood
        λ = self.invlink(z + self.bias)
        L2 = Δ * tt.sum(λ, axis=0)
        # constant factors
        c1 = tt.sum(y) * tt.log(Δ)
        c2 = -tt.sum(tt.where(y > 1, tt.gammaln(y + 1), 0.0))
        const = c1 - c2

        L = L1 - L2 + const
        return as_tensor_variable(L, name='logL')
Example #33
0
 def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t):
     import theano
     import theano.tensor as T
     sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \
         - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1))
     M_nlc = theano.ifelse.ifelse(
         T.eq(L_t, -1),
         T.sum(M_t, axis=0),
         M_t[L_t]
         )
     # for numerics: only account for values, where M_nlc is not zero
     a = T.switch(
         T.eq(M_nlc, 0.),
         T.min(sum_log_poisson),
         sum_log_poisson
         )
     a = T.max(a, keepdims=True)
     logarg = T.switch(
         T.eq(M_nlc, 0.),
         0.,
         T.exp(sum_log_poisson - a)*M_nlc/T.cast(M_t.shape[0], dtype='float32')
         )
     logarg = T.sum(logarg)
     return ll_t + a[0] + T.log(logarg)
 def logprob(self, y_target, n, p):
     coeff = T.gammaln(n + y_target) - T.gammaln(y_target + 1) - T.gammaln(n)
     return - (coeff + n * T.log(p) + y_target * T.log(1-p))
Example #35
0
def kldiv_gamma(a1, b1, a0=a0, b0=b0):
    return T.sum((a1 - a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(a0) + a0*(T.log(b1) - T.log(b0)) + a1*((b0 - b1)/b1))
Example #36
0
    def __init__(self, rng, input, n_in, n_out, num_MC,num_FF,n_tot,free_param,Domain_number=None,number="1",Domain_consideration=True):
        #inputも100*N*Dで入ってくるようにする.
        self.DATA=input
        #N=DATA.shape[1]
        #n_in_D=DATA.shape[2]
        srng = RandomStreams(seed=234)
        self.num_rff=num_FF

        
        #Define hyperparameters
        lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        #lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(1.,dtype=theano.config.floatX)
        self.lhyp = theano.shared(value=lhyp_values, name='lhyp'+number, borrow=True)
        self.sf2,self.l = T.exp(self.lhyp[0]), T.exp(self.lhyp[1:1+n_in])
        
        if Domain_consideration:#先行研究は0.1でうまくいった
            ls_value=np.zeros(Domain_number,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        else:
            ls_value=np.zeros(1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        
        
        #Define prior omega
        #prior_mean_Omega.append(tf.zeros([self.d_in[i],1]))
        self.log_prior_var_Omega=T.tile(1/(self.l)**0.5,(num_FF,1)).T
        
        #Define posterior omega
        
        #get samples from  omega
        
        sample_value = np.random.randn(1,n_in,num_FF)
        
        self.sample_Omega_epsilon_0 = theano.shared(value=sample_value, name='sample_Omega'+number)
        #self.sample_Omega_epsilon_0 = srng.normal((1,n_in,num_FF))
        Omega_sample=self.sample_Omega_epsilon_0*self.log_prior_var_Omega[None,:,:]
        Omega_samples=T.tile(Omega_sample,(num_MC,1,1))
        
        self.samples=Omega_samples
        #Define prior W
        #prior_mean_W = T.zeros(2*num_FF)
        
        #log_prior_var_W = T.ones(2*num_FF)        
        
        #Define posterior W
        
        
        mean_mu_value = np.random.randn(2*num_FF,n_out)#* 1e-2 
        self.mean_mu = theano.shared(value=mean_mu_value, name='mean_mu'+number, borrow=True)
        
        log_var_value = np.zeros((2*num_FF,n_out))
        self.log_var_W = theano.shared(value=log_var_value, name='q_W'+number, borrow=True)
        
        #get samples from W
        sample_Omega_epsilon = srng.normal((num_MC,2*num_FF,n_out))
        f2 = T.cast(free_param, 'int64')
        N=srng.uniform(size=(f2+n_tot,num_MC), low=1e-10,high=1.0)
        gamma_factor=T.sum(T.log(N),0)*(-1)
        
        #gamma_factor=self.gamma_dist(free_param+n_tot,1,num_MC)
        
        
        sample_Omega_epsilon_gamma=((free_param+n_tot)/gamma_factor)[:,None,None]*sample_Omega_epsilon
        #MC*Nrff*dout
        W_samples = sample_Omega_epsilon_gamma * (T.exp(self.log_var_W)**0.5)[None,:,:] + self.mean_mu[None,:,:]
        

        
        # calculate lyaer N_MC*N*D_out
        F_next, updates = theano.scan(fn=lambda a,b,c: self.passage(a,b,c,num_FF),
                              sequences=[input,Omega_samples,W_samples])
        
        #output
        self.output = F_next
        
        #KL-divergence
         #Omega
         
         
         #W
         #cross-entropy-term
        #self.KL_W=self.DKL_gaussian(self.mean_mu, self.log_var_W, prior_mean_W, log_prior_var_W)
        CH_const=T.gammaln((n_out+free_param)/2)-T.log(((free_param-2)*np.pi)**(n_out/2))-T.gammaln(free_param/2) 
         
        ch_mc,updates=theano.scan(fn=lambda a: (T.log(1+T.sum(a*a,-1)/(free_param-2))),
                              sequences=[W_samples])
         
        CH_MC=T.mean(T.sum(ch_mc,-1))
         
        CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2
        
         #entropy-term
        HF = self.shanon_Entropy_studentt(self.log_var_W,free_param+n_tot)
        
        self.KL_W=-HF-CH
        
        #parameter_setting
        self.all_params=[self.lhyp,self.ls,self.mean_mu,self.log_var_W]
        self.hyp_params=[self.lhyp,self.ls]
        self.variational_params=[self.mean_mu,self.log_var_W]
Example #37
0
 def in_loop(i,prev_res):
     j=i+1
     res = prev_res + T.gammaln(a + 0.5*(1 - j))
     return res
 def kldiv_r(self, a1, b1):
     return - ((a1 - self.a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(self.a0) +
               self.a0*(T.log(b1) - T.log(self.b0)) + a1*((self.b0 - b1)/b1))[0]
Example #39
0
 def shanon_Entropy_studentt(self,log_cov,freedom):
     Nrff,dout=log_cov.shape
     const=T.log(((freedom-2)*np.pi)**(dout/2))+T.gammaln(freedom/2)-T.gammaln((freedom+dout)/2)  +   (T.psi((freedom+dout)/2 ) - T.psi(freedom/2))*(freedom+dout)/2 
     
     return 0.5*T.sum(log_cov) + Nrff*const
def Beta_fn(a, b):
    return T.exp(T.gammaln(a) + T.gammaln(b) - T.gammaln(a+b))
Example #41
0
 def _log_beta_vec_func(self, alpha):
     output = 0
     for _k in range(self.k):
         output += T.gammaln(self._slice_last(alpha, _k))
     output -= T.gammaln(T.sum(alpha, axis=-1))
     return output
Example #42
0
 def _log_beta_func(self, alpha, beta):
     return T.gammaln(alpha) + T.gammaln(beta) - T.gammaln(alpha + beta)
Example #43
0
    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
        if not self.run_already:
            from theano import tensor as t
            import theano
            y = t.matrix(name='y')
            f = t.matrix(name='f')
            g = t.matrix(name='g')

            def theano_logaddexp(x,y):
                #Implementation of logaddexp from numpy, but in theano
                tmp = x - y
                return t.where(tmp > 0, x + t.log1p(t.exp(-tmp)), y + t.log1p(t.exp(tmp)))

            # Full log likelihood before expectations
            logpy_t = -(t.exp(f) + t.exp(g)) + y*theano_logaddexp(f, g) - t.gammaln(y+1)
            logpy_sum_t = t.sum(logpy_t)

            dF_df_t = theano.grad(logpy_sum_t, f)
            d2F_df2_t = 0.5*theano.grad(t.sum(dF_df_t), f)  # This right?
            dF_dg_t = theano.grad(logpy_sum_t, g)
            d2F_dg2_t = 0.5*theano.grad(t.sum(dF_dg_t), g)  # This right?

            self.logpy_func = theano.function([f,g,y],logpy_t)
            self.dF_df_func = theano.function([f,g,y],dF_df_t)  # , mode='DebugMode')
            self.d2F_df2_func = theano.function([f,g,y],d2F_df2_t)
            self.dF_dg_func = theano.function([f,g,y],dF_dg_t)
            self.d2F_dg2_func = theano.function([f,g,y],d2F_dg2_t)
            self.run_already = True

        funcs = [self.logpy_func, self.dF_df_func, self.d2F_df2_func, self.dF_dg_func, self.d2F_dg2_func]

        D = Y.shape[1]
        mf, mg = m[:, :D], m[:, D:]
        vf, vg = v[:, :D], v[:, D:]

        F = 0  # Could do analytical components here

        T = self.T
        # Need to get these now to duplicate the censored inputs for quadrature
        gh_x, gh_w = self._gh_points(T)

        (F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg) = self.quad2d(funcs=funcs, Y=Y, mf=mf, vf=vf, mg=mg, vg=vg,
                                                               gh_points=gh_points, exp_f=False, exp_g=False)

        F += F_quad
        # gprec = safe_exp(mg - 0.5*vg)
        dF_dmf += 0
        dF_dmg += 0
        dF_dvf += 0
        dF_dvg += 0

        dF_dm = np.hstack((dF_dmf, dF_dmg))
        dF_dv = np.hstack((dF_dvf, dF_dvg))

        if np.any(np.isnan(F_quad)):
            raise ValueError("Nan <log p(y|f,g)>_qf_qg")
        if np.any(np.isnan(dF_dmf)):
            raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qf mean")
        if np.any(np.isnan(dF_dmg)):
            raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qg mean")

        test_integration = False
        if test_integration:
            # Some little code to check the result numerically using quadrature
            from scipy import integrate
            i = 6  # datapoint index

            def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi):
                #link_fi = np.exp(fi)
                #link_gi = np.exp(gi)
                #logpy_fg = -(link_fi + link_gi) + yi*np.logaddexp(fi, gi) - sp.special.gammaln(yi+1)
                logpy_fg = self.logpdf(np.atleast_2d(np.hstack([fi,gi])), np.atleast_2d(yi))
                return (logpy_fg  # log p(y|f,g)
                        * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi)  # q(g)
                        * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi)  # q(f)
                        )
            quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i])

            def integrl(gi):
                return integrate.quad(quad_func_l, -70, 70, args=(gi))[0]

            print "These should match"

            print "Numeric scipy F quad"
            print integrate.quad(lambda fi: integrl(fi), -70, 70)

            print "2d quad F quad"
            print F[i]

        return F, dF_dm, dF_dv, None
Example #44
0
 def log_likelihood(self, samples, alpha, beta):
     output = alpha * T.log(beta + epsilon()) - T.gammaln(alpha)
     output += (alpha - 1) * T.log(samples + epsilon())
     output += -beta * samples
     return mean_sum_samples(output)
    term3 = map(s.special.gammaln, (degMatrix==0).sum(axis= 0) + alphaNull[0]) - s.special.gammaln(alphaNull[0])
    return  np.array(term1 + term2 + term3)
        
        
###################################################################

###  The following are Theano represeantion of certain functions
# sum matrix ops

m1 = T.fmatrix()
m2 = T.fmatrix()
add = function([m1, m2], m1 + m2, allow_input_downcast=True)

# declare a function that calcualte gammaln on a shared variable on GPU
aMatrix = shared(np.zeros((65536, 8192)), config.floatX, borrow=True)
gamma_ln = function([ ], T.gammaln(aMatrix))
theanoExp = function([ ], T.exp(aMatrix))
    
alpha = T.fscalar()
gamma_ln_scalar = function([alpha], T.gammaln(alpha), allow_input_downcast=True)

# now compute the second part of the F-score, which is the covariance of mut and deg
mutMatrix = shared(np.ones((32768, 4096)), config.floatX, borrow=True )  
expMatrix = shared(np.ones((8192, 4096)), config.floatX, borrow=True)
mDotE = function([], T.dot(mutMatrix, expMatrix))

nijk_11 = shared(np.zeros((32768, 4096)), config.floatX)
nijk_01 = shared(np.zeros((32768, 4096)), config.floatX)

fscore = shared(np.zeros((32768, 4096)), config.floatX)
tmpLnMatrix = shared(np.zeros((32768, 4096)), config.floatX, borrow=True)
Example #46
0
 def free_energy(self, v_sample):
     tmp, h = self.propup(v_sample)
     return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \
            T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1)
Example #47
0
 def free_energy(v,h):
     #return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \
          #  T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1)
     return -(v * bv).sum() - (h * bh).sum() + T.sum((- T.dot(v, W) * h + T.gammaln(v + 1)))
def betaln(alpha, beta):
    return T.gammaln(alpha)+T.gammaln(beta) - T.gammaln(alpha+beta)
Example #49
0
    def loglikelihood(self, data=None, mode='unsupervised'):
        """
        Calculate the log-likelihood of the given data under the model
        parameters.

        Keyword arguments:
        data: nparray (data,label) or 'None' for input data
        mode=['unsupervised','supervised']: calculate supervised or
              unsupervised loglikelihood
        """
        # To avoid numerical problems the log-likelihood has to be
        # calculated in such a more costly way by using intermediate
        # logarithmic functions

        # input data
        if data is None:
            Y = self.MultiLayer[0].Layer[0].get_input_data().astype('float32')
        else:
            Y = np.asarray(
                [self.MultiLayer[0].Layer[0].output(y) for y in data[0]],
                dtype='float32')

        # labels
        if (mode == 'supervised'):
            if data is None:
                L = self.MultiLayer[0].Layer[0].get_input_label()
            else:
                L = data[1]
        elif (mode == 'unsupervised'):
            L = (-1)*np.ones(Y.shape[0])
        
        # weights & dimensions
        W = self.MultiLayer[0].Layer[1].get_weights().astype('float32')
        N = Y.shape[0]
        C = W.shape[0]
        D = W.shape[1]
        if (self.number_of_multilayers() == 2):
            try:
                M = self.MultiLayer[1].Layer[1].get_weights()
            except:
                M = None
        elif ((self.number_of_multilayers() == 1) and
              (self.MultiLayer[0].number_of_layers() == 3)):
            M = self.MultiLayer[0].Layer[2].get_weights()
        else:
            M = None
        try:
            K = M.shape[0]
        except:
            K = None

        if not self._theano:
            if M is None:
                ones = np.ones(shape=(C,D), dtype=float)
                log_likelihood = np.empty(N, dtype=float)
                for ninput in xrange(N):
                    sum_log_poisson = np.sum(
                        log_poisson_function(ones*Y[ninput,:], W), axis=1)
                    a = np.max(sum_log_poisson)
                    log_likelihood[ninput] = -np.log(C) + a + \
                        np.log(np.sum(np.exp(sum_log_poisson - a)))
            else:
                ones = np.ones(shape=(C,D), dtype=float)
                log_likelihood = np.empty(N, dtype=float)
                for ninput in xrange(N):
                    sum_log_poisson = np.sum(
                        log_poisson_function(ones*Y[ninput,:], W), axis=1)
                    a = np.max(sum_log_poisson)
                    if (L[ninput] == -1):
                        log_likelihood[ninput] = a + np.log(np.sum(
                            np.exp(sum_log_poisson-a)*np.sum(M,axis=0)/float(K)))
                    else:
                        log_likelihood[ninput] = a + np.log(np.sum(
                            np.exp(sum_log_poisson - a)*
                            M[L[ninput],:]/float(K)))
            mean_log_likelihood = np.mean(log_likelihood)
            sum_log_likelihood = np.zeros_like(mean_log_likelihood)
            MPI.COMM_WORLD.Allreduce(mean_log_likelihood, sum_log_likelihood, op=MPI.SUM)
            mean_log_likelihood = sum_log_likelihood/float(MPI.COMM_WORLD.Get_size())
        else:
            import theano
            import theano.tensor as T
            ml = self.MultiLayer[0]
            if ml._scan_batch_size is None:
                nbatches = 1
                scan_batch_size = ml.Layer[0].get_input_data().shape[0]
            else:
                nbatches = int(np.ceil(
                    ml.Layer[0].get_input_data().shape[0]
                    /float(ml._scan_batch_size)))
                scan_batch_size = ml._scan_batch_size
            batch_log_likelihood = np.zeros(nbatches, dtype='float32')
            if M is None:
                if (self._t_sum_log_likelihood_W is None):
                    Y_t = T.matrix('Y', dtype='float32')
                    L_t = T.vector('L', dtype='int32')
                    W_t = T.matrix('W', dtype='float32')
                    sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \
                        - T.sum(W_t, axis=1) \
                        - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True)
                    a = T.max(sum_log_poisson, axis=1, keepdims=True)
                    logarg = T.sum(T.exp(sum_log_poisson-a), axis=1)
                    log_likelihood = -T.log(C) + a[:,0] + T.log(logarg)
                    # Compile theano function
                    self._t_sum_log_likelihood_W = theano.function(
                        [Y_t,L_t,W_t],
                        T.sum(log_likelihood),
                        on_unused_input='ignore')
                for nbatch in xrange(nbatches):
                    batch_log_likelihood[nbatch] = self._t_sum_log_likelihood_W(
                        Y[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('float32'),
                        L[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('int32'),
                        W.astype('float32'))
            else:
                if (self._t_sum_log_likelihood is None):
                    Y_t = T.matrix('Y', dtype='float32')
                    L_t = T.vector('L', dtype='int32')
                    W_t = T.matrix('W', dtype='float32')
                    M_t = T.matrix('M', dtype='float32')
                    sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \
                        - T.sum(W_t, axis=1) \
                        - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True)
                    M_nlc = M_t[L_t]
                    L_index = T.eq(L_t,-1).nonzero()
                    M_nlc = T.set_subtensor(M_nlc[L_index], T.sum(M_t, axis=0))
                    # for numerics: only account for values, where M_nlc is
                    # not zero
                    a = T.switch(
                        T.eq(M_nlc, 0.),
                        T.cast(T.min(sum_log_poisson), dtype = 'int32'),
                        T.cast(sum_log_poisson, dtype = 'int32'))
                    a = T.max(a, axis=1, keepdims=True)
                    # logarg = T.switch(
                    #     T.eq(M_nlc, 0.),
                    #     0.,
                    #     T.exp(sum_log_poisson-a).astype('float32')*M_nlc\
                    #         /M_t.shape[0].astype('float32'))
                    logarg = T.switch(
                        T.eq(M_nlc, 0.),
                        0.,
                        T.exp(sum_log_poisson-a.astype('float32'))
                    )
                    logarg = T.sum(logarg, axis=1)
                    log_likelihood = a[:,0].astype('float32') + T.log(logarg)
                    # Compile theano function
                    self._t_sum_log_likelihood = theano.function(
                        [Y_t,L_t,W_t,M_t],
                        T.sum(log_likelihood),
                        on_unused_input='ignore')
                    """
                    # LL_scan:
                    ll_t = T.scalar('loglikelihood', dtype='float32')
                    sequences = [Y_t, L_t]
                    outputs_info = [ll_t]
                    non_sequences = [W_t, M_t]
                    likelihood, updates = theano.scan(
                        fn=self._loglikelihood_step,
                        sequences=sequences,
                        outputs_info=outputs_info,
                        non_sequences=non_sequences)
                    result = likelihood[-1]
                    # Compile function
                    self._loglikelihood_scan = theano.function(
                        inputs=sequences + outputs_info + non_sequences,
                        outputs=result,
                        name='loglikelihood')
                    """
                for nbatch in xrange(nbatches):
                    batch_log_likelihood[nbatch] = self._t_sum_log_likelihood(
                        Y[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('float32'),
                        L[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('int32'),
                        W.astype('float32'),
                        M.astype('float32'))
            mean_log_likelihood = np.sum(batch_log_likelihood)/float(N)
        return mean_log_likelihood
Example #50
0
 def __call__(self, X):
     A = T.dot(X[:-1], self.w_trans)
     A = T.exp(T.concatenate([w_init, A], axis=0))
     B = T.sum(T.gammaln(A), axis=-1) - T.gammaln(T.sum(A, axis=-1))
     L = T.dot(A-1, X.dimshuffle(0, 2, 1)) - B
Example #51
0
def analytical_kl(q1, q2, given, deterministic=False):
    try:
        [x1, x2] = given
    except:
        raise ValueError("The length of given list must be 2, "
                         "got %d" % len(given))

    q1_class = q1.__class__.__name__
    q2_class = q2.__class__.__name__
    if q1_class == "Gaussian" and q2_class == "UnitGaussianSample":
        mean, var = q1.fprop(x1, deterministic=deterministic)
        return gauss_unitgauss_kl(mean, var)

    elif q1_class == "Gaussian" and q2_class == "Gaussian":
        mean1, var1 = q1.fprop(x1, deterministic=deterministic)
        mean2, var2 = q2.fprop(x2, deterministic=deterministic)
        return gauss_gauss_kl(mean1, var1, mean2, var2)

    elif q1_class == "Bernoulli" and q2_class == "UnitBernoulliSample":
        mean = q1.fprop(x1, deterministic=deterministic)
        output = mean * (T.log(mean + epsilon()) + T.log(2)) +\
            (1 - mean) * (T.log(1 - mean + epsilon()) + T.log(2))
        return T.sum(output, axis=1)

    elif q1_class == "Categorical" and q2_class == "UnitCategoricalSample":
        mean = q1.fprop(x1, deterministic=deterministic)
        output = mean * (T.log(mean + epsilon()) + T.log(q1.k))
        return T.sum(output, axis=1)

    elif q1_class == "Kumaraswamy" and q2_class == "UnitBetaSample":
        """
        [Naelisnick+ 2016]
        Deep Generative Models with Stick-Breaking Priors
        """
        M = 10
        euler_gamma = 0.57721

        a, b = q1.fprop(x1, deterministic=deterministic)

        def taylor(m, a, b):
            return 1. / (m + a * b) * q2._beta_func(m / a, b)
        kl, _ = theano.scan(fn=taylor,
                            sequences=T.arange(1, M + 1),
                            non_sequences=[a, b])
        kl = T.sum(kl, axis=0)
        kl *= (q2.beta - 1) * b

        kl += ((a - q2.alpha) / a + epsilon()) *\
              (-euler_gamma - psi(b) - 1. / (b + epsilon()))
        kl += T.log(a * b + epsilon()) +\
            T.log(q2._beta_func(q2.alpha, q2.beta) + epsilon())
        kl += -(b - 1) / (b + epsilon())

        return T.sum(kl, axis=1)

    elif q1_class == "Gamma" and q2_class == "UnitGammaSample":
        """
        https://arxiv.org/pdf/1611.01437.pdf
        """
        alpha1, beta1 = q1.fprop(x1, deterministic=deterministic)
        alpha2 = T.ones_like(alpha1)
        beta2 = T.ones_like(beta1)

        output = (alpha1 - alpha2) * psi(alpha1)
        output += -T.gammaln(alpha1) + T.gammaln(alpha2)
        output += alpha2 * (T.log(beta1 + epsilon()) -
                            T.log(beta2 + epsilon()))
        output += alpha1 * (beta2 - beta1) / (beta1 + epsilon())

        return T.sum(output, axis=1)

    elif q1_class == "Beta" and q2_class == "UnitBetaSample":
        """
        http://bariskurt.com/kullback-leibler-divergence\
        -between-two-dirichlet-and-beta-distributions/
        """
        alpha1, beta1 = q1.fprop(x1, deterministic=deterministic)
        alpha2 = T.ones_like(alpha1) * q2.alpha
        beta2 = T.ones_like(beta1) * q2.beta

        output = T.gammaln(alpha1 + beta1) -\
            T.gammaln(alpha2 + beta2) -\
            (T.gammaln(alpha1) + T.gammaln(beta1)) +\
            (T.gammaln(alpha2) + T.gammaln(beta2)) +\
            (alpha1 - alpha2) * (psi(alpha1) - psi(alpha1 + beta1)) +\
            (beta1 - beta2) * (psi(beta1) - psi(alpha1 + beta1))

        return T.sum(output, axis=1)

    elif q1_class == "Dirichlet" and q2_class == "UnitDirichletSample":
        """
        http://bariskurt.com/kullback-leibler-divergence\
        -between-two-dirichlet-and-beta-distributions/
        """
        alpha1 = q1.fprop(x1, deterministic=deterministic)
        alpha1 = alpha1.reshape((alpha1.shape[0], alpha1.shape[1] / q1.k,
                                 q1.k))

        alpha2 = T.ones_like(alpha1) * q2.alpha

        output = T.gammaln(T.sum(alpha1, axis=-1)) -\
            T.gammaln(T.sum(alpha2, axis=-1)) -\
            T.sum(T.gammaln(alpha1), axis=-1) +\
            T.sum(T.gammaln(alpha2), axis=-1) +\
            T.sum((alpha1 - alpha2) *
                  (psi(alpha1) -
                   psi(T.sum(alpha1, axis=-1,
                             keepdims=True))), axis=-1)

        return T.sum(output, axis=1)

    elif (q1_class == "MultiDistributions") and (
            q2_class == "MultiPriorDistributions"):
        """
        PixelVAE
        https://arxiv.org/abs/1611.05013
        """
        all_kl = 0
        for i, q, p in zip(range(len(q1.distributions[:-1])),
                           q1.distributions[:-1],
                           reversed(q2.distributions)):
            if i == 0:
                _x = x1
            else:
                _x = q1.sample_mean_given_x(x1, layer_id=i - 1)[-1]
            z = q1.sample_given_x(x1, layer_id=i + 1)[-1]
            kl = analytical_kl(q, p, given=[tolist(_x), tolist(z)])
            all_kl += kl

        _x = q1.sample_mean_given_x(x1, layer_id=-2)[-1]
        kl = analytical_kl(
            q1.distributions[-1], q2.prior, given=[tolist(_x), None])
        all_kl += kl

        return all_kl

    elif q1_class == "MultiDistributions":
        if len(q1.distributions) >= 2:
            _x1 = q1.sample_given_x(x1, layer_id=-2)[-1]
        else:
            _x1 = x1
        return analytical_kl(q1.distributions[-1], q2,
                             given=[tolist(_x1), x2],
                             deterministic=deterministic)

    raise Exception("You cannot use this distribution as q or prior, "
                    "got %s and %s." % (q1_class, q2_class))
def multinomial_coefficient(Obs, K, num_Obs):
    Ns_p1 = T.dot(Obs,T.ones((K,1))) + T.ones((num_Obs,1))   
    Obs_p1 = Obs + T.ones((num_Obs, K))
    lnmlti = T.gammaln(Ns_p1) - T.dot(T.gammaln(Obs_p1),T.ones((K,1)))
    return T.exp(lnmlti)