def LogJ(m_params, v_params, theano_observed_matrix): m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma = v_params w, r, gamma, gamma0, c0, sigma = m_params is_observed_matrix_numpy = ~np.isnan(observed_matrix) is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64)) theano_observed_matrix.set_value((np.nan_to_num( is_observed_matrix_numpy * observed_matrix)).astype(np.float64)) log_j_t0 = time.clock() results, updates = theano.scan( fn=LogJointScanFn, sequences=[dict(input=np.arange(N), taps=[-1])], outputs_info=[dict(initial=np.float64(0), taps=[-1])], non_sequences=[is_observed_matrix, theano_observed_matrix]) log_joint = results[-1] log_joint2 = (((D * gamma * T.log(gamma))[0] * r).sum() - (D * T.gammaln(gamma[0] * r)).sum() + ((gamma[0] * r - 1) * T.log(w)).sum() - (gamma[0] * w).sum() + (gamma0 * T.log(c0) - THRESHOLD_RANK * T.gammaln(gamma0 / THRESHOLD_RANK) + (gamma0 / THRESHOLD_RANK - 1)[0] * (T.log(r)).sum() - (c0[0] * r).sum() - gamma - gamma0 - c0)[0]) log_joint += log_joint2 return (log_joint)
def shanon_Entropy_studentt(self, log_cov, freedom): Nrff, dout = log_cov.shape const = T.log( ((freedom - 2) * np.pi)**(dout / 2) ) + T.gammaln(freedom / 2) - T.gammaln((freedom + dout) / 2) + (T.psi( (freedom + dout) / 2) - T.psi(freedom / 2)) * (freedom + dout) / 2 return 0.5 * T.sum(log_cov) + Nrff * const
def entropy_pi(self): log_gamma_term = T.sum( T.gammaln(self.tau_IBP[:,0]) + T.gammaln(self.tau_IBP[:,1]) \ - T.gammaln(self.tau_IBP[:,0] + self.tau_IBP[:,1]) ) digamma_term = T.sum( (1.0-self.tau_IBP[:,0])*T.psi(self.tau_IBP[:,0]) + (1.0-self.tau_IBP[:,1])*T.psi(self.tau_IBP[:,1]) + (self.tau_IBP[:,0]+self.tau_IBP[:,1]-2.0)*T.psi(self.tau_IBP[:,0]+self.tau_IBP[:,1]) ) return log_gamma_term + digamma_term
def calc_kl_divergence(self, prior_alpha, prior_beta): # use taylor approx for Digamma function psi_a_taylor_approx = T.log( self.a) - 1. / (2 * self.a) - 1. / (12 * self.a**2) kl = (self.a - prior_alpha) * psi_a_taylor_approx kl += -T.gammaln(self.a) + T.gammaln(prior_alpha) + prior_alpha * ( T.log(self.b) - T.log(prior_beta)) + ( (self.a * (prior_beta - self.b)) / self.b) return kl.sum(axis=1)
def _log_partition_symfunc(): natural_params = T.vector() log_Z = T.sum(T.gammaln(natural_params + 1.)) -\ T.gammaln(T.sum(natural_params + 1)) func = theano.function([natural_params], log_Z) grad_func = theano.function([natural_params], T.grad(T.sum(log_Z), natural_params)) return func, grad_func
def loglik_primary_f(k, y, theta, lower_n): logit_p = theta[0] logn = theta[1] n = lower_n + T.exp(logn) k = k[:, 0] p = T.nnet.nnet.sigmoid(logit_p) combiln = T.gammaln(n + 1) - (T.gammaln(k + 1) + T.gammaln(n - k + 1)) # add y to stop theano from complaining #loglik = combiln + k * T.log(p) + (n - k) * T.log1p(-p) + 0.0 * T.sum(y) loglik = combiln + k * T.log(p) + (n - k) * T.log(1.0 - p) + 0.0 * T.sum(y) return loglik
def __init__(self, mu=0.0, beta=None, cov=None, *args, **kwargs): super(GeneralizedGaussian, self).__init__(*args, **kwargs) # assert(mu.shape[0] == cov.shape[0] == cov.shape[1]) dim = mu.shape[0] self.mu = mu self.beta = beta self.prec = tt.nlinalg.pinv(cov) # self.k = (dim * tt.gamma(dim / 2.0)) / \ # ((np.pi**(dim / 2.0)) * tt.gamma(1 + dim / (2 * beta)) * (2**(1 + dim / (2 * beta)))) self.logk = tt.log(dim) + tt.gammaln(dim / 2.0) - \ (dim / 2.0) * tt.log(np.pi) - \ tt.gammaln(1 + dim / (2 * beta)) - \ (1 + dim / (2 * beta)) * tt.log(2.0)
def _negCLL(self, z, X):#, validation = False): """Estimate -log p[x|z]""" if self.params['data_type']=='binary': p_x_z = self._conditionalXgivenZ(z) negCLL_m = T.nnet.binary_crossentropy(p_x_z,X) elif self.params['data_type'] =='bow': #Likelihood under a multinomial distribution if self.params['likelihood'] == 'mult': lsf = self._conditionalXgivenZ(z) p_x_z = T.exp(lsf) negCLL_m = -1*(X*lsf) elif self.params['likelihood'] =='poisson': loglambda_p = self._conditionalXgivenZ(z) p_x_z = T.exp(loglambda_p) negCLL_m = -X*loglambda_p+T.exp(loglambda_p)+T.gammaln(X+1) else: raise ValueError,'Invalid choice for likelihood: '+self.params['likelihood'] elif self.params['data_type']=='real': params = self._conditionalXgivenZ(z) mu,logvar= params[0], params[1] p_x_z = mu negCLL_m = 0.5 * np.log(2 * np.pi) + 0.5*logvar + 0.5 * ((X - mu_p)**2)/T.exp(logvar) else: assert False,'Bad data_type: '+str(self.params['data_type']) return p_x_z, negCLL_m.sum(1,keepdims=True)
def compute_LogDensity_Yterms(self, Y=None, X=None, padleft=False, persamp=False): """ TODO: The persamp option allows this function to return a list of the costs computed for each sample. This is useful for implementing more sophisticated optimization procedures such as NVIL. TO BE IMPLEMENTED... NOTE: Please accompany a compute function with an eval function that allows evaluation from an external program. compute functions assume by default that the 0th dimension of the data arrays is the trial dimension. If you deal with a single trial and the trial dimension is omitted, set padleft to False to padleft. """ if Y is None: Y = self.Y if X is None: X = self.X if padleft: Y = T.shape_padleft(Y, 1) Yprime = theano.clone(self.Rate, replace={self.X: X}) Density = T.sum(Y * T.log(Yprime) - Yprime - T.gammaln(Y + 1)) return Density
def get_viewed_cost(self, v0, vk_stat): # Binary cross-entropy cost = 0 if self.input_type == InputType.binary: clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999)) cost = -T.sum(v0 * T.log(clip_vk_stat) + (1 - v0) * T.log(1 - clip_vk_stat), axis=1) # Sum square error elif self.input_type == InputType.gaussian: cost = T.sum((v0 - vk_stat) ** 2, axis=1) # Categorical cross-entropy elif self.input_type == InputType.categorical: clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999)) cost = -T.sum(v0 * T.log(clip_vk_stat), axis=1) elif self.input_type == InputType.poisson: clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf) cost = -T.sum(-vk_stat + v0 * T.log(clip_vk_stat) - T.gammaln(1 + v0), axis=1) if self.input_type == InputType.replicated_softmax: clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf) cost = -T.sum((v0 / self.total_count) * T.log(clip_vk_stat), axis=1) return cost
def likelihood(xs): return tt.sum( tt.log(beta) - tt.log(2.0 * std * tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta))) - tt.gammaln(1.0 / beta) + -tt.power( tt.abs_(xs - mu) / std * tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta)), beta))
def likelihood(xs): return T.sum( T.log(beta) - T.log(2.0 * std * T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta))) - T.gammaln(1.0 / beta) + -T.power( T.abs_(xs - mu) / std * T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta)), beta))
def log_joint_fn(N, D, K, m_params, y, cov, mask): w, r, gamma, gamma0, c0, sigma = m_params results, updates = theano.scan(fn=log_joint_scan_fn, sequences=np.arange(N), outputs_info=[dict(initial=np.float64(0), taps=[-1])], non_sequences=[y, cov, mask]) log_joint = results[-1] log_joint += ((D * gamma * T.log(gamma))[0] * r).sum() - (D * T.gammaln(gamma[0] * r)).sum() + ( (gamma[0] * r - 1) * T.log(w)).sum() - (gamma[0] * w).sum() + ( gamma0 * T.log(c0) - K * T.gammaln(gamma0 / K) + (gamma0 / K - 1)[0] * (T.log(r)).sum() - ( c0[0] * r).sum() - gamma - gamma0 - c0)[0] return log_joint
def log_negative_binomial(x, p, log_r, eps=0.0): """ Compute log pdf of a negative binomial distribution with success probability p and number of failures, r, until the experiment is stopped, at values x. A simple variation of Stirling's approximation is used: log x! = x log x - x. """ x = T.clip(x, eps, x) p = T.clip(p, eps, 1.0 - eps) r = T.exp(log_r) r = T.clip(r, eps, r) y = T.gammaln(x + r) - T.gammaln(x + 1) - T.gammaln(r) \ + x * T.log(p) + r * T.log(1 - p) return y
def log_poisson(x, log_lambda, eps=0.0): x = T.clip(x, eps, x) lambda_ = T.exp(log_lambda) lambda_ = T.clip(lambda_, eps, lambda_) y = x * log_lambda - lambda_ - T.gammaln(x + 1) return y
def log_gumbel_softmax(y, logits, tau=1): shape = logits.shape k = shape[-1] logits_flat = logits.reshape((-1, k)) p_flat = T.nnet.softmax(logits_flat) p = p_flat.reshape(shape) log_gamma = T.gammaln(k) logsum = T.log(T.sum(p / (y**tau), axis=-1)) sumlog = T.sum(T.log(p / (y**(tau + 1))), axis=-1) return log_gamma + (k - 1) * T.log(tau) - k * logsum + sumlog
def log_gumbel_softmax(x, mu, tau=1.0, eps=1e-6): """ Compute logpdf of a Gumbel Softmax distribution with parameters p, at values x. .. See Appendix B.[1:2] https://arxiv.org/pdf/1611.01144v2.pdf """ k = mu.shape[-1] logpdf = T.gammaln(k) + (k - 1) * T.log(tau + eps) \ - k * T.log(T.sum(T.exp(mu) / T.power(x, tau), axis=2) + eps) \ + T.sum(mu - (tau + 1) * T.log(x + eps), axis=2) return logpdf
def eval_prior(self, buffers): """ Evaluates prior on the latent variables """ zsamp = self.S[:, buffers[0]:-buffers[1]] n_samples = zsamp.shape[0] // self.batch_size zsamp = zsamp.reshape((self.batch_size, n_samples, -1)) ks = zsamp.sum(axis=-1) ns = zsamp.shape[-1].astype(config.floatX) * T.ones_like(ks) log_nok = T.gammaln(ns + 1) - T.gammaln(ks + 1) - T.gammaln(ns - ks + 1) log_p = 0 if self.n_genparams == 1: log_p = -0.5 * (T.log(2 * np.pi) + 2 * T.log(self.p_sigma) + ((self.P / self.p_sigma)**2).sum(axis=-1)) log_p = log_p.reshape((self.batch_size, n_samples)) return log_nok + ks * T.log( self.pz) + (ns - ks) * T.log(1 - self.pz) + log_p
def evaluateLogDensity(self,X,Y): # This is the log density of the generative model (*not* negated) Ypred = theano.clone(self.rate,replace={self.Xsamp: X}) resY = Y-Ypred resX = X[1:]-T.dot(X[:-1],self.A.T) resX0 = X[0]-self.x0 LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0] #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0] PoisDensity = T.sum(Y * T.log(Ypred) - Ypred - T.gammaln(Y + 1)) LogDensity = LatentDensity + PoisDensity return LogDensity
def _get_log_partition_func(dim, nparams): np1, np2, np3, np4 = nparams idxs = np.arange(dim) + 1 W = T.nlinalg.matrix_inverse(np1 - (1. / np3) * T.outer(np2, np2)) log_Z = .5 * (np4 + dim) * T.log(T.nlinalg.det(W)) log_Z += .5 * (np4 + dim) * dim * np.log(2) log_Z += .5 * dim * (dim - 4) log_Z += T.sum(T.gammaln(.5 * (np4 + dim + 1 - idxs))) log_Z += -.5 * dim * T.log(np3) return log_Z, theano.function([], log_Z)
def liklihood_studnet_t(self, target, free_param): self.beta = T.exp(self.ls) Covariance = self.beta LL = self.log_mvns(target, self.output, Covariance) # - 0.5*T.sum(T.dot(betaI,Ktilda))) N, n_out = target.shape CH_const = T.gammaln((n_out + n_out + free_param) / 2) - T.log( ((free_param + n_out) * np.pi)**(n_out / 2)) - T.gammaln( (free_param + n_out) / 2) ch_mc, updates = theano.scan( fn=lambda a: T.sum(T.log(1 + T.sum(a * a, -1) / free_param)), sequences=[W_samples]) CH_MC = T.mean(ch_mc) CH = CH_const * num_FF - CH_MC * (free_param + n_out) / 2 return LL
def mvt_logpdf_theano(x, mu, Li, df): import theano.tensor as T dim = Li.shape[0] Ki = Li.T.dot(Li) #determinant is just multiplication of diagonal elements of cholesky logdet = 2*T.log(1./T.diag(Li)).sum() lpdf_const = (T.gammaln((df + dim) / 2) -(T.gammaln(df/2) + (T.log(df)+T.log(np.pi)) * dim*0.5 + logdet * 0.5) ) d = (x - mu.reshape((1 ,mu.size))).T Ki_d_scal = T.dot(Ki, d) / df #vector d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar res_pdf = (lpdf_const - 0.5 * (df+dim) * T.log(d_Ki_d_scal_1)).flatten() if res_pdf.size == 1: res_pdf = T.float(res_pdf) return res_pdf
def _log_partition_symfunc(): natural_params = T.vector() size = natural_params.shape[0] // 4 np1, np2, np3, np4 = T.split(natural_params, 4 * [size], 4) log_Z = T.sum(T.gammaln(.5 * (np4 + 1))) log_Z += T.sum(- .5 * (np4 + 1) * T.log(.5 * (np1 - (np2 ** 2) / np3))) log_Z += T.sum(-.5 * T.log(np3)) func = theano.function([natural_params], log_Z) grad_func = theano.function([natural_params], T.grad(T.sum(log_Z), natural_params)) return func, grad_func
def log_partf(b, s, C, v, logdet=None): D = b.size # multivariate log-gamma function g = tt.sum(tt.gammaln((v + 1. - tt.arange(1, D + 1)) / 2.)) + D * (D - 1) / 4. * np.log(np.pi) # log-partition function if logdet is None: return -v / 2. * tt.log(tl.det(C - tt.dot(b, b.T) / (4 * s))) \ + v * np.log(2.) + g - D / 2. * tt.log(s) else: return -v / 2. * logdet + v * np.log(2.) + g - D / 2. * tt.log(s)
def logp_cho(cls, value, mu, cho, freedom, mapping): delta = mapping.inv(value) - mu lcho = tsl.solve_lower_triangular(cho, delta) beta = lcho.T.dot(lcho) n = cho.shape[0].astype(th.config.floatX) np5 = np.float32(0.5) np2 = np.float32(2.0) npi = np.float32(np.pi) r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2)) r2 = ifelse( tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi), tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) - np5 * n * tt.log((freedom - np2) * npi)) r3 = -tt.sum(tt.log(tnl.diag(cho))) det_m = mapping.logdet_dinv(value) r1 = debug(r1, name='r1', force=True) r2 = debug(r2, name='r2', force=True) r3 = debug(r3, name='r3', force=True) det_m = debug(det_m, name='det_m', force=True) r = r1 + r2 + r3 + det_m cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta))) cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m))) cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho))) cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho))) return ifelse( cond1, np.float32(-1e30), ifelse( cond2, np.float32(-1e30), ifelse(cond3, np.float32(-1e30), ifelse(cond4, np.float32(-1e30), r))))
def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t): import theano import theano.tensor as T sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \ - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1)) M_nlc = theano.ifelse.ifelse(T.eq(L_t, -1), T.sum(M_t, axis=0), M_t[L_t]) # for numerics: only account for values, where M_nlc is not zero a = T.switch(T.eq(M_nlc, 0.), T.min(sum_log_poisson), sum_log_poisson) a = T.max(a, keepdims=True) logarg = T.switch( T.eq(M_nlc, 0.), 0., T.exp(sum_log_poisson - a) * M_nlc / T.cast(M_t.shape[0], dtype='float32')) logarg = T.sum(logarg) return ll_t + a[0] + T.log(logarg)
def logp(self, x): alpha = self.alpha n = tt.sum(x, axis=-1) sum_alpha = tt.sum(alpha, axis=-1) const = (tt.gammaln(n + 1) + tt.gammaln(sum_alpha)) - tt.gammaln(n + sum_alpha) series = tt.gammaln(x + alpha) - (tt.gammaln(x + 1) + tt.gammaln(alpha)) result = const + tt.sum(series, axis=-1) return result
def liklihood_studnet_t(self,target,free_param): self.beta = T.exp(self.ls) Covariance = self.beta LL = self.log_mvns(target, self.output, Covariance)# - 0.5*T.sum(T.dot(betaI,Ktilda))) N,n_out=target.shape CH_const=T.gammaln((n_out+n_out+free_param)/2)-T.log(((free_param+n_out)*np.pi)**(n_out/2))-T.gammaln((free_param+n_out)/2) ch_mc,updates=theano.scan(fn=lambda a: T.sum(T.log(1+T.sum(a*a,-1)/free_param)), sequences=[W_samples]) CH_MC=T.mean(ch_mc) CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2 return LL
def LogJ(m_params,v_params,theano_observed_matrix): m_matrix_estimate, s_matrix_estimate, m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma=v_params matrix_estimate, w, r, gamma, gamma0, c0, sigma= m_params is_observed_matrix_numpy=~np.isnan(observed_matrix) is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64)) theano_observed_matrix.set_value((np.nan_to_num(is_observed_matrix_numpy*observed_matrix)).astype(np.float64)) log_j_t0=time.clock() log_joint=0 for n in range(observed_count): log_joint+= np.power((observation_record[0,n]-matrix_estimate[int(observation_record_numpy[1,n]), int(observation_record_numpy[2,n])]),2) log_joint=(-1/(2*ERROR[0]*ERROR[0]))*log_joint print("first result") print(log_joint.eval()) log_joint += -(N/2.0)*T.nlinalg.Det()(covariance_matrix) - (1/2.0)*T.nlinalg.trace(T.dot(T.dot(matrix_estimate, matrix_estimate.T), T.nlinalg.MatrixInverse()(covariance_matrix))) log_joint2= (((D*gamma*T.log(gamma))[0]*r).sum()-(D*T.gammaln(gamma[0]*r)).sum()+((gamma[0]*r-1)*T.log(w)).sum()-(gamma[0]*w).sum() + (gamma0*T.log(c0)-THRESHOLD_RANK*T.gammaln(gamma0/THRESHOLD_RANK)+(gamma0/THRESHOLD_RANK-1)[0]*(T.log(r)).sum()-(c0[0]*r).sum()-gamma-gamma0-c0)[0]) log_joint += log_joint2 return(log_joint)
def logp(self, value): topology = self.topology taxon_count = tt.as_tensor_variable(topology.get_taxon_count()) root_index = topology.get_root_index() r = self.r a = self.a rho = self.rho log_coeff = (taxon_count - 1) * tt.log(2.0) - tt.gammaln(taxon_count) tree_logp = log_coeff + (taxon_count - 1) * tt.log( r * rho) + taxon_count * tt.log(1 - a) mrhs = -r * value zs = tt.log(rho + ((1 - rho) - a) * tt.exp(mrhs)) ls = -2 * zs + mrhs root_term = mrhs[root_index] - zs[root_index] return tree_logp + tt.sum(ls) + root_term
def likelihood(self, z, y): η = z.flatten(min(2, z.ndim)) + self.bias Δ = self.binsize # 1st part of the likelihood L1 = tt.dot(y, η) if z.ndim > 1: ndim = z.ndim - 1 shp_z = z.shape[-ndim:] L1 = L1.reshape(shp_z, ndim=ndim) # 2nd part of the likelihood λ = self.invlink(z + self.bias) L2 = Δ * tt.sum(λ, axis=0) # constant factors c1 = tt.sum(y) * tt.log(Δ) c2 = -tt.sum(tt.where(y > 1, tt.gammaln(y + 1), 0.0)) const = c1 - c2 L = L1 - L2 + const return as_tensor_variable(L, name='logL')
def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t): import theano import theano.tensor as T sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \ - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1)) M_nlc = theano.ifelse.ifelse( T.eq(L_t, -1), T.sum(M_t, axis=0), M_t[L_t] ) # for numerics: only account for values, where M_nlc is not zero a = T.switch( T.eq(M_nlc, 0.), T.min(sum_log_poisson), sum_log_poisson ) a = T.max(a, keepdims=True) logarg = T.switch( T.eq(M_nlc, 0.), 0., T.exp(sum_log_poisson - a)*M_nlc/T.cast(M_t.shape[0], dtype='float32') ) logarg = T.sum(logarg) return ll_t + a[0] + T.log(logarg)
def logprob(self, y_target, n, p): coeff = T.gammaln(n + y_target) - T.gammaln(y_target + 1) - T.gammaln(n) return - (coeff + n * T.log(p) + y_target * T.log(1-p))
def kldiv_gamma(a1, b1, a0=a0, b0=b0): return T.sum((a1 - a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(a0) + a0*(T.log(b1) - T.log(b0)) + a1*((b0 - b1)/b1))
def __init__(self, rng, input, n_in, n_out, num_MC,num_FF,n_tot,free_param,Domain_number=None,number="1",Domain_consideration=True): #inputも100*N*Dで入ってくるようにする. self.DATA=input #N=DATA.shape[1] #n_in_D=DATA.shape[2] srng = RandomStreams(seed=234) self.num_rff=num_FF #Define hyperparameters lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX) #lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(1.,dtype=theano.config.floatX) self.lhyp = theano.shared(value=lhyp_values, name='lhyp'+number, borrow=True) self.sf2,self.l = T.exp(self.lhyp[0]), T.exp(self.lhyp[1:1+n_in]) if Domain_consideration:#先行研究は0.1でうまくいった ls_value=np.zeros(Domain_number,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX) else: ls_value=np.zeros(1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX) self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True) #Define prior omega #prior_mean_Omega.append(tf.zeros([self.d_in[i],1])) self.log_prior_var_Omega=T.tile(1/(self.l)**0.5,(num_FF,1)).T #Define posterior omega #get samples from omega sample_value = np.random.randn(1,n_in,num_FF) self.sample_Omega_epsilon_0 = theano.shared(value=sample_value, name='sample_Omega'+number) #self.sample_Omega_epsilon_0 = srng.normal((1,n_in,num_FF)) Omega_sample=self.sample_Omega_epsilon_0*self.log_prior_var_Omega[None,:,:] Omega_samples=T.tile(Omega_sample,(num_MC,1,1)) self.samples=Omega_samples #Define prior W #prior_mean_W = T.zeros(2*num_FF) #log_prior_var_W = T.ones(2*num_FF) #Define posterior W mean_mu_value = np.random.randn(2*num_FF,n_out)#* 1e-2 self.mean_mu = theano.shared(value=mean_mu_value, name='mean_mu'+number, borrow=True) log_var_value = np.zeros((2*num_FF,n_out)) self.log_var_W = theano.shared(value=log_var_value, name='q_W'+number, borrow=True) #get samples from W sample_Omega_epsilon = srng.normal((num_MC,2*num_FF,n_out)) f2 = T.cast(free_param, 'int64') N=srng.uniform(size=(f2+n_tot,num_MC), low=1e-10,high=1.0) gamma_factor=T.sum(T.log(N),0)*(-1) #gamma_factor=self.gamma_dist(free_param+n_tot,1,num_MC) sample_Omega_epsilon_gamma=((free_param+n_tot)/gamma_factor)[:,None,None]*sample_Omega_epsilon #MC*Nrff*dout W_samples = sample_Omega_epsilon_gamma * (T.exp(self.log_var_W)**0.5)[None,:,:] + self.mean_mu[None,:,:] # calculate lyaer N_MC*N*D_out F_next, updates = theano.scan(fn=lambda a,b,c: self.passage(a,b,c,num_FF), sequences=[input,Omega_samples,W_samples]) #output self.output = F_next #KL-divergence #Omega #W #cross-entropy-term #self.KL_W=self.DKL_gaussian(self.mean_mu, self.log_var_W, prior_mean_W, log_prior_var_W) CH_const=T.gammaln((n_out+free_param)/2)-T.log(((free_param-2)*np.pi)**(n_out/2))-T.gammaln(free_param/2) ch_mc,updates=theano.scan(fn=lambda a: (T.log(1+T.sum(a*a,-1)/(free_param-2))), sequences=[W_samples]) CH_MC=T.mean(T.sum(ch_mc,-1)) CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2 #entropy-term HF = self.shanon_Entropy_studentt(self.log_var_W,free_param+n_tot) self.KL_W=-HF-CH #parameter_setting self.all_params=[self.lhyp,self.ls,self.mean_mu,self.log_var_W] self.hyp_params=[self.lhyp,self.ls] self.variational_params=[self.mean_mu,self.log_var_W]
def in_loop(i,prev_res): j=i+1 res = prev_res + T.gammaln(a + 0.5*(1 - j)) return res
def kldiv_r(self, a1, b1): return - ((a1 - self.a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(self.a0) + self.a0*(T.log(b1) - T.log(self.b0)) + a1*((self.b0 - b1)/b1))[0]
def shanon_Entropy_studentt(self,log_cov,freedom): Nrff,dout=log_cov.shape const=T.log(((freedom-2)*np.pi)**(dout/2))+T.gammaln(freedom/2)-T.gammaln((freedom+dout)/2) + (T.psi((freedom+dout)/2 ) - T.psi(freedom/2))*(freedom+dout)/2 return 0.5*T.sum(log_cov) + Nrff*const
def Beta_fn(a, b): return T.exp(T.gammaln(a) + T.gammaln(b) - T.gammaln(a+b))
def _log_beta_vec_func(self, alpha): output = 0 for _k in range(self.k): output += T.gammaln(self._slice_last(alpha, _k)) output -= T.gammaln(T.sum(alpha, axis=-1)) return output
def _log_beta_func(self, alpha, beta): return T.gammaln(alpha) + T.gammaln(beta) - T.gammaln(alpha + beta)
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): if not self.run_already: from theano import tensor as t import theano y = t.matrix(name='y') f = t.matrix(name='f') g = t.matrix(name='g') def theano_logaddexp(x,y): #Implementation of logaddexp from numpy, but in theano tmp = x - y return t.where(tmp > 0, x + t.log1p(t.exp(-tmp)), y + t.log1p(t.exp(tmp))) # Full log likelihood before expectations logpy_t = -(t.exp(f) + t.exp(g)) + y*theano_logaddexp(f, g) - t.gammaln(y+1) logpy_sum_t = t.sum(logpy_t) dF_df_t = theano.grad(logpy_sum_t, f) d2F_df2_t = 0.5*theano.grad(t.sum(dF_df_t), f) # This right? dF_dg_t = theano.grad(logpy_sum_t, g) d2F_dg2_t = 0.5*theano.grad(t.sum(dF_dg_t), g) # This right? self.logpy_func = theano.function([f,g,y],logpy_t) self.dF_df_func = theano.function([f,g,y],dF_df_t) # , mode='DebugMode') self.d2F_df2_func = theano.function([f,g,y],d2F_df2_t) self.dF_dg_func = theano.function([f,g,y],dF_dg_t) self.d2F_dg2_func = theano.function([f,g,y],d2F_dg2_t) self.run_already = True funcs = [self.logpy_func, self.dF_df_func, self.d2F_df2_func, self.dF_dg_func, self.d2F_dg2_func] D = Y.shape[1] mf, mg = m[:, :D], m[:, D:] vf, vg = v[:, :D], v[:, D:] F = 0 # Could do analytical components here T = self.T # Need to get these now to duplicate the censored inputs for quadrature gh_x, gh_w = self._gh_points(T) (F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg) = self.quad2d(funcs=funcs, Y=Y, mf=mf, vf=vf, mg=mg, vg=vg, gh_points=gh_points, exp_f=False, exp_g=False) F += F_quad # gprec = safe_exp(mg - 0.5*vg) dF_dmf += 0 dF_dmg += 0 dF_dvf += 0 dF_dvg += 0 dF_dm = np.hstack((dF_dmf, dF_dmg)) dF_dv = np.hstack((dF_dvf, dF_dvg)) if np.any(np.isnan(F_quad)): raise ValueError("Nan <log p(y|f,g)>_qf_qg") if np.any(np.isnan(dF_dmf)): raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qf mean") if np.any(np.isnan(dF_dmg)): raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qg mean") test_integration = False if test_integration: # Some little code to check the result numerically using quadrature from scipy import integrate i = 6 # datapoint index def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi): #link_fi = np.exp(fi) #link_gi = np.exp(gi) #logpy_fg = -(link_fi + link_gi) + yi*np.logaddexp(fi, gi) - sp.special.gammaln(yi+1) logpy_fg = self.logpdf(np.atleast_2d(np.hstack([fi,gi])), np.atleast_2d(yi)) return (logpy_fg # log p(y|f,g) * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi) # q(g) * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi) # q(f) ) quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i]) def integrl(gi): return integrate.quad(quad_func_l, -70, 70, args=(gi))[0] print "These should match" print "Numeric scipy F quad" print integrate.quad(lambda fi: integrl(fi), -70, 70) print "2d quad F quad" print F[i] return F, dF_dm, dF_dv, None
def log_likelihood(self, samples, alpha, beta): output = alpha * T.log(beta + epsilon()) - T.gammaln(alpha) output += (alpha - 1) * T.log(samples + epsilon()) output += -beta * samples return mean_sum_samples(output)
term3 = map(s.special.gammaln, (degMatrix==0).sum(axis= 0) + alphaNull[0]) - s.special.gammaln(alphaNull[0]) return np.array(term1 + term2 + term3) ################################################################### ### The following are Theano represeantion of certain functions # sum matrix ops m1 = T.fmatrix() m2 = T.fmatrix() add = function([m1, m2], m1 + m2, allow_input_downcast=True) # declare a function that calcualte gammaln on a shared variable on GPU aMatrix = shared(np.zeros((65536, 8192)), config.floatX, borrow=True) gamma_ln = function([ ], T.gammaln(aMatrix)) theanoExp = function([ ], T.exp(aMatrix)) alpha = T.fscalar() gamma_ln_scalar = function([alpha], T.gammaln(alpha), allow_input_downcast=True) # now compute the second part of the F-score, which is the covariance of mut and deg mutMatrix = shared(np.ones((32768, 4096)), config.floatX, borrow=True ) expMatrix = shared(np.ones((8192, 4096)), config.floatX, borrow=True) mDotE = function([], T.dot(mutMatrix, expMatrix)) nijk_11 = shared(np.zeros((32768, 4096)), config.floatX) nijk_01 = shared(np.zeros((32768, 4096)), config.floatX) fscore = shared(np.zeros((32768, 4096)), config.floatX) tmpLnMatrix = shared(np.zeros((32768, 4096)), config.floatX, borrow=True)
def free_energy(self, v_sample): tmp, h = self.propup(v_sample) return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \ T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1)
def free_energy(v,h): #return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \ # T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1) return -(v * bv).sum() - (h * bh).sum() + T.sum((- T.dot(v, W) * h + T.gammaln(v + 1)))
def betaln(alpha, beta): return T.gammaln(alpha)+T.gammaln(beta) - T.gammaln(alpha+beta)
def loglikelihood(self, data=None, mode='unsupervised'): """ Calculate the log-likelihood of the given data under the model parameters. Keyword arguments: data: nparray (data,label) or 'None' for input data mode=['unsupervised','supervised']: calculate supervised or unsupervised loglikelihood """ # To avoid numerical problems the log-likelihood has to be # calculated in such a more costly way by using intermediate # logarithmic functions # input data if data is None: Y = self.MultiLayer[0].Layer[0].get_input_data().astype('float32') else: Y = np.asarray( [self.MultiLayer[0].Layer[0].output(y) for y in data[0]], dtype='float32') # labels if (mode == 'supervised'): if data is None: L = self.MultiLayer[0].Layer[0].get_input_label() else: L = data[1] elif (mode == 'unsupervised'): L = (-1)*np.ones(Y.shape[0]) # weights & dimensions W = self.MultiLayer[0].Layer[1].get_weights().astype('float32') N = Y.shape[0] C = W.shape[0] D = W.shape[1] if (self.number_of_multilayers() == 2): try: M = self.MultiLayer[1].Layer[1].get_weights() except: M = None elif ((self.number_of_multilayers() == 1) and (self.MultiLayer[0].number_of_layers() == 3)): M = self.MultiLayer[0].Layer[2].get_weights() else: M = None try: K = M.shape[0] except: K = None if not self._theano: if M is None: ones = np.ones(shape=(C,D), dtype=float) log_likelihood = np.empty(N, dtype=float) for ninput in xrange(N): sum_log_poisson = np.sum( log_poisson_function(ones*Y[ninput,:], W), axis=1) a = np.max(sum_log_poisson) log_likelihood[ninput] = -np.log(C) + a + \ np.log(np.sum(np.exp(sum_log_poisson - a))) else: ones = np.ones(shape=(C,D), dtype=float) log_likelihood = np.empty(N, dtype=float) for ninput in xrange(N): sum_log_poisson = np.sum( log_poisson_function(ones*Y[ninput,:], W), axis=1) a = np.max(sum_log_poisson) if (L[ninput] == -1): log_likelihood[ninput] = a + np.log(np.sum( np.exp(sum_log_poisson-a)*np.sum(M,axis=0)/float(K))) else: log_likelihood[ninput] = a + np.log(np.sum( np.exp(sum_log_poisson - a)* M[L[ninput],:]/float(K))) mean_log_likelihood = np.mean(log_likelihood) sum_log_likelihood = np.zeros_like(mean_log_likelihood) MPI.COMM_WORLD.Allreduce(mean_log_likelihood, sum_log_likelihood, op=MPI.SUM) mean_log_likelihood = sum_log_likelihood/float(MPI.COMM_WORLD.Get_size()) else: import theano import theano.tensor as T ml = self.MultiLayer[0] if ml._scan_batch_size is None: nbatches = 1 scan_batch_size = ml.Layer[0].get_input_data().shape[0] else: nbatches = int(np.ceil( ml.Layer[0].get_input_data().shape[0] /float(ml._scan_batch_size))) scan_batch_size = ml._scan_batch_size batch_log_likelihood = np.zeros(nbatches, dtype='float32') if M is None: if (self._t_sum_log_likelihood_W is None): Y_t = T.matrix('Y', dtype='float32') L_t = T.vector('L', dtype='int32') W_t = T.matrix('W', dtype='float32') sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \ - T.sum(W_t, axis=1) \ - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True) a = T.max(sum_log_poisson, axis=1, keepdims=True) logarg = T.sum(T.exp(sum_log_poisson-a), axis=1) log_likelihood = -T.log(C) + a[:,0] + T.log(logarg) # Compile theano function self._t_sum_log_likelihood_W = theano.function( [Y_t,L_t,W_t], T.sum(log_likelihood), on_unused_input='ignore') for nbatch in xrange(nbatches): batch_log_likelihood[nbatch] = self._t_sum_log_likelihood_W( Y[nbatch*scan_batch_size: (nbatch+1)*scan_batch_size].astype('float32'), L[nbatch*scan_batch_size: (nbatch+1)*scan_batch_size].astype('int32'), W.astype('float32')) else: if (self._t_sum_log_likelihood is None): Y_t = T.matrix('Y', dtype='float32') L_t = T.vector('L', dtype='int32') W_t = T.matrix('W', dtype='float32') M_t = T.matrix('M', dtype='float32') sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \ - T.sum(W_t, axis=1) \ - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True) M_nlc = M_t[L_t] L_index = T.eq(L_t,-1).nonzero() M_nlc = T.set_subtensor(M_nlc[L_index], T.sum(M_t, axis=0)) # for numerics: only account for values, where M_nlc is # not zero a = T.switch( T.eq(M_nlc, 0.), T.cast(T.min(sum_log_poisson), dtype = 'int32'), T.cast(sum_log_poisson, dtype = 'int32')) a = T.max(a, axis=1, keepdims=True) # logarg = T.switch( # T.eq(M_nlc, 0.), # 0., # T.exp(sum_log_poisson-a).astype('float32')*M_nlc\ # /M_t.shape[0].astype('float32')) logarg = T.switch( T.eq(M_nlc, 0.), 0., T.exp(sum_log_poisson-a.astype('float32')) ) logarg = T.sum(logarg, axis=1) log_likelihood = a[:,0].astype('float32') + T.log(logarg) # Compile theano function self._t_sum_log_likelihood = theano.function( [Y_t,L_t,W_t,M_t], T.sum(log_likelihood), on_unused_input='ignore') """ # LL_scan: ll_t = T.scalar('loglikelihood', dtype='float32') sequences = [Y_t, L_t] outputs_info = [ll_t] non_sequences = [W_t, M_t] likelihood, updates = theano.scan( fn=self._loglikelihood_step, sequences=sequences, outputs_info=outputs_info, non_sequences=non_sequences) result = likelihood[-1] # Compile function self._loglikelihood_scan = theano.function( inputs=sequences + outputs_info + non_sequences, outputs=result, name='loglikelihood') """ for nbatch in xrange(nbatches): batch_log_likelihood[nbatch] = self._t_sum_log_likelihood( Y[nbatch*scan_batch_size: (nbatch+1)*scan_batch_size].astype('float32'), L[nbatch*scan_batch_size: (nbatch+1)*scan_batch_size].astype('int32'), W.astype('float32'), M.astype('float32')) mean_log_likelihood = np.sum(batch_log_likelihood)/float(N) return mean_log_likelihood
def __call__(self, X): A = T.dot(X[:-1], self.w_trans) A = T.exp(T.concatenate([w_init, A], axis=0)) B = T.sum(T.gammaln(A), axis=-1) - T.gammaln(T.sum(A, axis=-1)) L = T.dot(A-1, X.dimshuffle(0, 2, 1)) - B
def analytical_kl(q1, q2, given, deterministic=False): try: [x1, x2] = given except: raise ValueError("The length of given list must be 2, " "got %d" % len(given)) q1_class = q1.__class__.__name__ q2_class = q2.__class__.__name__ if q1_class == "Gaussian" and q2_class == "UnitGaussianSample": mean, var = q1.fprop(x1, deterministic=deterministic) return gauss_unitgauss_kl(mean, var) elif q1_class == "Gaussian" and q2_class == "Gaussian": mean1, var1 = q1.fprop(x1, deterministic=deterministic) mean2, var2 = q2.fprop(x2, deterministic=deterministic) return gauss_gauss_kl(mean1, var1, mean2, var2) elif q1_class == "Bernoulli" and q2_class == "UnitBernoulliSample": mean = q1.fprop(x1, deterministic=deterministic) output = mean * (T.log(mean + epsilon()) + T.log(2)) +\ (1 - mean) * (T.log(1 - mean + epsilon()) + T.log(2)) return T.sum(output, axis=1) elif q1_class == "Categorical" and q2_class == "UnitCategoricalSample": mean = q1.fprop(x1, deterministic=deterministic) output = mean * (T.log(mean + epsilon()) + T.log(q1.k)) return T.sum(output, axis=1) elif q1_class == "Kumaraswamy" and q2_class == "UnitBetaSample": """ [Naelisnick+ 2016] Deep Generative Models with Stick-Breaking Priors """ M = 10 euler_gamma = 0.57721 a, b = q1.fprop(x1, deterministic=deterministic) def taylor(m, a, b): return 1. / (m + a * b) * q2._beta_func(m / a, b) kl, _ = theano.scan(fn=taylor, sequences=T.arange(1, M + 1), non_sequences=[a, b]) kl = T.sum(kl, axis=0) kl *= (q2.beta - 1) * b kl += ((a - q2.alpha) / a + epsilon()) *\ (-euler_gamma - psi(b) - 1. / (b + epsilon())) kl += T.log(a * b + epsilon()) +\ T.log(q2._beta_func(q2.alpha, q2.beta) + epsilon()) kl += -(b - 1) / (b + epsilon()) return T.sum(kl, axis=1) elif q1_class == "Gamma" and q2_class == "UnitGammaSample": """ https://arxiv.org/pdf/1611.01437.pdf """ alpha1, beta1 = q1.fprop(x1, deterministic=deterministic) alpha2 = T.ones_like(alpha1) beta2 = T.ones_like(beta1) output = (alpha1 - alpha2) * psi(alpha1) output += -T.gammaln(alpha1) + T.gammaln(alpha2) output += alpha2 * (T.log(beta1 + epsilon()) - T.log(beta2 + epsilon())) output += alpha1 * (beta2 - beta1) / (beta1 + epsilon()) return T.sum(output, axis=1) elif q1_class == "Beta" and q2_class == "UnitBetaSample": """ http://bariskurt.com/kullback-leibler-divergence\ -between-two-dirichlet-and-beta-distributions/ """ alpha1, beta1 = q1.fprop(x1, deterministic=deterministic) alpha2 = T.ones_like(alpha1) * q2.alpha beta2 = T.ones_like(beta1) * q2.beta output = T.gammaln(alpha1 + beta1) -\ T.gammaln(alpha2 + beta2) -\ (T.gammaln(alpha1) + T.gammaln(beta1)) +\ (T.gammaln(alpha2) + T.gammaln(beta2)) +\ (alpha1 - alpha2) * (psi(alpha1) - psi(alpha1 + beta1)) +\ (beta1 - beta2) * (psi(beta1) - psi(alpha1 + beta1)) return T.sum(output, axis=1) elif q1_class == "Dirichlet" and q2_class == "UnitDirichletSample": """ http://bariskurt.com/kullback-leibler-divergence\ -between-two-dirichlet-and-beta-distributions/ """ alpha1 = q1.fprop(x1, deterministic=deterministic) alpha1 = alpha1.reshape((alpha1.shape[0], alpha1.shape[1] / q1.k, q1.k)) alpha2 = T.ones_like(alpha1) * q2.alpha output = T.gammaln(T.sum(alpha1, axis=-1)) -\ T.gammaln(T.sum(alpha2, axis=-1)) -\ T.sum(T.gammaln(alpha1), axis=-1) +\ T.sum(T.gammaln(alpha2), axis=-1) +\ T.sum((alpha1 - alpha2) * (psi(alpha1) - psi(T.sum(alpha1, axis=-1, keepdims=True))), axis=-1) return T.sum(output, axis=1) elif (q1_class == "MultiDistributions") and ( q2_class == "MultiPriorDistributions"): """ PixelVAE https://arxiv.org/abs/1611.05013 """ all_kl = 0 for i, q, p in zip(range(len(q1.distributions[:-1])), q1.distributions[:-1], reversed(q2.distributions)): if i == 0: _x = x1 else: _x = q1.sample_mean_given_x(x1, layer_id=i - 1)[-1] z = q1.sample_given_x(x1, layer_id=i + 1)[-1] kl = analytical_kl(q, p, given=[tolist(_x), tolist(z)]) all_kl += kl _x = q1.sample_mean_given_x(x1, layer_id=-2)[-1] kl = analytical_kl( q1.distributions[-1], q2.prior, given=[tolist(_x), None]) all_kl += kl return all_kl elif q1_class == "MultiDistributions": if len(q1.distributions) >= 2: _x1 = q1.sample_given_x(x1, layer_id=-2)[-1] else: _x1 = x1 return analytical_kl(q1.distributions[-1], q2, given=[tolist(_x1), x2], deterministic=deterministic) raise Exception("You cannot use this distribution as q or prior, " "got %s and %s." % (q1_class, q2_class))
def multinomial_coefficient(Obs, K, num_Obs): Ns_p1 = T.dot(Obs,T.ones((K,1))) + T.ones((num_Obs,1)) Obs_p1 = Obs + T.ones((num_Obs, K)) lnmlti = T.gammaln(Ns_p1) - T.dot(T.gammaln(Obs_p1),T.ones((K,1))) return T.exp(lnmlti)