def gaussian_kl_loss(mx, Sx, mt, St): ''' Returns KL ( Normal(mx, Sx) || Normal(mt, St) ) ''' if St is None: target_samples = mt mt, St = empirical_gaussian_params(target_samples) if Sx is None: # evaluate empirical KL (expectation over the rolled out samples) x = mx mx, Sx = empirical_gaussian_params(x) def logprob(x, m, S): delta = x - m L = cholesky(S) beta = solve_lower_triangular(L, delta.T).T lp = -0.5 * tt.square(beta).sum(-1) lp -= tt.sum(tt.log(tt.diagonal(L))) lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype( theano.config.floatX) return lp return (logprob(x, mx, Sx) - logprob(x, mt, St)).mean(0) else: delta = mt - mx Stinv = matrix_inverse(St) kl = tt.log(det(St)) - tt.log(det(Sx)) kl += trace(Stinv.dot(delta.T.dot(delta) + Sx - St)) return 0.5 * kl
def quadratic_saturating_loss(mx, Sx, target, Q, *args, **kwargs): ''' Squashing loss penalty function c(x) = ( 1 - e^(-0.5*quadratic_loss(x, target)) ) ''' if Sx is None: if mx.ndim == 1: mx = mx[None, :] delta = mx - target[None, :] deltaQ = delta.dot(Q) cost = 1.0 - tt.exp(-0.5 * tt.batched_dot(deltaQ, delta)) return cost else: # stochastic case (moment matching) delta = mx - target SxQ = Sx.dot(Q) EyeM = tt.eye(mx.shape[0]) IpSxQ = EyeM + SxQ Ip2SxQ = EyeM + 2 * SxQ S1 = tt.dot(Q, matrix_inverse(IpSxQ)) S2 = tt.dot(Q, matrix_inverse(Ip2SxQ)) # S1 = solve(IpSxQ.T, Q.T).T # S2 = solve(Ip2SxQ.T, Q.T).T # mean m_cost = -tt.exp(-0.5 * delta.dot(S1).dot(delta)) / tt.sqrt(det(IpSxQ)) # var s_cost = tt.exp(-delta.dot(S2).dot(delta)) / tt.sqrt( det(Ip2SxQ)) - m_cost**2 return 1.0 + m_cost, s_cost
def test_det_shape(): rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) x = tensor.matrix() f = theano.function([x], det(x)) f_shape = theano.function([x], det(x).shape) assert np.all(f(r).shape == f_shape(r))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, n > (p - 1))
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False)
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((n - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * tt.log(2) - n * tt.log(IVI) - 2 * multigammaln(n / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), n > (p - 1))
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ((n - p - 1) * log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * log(2) - n * log(IVI) - 2 * multigammaln(n / 2., p)) / 2, gt(n, (p - 1)), all(gt(eigh(X)[0], 0)), eq(X, X.T) )
def logNormalPDFmat(X, Mu, XChol, xDim): ''' Use this version when X is a matrix [N x xDim] ''' Lambda = Tla.matrix_inverse(T.dot(XChol, T.transpose(XChol))) XMu = X - Mu return (-0.5 * T.dot(XMu, T.dot(Lambda, T.transpose(XMu))) + 0.5 * X.shape[0] * T.log(Tla.det(Lambda)) - 0.5 * np.log(2 * np.pi) * X.shape[0] * xDim)
def _compile_theano_functions(self): p = self.number_dense_jacob_columns u = tt.vector('u') y = self.generator(u, self.constants) u_rep = tt.tile(u, (p, 1)) y_rep = self.generator(u_rep, self.constants) diag_jacob = tt.grad(tt.sum(y), u)[p:] m = tt.zeros((p, u.shape[0])) m = tt.set_subtensor(m[:p, :p], tt.eye(p)) dense_jacob = tt.Rop(y_rep, u_rep, m).T energy = self.base_energy(u) + ( 0.5 * tt.log(nla.det( tt.eye(p) + (dense_jacob.T / diag_jacob**2).dot(dense_jacob) )) + tt.log(diag_jacob).sum() ) energy_grad = tt.grad(energy, u) dy_du = tt.join(1, dense_jacob, tt.diag(diag_jacob)) self.generator_func = _timed_func_compilation( [u], y, 'generator function') self.generator_jacob = _timed_func_compilation( [u], dy_du, 'generator Jacobian') self._energy_grad = _timed_func_compilation( [u], energy_grad, 'energy gradient') self.base_energy_func = _timed_func_compilation( [u], self.base_energy(u), 'base energy function')
def likelihood(f, l, R, mu, eps, sigma2, lambda_1=1e-4): # The similarity matrix W is a linear combination of the slices in R W = T.tensordot(R, mu, axes=1) # The following indices correspond to labeled and unlabeled examples labeled = T.eq(l, 1).nonzero() # Calculating the graph Laplacian of W D = T.diag(W.sum(axis=0)) L = D - W # The Covariance (or Kernel) matrix is the inverse of the (regularized) Laplacian epsI = eps * T.eye(L.shape[0]) rL = L + epsI Sigma = nlinalg.matrix_inverse(rL) # The marginal density of labeled examples uses Sigma_LL as covariance (sub-)matrix Sigma_LL = Sigma[labeled][:, labeled][:, 0, :] # We also consider additive Gaussian noise with variance sigma2 K_L = Sigma_LL + (sigma2 * T.eye(Sigma_LL.shape[0])) # Calculating the inverse and the determinant of K_L iK_L = nlinalg.matrix_inverse(K_L) dK_L = nlinalg.det(K_L) f_L = f[labeled] # The (L1-regularized) log-likelihood is given by the summation of the following four terms term_A = - (1 / 2) * f_L.dot(iK_L.dot(f_L)) term_B = - (1 / 2) * T.log(dK_L) term_C = - (1 / 2) * T.log(2 * np.pi) term_D = - lambda_1 * T.sum(abs(mu)) return term_A + term_B + term_C + term_D
def evaluateLogDensity(self, X, Y): # This is the log density of the generative model (*not* negated) Ypred = theano.clone(self.rate, replace={self.Xsamp: X}) resY = Y - Ypred resX = X[1:] - T.dot(X[:-1], self.A.T) resX0 = X[0] - self.x0 LatentDensity = -0.5 * T.dot(T.dot( resX0, self.Lambda0), resX0.T) - 0.5 * ( resX * T.dot(resX, self.Lambda)).sum() + 0.5 * T.log( Tla.det(self.Lambda)) * (Y.shape[0] - 1) + 0.5 * T.log( Tla.det(self.Lambda0)) - 0.5 * (self.xDim) * np.log( 2 * np.pi) * Y.shape[0] #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0] PoisDensity = T.sum(Y * T.log(Ypred) - Ypred - T.gammaln(Y + 1)) LogDensity = LatentDensity + PoisDensity return LogDensity
def evaluateLogDensity(self, X, Y): Ypred = theano.clone(self.rate, replace={self.Xsamp: X}) resY = Y - Ypred resX = X[1:] - T.dot(X[:(X.shape[0] - 1)], self.A.T) resX0 = X[0] - self.x0 LogDensity = -(0.5 * T.dot(resY.T, resY) * T.diag(self.Rinv)).sum() - ( 0.5 * T.dot(resX.T, resX) * self.Lambda).sum() - 0.5 * T.dot( T.dot(resX0, self.Lambda0), resX0.T) LogDensity += 0.5 * (T.log( self.Rinv)).sum() * Y.shape[0] + 0.5 * T.log(Tla.det( self.Lambda)) * (Y.shape[0] - 1) + 0.5 * T.log( Tla.det(self.Lambda0)) - 0.5 * ( self.xDim + self.yDim) * np.log(2 * np.pi) * Y.shape[0] return LogDensity
def test_det(): rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) x = tensor.matrix() f = theano.function([x], det(x)) assert np.allclose(np.linalg.det(r), f(r))
def logp(self, value): mu = self.mu tau = self.tau delta = value - mu k = tau.shape[0] return 1/2. * (-k * log(2*pi) + log(det(tau)) - dot(delta.T, dot(tau, delta)))
def logp(self, X): nu = self.nu p = self.p V = self.V IVI = det(V) IXI = det(X) return bound(((nu - p - 1) * tt.log(IXI) - trace(matrix_inverse(V).dot(X)) - nu * p * tt.log(2) - nu * tt.log(IVI) - 2 * multigammaln(nu / 2., p)) / 2, matrix_pos_def(X), tt.eq(X, X.T), nu > (p - 1), broadcast_conditions=False )
def logp(self, value): mu = self.mu tau = self.tau delta = value - mu k = tau.shape[0] return 1 / 2. * (-k * log(2 * pi) + log(det(tau)) - dot(delta.T, dot(tau, delta)))
def compute_LogDensity_Yterms(self, Y=None, X=None, padleft=False, persamp=False): """ TODO: Write docstring The persamp option allows this function to return a list of the costs computed for each sample. This is useful for implementing more sophisticated optimization procedures such as NVIL. NOTE: Please accompany every compute function with an eval function that allows evaluation from an external program. compute functions assume by default that the 0th dimension of the data arrays is the trial dimension. If you deal with a single trial and the trial dimension is omitted, set padleft to False to padleft. """ if Y is None: Y = self.Y if X is None: X = self.X if padleft: Y = T.shape_padleft(Y, 1) Nsamps = Y.shape[0] Tbins = Y.shape[1] Mu = theano.clone(self.MuY, replace={self.X: X}) DeltaY = Y - Mu # TODO: Implement SigmaInv dependent on X if persamp: L1 = -0.5 * T.sum(DeltaY * T.dot(DeltaY, self.SigmaInv), axis=(1, 2)) L2 = 0.5 * T.log(Tnla.det(self.SigmaInv)) * Tbins else: L1 = -0.5 * T.sum(DeltaY * T.dot(DeltaY, self.SigmaInv)) L2 = 0.5 * T.log(Tnla.det(self.SigmaInv)) * Nsamps * Tbins L = L1 + L2 return L, L1, L2
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = T.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * T.log(det(X)) return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = t.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * log(det(X)) return bound(result, n > 0, all(le(X, 1)), all(ge(X, -1)))
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = T.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.0) * T.log(det(X)) return bound(result, T.all(X <= 1), T.all(X >= -1), n > 0)
def evaluateLogDensity(self,X,Y): Ypred = theano.clone(self.rate,replace={self.Xsamp: X}) resY = Y-Ypred resX = X[1:]-T.dot(X[:(X.shape[0]-1)],self.A.T) resX0 = X[0]-self.x0 LogDensity = -(0.5*T.dot(resY.T,resY)*T.diag(self.Rinv)).sum() - (0.5*T.dot(resX.T,resX)*self.Lambda).sum() - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) LogDensity += 0.5*(T.log(self.Rinv)).sum()*Y.shape[0] + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim + self.yDim)*np.log(2*np.pi)*Y.shape[0] return LogDensity
def logp(self, value): mu = self.mu tau = self.tau delta = value - mu k = tau.shape[0] result = k * tt.log(2 * np.pi) + tt.log(1. / det(tau)) result += (delta.dot(tau) * delta).sum(axis=delta.ndim - 1) return -1 / 2. * result
def logp(self, value): mu = self.mu tau = self.tau delta = value - mu k = tau.shape[0] result = k * log(2*pi) + log(1./det(tau)) result += (delta.dot(tau) * delta).sum(axis=delta.ndim - 1) return -1/2. * result
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = t.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.0) * log(det(X)) return bound(result, n > 0, all(le(X, 1)), all(ge(X, -1)))
def logp_normal(mu, tau, value): # log probability of individual samples k = tau.shape[0] def delta(mu): return value - mu # delta = lambda mu: value - mu return (-1 / 2.) * (k * T.log(2 * np.pi) + T.log(1. / det(tau)) + (delta(mu).dot(tau) * delta(mu)).sum(axis=1))
def logp(self, x): n = self.n p = self.p X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = self._normalizing_constant(n, p) result += (n - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), n > 0)
def gaussInit(muin, varin): d = muin.shape[0] vardet, varinv = nlinalg.det(varin), nlinalg.matrix_inverse(varin) logconst = -d / 2. * np.log(2 * PI) - .5 * T.log(vardet) def logP(x): submu = x - muin out = logconst - .5 * T.sum(submu * (T.dot(submu, varinv.T)), axis=1) return out return logP
def logp(self, X): n = self.n p = self.p V = self.V IVI = det(V) IXI = det(X) return bound( ( (n - p - 1) * T.log(IXI) - trace(matrix_inverse(V).dot(X)) - n * p * T.log(2) - n * T.log(IVI) - 2 * multigammaln(n / 2.0, p) ) / 2, T.all(eigh(X)[0] > 0), T.eq(X, X.T), n > (p - 1), )
def second_moments(i, j, M2, beta, R, logk_c, logk_r, z_, Sx, *args): # This comes from Deisenroth's thesis ( Eqs 2.51- 2.54 ) Rij = R[i, j] n2 = logk_c[i] + logk_r[j] n2 += utils.maha(z_[i], -z_[j], 0.5 * solve(Rij, Sx)) Q = tt.exp(n2) / tt.sqrt(det(Rij)) # Eq 2.55 m2 = matrix_dot(beta[i], Q, beta[j]) m2 = theano.ifelse.ifelse(tt.eq(i, j), m2 + 1e-6, m2) M2 = tt.set_subtensor(M2[i, j], m2) return M2
def compute_LogDensity_Xterms(self, X=None, Xprime=None, padleft=False, persamp=False): """ Computes the symbolic log p(X, Y). p(X, Y) is computed using Bayes Rule. p(X, Y) = P(Y|X)p(X). p(X) is normal as described in help(PNLDS). p(Y|X) is py with output self.output(X). Inputs: X : Symbolic array of latent variables. Y : Symbolic array of X NOTE: This function is required to accept symbolic inputs not necessarily belonging to the class. """ if X is None: X = self.X if padleft: X = T.shape_padleft(X, 1) Nsamps, Tbins = X.shape[0], X.shape[1] totalApred = theano.clone(self.totalA, replace={self.X : X}) totalApred = T.reshape(totalApred, [Nsamps*(Tbins-1), self.xDim, self.xDim]) Xprime = T.batched_dot(X[:,:-1,:].reshape([Nsamps*(Tbins-1), self.xDim]), totalApred) if Xprime is None else Xprime Xprime = T.reshape(Xprime, [Nsamps, Tbins-1, self.xDim]) resX = X[:,1:,:] - Xprime resX0 = X[:,0,:] - self.x0 # L = -0.5*(∆X_0^T·Q0^{-1}·∆X_0) - 0.5*Tr[∆X^T·Q^{-1}·∆X] + 0.5*N*log(Det[Q0^{-1}]) # + 0.5*N*T*log(Det[Q^{-1}]) - 0.5*N*T*d_X*log(2*Pi) L1 = -0.5*(resX0*T.dot(resX0, self.Q0Inv)).sum() L2 = -0.5*(resX*T.dot(resX, self.QInv)).sum() L3 = 0.5*T.log(Tnla.det(self.Q0Inv))*Nsamps L4 = 0.5*T.log(Tnla.det(self.QInv))*(Tbins-1)*Nsamps L5 = -0.5*(self.xDim)*np.log(2*np.pi)*Nsamps*Tbins LatentDensity = L1 + L2 + L3 + L4 + L5 return LatentDensity, L1, L2, L3, L4, L5
def logp(self, value): mu = self.mu tau = self.tau delta = value - mu k = tau.shape[0] result = k * tt.log(2 * np.pi) if self.gpu_compat: result -= tt.log(det(tau)) else: result -= logdet(tau) result += (delta.dot(tau) * delta).sum(axis=delta.ndim - 1) return -1 / 2. * result
def logp(self, x): n = self.n eta = self.eta X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) result += (eta - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), eta > 0, broadcast_conditions=False)
def logp(self, x): n = self.n eta = self.eta X = x[self.tri_index] X = tt.fill_diagonal(X, 1) result = _lkj_normalizing_constant(eta, n) result += (eta - 1.) * tt.log(det(X)) return bound(result, tt.all(X <= 1), tt.all(X >= -1), matrix_pos_def(X), eta > 0, broadcast_conditions=False )
def logp(self, value): S = self.Sigma nu = self.nu mu = self.mu d = S.shape[0] X = value - mu Q = X.dot(matrix_inverse(S)).dot(X.T).sum() log_det = tt.log(det(S)) log_pdf = gammaln((nu + d) / 2.) - 0.5 * \ (d * tt.log(np.pi * nu) + log_det) - gammaln(nu / 2.) log_pdf -= 0.5 * (nu + d) * tt.log(1 + Q / nu) return log_pdf
def multiNormInit_sharedParams(mean, varmat, dim): ''' :param mean: theano.tensor.TensorVaraible :param varmat: theano.tensor.TensorVaraible :param dim: number :return: ''' d = dim const = -d / 2. * np.log(2 * PI) - 0.5 * T.log(T.abs_(tlin.det(varmat))) varinv = tlin.matrix_inverse(varmat) def loglik(x): subx = x - mean subxcvt = T.dot(subx, varinv) # Nxd subxsqr = subx * subxcvt # Nxd return -T.sum(subxsqr, axis=1) / 2. + const return loglik
def negative_log_likelihood_symbolic(L, y, mu, R, eta, eps): """ Negative Marginal Log-Likelihood in a Gaussian Process regression model. The marginal likelihood for a set of parameters Theta is defined as follows: \log(y|X, \Theta) = - 1/2 y^T K_y^-1 y - 1/2 log |K_y| - n/2 log 2 \pi where K_y = K_f + sigma^2_n I is the covariance matrix for the noisy targets y, and K_f is the covariance matrix for the noise-free latent f. """ N = L.shape[0] W = T.tensordot(R, eta, axes=1) large_W = T.zeros((N * 2, N * 2)) large_W = T.set_subtensor(large_W[:N, :N], 2. * mu * W) large_W = T.set_subtensor(large_W[N:, :N], T.diag(L)) large_W = T.set_subtensor(large_W[:N, N:], T.diag(L)) large_D = T.diag(T.sum(abs(large_W), axis=0)) large_M = large_D - large_W PrecisionMatrix = T.inc_subtensor(large_M[:N, :N], mu * eps * T.eye(N)) # Let's try to avoid singular matrices _EPSILON = 1e-8 PrecisionMatrix += _EPSILON * T.eye(N * 2) # K matrix in a Gaussian Process regression model CovarianceMatrix = nlinalg.matrix_inverse(PrecisionMatrix) L_idx = L.nonzero()[0] y_l = y[L_idx] CovarianceMatrix_L = CovarianceMatrix[N + L_idx, :][:, N + L_idx] log_likelihood = 0. log_likelihood -= .5 * y_l.T.dot(CovarianceMatrix_L.dot(y_l)) log_likelihood -= .5 * T.log(nlinalg.det(CovarianceMatrix_L)) log_likelihood -= .5 * T.log(2 * T.pi) return -log_likelihood
def _compile_theano_functions(self): u = tt.vector('u') y = self.generator(u, self.constants) u_rep = tt.tile(u, (y.shape[0], 1)) y_rep = self.generator(u_rep, self.constants) dy_du = tt.grad( cost=None, wrt=u_rep, known_grads={y_rep: tt.identity_like(y_rep)}) energy = (self.base_energy(u) + 0.5 * tt.log(nla.det(dy_du.dot(dy_du.T)))) dy_du_pinv = tt.matrix('dy_du_pinv') energy_grad = u + tt.Lop(dy_du, u_rep, dy_du_pinv).sum(0) self.generator_func = _timed_func_compilation( [u], y, 'generator function') self.generator_jacob = _timed_func_compilation( [u], dy_du, 'generator Jacobian') self._energy_grad = _timed_func_compilation( [u, dy_du_pinv], energy_grad, 'energy gradient') self.base_energy_func = _timed_func_compilation( [u], self.base_energy(u), 'base energy function')
def _compile_theano_functions(self): u = tt.vector('u') y = self.generator(u, self.constants) # Jacobian dy/du calculated by forward propagating batch of repeated # input vectors i.e. matrix of shape (output_dim, input_dim) to get # batch of repeated output vectors, shape (output_dim, output_dim), # and then initialising back propagation of gradients from this # repeated output matrix with identity matrix seed. Although convoluted # this method of computing Jacobian exploits blocked operations and # gives significant improvements in speed over the in-built sequential # scan based Jacobian calculation in Theano. See following issue: # https://github.com/Theano/Theano/issues/4087 u_rep = tt.tile(u, (y.shape[0], 1)) y_rep = self.generator(u_rep, self.constants) dy_du = tt.grad( cost=None, wrt=u_rep, known_grads={y_rep: tt.identity_like(y_rep)}) # Direct energy calculation using Jacobian Gram matrix determinant energy = (self.base_energy(u) + 0.5 * tt.log(nla.det(dy_du.dot(dy_du.T)))) # Alternative energy gradient calculation uses externally calculated # pseudo-inverse of Jacobian dy/du dy_du_pinv = tt.matrix('dy_du_pinv') base_energy_grad = tt.grad(self.base_energy(u), u) # Lop term calculates gradient of log|(dy/du) (dy/du)^T| using # externally calculated pseudo-inverse [(dy/du)(dy/du)^T]^(-1) (dy/du) energy_grad_alt = ( base_energy_grad + tt.Lop(dy_du, u_rep, dy_du_pinv).sum(0) ) self.generator_func = _timed_func_compilation( [u], y, 'generator function') self.generator_jacob = _timed_func_compilation( [u], dy_du, 'generator Jacobian') self.energy_func_direct = _timed_func_compilation( [u], energy, 'energy function') self.energy_grad_direct = _timed_func_compilation( [u], tt.grad(energy, u), 'energy gradient (direct)') self.energy_grad_alt = _timed_func_compilation( [u, dy_du_pinv], energy_grad_alt, 'energy gradient (alternative)') self.base_energy_func = _timed_func_compilation( [u], self.base_energy(u), 'base energy function')
def evaluateLogDensity(self,X,Y): # This is the log density of the generative model (*not* negated) Ypred = theano.clone(self.rate,replace={self.Xsamp: X}) resY = Y-Ypred resX = X[1:]-T.dot(X[:-1],self.A.T) resX0 = X[0]-self.x0 LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0] #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0] PoisDensity = T.sum(Y * T.log(Ypred) - Ypred - T.gammaln(Y + 1)) LogDensity = LatentDensity + PoisDensity return LogDensity
def logp_normal(mu, tau, value): # log probability of individual samples dim = tau.shape[0] delta = lambda mu: value - mu return -0.5 * (dim * tt.log(2 * np.pi) + tt.log(1/det(tau)) + (delta(mu).dot(tau) * delta(mu)).sum(axis=1))