def test_mvn_same_as_edward_mvn(): loc = np.zeros(5) scale = np.ones(5) A = mvn.mvn(loc=loc, scale=scale) B = MultivariateNormalDiag(loc=loc, scale_diag=scale) M = np.random.rand(5, 5) tf.InteractiveSession() assert (tf.reduce_sum(A.log_prob(M)).eval() - tf.reduce_sum(B.log_prob(M)).eval() < 1e-6)
def test_mvn_same_as_edward_log_prob(): loc = np.zeros(5) scale = np.ones(5) A = mvn.mvn(loc=loc, scale=scale) B = MultivariateNormalDiag(loc=loc, scale_diag=scale) samples = np.random.rand(5, 5) tf.InteractiveSession() print('Log probability of Multivariate Normal Scipy vs Edward') print_err( tf.reduce_sum(A.log_prob(samples)).eval(), tf.reduce_sum(B.log_prob(samples)).eval())
class SimpleGaussianLDA(object): def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K) self.sigma = sigma = tf.sqrt(self.sigmasq) self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_diag': sigma }, component_dist=MultivariateNormalDiag, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalDiag(loc=tf.gather(mu, k), scale_diag=tf.gather(self.sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d]) def __run_inference__(self, T, S=None): tf.global_variables_initializer().run() for n in range(self.inference.n_iter): info_dict = self.inference.update() self.inference.print_progress(info_dict) self.inference.finalize() def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qsigmasq = InverseGamma(tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))), tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.sigmasq] = qsigmasq for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu self.qsigma = qsigma = tf.sqrt(qsigmasq) self.qw = MultivariateNormalDiag(loc=qmu, scale_diag=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T) def getTopWords(self, wordVec, tokens): K = self.K V = len(wordVec) qbeta = self.qbeta qbeta_sample = qbeta.eval() prob = [None] * K for k in range(K): prob[k] = qbeta_sample[:, k] self.tokens_probs = tokens_probs = [None] * K self.top_words = [None] * K for k in range(K): tokens_probs[k] = dict((t, p) for t, p in zip(range(V), prob[k])) newdict = sorted(tokens_probs[k], key=tokens_probs[k].get, reverse=True)[:15] self.top_words[k] = newdict print('topic %d' % k) for Id in newdict: print(tokens[Id], tokens_probs[k][Id]) def getPMI(self, comatrix): K = self.K self.pmis = pmis = [None] * K for k in range(K): pmis[k] = util.pmi(comatrix, self.top_words[k]) print('topic %d pmi: %f' % (k, pmis[k]))