def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qpsi0 = tf.Variable(tf.eye(nu, batch_shape=[K])) Ltril = tf.linalg.LinearOperatorLowerTriangular( ds.matrix_diag_transform(qpsi0, transform=tf.nn.softplus)).to_dense() qsigma = WishartCholesky(df=tf.ones([K]) * nu, scale=Ltril, cholesky_input_output_matrices=True) latent_vars[self.sigma] = qsigma for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu # self.qsigma_inv = qsigma_inv = tf.matrix_inverse(qsigma) self.qw = MultivariateNormalTriL(loc=qmu, scale_tril=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T)
def main(_): ed.set_seed(42) # DATA x_data = build_toy_dataset(FLAGS.N, FLAGS.V) # MODEL x_ph = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.V]) # Form (N, V, V) covariance, one matrix per data point. K = tf.stack([ rbf(tf.reshape(xn, [FLAGS.V, 1])) + tf.diag([1e-6, 1e-6]) for xn in tf.unstack(x_ph) ]) f = MultivariateNormalTriL(loc=tf.zeros([FLAGS.N, FLAGS.V]), scale_tril=tf.cholesky(K)) x = Poisson(rate=tf.exp(f)) # INFERENCE qf = Normal(loc=tf.get_variable("qf/loc", [FLAGS.N, FLAGS.V]), scale=tf.nn.softplus( tf.get_variable("qf/scale", [FLAGS.N, FLAGS.V]))) inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data}) inference.run(n_iter=5000)
def gaussian_process_classification_example(): ed.set_seed(42) data, metadata = crabs('~/data') X_train = data[:100, 3:] y_train = data[:100, 1] N = X_train.shape[0] # Number of data points. D = X_train.shape[1] # Number of features. print('Number of data points: {}'.format(N)) print('Number of features: {}'.format(D)) #-------------------- # Model. X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(rbf(X))) y = Bernoulli(logits=f) #-------------------- # Inference. # Perform variational inference. qf = Normal(loc=tf.get_variable('qf/loc', [N]), scale=tf.nn.softplus(tf.get_variable('qf/scale', [N]))) inference = ed.KLqp({f: qf}, data={X: X_train, y: y_train}) inference.run(n_iter=5000)
def __init__(self, latent_vars, data=None): """Create an inference algorithm. Args: latent_vars: list of RandomVariable or dict of RandomVariable to RandomVariable. Collection of random variables to perform inference on. If list, each random variable will be implictly optimized using a `MultivariateNormalTriL` random variable that is defined internally (with unconstrained support). If dictionary, each random variable must be a `MultivariateNormalDiag`, `MultivariateNormalTriL`, or `Normal` random variable. """ if isinstance(latent_vars, list): with tf.variable_scope(None, default_name="posterior"): latent_vars = {rv: MultivariateNormalTriL( loc=tf.Variable(tf.random_normal(rv.batch_shape)), scale_tril=tf.Variable(tf.random_normal( rv.batch_shape.concatenate(rv.batch_shape[-1])))) for rv in latent_vars} elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance( qz, (MultivariateNormalDiag, MultivariateNormalTriL, Normal)): raise TypeError("Posterior approximation must consist of only " "MultivariateNormalDiag, MultivariateTriL, or " "Normal random variables.") # call grandparent's method; avoid parent (MAP) super(MAP, self).__init__(latent_vars, data)
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL( loc=tf.ones(2), scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [1000, 2])) inference = ed.HMC({z: qz}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) fig, ax = plt.subplots() trace = sess.run(qz.params) ax.scatter(trace[:, 0], trace[:, 1], marker=".") mvn_plot_contours(z, ax=ax) plt.show()
def test_multivariate_normal_tril(self): with self.test_session() as sess: N, D, w_true, X_train, y_train, X, w, b, y = self._setup() # INFERENCE. Initialize scales at identity to verify if we # learned an approximately zero determinant. qw = MultivariateNormalTriL( loc=tf.Variable(tf.random_normal([D])), scale_tril=tf.Variable(tf.diag(tf.ones(D)))) qb = MultivariateNormalTriL( loc=tf.Variable(tf.random_normal([1])), scale_tril=tf.Variable(tf.diag(tf.ones(1)))) inference = ed.Laplace({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_iter=100) self._test(sess, qw, qb, w_true)
def construct_model(): nku = len(Ku) nkv = len(Kv) obs = tf.placeholder(tf.float32, R_.shape) Ug = TransformedDistribution(distribution=Normal(tf.zeros([nku]), tf.ones([nku])), bijector=tf.contrib.distributions.bijectors.Exp()) Vg = TransformedDistribution(distribution=Normal(tf.zeros([nkv]), tf.ones([nkv])), bijector=tf.contrib.distributions.bijectors.Exp()) Ua = TransformedDistribution(distribution=Normal(tf.zeros([1]), tf.ones([1])), bijector=tf.contrib.distributions.bijectors.Exp()) Va = TransformedDistribution(distribution=Normal(tf.zeros([1]), tf.ones([1])), bijector=tf.contrib.distributions.bijectors.Exp()) cKu = tf.cholesky(Ku + tf.eye(I) / Ua) # TODO: rank 1 chol update cKv = tf.cholesky(Kv + tf.eye(J) / Va) Uw1 = MultivariateNormalTriL(tf.zeros([L, I]), tf.reduce_sum(cKu / tf.reshape(tf.sqrt(Ug), [nku, 1, 1]), axis=0)) Vw1 = MultivariateNormalTriL(tf.zeros([L, J]), tf.reduce_sum(cKv / tf.reshape(tf.sqrt(Vg), [nkv, 1, 1]), axis=0)) logits = nn(Uw1, Vw1) R = AugmentedBernoulli(logits=logits, c=c, obs=obs, value=tf.cast(logits > 0, tf.int32)) qUg = TransformedDistribution(distribution=NormalWithSoftplusScale(tf.Variable(tf.zeros([nku])), tf.Variable(tf.ones([nku]))), bijector=tf.contrib.distributions.bijectors.Exp()) qVg = TransformedDistribution(distribution=NormalWithSoftplusScale(tf.Variable(tf.zeros([nkv])), tf.Variable(tf.ones([nkv]))), bijector=tf.contrib.distributions.bijectors.Exp()) qUa = TransformedDistribution(distribution=NormalWithSoftplusScale(tf.Variable(tf.zeros([1])), tf.Variable(tf.ones([1]))), bijector=tf.contrib.distributions.bijectors.Exp()) qVa = TransformedDistribution(distribution=NormalWithSoftplusScale(tf.Variable(tf.zeros([1])), tf.Variable(tf.ones([1]))), bijector=tf.contrib.distributions.bijectors.Exp()) qUw1 = MultivariateNormalTriL(tf.Variable(tf.zeros([L, I])), tf.Variable(tf.eye(I))) qVw1 = MultivariateNormalTriL(tf.Variable(tf.zeros([L, J])), tf.Variable(tf.eye(J))) return obs, Ug, Vg, Ua, Va, cKu, cKv, Uw1, Vw1, R, qUg, qVg, qUa, qVa, qUw1, qVw1
def __init__(self, latent_vars, data=None): """ Parameters ---------- latent_vars : list of RandomVariable or dict of RandomVariable to RandomVariable Collection of random variables to perform inference on. If list, each random variable will be implictly optimized using a ``MultivariateNormalTriL`` random variable that is defined internally (with unconstrained support). If dictionary, each random variable must be a ``MultivariateNormalDiag``, ``MultivariateNormalTriL``, or ``Normal`` random variable. Notes ----- If ``MultivariateNormalDiag`` or ``Normal`` random variables are specified as approximations, then the Laplace approximation will only produce the diagonal. This does not capture correlation among the variables but it does not require a potentially expensive matrix inversion. Examples -------- >>> X = tf.placeholder(tf.float32, [N, D]) >>> w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) >>> y = Normal(loc=ed.dot(X, w), scale=tf.ones(N)) >>> >>> qw = MultivariateNormalTriL( >>> loc=tf.Variable(tf.random_normal([D])), >>> scale_tril=tf.Variable(tf.random_normal([D, D]))) >>> >>> inference = ed.Laplace({w: qw}, data={X: X_train, y: y_train}) """ if isinstance(latent_vars, list): with tf.variable_scope("posterior"): latent_vars = {rv: MultivariateNormalTriL( loc=tf.Variable(tf.random_normal(rv.batch_shape)), scale_tril=tf.Variable(tf.random_normal( rv.batch_shape.concatenate(rv.batch_shape[-1])))) for rv in latent_vars} elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance( qz, (MultivariateNormalDiag, MultivariateNormalTriL, Normal)): raise TypeError("Posterior approximation must consist of only " "MultivariateNormalDiag, MultivariateTriL, or " "Normal random variables.") # call grandparent's method; avoid parent (MAP) super(MAP, self).__init__(latent_vars, data)
def __init__(self, latent_vars, data=None): """Create an inference algorithm. Args: latent_vars: list of RandomVariable or dict of RandomVariable to RandomVariable. Collection of random variables to perform inference on. If list, each random variable will be implictly optimized using a `MultivariateNormalTriL` random variable that is defined internally with unconstrained support and is initialized using standard normal draws. If dictionary, each random variable must be a `MultivariateNormalDiag`, `MultivariateNormalTriL`, or `Normal` random variable. """ if isinstance(latent_vars, list): with tf.variable_scope(None, default_name="posterior"): latent_vars_dict = {} for z in latent_vars: # Define location to have constrained support and # unconstrained free parameters. batch_event_shape = z.batch_shape.concatenate( z.event_shape) loc = tf.Variable(tf.random_normal(batch_event_shape)) if hasattr(z, 'support'): z_transform = transform(z) if hasattr(z_transform, 'bijector'): loc = z_transform.bijector.inverse(loc) scale_tril = tf.Variable( tf.random_normal( batch_event_shape.concatenate( batch_event_shape[-1]))) qz = MultivariateNormalTriL(loc=loc, scale_tril=scale_tril) latent_vars_dict[z] = qz latent_vars = latent_vars_dict del latent_vars_dict elif isinstance(latent_vars, dict): for qz in six.itervalues(latent_vars): if not isinstance( qz, (MultivariateNormalDiag, MultivariateNormalTriL, Normal)): raise TypeError( "Posterior approximation must consist of only " "MultivariateNormalDiag, MultivariateTriL, or " "Normal random variables.") # call grandparent's method; avoid parent (MAP) super(MAP, self).__init__(latent_vars, data)
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL(loc=tf.ones(2), scale_tril=tf.cholesky( tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [2000, 2])) inference = ed.SGLD({z: qz}) inference.run(step_size=5.0) # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev)
def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 mu0 = tf.constant([0.0] * nu) sigma0 = tf.eye(nu) self.sigma = sigma = WishartCholesky( df=nu, scale=sigma0, cholesky_input_output_matrices=True, sample_shape=K) # sigma_inv = tf.matrix_inverse(sigma) self.mu = mu = Normal(mu0, tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_tril': sigma }, component_dist=MultivariateNormalTriL, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalTriL(loc=tf.gather(mu, k), scale_tril=tf.gather(sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d])
def define_prior(N, D, sigma_noise, sigma_signal, lengthscale): """ Define a Gaussian process prior. Parameters ---------- N : int The number of observations. D : int The number of input dimensions. sigma_noise : float The noise variance. sigma_signal : float The signal variance. lengthscale : float or array-like The lengthscale parameter. Can either be a scalar or vector of size D where D is the number of dimensions of the input space. Returns ------- X : tf.placeholder, shape (N, D) A placeholder for the input data. K : tf.Tensor, shape (N, N) The covariance matrix. f : edward.RandomVariable, shape (N,) The Gaussian process prior. """ # define model X = tf.placeholder(tf.float32, [N, D]) K = (rbf(X, variance=sigma_signal, lengthscale=lengthscale) + np.eye(N) * sigma_noise) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(K)) # check dimensions assert X.shape == (N, D) assert K.shape == (N, N) assert f.shape == (N, ) return X, K, f
def _initialize_output_model(self): with self.sess.as_default(): with self.sess.graph.as_default(): if self.output_scale is None: output_scale = self.decoder_scale else: output_scale = self.output_scale if self.mv: self.out = MultivariateNormalTriL( loc=tf.layers.Flatten()(self.decoder), scale_tril= output_scale ) else: self.out = Normal( loc=self.decoder, scale = output_scale ) if self.normalize_data and self.constrain_output: self.out = TransformedDistribution( self.out, bijector=tf.contrib.distributions.bijectors.Sigmoid() )
xn_data = np.random.multivariate_normal(mu, sigma, N) plt.scatter(xn_data[:, 0], xn_data[:, 1], cmap=cm.gist_rainbow, s=5) plt.show() print('mu={}'.format(mu)) print('sigma={}'.format(sigma)) # Prior definition v_prior = tf.constant(3., dtype=tf.float64) W_prior = tf.constant(generate_random_positive_matrix(D), dtype=tf.float64) m_prior = tf.constant(np.array([0.5, 0.5]), dtype=tf.float64) k_prior = tf.constant(0.6, dtype=tf.float64) # Posterior inference # Probabilistic model sigma = WishartCholesky(df=v_prior, scale=W_prior) mu = MultivariateNormalTriL(m_prior, k_prior * sigma) xn = MultivariateNormalTriL(tf.reshape(tf.tile(mu, [N]), [N, D]), tf.reshape(tf.tile(sigma, [N, 1]), [N, 2, 2])) # Variational model # Variational model qmu = MultivariateNormalTriL( tf.Variable(tf.random_normal([D], dtype=tf.float64)), tf.nn.softplus(tf.Variable(tf.random_normal([D, D], dtype=tf.float64)))) L = tf.Variable(tf.random_normal([D, D], dtype=tf.float64)) qsigma = WishartCholesky( tf.nn.softplus( tf.Variable(tf.random_normal([], dtype=tf.float64)) + D + 1), LinearOperatorTriL(L).to_dense()) # Inference
import numpy as np import edward as ed import tensorflow as tf import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from edward.models import Normal, MultivariateNormalTriL, Empirical ed.set_seed(42) T = 5000 # number of sampling D = 3 # dimension cov = [[ 1.36, 0.62, 0.93], [ 0.80, 1.19, 0.43], [ 0.57, 0.73, 1.06]] # model z = MultivariateNormalTriL(loc=tf.ones(D), scale_tril=tf.cholesky(cov)) # inference qz = MultivariateNormalTriL(loc=tf.Variable(tf.zeros(D)), scale_tril=tf.nn.softplus(tf.Variable(tf.zeros((D, D))))) inference = ed.KLqp({z:qz}) # qz = Empirical(tf.Variable(tf.random_normal([T,D]))) # inference = ed.HMC({z:qz}) inference.run() # criticism sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean: ", mean)
period_pre = tf.Variable(np.log(np.exp(7.0 * len_init) - 1), dtype=tf.float32) period_len_pre = tf.Variable(1.0) period_var_pre = tf.Variable(np.log(np.exp(0.5) - 1), dtype=tf.float32) # period = tf.nn.softplus(period_pre) period_length = tf.nn.softplus(period_len_pre) Kuu = kernelfx(xu, xu) fu_loc = tf.zeros((p, m)) fu_scale = tf.cast(tf.cholesky(Kuu + offset * tf.eye(m, dtype=tf.float64), name='fu_scale'), dtype=tf.float32) u = MultivariateNormalTriL(loc=fu_loc, scale_tril=fu_scale, name='pu') x = Normal(loc=tf.zeros((M, Q)), scale=1.0) Kfu = kernelfx(x, xu) Kff = kernelfx(x, x) Kuuinv = tf.matrix_inverse(Kuu + offset * tf.eye(m, dtype=tf.float64)) KfuKuuinv = tf.matmul(Kfu, Kuuinv) KffKuuinvU = [ tf.reshape( tf.matmul(KfuKuuinv, tf.expand_dims(tf.cast(u[i], dtype=tf.float64), axis=1)), [-1]) for i in range(0, p) ]
def val_loocv(X_input, y_input, param_in, sigma_sq_in, max_VI_iter, qf_in, mean_prior=0): f_pred_all = np.zeros(X_input.shape[0]) loo = LeaveOneOut() temp_sess = tf.Session() N = int(X_input.shape[0]) D = int(X_input.shape[1]) for train_index, test_index in loo.split(X_input): X_star_input = X_input[test_index, :].reshape(1, -1) X_other_input = X_input[train_index, :] y_other_input = y_input[train_index].reshape(-1, 1) k_star = rbf_fun(X_other_input, X_star_input, lengthscale=param_in[0], variance=param_in[1])[0] k_star_1 = matern_fun(X_other_input, X_star_input, lengthscale_in=param_in[2], gamma_in=param_in[3])[0] k_star_2 = rat_quadratic_fun(X_other_input, X_star_input, magnitude=param_in[4], lengthscale=param_in[5], diffuseness=param_in[6])[0] k_star_all = tf.add(tf.add(k_star, k_star_1), k_star_2) x_only_part = rbf_fun(X_other_input, lengthscale=param_in[0], variance=param_in[1])[0] x_only_part = tf.add( x_only_part, matern_fun(X_other_input, lengthscale_in=param_in[2], gamma_in=param_in[3])[0]) x_only_part = tf.add( x_only_part, rat_quadratic_fun(X_other_input, magnitude=param_in[4], lengthscale=param_in[5], diffuseness=param_in[6])[0]) x_only_part = tf.add( x_only_part, tf.multiply(sigma_sq_in, tf.eye(int(X_input.shape[0])))) x_only_part_inv = tf.linalg.inv(x_only_part) # Inference from Edward Part X = tf.placeholder(tf.float32, [N - 1, D]) f = MultivariateNormalTriL(loc=tf.zeros(N - 1), scale_tril=tf.cholesky(x_only_part)) y = Poisson(rate=tf.nn.softplus(f)) w_mat = tf.matmul(x_only_part_inv, k_star_all) y_other_input = tf.reshape(y_other_input, [-1]) y_other_input = tf.cast(y_other_input, dtype=tf.float32) inference_vi = ed.KLqp({f: qf_in}, data={ X: X_other_input, y: y_other_input }) inference_vi.run(n_iter=max_VI_iter) y_post = ed.copy(y, {f: qf_in}) m_mat = y_post.eval() f_star_each = mean_prior + \ tf.matmul(tf.transpose(w_mat), (tf.reshape(y_other_input, [-1, 1]) - m_mat)) f_pred_all[test_index] = temp_sess.run(f_star_each) sum_sq_err = np.sum(np.square(y_input - f_pred_all)) return f_pred_all, sum_sq_err
# it seems like the test and training data need to have the same N X_test, y_test = X_test[:-1, :], y_test[:-1] # unfortunately not sure how to make the linear kernel work at this moment N, P = X_train.shape X_tf = tf.placeholder(tf.float32, [N, P]) # latent stochastic function # ok so here in the loc position is where we can get (x *element-wise* b) b = Bernoulli(varbvs_prior, dtype=np.float32) # prior from varbvs gp_mu = tf.reduce_mean(tf.multiply(X_tf, tf.reshape(tf.tile(b, [N]), [N, P])), 1) # mean for prior over GP f = MultivariateNormalTriL( loc=gp_mu, scale_tril=tf.cholesky( rbf(X_tf)) # uses rbf kernel for covariance of GP for now ) qf = Normal(loc=tf.get_variable("qf/loc", [N]), scale=tf.nn.softplus(tf.get_variable("qf/scale", [N]))) # respose y_tf = Bernoulli(logits=f) # inference infer = ed.KLqp({f: qf}, data={X_tf: X_train, y_tf: y_train}) infer.run(n_samples=3, n_iter=5000) # criticism y_post = ed.copy(y_tf, {f: qf})
Langevin dynamics. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import edward as ed import tensorflow as tf from edward.models import Empirical, MultivariateNormalTriL ed.set_seed(42) # MODEL z = MultivariateNormalTriL( loc=tf.ones(2), scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.Variable(tf.random_normal([2000, 2]))) inference = ed.SGLD({z: qz}) inference.run(step_size=5.0) # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev)
for n in range(N): f_n = multivariate_normal.rvs(cov=K, size=1) for v in range(V): x[n, v] = poisson.rvs(mu=np.exp(f_n[v]), size=1) return x ed.set_seed(42) N = 308 # number of NBA players V = 2 # number of shot locations # DATA x_data = build_toy_dataset(N, V) # MODEL x_ph = tf.placeholder(tf.float32, [N, V]) # inputs to Gaussian Process # Form (N, V, V) covariance, one matrix per data point. K = tf.stack([rbf(tf.reshape(xn, [V, 1])) + tf.diag([1e-6, 1e-6]) for xn in tf.unstack(x_ph)]) f = MultivariateNormalTriL(loc=tf.zeros([N, V]), scale_tril=tf.cholesky(K)) x = Poisson(rate=tf.exp(f)) # INFERENCE qf = Normal(loc=tf.Variable(tf.random_normal([N, V])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([N, V])))) inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data}) inference.run(n_iter=5000)
num_classes=5, num_per_class=30, rate=0.4) N = data.shape[0] D = 2 # number of features K = 2 # number of latent dimensions H1 = 2 H2 = 2 # Model: deep/shallow GP (generative model) X = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K])) Kernal = rbf(X) + tf.eye(N) * 1e-6 cholesky = tf.tile(tf.reshape(tf.cholesky(Kernal), [1, N, N]), [H1, 1, 1]) h1 = MultivariateNormalTriL(loc=tf.zeros([H1, N]), scale_tril=cholesky) Kernal1 = rbf(tf.transpose(h1)) + tf.eye(N) * 1e-6 cholesky1 = tf.tile(tf.reshape(tf.cholesky(Kernal1), [1, N, N]), [H2, 1, 1]) h2 = MultivariateNormalTriL(loc=tf.zeros([H2, N]), scale_tril=cholesky1) Kernal2 = rbf(tf.transpose(h2)) + tf.eye(N) * 1e-6 cholesky2 = tf.tile(tf.reshape(tf.cholesky(Kernal2), [1, N, N]), [D, 1, 1]) Y = MultivariateNormalTriL(loc=tf.zeros([D, N]), scale_tril=cholesky2) # Inference (recongnition model) qX = Normal(loc=tf.Variable(tf.random_normal([N, K])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([N, K])))) inference = ed.KLqp({X: qX}, data={Y: data.transpose()}) inference.run(n_iter=1000)
from edward.models import Normal, Exponential import tensorflow as tf # 都是RandomVariable衍生出来 # 一元分布 Normal(loc=tf.constant(0.0), scale=tf.constant(1.0)) Normal(loc=tf.zeros(5), scale=tf.ones(5)) Exponential(rate=tf.ones([2, 3])) # 多元分布 from edward.models import Dirichlet, MultivariateNormalTriL K = 3 Dirichlet(concentration=tf.constant([0.1] * K)) # K为Dirichlet分布 MultivariateNormalTriL(loc=tf.zeros([5, K]), scale_tril=tf.ones([5, K, K])) # loc的最后一位表示维数 MultivariateNormalTriL(loc=tf.zeros([2, 5, K]), scale_tril=tf.ones([2, 5, K, K])) # 每个RandomVariable有方法log_prob(),mean(),sample(),且与计算图上的一个张量对应 # 可以支持诸多运算 from edward.models import Normal x = Normal(loc=tf.zeros(10), scale=tf.ones(10)) y = tf.constant(5.0) x + y, x - y, x * y, x / y tf.tanh(x * y) tf.gather(x, 2) print(x[2]) # 有向图模型 from edward.models import Bernoulli, Beta
from edward.models import Bernoulli, MultivariateNormalTriL, Normal from edward.util import rbf ys = df['index'].values xs = df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']].values N = xs.shape[0] D = xs.shape[1] print("Number of data points: {}".format(N)) print("Number of features: {}".format(D)) # In[4]: X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(rbf(X))) y = Bernoulli(logits=f) # In[ ]: qf = Normal(loc=tf.get_variable("qf/loc", [N]), scale=tf.nn.softplus(tf.get_variable("qf/scale", [N]))) # In[ ]: inference = ed.KLqp({f: qf}, data={X: xs, y: ys}) inference.run(n_iter=5000)
class GaussianLDA(object): def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 mu0 = tf.constant([0.0] * nu) sigma0 = tf.eye(nu) self.sigma = sigma = WishartCholesky( df=nu, scale=sigma0, cholesky_input_output_matrices=True, sample_shape=K) # sigma_inv = tf.matrix_inverse(sigma) self.mu = mu = Normal(mu0, tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_tril': sigma }, component_dist=MultivariateNormalTriL, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalTriL(loc=tf.gather(mu, k), scale_tril=tf.gather(sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d]) def __run_inference__(self, T, S=None): tf.global_variables_initializer().run() for n in range(self.inference.n_iter): info_dict = self.inference.update() self.inference.print_progress(info_dict) self.inference.finalize() def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qpsi0 = tf.Variable(tf.eye(nu, batch_shape=[K])) Ltril = tf.linalg.LinearOperatorLowerTriangular( ds.matrix_diag_transform(qpsi0, transform=tf.nn.softplus)).to_dense() qsigma = WishartCholesky(df=tf.ones([K]) * nu, scale=Ltril, cholesky_input_output_matrices=True) latent_vars[self.sigma] = qsigma for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu # self.qsigma_inv = qsigma_inv = tf.matrix_inverse(qsigma) self.qw = MultivariateNormalTriL(loc=qmu, scale_tril=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T) def getTopWords(self, wordVec, tokens): K = self.K V = len(wordVec) qbeta = self.qbeta qbeta_sample = qbeta.eval() prob = [None] * K for k in range(K): prob[k] = qbeta_sample[:, k] self.tokens_probs = tokens_probs = [None] * K self.top_words = [None] * K for k in range(K): tokens_probs[k] = dict((t, p) for t, p in zip(range(V), prob[k])) newdict = sorted(tokens_probs[k], key=tokens_probs[k].get, reverse=True)[:15] self.top_words[k] = newdict print('topic %d' % k) for Id in newdict: print(tokens[Id], tokens_probs[k][Id]) def getPMI(self, comatrix): K = self.K self.pmis = pmis = [None] * K for k in range(K): pmis[k] = util.pmi(comatrix, self.top_words[k]) print('topic %d pmi: %f' % (k, pmis[k]))