def reparam_gradient(alpha,m,x,K,alphaz,corr=True,B=0): gradient = np.zeros((alpha.shape[0],2)) if B == 0: assert np.min(alpha)>= 1.,"Needs shape augmentation" lmbda = npr.gamma(alpha,1.) lmbda[lmbda < 1e-300] = 1e-300 zw = m*lmbda/alpha epsilon = calc_epsilon(lmbda, alpha) h_val = gamma_h(epsilon, alpha) h_der = gamma_grad_h(epsilon, alpha) logp_der = grad_logp(zw, K, x, alphaz) gradient[:,0] = logp_der*m*(alpha*h_der-h_val)/alpha**2 gradient[:,1] = logp_der*h_val/alpha gradient += grad_entropy(alpha,m) if corr: gradient[:,0] += logp(zw, K, x, alphaz)*gamma_correction(epsilon, alpha) else: lmbda = npr.gamma(alpha+B,1.) lmbda[lmbda < 1e-5] = 1e-5 u = npr.rand(alpha.shape[0],B) epsilon = calc_epsilon(lmbda, alpha+B) h_val = gamma_h_boosted(epsilon,u,alpha) h_der = gamma_grad_h_boosted(epsilon,u,alpha) zw = h_val*m/alpha zw[zw < 1e-5] = 1e-5 logp_der = grad_logp(zw, K, x, alphaz) gradient[:,0] = logp_der*m*(alpha*h_der-h_val)/alpha**2 gradient[:,1] = logp_der*h_val/alpha gradient += grad_entropy(alpha,m) if corr: gradient[:,0] += logp(zw, K, x, alphaz)*gamma_correction(epsilon, alpha+B) return gradient
def sample_y(self, z, x, input=None, tag=None): T = z.shape[0] z = np.zeros_like(z, dtype=int) if self.single_subspace else z mus = self.compute_mus(x) etas = np.exp(self.inv_etas) taus = npr.gamma(nus[z] / 2.0, 2.0 / nus[z]) return mus[np.arange(T), z, :] + np.sqrt(etas[z] / taus) * npr.randn(T, self.N)
def estimate_elbo(alpha, m, K, x, alphaz, S=1): elbo = np.zeros(S) for s in range(S): zw = npr.gamma(alpha, 1.) * m / alpha zw[zw < 1e-300] = 1e-300 elbo[s] = logp(zw, K, x, alphaz) return np.mean(elbo) + np.sum(entropy(alpha, m))
def sample_x(self, z, xhist, input=None, tag=None, with_noise=True): D, As, bs, sigmas, nus = self.D, self.As, self.bs, np.exp(self.inv_sigmas), np.exp(self.inv_nus) if xhist.shape[0] < self.lags: mu_init = self.mu_init sigma_init = np.exp(self.inv_sigma_init) if with_noise else 0 return mu_init + np.sqrt(sigma_init) * npr.randn(D) else: mu = bs[z].copy() for l in range(self.lags): mu += As[z][:,l*D:(l+1)*D].dot(xhist[-l-1]) tau = npr.gamma(nus[z] / 2.0, 2.0 / nus[z]) sigma = sigmas[z] / tau if with_noise else 0 return mu + np.sqrt(sigma) * npr.randn(D)
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) start = time.time() cavi(log_joint, init_vals, (SIMPLEX, INTEGER, REAL, NONNEGATIVE), FLAGS.num_iterations, callback=lambda *args: None) runtime = time.time() - start print("CAVI Runtime (s): ", runtime)
def grep_gradient(alpha, m, x, K, alphaz): gradient = np.zeros((alpha.shape[0], 2)) lmbda = npr.gamma(alpha, 1.) lmbda[lmbda < 1e-5] = 1e-5 Tinv_val = fun_Tinv(lmbda, alpha) h_val = fun_H(Tinv_val, alpha) u_val = fun_U(Tinv_val, alpha) zw = m * lmbda / alpha zw[zw < 1e-5] = 1e-5 logp_der = grad_logp(zw, K, x, alphaz) logp_val = logp(zw, K, x, alphaz) logq_der = grad_logQ_Z(zw, alpha) gradient[:, 0] = logp_der * (h_val - lmbda / alpha) * m / alpha gradient[:, 1] = logp_der * lmbda / alpha gradient[:, 0] += logp_val * ( np.log(lmbda) + (alpha / lmbda - 1.) * h_val - sp.digamma(alpha) + sp.polygamma(2, alpha) / 2. / sp.polygamma(1, alpha)) gradient += grad_entropy(alpha, m) return gradient
def score_estimator(alpha, m, x, K, alphaz, S=100): """ Form score function estimator based on samples lmbda. """ N = x.shape[0] if x.ndim == 1: D = 1 else: D = x.shape[1] num_z = N * np.sum(K) L = K.shape[0] gradient = np.zeros((alpha.shape[0], 2)) f = np.zeros((2 * S, alpha.shape[0], 2)) h = np.zeros((2 * S, alpha.shape[0], 2)) for s in range(2 * S): lmbda = npr.gamma(alpha, 1.) lmbda[lmbda < 1e-300] = 1e-300 zw = m * lmbda / alpha lQ = logQ(zw, alpha, m) gradLQ = grad_logQ(zw, alpha, m) lP = logp(zw, K, x, alphaz) temp = lP - np.sum(lQ) f[s, :, :] = temp * gradLQ h[s, :, :] = gradLQ # CV covFH = np.zeros((alpha.shape[0], 2)) covFH[:, 0] = np.diagonal( np.cov(f[S:, :, 0], h[S:, :, 0], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) covFH[:, 1] = np.diagonal( np.cov(f[S:, :, 1], h[S:, :, 1], rowvar=False)[:alpha.shape[0], alpha.shape[0]:]) a = covFH / np.var(h[S:, :, :], axis=0) return np.mean(f[:S, :, :], axis=0) - a * np.mean(h[:S, :, :], axis=0)
def sample_x(self, z, xhist, input=None, tag=None, with_noise=True): D, mus, sigmas, nus = self.D, self.mus, np.exp( self.inv_sigmas), np.exp(self.inv_nus) tau = npr.gamma(nus[z] / 2.0, 2.0 / nus[z]) sigma = sigmas[z] / tau if with_noise else 0 return mus[z] + np.sqrt(sigma) * npr.randn(D)
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) supports = (SIMPLEX, INTEGER, REAL, NONNEGATIVE) neg_energy, normalizers, _, initializers, _, _ = \ multilinear_representation(log_joint, init_vals, supports) np_natparams = [initializer(10.) for initializer in initializers] np_meanparams = [ grad(normalizer)(natparam) for normalizer, natparam in zip(normalizers, np_natparams) ] # TODO(trandustin) try using feed_dict's to debug def tf_get_variable(inputs): return tf.get_variable(str(id(inputs)), initializer=tf.constant_initializer(inputs), dtype=tf.float32, shape=inputs.shape) tf_meanparams = container_fmap(tf_get_variable, np_meanparams) tf_natparams = container_fmap(tf_get_variable, np_natparams) # Represent the set of natural/mean parameters for each coordinate update. all_tf_natparams = [None] * len(normalizers) all_tf_natparams_assign_ops = [[]] * len(normalizers) # all_tf_meanparams = [None] * len(normalizers) all_tf_meanparams_assign_ops = [[]] * len(normalizers) for i in range(len(normalizers)): cast = lambda inputs: tf.cast(inputs, dtype=tf.float32) tf_update = make_tffun(grad(neg_energy, i), *np_meanparams) values = container_fmap(cast, tf_update(*tf_meanparams)) for variable, value in zip(tf_meanparams[i], values): assign_op = variable.assign(value) all_tf_natparams_assign_ops[i].append(assign_op) all_tf_natparams[i] = values tf_update = make_tffun(grad(normalizers[i]), np_natparams[i]) values = container_fmap(cast, tf_update(all_tf_natparams[i])) # values = container_fmap(cast, tf_update(tf_natparams)) for variable, value in zip(tf_natparams[i], values): assign_op = variable.assign(value) all_tf_meanparams_assign_ops[i].append(assign_op) # all_tf_meanparams[i] = values # Set config for 1 CPU, 1 core, 1 thread(?). config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, device_count={'CPU': 1}) # Find out device placement. # config = tf.ConfigProto(log_device_placement=True) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams)) start = time.time() for _ in range(FLAGS.num_iterations): for i in range(len(normalizers)): _ = sess.run(all_tf_natparams_assign_ops[i]) _ = sess.run(all_tf_meanparams_assign_ops[i]) runtime = time.time() - start print("CAVI Runtime (s): ", runtime) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams))
def negbin_sample(r, p, size): # a negative binomial is a gamma-compound-Poisson return npr.poisson(npr.gamma(r, p/(1-p), size=size))
def negbin_sample(r, p, size): # a negative binomial is a gamma-compound-Poisson return npr.poisson(npr.gamma(r, p / (1 - p), size=size))
def sample_pi(theta, size=(1, )): alpha, beta = unwrap(theta) return h_inverse(npr.gamma(alpha, 1. / beta, size=size), theta)