def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 self.sigmasq = InverseGamma(tf.ones(nu), tf.ones(nu), sample_shape=K) self.sigma = sigma = tf.sqrt(self.sigmasq) self.mu = mu = Normal(tf.zeros(nu), tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_diag': sigma }, component_dist=MultivariateNormalDiag, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalDiag(loc=tf.gather(mu, k), scale_diag=tf.gather(self.sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d])
def get_tf_mixture(locs, diags, weights): q_comps = [ MultivariateNormalDiag(loc=loc, scale_diag=scale_diag) for loc, scale_diag in zip(locs, diags) ] cat = Categorical(probs=tf.convert_to_tensor(weights)) return Mixture(cat=cat, components=q_comps)
def make_channel(members): """ Create the equivalent of a HistFactory Channel(). This is a composite model of p.d.f.s whose fractional weights sum to unity. Args: members (dict of ed.models): The p.d.f.s that will comprise the channel model along with their relative fractional weights. The dict should have elements of {'pdf_name':[fractional_weight, pdf]} Returns: channel (ed.models.Mixture): The resulting mixture model from the weighted combinations of the members """ fracs = [v[0] for v in members.values()] assert 1. == sum(fracs),\ "The sum of the p.d.f. samples fractional weights must be unity.\n\ 1 != {0}".format(sum(fracs)) from edward.models import Categorical, Mixture cat = Categorical(probs=fracs) components = [v[1] for v in members.values()] channel = Mixture(cat=cat, components=components) return channel
def main(): import sys import os # Don't require pip install to test out #sys.path.append(os.getcwd() + '/../src') sys.path.append(os.getcwd() + '/../') from dfgmark import edwardbench as edbench import matplotlib.pyplot as plt N = 10000 mean1 = tf.Variable(0., name='mean1') mean2 = tf.Variable(3., name='mean2') mu1 = Normal(loc=mean1, scale=1.) mu2 = Normal(loc=mean2, scale=1.) frac_1 = 0.4 frac_2 = 1 - frac_1 cat = Categorical(probs=[frac_1, frac_2]) components = [mu1, mu2] # Gaussian mixture model model_template = Mixture(cat=cat, components=components) model, samples = edbench.sample_model(model_template, N) POI = {'mean1': mean1, 'mean2': mean2} fit_result = edbench.fit_model(model, samples, POI) print(fit_result) plt.hist(samples, bins=50, range=(-3.0, 9.0)) plt.show()
def define_val_model(self, N, P, K): # Define new graph self.z_test = Gamma(2. * tf.ones([N, K]), 1. * tf.ones([N, K])) self.l_test = TransformedDistribution( distribution=Normal(self.mean_llib * tf.ones([N, 1]), np.sqrt(self.std_llib) * tf.ones([N, 1])), bijector=tf.contrib.distributions.bijectors.Exp()) rho_test = tf.matmul(self.z_test, self.W0) rho_test = rho_test / tf.reshape(tf.reduce_sum(rho_test, axis=1), (-1, 1)) # NxP self.lam_test = Gamma(self.r, self.r / (rho_test * self.l_test)) if self.zero_inflation: logit_pi_test = tf.matmul(self.z_test, self.W1) pi_test = tf.minimum( tf.maximum(tf.nn.sigmoid(logit_pi_test), 1e-7), 1. - 1e-7) cat_test = Categorical( probs=tf.stack([pi_test, 1. - pi_test], axis=2)) components_test = [ Poisson(rate=1e-30 * tf.ones([N, P])), Poisson(rate=self.lam_test) ] self.likelihood_test = Mixture(cat=cat_test, components=components_test) else: self.likelihood_test = Poisson(rate=self.lam_test)
def create_target_dist(): """Create and return target distribution.""" if FLAGS.dist != 'normal': raise NotImplementedError pi = np.random.dirichlet([1.] * K) #pi = pi[np.newaxis, :].astype(np.float32) #mus = 2.*np.random.rand(K, D).astype(np.float32) - 1. #stds = np.random.rand(K, D).astype(np.float32) mus = np.random.randn(K, D).astype(np.float32) stds = softplus(np.random.randn(K, D).astype(np.float32)) pcomps = [ MultivariateNormalDiag(loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor( stds[i], dtype=tf.float32)) for i in range(K) ] p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(pi, dtype=tf.float32)), components=pcomps) #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0]) return p, mus, stds
def build_mixture(weights, components): cat = Categorical(probs=tf.convert_to_tensor(weights)) comps = [ base(loc=tf.convert_to_tensor(c['loc']), scale=tf.convert_to_tensor(c['scale'])) for c in components ] mix = Mixture(cat=cat, components=comps) return mix
def line_search(q, s, p): s_samples = s.sample(1000).eval() q_samples = q.sample(1000).eval() gamma = 0.5 def get_new_weights(gamma): weights = q.cat.probs.eval() weights *= (1 - gamma) weights = np.append(weights, gamma).astype(np.float32) return weights comps = q.components comps = list(comps) comps.append(s) T = 50 for t in range(T): weights = get_new_weights(gamma) mix = Mixture(cat=Categorical(weights), components=comps) s_expectation = tf.reduce_sum(mix.log_prob(s_samples), axis=1) - p.log_prob(s_samples) q_expectation = tf.reduce_sum(mix.log_prob(q_samples), axis=1) - p.log_prob(q_samples) grad = s_expectation - q_expectation grad = tf.reduce_mean(grad).eval() print("t", t, "gamma", gamma, "grad", grad) step_size = 0.01 / (t + 1) gamma_prime = gamma - grad * step_size if gamma_prime >= 1 or gamma_prime <= 0: gamma_prime = max(min(gamma_prime, 1.), 0.) if np.abs(gamma - gamma_prime) < 1e-6: gamma = gamma_prime break gamma = gamma_prime if gamma < 1e-5: gamma = 1e-5 print("final t", t, "gamma", gamma, "grad", grad) return get_new_weights(gamma)
def _build_model(self, X, y): """ implementation of the KMN """ # create a placeholder for the target self.y_ph = y_ph = tf.placeholder(tf.float32, [None]) self.n_sample_ph = tf.placeholder(tf.int32, None) # store feature dimension size for placeholder self.n_features = X.shape[1] # if no external estimator is provided, create a default neural network if self.estimator is None: self.X_ph = tf.placeholder(tf.float32, [None, self.n_features]) # two dense hidden layers with 15 nodes each x = Dense(15, activation='relu')(self.X_ph) x = Dense(15, activation='relu')(x) self.estimator = x # get batch size self.batch_size = tf.shape(self.X_ph)[0] # locations of the gaussian kernel centers n_locs = self.n_centers self.locs = locs = sample_center_points(y, method=self.center_sampling_method, k=n_locs, keep_edges=self.keep_edges) self.locs_array = locs_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, n_locs)), locs))) # scales of the gaussian kernels self.scales = scales = tf.nn.softplus(tf.Variable(self.init_scales, dtype=tf.float32, trainable=self.train_scales)) self.scales_array = scales_array = tf.unstack(tf.transpose(tf.multiply(tf.ones((self.batch_size, self.n_scales)), scales))) # kernel weights, as output by the neural network self.weights = weights = Dense(n_locs * self.n_scales, activation='softplus')(self.estimator) # mixture distributions self.cat = cat = Categorical(logits=weights) self.components = components = [Normal(loc=loc, scale=scale) for loc in locs_array for scale in scales_array] self.mixtures = mixtures = Mixture(cat=cat, components=components, value=tf.zeros_like(y_ph)) # tensor to store samples #self.samples = mixtures.sample(sample_shape=self.n_samples) self.samples = mixtures.sample() # store minmax of training target values for a sensible default grid for self.predict_density() #self.y_range = y.max() - y.min() #self.y_min = y.min() - 0.1 * self.y_range #self.y_max = y.max() + 0.1 * self.y_range self.y_min = y.min() self.y_max = y.max() # placeholder for the grid self.y_grid_ph = y_grid_ph = tf.placeholder(tf.float32) # tensor to store grid point densities self.densities = tf.transpose(mixtures.prob(tf.reshape(y_grid_ph, (-1, 1)))) # tensor to compute likelihoods self.likelihoods = mixtures.prob(y_ph)
def test_list(self): with self.test_session() as sess: x = Normal(tf.constant(0.0), tf.constant(0.1)) y = Normal(tf.constant(10.0), tf.constant(0.1)) cat = Categorical(logits=tf.zeros(5)) components = [Normal(x, tf.constant(0.1)) for _ in range(5)] z = Mixture(cat=cat, components=components) z_new = ed.copy(z, {x: y.value()}) self.assertGreater(z_new.value().eval(), 5.0)
def deserialize_target_from_file(filename): qt_deserialized = np.load(filename) mus = qt_deserialized['mus'].astype(np.float32) stds = qt_deserialized['stds'].astype(np.float32) pi = qt_deserialized['pi'].astype(np.float32) cat = Categorical(probs=tf.convert_to_tensor(pi)) target_comps = [Normal(loc=tf.convert_to_tensor(mus[i]), scale=tf.convert_to_tensor(stds[i])) for i in range(len(mus))] return Mixture(cat=cat, components=target_comps)
def main(): # build model xcomps = [ Normal(loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]), scale=tf.convert_to_tensor(mixture_model_relbo.stds[i])) for i in range(len(mixture_model_relbo.mus)) ] x = Mixture( cat=Categorical(probs=tf.convert_to_tensor(mixture_model_relbo.pi)), components=xcomps, sample_shape=mixture_model_relbo.N) x_mvns = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mixture_model_relbo.mus[i]), scale_diag=tf.convert_to_tensor(mixture_model_relbo.stds[i])) for i in range(len(mixture_model_relbo.mus)) ] x_train, components = mixture_model_relbo.build_toy_dataset( mixture_model_relbo.N) n_examples, n_features = x_train.shape qxs = [ MultivariateNormalDiag(loc=[scipy.stats.norm.rvs(1)], scale_diag=[scipy.stats.norm.rvs(1)]) for i in range(10) ] truth = [ MultivariateNormalDiag(loc=mixture_model_relbo.mus[i], scale_diag=mixture_model_relbo.stds[i]) for i in range(len(mixture_model_relbo.mus)) ] qxs.extend(truth) mix = Mixture(cat=Categorical(probs=[1. / len(qxs)] * len(qxs)), components=qxs) sess = tf.InteractiveSession() with sess.as_default(): mixture_model_relbo.fully_corrective(mix, x)
def deserialize_mixture_from_file(filename): qt_deserialized = np.load(filename) locs = qt_deserialized['locs'].astype(np.float32) scale_diags = qt_deserialized['scale_diags'].astype(np.float32) weights = qt_deserialized['weights'].astype(np.float32) #q_comps = [Normal(loc=loc[0], scale=tf.nn.softmax(scale_diag)[0]) \ q_comps = [Normal(loc=loc[0], scale=scale_diag[0]) \ for loc, scale_diag in zip(locs, scale_diags)] cat = Categorical(probs=tf.convert_to_tensor(weights)) q_latest = Mixture(cat=cat, components=q_comps) return q_latest
def deserialize_mixture_from_file(filename): qt_deserialized = np.load(filename) locs = qt_deserialized['locs'].astype(np.float32) scale_diags = qt_deserialized['scale_diags'].astype(np.float32) weights = qt_deserialized['weights'].astype(np.float32) q_comps = [ MultivariateNormalDiag(loc=loc, scale_diag=scale_diag) for loc, scale_diag in zip(locs, scale_diags) ] cat = Categorical(probs=tf.convert_to_tensor(weights)) q_latest = Mixture(cat=cat, components=q_comps) return q_latest
def target_dist(*args, **kwargs): """Build the target distribution""" stds = kwargs['stds'] mus = kwargs['mus'] pi = kwargs['pi'] pcomps = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor( stds[i], dtype=tf.float32)) for i in range(len(mus)) ] p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(pi[0])), components=pcomps) #q = VectorLaplaceDiag(loc=mus[0], scale_diag=stds[0]) return p
def get_mixture(weights, components): """Build a mixture model with given weights and components. Args: weights: list or np.array components: list ed.distribution Returns: constructed mixture """ assert len(weights) == len( components), 'Weights size %d not same as components size %d' % ( len(weights), len(components)) assert math.isclose(1., sum(weights), rel_tol=1e-5), "Weights not normalized" return Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights, dtype=tf.float32)), components=components)
def define_stochastic_model(self, P, K): M = self.minibatch_size self.W0 = Gamma(0.1 * tf.ones([K, P]), 0.3 * tf.ones([K, P])) if self.zero_inflation: self.W1 = Normal(tf.zeros([K, P]), tf.ones([K, P])) self.z = Gamma(2. * tf.ones([M, K]), 1. * tf.ones([M, K])) self.r = Gamma(2. * tf.ones([ P, ]), 1. * tf.ones([ P, ])) self.l = TransformedDistribution( distribution=Normal(self.mean_llib * tf.ones([M, 1]), self.std_llib * tf.ones([M, 1])), bijector=tf.contrib.distributions.bijectors.Exp()) self.rho = tf.matmul(self.z, self.W0) self.rho = self.rho / tf.reshape(tf.reduce_sum(self.rho, axis=1), (-1, 1)) # NxP self.lam = Gamma(self.r, self.r / (self.rho * self.l)) if self.zero_inflation: self.logit_pi = tf.matmul(self.z, self.W1) self.pi = tf.minimum( tf.maximum(tf.nn.sigmoid(self.logit_pi), 1e-7), 1. - 1e-7) self.cat = Categorical( probs=tf.stack([self.pi, 1. - self.pi], axis=2)) self.components = [ Poisson(rate=1e-30 * tf.ones([M, P])), Poisson(rate=self.lam) ] self.likelihood = Mixture(cat=self.cat, components=self.components) else: self.likelihood = Poisson(rate=self.lam)
def test_lipschitz_init(pi, mus, stds): g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): s = construct_normal([1], 0, 's') sess.run(tf.global_variables_initializer()) logger.info('mean of s = %.3f, std = %.3f' % (s.mean().eval(), s.stddev().eval())) # build target distribution pcomps = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32)) for i in range(len(mus)) ] p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)), components=pcomps) lipschitz_init_estimate = opt.adafw_linit(s, p) logger.info('L estimate is %.5f' % lipschitz_init_estimate)
def __init__(self, K, D, N, nu, use_param=False): self.K = K # number of topics self.D = D # number of documents self.N = N # number of words of each document self.nu = nu self.alpha = alpha = tf.zeros([K]) + 0.1 mu0 = tf.constant([0.0] * nu) sigma0 = tf.eye(nu) self.sigma = sigma = WishartCholesky( df=nu, scale=sigma0, cholesky_input_output_matrices=True, sample_shape=K) # sigma_inv = tf.matrix_inverse(sigma) self.mu = mu = Normal(mu0, tf.ones(nu), sample_shape=K) self.theta = theta = [None] * D self.z = z = [None] * D self.w = w = [None] * D for d in range(D): theta[d] = Dirichlet(alpha) if use_param: w[d] = ParamMixture(mixing_weights=theta[d], component_params={ 'loc': mu, 'scale_tril': sigma }, component_dist=MultivariateNormalTriL, sample_shape=N[d]) z[d] = w[d].cat else: z[d] = Categorical(probs=theta[d], sample_shape=N[d]) components = [ MultivariateNormalTriL(loc=tf.gather(mu, k), scale_tril=tf.gather(sigma, k), sample_shape=N[d]) for k in range(K) ] w[d] = Mixture(cat=z[d], components=components, sample_shape=N[d])
def myNormal(loc, scale): nn = Normal(loc, scale) tt = Normal(loc, 10.) return Mixture(Categorical(probs=[[0.95, 0.05]]), components=[nn, tt])
outdir = os.path.expanduser(outdir) os.makedirs(outdir, exist_ok=True) return outdir if __name__ == "__main__": x_train, components = build_toy_dataset(N) n_examples, n_features = x_train.shape # build model xcomps = [ Normal(loc=tf.convert_to_tensor(mus[i]), scale=tf.convert_to_tensor(stds[i])) for i in range(len(mus)) ] x = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)), components=xcomps, sample_shape=N) qx = construct_normal([n_features], 42, 'qx') inference = ed.KLqp({x: qx}) inference.run(n_iter=FLAGS.n_iter) # save the target outdir = setup_outdir() np.savez(os.path.join(outdir, 'target_dist.npz'), pi=pi, mus=mus, stds=stds) # save the approximation
return locs, scales, logits, hidden1 def myNormal(loc, scale): nn = Normal(loc, scale) tt = Normal(loc, 10.) return Mixture(Categorical(probs=[[0.95, 0.05]]), components=[nn, tt]) locs, scales, logits, hidden1 = neural_network(X_ph) cat = Categorical(logits=logits) components = [ Normal(loc=loc, scale=scale) for loc, scale in zip( tf.unstack(tf.transpose(locs)), tf.unstack(tf.transpose(scales))) ] y = Mixture(cat=cat, components=components, value=tf.zeros_like(y_ph)) # Note: A bug exists in Mixture which prevents samples from it to have # a shape of [None]. For now fix it using the value argument, as # sampling is not necessary for MAP estimation anyways. # # There are no latent variables to infer. Thus inference is concerned # with only training model parameters, which are baked into how we # specify the neural networks. n_epoch = 1000 inference = ed.MAP(data={y: y_ph}) inference.initialize(var_list=tf.trainable_variables(), n_iter=n_epoch) sess = ed.get_session()
def main(argv): del argv outdir = FLAGS.outdir if '~' in outdir: outdir = os.path.expanduser(outdir) os.makedirs(outdir, exist_ok=True) # Files to log metrics times_filename = os.path.join(outdir, 'times.csv') elbos_filename = os.path.join(outdir, 'elbos.csv') objective_filename = os.path.join(outdir, 'kl.csv') reference_filename = os.path.join(outdir, 'ref_kl.csv') step_filename = os.path.join(outdir, 'steps.csv') # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): curvature_filename = os.path.join(outdir, 'curvature.csv') gap_filename = os.path.join(outdir, 'gap.csv') iter_info_filename = os.path.join(outdir, 'iter_info.txt') elif FLAGS.fw_variant == 'line_search': goutdir = os.path.join(outdir, 'gradients') # empty the files present in the folder already open(times_filename, 'w').close() open(elbos_filename, 'w').close() open(objective_filename, 'w').close() open(reference_filename, 'w').close() open(step_filename, 'w').close() # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): open(curvature_filename, 'w').close() append_to_file(curvature_filename, "c_local,c_global") open(gap_filename, 'w').close() open(iter_info_filename, 'w').close() elif FLAGS.fw_variant == 'line_search': os.makedirs(goutdir, exist_ok=True) for i in range(FLAGS.n_fw_iter): # NOTE: First iteration (t = 0) is initialization g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): p, mus, stds = create_target_dist() # current iterate (solution until now) if FLAGS.init == 'random': muq = np.random.randn(D).astype(np.float32) stdq = softplus(np.random.randn(D).astype(np.float32)) raise ValueError else: muq = mus[0] stdq = stds[0] # 1 correct LMO t = 1 comps = [{'loc': muq, 'scale_diag': stdq}] weights = [1.0] curvature_estimate = opt.adafw_linit() qtx = MultivariateNormalDiag( loc=tf.convert_to_tensor(muq, dtype=tf.float32), scale_diag=tf.convert_to_tensor(stdq, dtype=tf.float32)) fw_iterates = {p: qtx} # calculate kl-div with 1 component objective_old = kl_divergence(qtx, p).eval() logger.info("kl with init %.4f" % (objective_old)) append_to_file(reference_filename, objective_old) # s is the solution to LMO. It is initialized randomly # mu ~ N(0, 1), std ~ softplus(N(0, 1)) s = coreutils.construct_multivariatenormaldiag([D], t, 's') sess.run(tf.global_variables_initializer()) total_time = 0 start_inference_time = time.time() if FLAGS.LMO == 'vi': # we have to iterate over parameter space raise ValueError inference = relbo.KLqp({p: s}, fw_iterates=fw_iterates, fw_iter=t) inference.run(n_iter=FLAGS.LMO_iter) # s now contains solution to LMO end_inference_time = time.time() mu_s = s.mean().eval() cov_s = s.stddev().eval() # NOTE: keep only step size time #total_time += end_inference_time - start_inference_time # compute step size to update the next iterate step_result = {} if FLAGS.fw_variant == 'fixed': gamma = 2. / (t + 2.) elif FLAGS.fw_variant == 'line_search': start_line_search_time = time.time() step_result = opt.line_search_dkl( weights, [c['loc'] for c in comps], [c['scale_diag'] for c in comps], qtx, mu_s, cov_s, s, p, t) end_line_search_time = time.time() total_time += (end_line_search_time - start_line_search_time) gamma = step_result['gamma'] elif FLAGS.fw_variant == 'adafw': start_adafw_time = time.time() step_result = opt.adaptive_fw( weights, [c['loc'] for c in comps], [c['scale_diag'] for c in comps], qtx, mu_s, cov_s, s, p, t, curvature_estimate) end_adafw_time = time.time() total_time += end_adafw_time - start_adafw_time gamma = step_result['gamma'] else: raise NotImplementedError comps.append({'loc': mu_s, 'scale_diag': cov_s}) weights = [(1. - gamma), gamma] c_global = estimate_global_curvature(comps, qtx) q_latest = Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[MultivariateNormalDiag(**c) for c in comps]) # Log metrics for current iteration time_t = float(total_time) logger.info('total time %f' % (time_t)) append_to_file(times_filename, time_t) elbo_t = elbo(q_latest, p, n_samples=1000) logger.info("iter, %d, elbo, %.2f +/- %.2f" % (t, elbo_t[0], elbo_t[1])) append_to_file(elbos_filename, "%f,%f" % (elbo_t[0], elbo_t[1])) logger.info('iter %d, gamma %.4f' % (t, gamma)) append_to_file(step_filename, gamma) objective_t = kl_divergence(q_latest, p).eval() logger.info("run %d, kl %.4f" % (i, objective_t)) append_to_file(objective_filename, objective_t) if FLAGS.fw_variant.startswith('ada'): curvature_estimate = step_result['c_estimate'] append_to_file(gap_filename, step_result['gap']) append_to_file(iter_info_filename, step_result['step_type']) logger.info('gap = %.3f, ct = %.5f, iter_type = %s' % (step_result['gap'], step_result['c_estimate'], step_result['step_type'])) append_to_file(curvature_filename, '%f,%f' % (curvature_estimate, c_global)) elif FLAGS.fw_variant == 'line_search': n_line_search_samples = step_result['n_samples'] grad_t = step_result['grad_gamma'] g_outfile = os.path.join( goutdir, 'line_search_samples_%d.npy.%d' % (n_line_search_samples, t)) logger.info('saving line search data to, %s' % g_outfile) np.save(open(g_outfile, 'wb'), grad_t) sess.close() tf.reset_default_graph()
def f(gamma): weights = [(1 - gamma), gamma] q_l = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[MultivariateNormalDiag(**c) for c in comps]) return kl_divergence(q_l, qt).eval()
locs = tf.layers.dense(net, K, activation=None) scales = tf.layers.dense(net, K, activation=tf.exp) logits = tf.layers.dense(net, K, activation=None) return locs, scales, logits locs_new, scales_new, logits_new = neural_network(X_ph_new) cat_new = Categorical(logits=logits_new) components_new = [ Normal(loc=loc, scale=scale) for loc, scale in zip(tf.unstack(tf.transpose(locs_new)), tf.unstack(tf.transpose(scales_new))) ] y_new = Mixture(cat=cat_new, components=components_new, value=tf.zeros_like(y_ph_new)) ## Note: A bug exists in Mixture which prevents samples from it to have ## a shape of [None]. For now fix it using the value argument, as ## sampling is not necessary for MAP estimation anyways. ######################### inference ############################## # There are no latent variables to infer. Thus inference is concerned # with only training model parameters, which are baked into how we # specify the neural networks. inference_new = ed.MAP(data={y_new: y_ph_new}) optimizer_new = tf.train.AdamOptimizer(learning_rate=learning_rate) inference_new.initialize(optimizer=optimizer_new, var_list=tf.trainable_variables())
def test_adaptive_gamma(): pi = np.array([0.2, 0.5, 0.3]).astype(np.float32) mus = [[2.], [-1.], [0.]] stds = [[.6], [.4], [0.5]] outfile = os.path.join(FLAGS.outdir, 'gamma.csv') g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): # p = pi[0] * N(mus[0], stds[0]) + ... + pi[2] * N(mus[2], stds[2]) p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(pi)), components=[ #Normal(loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), # scale=tf.convert_to_tensor( # stds[i], dtype=tf.float32)), MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32)) for i in range(len(mus)) ]) qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi[:2])), components=[ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor( stds[i], dtype=tf.float32)) for i in range(len(mus[:2])) ]) st = MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[2], dtype=tf.float32), scale_diag=tf.convert_to_tensor(stds[2], dtype=tf.float32)) if FLAGS.fw_variant == "line_search": gamma = opt.line_search_dkl(pi[:2], mus[:2], stds[:2], qt, mus[2], stds[2], st, p, FLAGS.init_k, return_gamma=True) # seed, n_line_search_iter, n_monte_carlo_samples, b, gamma append_to_file( outfile, "%d,%d,%d,%d,%f" % (FLAGS.seed, FLAGS.n_line_search_iter, FLAGS.n_monte_carlo_samples, 1, gamma)) elif FLAGS.fw_variant == "adafw": gamma = opt.adaptive_fw(weights=pi[:2], locs=mus[:2], diags=stds[:2], q_t=qt, mu_s=mus[2], cov_s=stds[2], s_t=st, p=p, k=FLAGS.init_k, l_prev=opt.adafw_linit(qt, p), return_gamma=True) # seed, n_monte_carlo_samples, eta, tau, linit, gamma append_to_file( outfile, "%d,%d,%f,%f,%f,%f" % (FLAGS.seed, FLAGS.n_monte_carlo_samples, FLAGS.damping_adafw, FLAGS.exp_adafw, FLAGS.linit_fixed, gamma)) print_err(pi[2], gamma)
def plot_objective(): weights_q = [0.6, 0.4] # weights_s = gamma is what we iterate on gammas = np.arange(0., 1., 0.02) # for exact gamma mus = [2., -1., 0.] stds = [.6, .4, 0.5] # for inexact approx mus2 = [-1., 1., 0., 2.0] stds2 = [3.3, 0.9, 0.5, 0.4] g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): comps = [ Normal(loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale=tf.convert_to_tensor(stds[i], dtype=tf.float32)) for i in range(len(mus)) ] comps2 = [ Normal(loc=tf.convert_to_tensor(mus2[i], dtype=tf.float32), scale=tf.convert_to_tensor(stds2[i], dtype=tf.float32)) for i in range(len(mus2)) ] # p = pi[0] * N(mus[0], stds[0]) + ... + pi[2] * N(mus[2], stds[2]) weight_s = 0.5 logger.info('true gamma for exact mixture %.2f' % (weight_s)) final_weights = [(1 - weight_s) * w for w in weights_q] final_weights.append(weight_s) p = Mixture( cat=Categorical(probs=tf.convert_to_tensor(final_weights)), components=comps) objective_exact = [] objective_inexact = [] for gamma in gammas: new_weights = [(1 - gamma) * w for w in weights_q] new_weights.append(gamma) q = Mixture( cat=Categorical(probs=tf.convert_to_tensor(new_weights)), components=comps) objective = kl_divergence(q, p, allow_nan_stats=False).eval() objective_exact.append(objective) new_weights2 = [(1 - gamma) * w for w in final_weights] new_weights2.append(gamma) q2 = Mixture( cat=Categorical(probs=tf.convert_to_tensor(new_weights2)), components=comps2) objective2 = kl_divergence(q2, p, allow_nan_stats=False).eval() objective_inexact.append(objective2) logger.info( 'gamma = %.2f, D_kl_exact = %.5f, D_kl_inexact = %.5f' % (gamma, objective, objective2)) plt.plot(gammas, objective_exact, '-', color='r', linewidth=2.0, label='exact mixture') plt.plot(gammas, objective_inexact, '-', color='b', linewidth=2.0, label='inexact mixture') plt.legend() plt.xlabel('gamma') plt.ylabel('kl divergence of mixture') plt.show()
def test_exact_gamma(): pi = mixture_model_relbo.pi mus = mixture_model_relbo.mus stds = mixture_model_relbo.stds outfile = os.path.join(FLAGS.outdir, 'gamma.csv') g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # Build p = pi[0] * N(mu[0], std[0]) + pi[1] * N(mu[1], std[1]) # thus, gamma = pi[1] (=0.6), q_t = N(mu[0], std[0]) # s = N(mu[1], std[1]) pcomps = [ MultivariateNormalDiag( loc=tf.convert_to_tensor(mus[i], dtype=tf.float32), scale_diag=tf.convert_to_tensor(stds[i], dtype=tf.float32)) for i in range(len(mus)) ] p = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi[0])), components=pcomps) # build q_t weights = [1.] locs = [mus[0]] diags = [stds[0]] # Create current iter $q_t$ qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[ MultivariateNormalDiag(loc=loc, scale_diag=diag) for loc, diag in zip(locs, diags) ]) s = MultivariateNormalDiag(loc=mus[1], scale_diag=stds[1]) if FLAGS.fw_variant == "line_search": gamma = opt.line_search_dkl(weights, locs, diags, qt, mus[1], stds[1], s, p, FLAGS.init_k, return_gamma=True) # seed, n_line_search_iter, n_monte_carlo_samples, b, gamma append_to_file( outfile, "%d,%d,%d,%d,%f" % (FLAGS.seed, FLAGS.n_line_search_iter, FLAGS.n_monte_carlo_samples, 1, gamma)) elif FLAGS.fw_variant == "adafw": gamma = opt.adaptive_fw(weights=weights, locs=locs, diags=diags, q_t=qt, mu_s=mus[1], cov_s=stds[1], s_t=s, p=p, k=FLAGS.init_k, l_prev=1., return_gamma=True) # seed, n_monte_carlo_samples, eta, tau, linit, gamma append_to_file( outfile, "%d,%d,%f,%f,%f,%f" % (FLAGS.seed, FLAGS.n_monte_carlo_samples, FLAGS.damping_adafw, FLAGS.exp_adafw, FLAGS.linit_fixed, gamma)) else: raise NotImplementedError('other variants not tested yet.') print_err(pi[0][1], gamma)
def _test(cat, components, n): x = Mixture(cat=cat, components=components) val_est = get_dims(x.sample(n)) val_true = n + get_dims(components[0].mu) assert val_est == val_true
x_train = build_toy_dataset(N) plt.scatter(x_train[:, 0], x_train[:, 1]) plt.axis([-3, 3, -3, 3]) plt.title("Simulated dataset") plt.show() # MODEL mu = Normal(mu=tf.zeros([K, D]), sigma=tf.ones([K, D])) sigma = InverseGamma(alpha=tf.ones([K, D]), beta=tf.ones([K, D])) cat = Categorical(logits=tf.zeros([N, K])) components = [ MultivariateNormalDiag(mu=tf.ones([N, 1]) * tf.gather(mu, k), diag_stdev=tf.ones([N, 1]) * tf.gather(sigma, k)) for k in range(K) ] x = Mixture(cat=cat, components=components) # INFERENCE qmu = Normal(mu=tf.Variable(tf.random_normal([K, D])), sigma=tf.nn.softplus(tf.Variable(tf.zeros([K, D])))) qsigma = InverseGamma(alpha=tf.nn.softplus( tf.Variable(tf.random_normal([K, D]))), beta=tf.nn.softplus(tf.Variable(tf.random_normal([K, D])))) inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: x_train}) inference.initialize(n_samples=20, n_iter=4000) sess = ed.get_session() init = tf.initialize_all_variables() init.run()