N, D = X.shape p_network = Tanh(5) q_network = Tanh(5) print("Creating tree prior...") if args.no_tree: tree_prior = GaussianPrior(N, args.embedding_size) else: tree_prior = DDTPrior(N, args.embedding_size, c=0.1, sigma0=1) print("Creating likelihood model...") likelihood_model = BernoulliLikelihoodModel(args.embedding_size, D, p_network) vae = VAE(D, args.embedding_size, q_network, likelihood_model) batch_indices = T.vector(dtype='int32') batch = T.matrix() batch_noise = T.matrix() z_sample = vae.sample_z(batch, batch_noise) encodings = vae.encode(batch) log_likelihood = likelihood_model.log_likelihood(batch, z_sample) log_likelihood_z = vae.log_likelihood(z_sample, batch) log_prior = tree_prior.log_prior(z_sample) lower_bound = log_prior + log_likelihood - log_likelihood_z all_encodings = vae.encode(X) tree_likelihood = tree_prior.log_prior(all_encodings) bound_summary = tf.merge_summary([ tf.summary.scalar("ELBO", lower_bound),
def a(t): return c / (1 - t) def log_a(t): return T.log(c / (1 - t)) def A(t): return -c * T.log(1 - t) def create_harmonic(M): return np.cumsum(1.0 / np.arange(1, M + 1)).astype(np.float32) T.set_default_device('/cpu:0') c = T.scalar(name='c') segments = T.matrix(dtype='int32', name='segments') a_idx = segments[:, 0] b_idx = segments[:, 1] leaf_segment = segments[:, 2] m = segments[:, 3] log_fac = segments[:, 4] x = T.matrix(name='x') e = T.matrix(name='e') q_network = Vector(X.shape[1], placeholder=x, is_input=False) >> Repeat(Tanh(200), 2) q_mu_network = q_network >> Linear(D) q_mu = q_mu_network.get_outputs()[0].get_placeholder() q_sigma_network = q_network >> Linear(D) q_sigma = tf.sqrt(tf.exp(q_sigma_network.get_outputs()[0].get_placeholder())) z = q_mu + e * q_sigma
return dist.__class__(T.variable(T.to_float( dist.get_parameters('natural'))), parameter_type='natural') (X, Y) = generate_data(N, D, seed=3) cf = LogisticRegression(fit_intercept=False) cf.fit(X, Y) coef_ = cf.coef_ score_ = cf.score(X, Y) q_w = make_variable( Gaussian([T.to_float(np.eye(D))[None], T.to_float(np.zeros(D))[None]])) x, y = T.matrix(), T.vector() lr = 1e-4 batch_size = T.shape(x)[0] num_batches = T.to_float(N / batch_size) with T.initialization('xavier'): # stats_net = Relu(D + 1, 20) >> Relu(20) >> GaussianLayer(D) stats_net = GaussianLayer(D + 1, D) net_out = stats_net(T.concat([x, y[..., None]], -1)) stats = T.sum(net_out.get_parameters('natural'), 0)[None] natural_gradient = (p_w.get_parameters('natural') + num_batches * stats - q_w.get_parameters('natural')) / N next_w = Gaussian(q_w.get_parameters('natural') + lr * natural_gradient, parameter_type='natural')