def fit(self, n_iter=250): '''Fit model with Variational EM TODO: * add convergence criteria based on heldout data? * add attributes that store loss function * add ability to change optimizers or explain default Args: n_iter: int number of epochs of variational em ''' with tf.Session() as sess: # build computation graph self._build_graph() # intialize inference if self.spatial_effect: inference = ed.KLqp({self.l: self.ql}, { self.y: self.dataset.genotypes.y, self.x_ph: self.dataset.positions.x }) inference.initialize(n_iter=n_iter) else: inference = ed.KLqp({self.l: self.ql}, {self.y: self.dataset.genotypes.y}) inference.initialize(n_iter=n_iter) # intialize variables sess.run(tf.global_variables_initializer()) # run inference self.loss = np.empty(inference.n_iter, dtype=np.float32) for i in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) self.loss[i] = info_dict['loss'] # finalize inference inference.finalize() # extract point estimates self.l_hat = self.ql.mean().eval() # posterior mean self.sigma_e_hat = self.sigma_e.eval() # mle if self.sparse_loadings: self.tau_hat = self.tau.eval() # mle if self.spatial_effect: self.sigma_s_hat = self.sigma_s.eval() # mle self.alpha_hat = self.alpha.eval() # mle
def main(): X_train, y_train, X_test, y_test, train_filenames, test_filenames = prepare_scutfbp5500( feat_layers=["conv4_1", "conv5_1"]) print('Shape of X_train: {0}'.format(X_train)) print('Shape of X_test: {0}'.format(X_test)) print('Shape of y_train: {0}'.format(y_train)) print('Shape of y_test: {0}'.format(y_test)) N = 3300 D = len(X_train[0]) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_samples=3300, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Mean absolute error on test data:") print(ed.evaluate('mean_absolute_error', data={X: X_test, y_post: y_test}))
def train_model(self, games, results, num_train_steps=10000): params_post = {p: q for p, q in zip(self.prior, self.var_post)} x = tf.placeholder(tf.int32, shape=[None, 3]) y = self.predict(x) print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ y: results, x: games })) inference = ed.KLqp(params_post, data={y: results, x: games}) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(y.d2, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood', ed.evaluate(['accuracy', 'log_likelihood'], data={ out_post: results, x: games }))
def latent_space_model_example(): x_train = celegans('~/data') #-------------------- N = x_train.shape[0] # Number of data points. K = 3 # Latent dimensionality. z = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K])) # Calculate N x N distance matrix. # 1. Create a vector, [||z_1||^2, ||z_2||^2, ..., ||z_N||^2], and tile it to create N identical rows. xp = tf.tile(tf.reduce_sum(tf.pow(z, 2), 1, keep_dims=True), [1, N]) # 2. Create a N x N matrix where entry (i, j) is ||z_i||^2 + ||z_j||^2 - 2 z_i^T z_j. xp = xp + tf.transpose(xp) - 2 * tf.matmul(z, z, transpose_b=True) # 3. Invert the pairwise distances and make rate along diagonals to be close to zero. xp = 1.0 / tf.sqrt(xp + tf.diag(tf.zeros(N) + 1e3)) x = Poisson(rate=xp) #-------------------- if True: # Maximum a posteriori (MAP) estimation is simple in Edward. inference = ed.MAP([z], data={x: x_train}) else: # One could run variational inference. qz = Normal(loc=tf.get_variable('qz/loc', [N * K]), scale=tf.nn.softplus(tf.get_variable('qz/scale', [N * K]))) inference = ed.KLqp({z: qz}, data={x: x_train}) def main(): latent_space_model_example() inference.run(n_iter=2500)
def predict_old(self, target_var, observations): ### copy qvars from the model # check learnt # add posterior of the latent variables for h in self.latent_vars: if h not in observations.keys(): observations.update({h: self.posterior(h)}) ancestors = target_var.dist.get_ancestors() ancestors_obs = dict([(obs, observations[obs]) for obs in observations.keys() if obs.dist in ancestors]) m_pred = self.copy(swap_dict=ancestors_obs) non_ancestors_obs = dict([(m_pred.get_copy_from(obs), observations[obs]) for obs in observations.keys() if obs.dist not in ancestors]) non_ancestors_obs_ed = {} for (key, value) in iteritems(non_ancestors_obs): non_ancestors_obs_ed.update( {key.dist: value.dist if isinstance(value, inf.models.RandomVariable) else value}) copy_target = m_pred.get_copy_from(target_var) q_target = inf.Qmodel.new_qvar(copy_target, check_observed=False) inference_pred = ed.KLqp({copy_target.dist: q_target.dist}, data=non_ancestors_obs_ed) copy_target.dist.get_parents() inference_pred.run() return q_target
def bayesian_matrix_factorization(): N = 10000 M = 5000 D = 3 noise_std = .1 # true latent factors U_true = np.random.randn(N, D) V_true = np.random.randn(D, M) # DATA R_true, noises = build_matrix_factorization_toy_dataset(U_true, V_true, N, M, noise_std) print('data laoded') # MODEL U = Normal(loc=0.0, scale=1.0, sample_shape=[N, D]) V = Normal(loc=0.0, scale=1.0, sample_shape=[D, M]) log_sd = Normal(loc=tf.zeros(M), scale=tf.ones(M)) R = Normal(loc=tf.matmul(U, V), scale=tf.matmul(tf.ones([N,M]),tf.matrix_diag(tf.exp(log_sd)))) # INFERENCE qU = Normal(loc=tf.get_variable("qU/loc", [N, D]), scale=tf.nn.softplus( tf.get_variable("qU/scale", [N, D]))) qV = Normal(loc=tf.get_variable("qV/loc", [D, M]), scale=tf.nn.softplus( tf.get_variable("qV/scale", [D, M]))) qlog_sd = Normal(loc=tf.get_variable("qlog_sd/loc", [M]), scale=tf.nn.softplus(tf.get_variable("qlog_sd/scale", [M]))) inference = ed.KLqp({U: qU, V: qV, log_sd: qlog_sd}, data={R: R_true}) inference.run() pdb.set_trace()
def main(_): # data J = 8 data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) # model definition mu = Normal(0., 10.) logtau = Normal(5., 1.) theta_prime = Normal(tf.zeros(J), tf.ones(J)) sigma = tf.placeholder(tf.float32, J) y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J])) data = {y: data_y, sigma: data_sigma} # ed.KLqp inference with tf.variable_scope('q_logtau'): q_logtau = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_mu'): q_mu = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_theta_prime'): q_theta_prime = Normal(tf.get_variable('loc', [J]), tf.nn.softplus(tf.get_variable('scale', [J]))) inference = ed.KLqp({logtau: q_logtau, mu: q_mu, theta_prime: q_theta_prime}, data=data) inference.run(n_samples=15, n_iter=60000) print("==== ed.KLqp inference ====") print("E[mu] = %f" % (q_mu.mean().eval())) print("E[logtau] = %f" % (q_logtau.mean().eval())) print("E[theta_prime]=") print((q_theta_prime.mean().eval())) print("==== end ed.KLqp inference ====") print("") print("") # HMC inference S = 400000 burn = S // 2 hq_logtau = Empirical(tf.get_variable('hq_logtau', [S])) hq_mu = Empirical(tf.get_variable('hq_mu', [S])) hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J])) inference = ed.HMC({logtau: hq_logtau, mu: hq_mu, theta_prime: hq_theta_prime}, data=data) inference.run() print("==== ed.HMC inference ====") print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean())) print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean())) print("E[theta_prime]=") print(hq_theta_prime.params.eval()[burn:, ].mean(0)) print("==== end ed.HMC inference ====") print("") print("")
def function(x_data, pi_list, counts, total_counts_per_month, n_states, chain_len, n_samples=10, **kwargs): sess = tf.Session() qpi_list = kwargs['ed_model']['qpi_list'] saver = tf.train.Saver() # set sess as default but doesn't close it so we can re-use it later: with sess.as_default(): inference = ed.KLqp( dict(zip(pi_list, qpi_list)), data=dict(zip(counts, [x_data[i, :] for i in range(chain_len)]))) inference.run(n_iter=3000) saver.save(sess, join(cache_path, 'experiment5.ckpt')) inferred_probs = [ pd.DataFrame(sess.run(pi.mean())) for pi in qpi_list ] inferred_matrix = np.array( [prob.values.reshape(-1) for prob in inferred_probs]) inferred_matrix = pd.DataFrame(inferred_matrix) inferred_matrix = pretty_matrix(inferred_matrix) # add column names # get rid of row names inferred_matrix = inferred_matrix.reset_index().drop('index', axis=1) print() # hack for printing new line return inferred_matrix, sess, qpi_list
def gaussian_process_classification_example(): ed.set_seed(42) data, metadata = crabs('~/data') X_train = data[:100, 3:] y_train = data[:100, 1] N = X_train.shape[0] # Number of data points. D = X_train.shape[1] # Number of features. print('Number of data points: {}'.format(N)) print('Number of features: {}'.format(D)) #-------------------- # Model. X = tf.placeholder(tf.float32, [N, D]) f = MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(rbf(X))) y = Bernoulli(logits=f) #-------------------- # Inference. # Perform variational inference. qf = Normal(loc=tf.get_variable('qf/loc', [N]), scale=tf.nn.softplus(tf.get_variable('qf/scale', [N]))) inference = ed.KLqp({f: qf}, data={X: X_train, y: y_train}) inference.run(n_iter=5000)
def test_auto_transform_true(self): with self.test_session() as sess: # Match normal || softplus-inverse-normal distribution with # automated transformation on latter (assuming it is softplus). x = TransformedDistribution( distribution=Normal(0.0, 0.5), bijector=tf.contrib.distributions.bijectors.Softplus()) x.support = 'nonnegative' qx = Normal(loc=tf.Variable(tf.random_normal([])), scale=tf.nn.softplus(tf.Variable(tf.random_normal( [])))) inference = ed.KLqp({x: qx}) inference.initialize(auto_transform=True, n_samples=5, n_iter=1000) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() # Check approximation on constrained space has same moments as # target distribution. n_samples = 10000 x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0) x_unconstrained = inference.transformations[x] qx_constrained = transform( qx, bijectors.Invert(x_unconstrained.bijector)) qx_mean, qx_var = tf.nn.moments(qx_constrained.sample(n_samples), 0) stats = sess.run([x_mean, qx_mean, x_var, qx_var]) self.assertAllClose(info_dict['loss'], 0.0, rtol=0.2, atol=0.2) self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1) self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
def BuildModelDynamic(x): pq = {} q = [] for i, value in enumerate(layers): if (i == len(layers) - 1): break inputs = value outputs = layers[i + 1] w = Normal(tf.zeros([inputs, outputs]), scale=tf.ones(outputs)) b = Normal(tf.zeros(outputs), scale=tf.ones(outputs)) if (i == (len(layers) - 1)): x = tf.nn.softmax(tf.matmul(x, w) + b) else: x = tf.nn.relu(tf.matmul(x, w) + b) qw = Normal(loc=tf.get_variable('loc/qw_' + str(i), [inputs, outputs]), scale=tf.get_variable('scale/qw_' + str(i), [inputs, outputs])) qb = Normal(loc=tf.get_variable('loc/qb_' + str(i), [outputs]), scale=tf.nn.softplus( tf.get_variable('scale/qb_' + str(i), [outputs]))) pq[w] = qw pq[b] = qb q.append({'qw': qw, 'qb': qb}) y = Categorical(x) y_ph = tf.placeholder(tf.int32, [N]) inference = ed.KLqp(pq, data={y: y_ph}) return inference, y_ph, y, q
def _construct_inference(self): self.is_graph_constructed = True print('constructing graph') with self.graph.as_default(): self.inference_dict = {} for prior_index, prior_element in enumerate(self.priors): self.inference_dict[prior_element] = self.posteriors[ prior_index] self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.inference = ed.KLqp(self.inference_dict, data={self.y: self.y_ph}) try: self.inference.initialize(optimizer=self.optimizer, n_iter=10**8, var_list=tf.trainable_variables()) except RecursionError: print('recursion error') print(self.inference_dict) quit() self.sess = tf.Session(graph=self.graph) with self.sess.as_default(): tf.global_variables_initializer().run()
def bayesian_linear_regression(): # underlying model params N = 5000 # number of data points D = 100 # number of features noise_std = .1 # Generate simulated data w_true = np.random.randn(D) X_train, y_train = build_lin_reg_toy_dataset(N, w_true, noise_std) X_test, y_test = build_lin_reg_toy_dataset(N, w_true, noise_std) # Set up edward model X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) log_sd = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.exp(log_sd)) # Inference in edward qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) qlog_sd = Normal(loc=tf.get_variable("qlog_sd/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qlog_sd/scale", [1]))) inference = ed.KLqp({w: qw, b: qb, log_sd: qlog_sd}, data={X: X_train, y: y_train}) inference.run(n_iter=1000) pdb.set_trace()
def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qpsi0 = tf.Variable(tf.eye(nu, batch_shape=[K])) Ltril = tf.linalg.LinearOperatorLowerTriangular( ds.matrix_diag_transform(qpsi0, transform=tf.nn.softplus)).to_dense() qsigma = WishartCholesky(df=tf.ones([K]) * nu, scale=Ltril, cholesky_input_output_matrices=True) latent_vars[self.sigma] = qsigma for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu # self.qsigma_inv = qsigma_inv = tf.matrix_inverse(qsigma) self.qw = MultivariateNormalTriL(loc=qmu, scale_tril=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T)
def klqp(self, docs, S, T, wordVec): K = self.K D = self.D nu = self.nu self.latent_vars = latent_vars = {} training_data = {} qmu = Normal(loc=tf.Variable(tf.random_normal([K, nu])), scale=tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.mu] = qmu qsigmasq = InverseGamma(tf.nn.softplus(tf.Variable(tf.zeros([K, nu]))), tf.nn.softplus(tf.Variable(tf.zeros([K, nu])))) latent_vars[self.sigmasq] = qsigmasq for d in range(D): training_data[self.w[d]] = docs[d] self.qmu = qmu self.qsigma = qsigma = tf.sqrt(qsigmasq) self.qw = MultivariateNormalDiag(loc=qmu, scale_diag=qsigma) V = len(wordVec) logprobs = [None] * V for i in range(V): logprobs[i] = self.qw.log_prob(wordVec[i]) self.qbeta = tf.convert_to_tensor(logprobs) self.inference = ed.KLqp(latent_vars, data=training_data) self.inference.initialize(n_iter=T, n_print=10, n_samples=S) self.__run_inference__(T)
def train(self, R, mask, n_iter=2000, n_samples=5): ''' Re-train model given the true R and a mask. ''' # Note: Each inference run starts from scratch sess = ed.get_session() sess.as_default() inference = ed.KLqp( { self.U: self.qU, self.V: self.qV, self.Up: self.qUp, self.Vp: self.qVp, self.W0: self.qW0, self.b0: self.qb0, self.W1: self.qW1, self.b1: self.qb1 }, data={ self.R: R, self.mask: mask }) inference.run(n_iter=n_iter, n_samples=n_samples) self.posterior = self._get_rhats() # I think the marginals are gaussians, so we can use mean to find MAP. self.posterior_map = np.mean(self.posterior, axis=0)
def main(_): ed.set_seed(42) # DATA x_data = build_toy_dataset(FLAGS.N, FLAGS.V) # MODEL x_ph = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.V]) # Form (N, V, V) covariance, one matrix per data point. K = tf.stack([ rbf(tf.reshape(xn, [FLAGS.V, 1])) + tf.diag([1e-6, 1e-6]) for xn in tf.unstack(x_ph) ]) f = MultivariateNormalTriL(loc=tf.zeros([FLAGS.N, FLAGS.V]), scale_tril=tf.cholesky(K)) x = Poisson(rate=tf.exp(f)) # INFERENCE qf = Normal(loc=tf.get_variable("qf/loc", [FLAGS.N, FLAGS.V]), scale=tf.nn.softplus( tf.get_variable("qf/scale", [FLAGS.N, FLAGS.V]))) inference = ed.KLqp({f: qf}, data={x: x_data, x_ph: x_data}) inference.run(n_iter=5000)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) n_iter = 5000 inference = ed.KLqp({mu: qmu}, data={x: x_data}) inference.run(n_iter=n_iter) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1) variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer') old_t, old_variables = sess.run([inference.t, variables]) self.assertEqual(old_t, n_iter) sess.run(inference.reset) new_t, new_variables = sess.run([inference.t, variables]) self.assertEqual(new_t, 0) self.assertNotEqual(old_variables, new_variables)
def multi_batch_demo(): model = bayesian_dynamics_model(1, 1) #x, y = model.generate_toy_data() xeval = np.linspace(-3., 3., 100)[..., np.newaxis] #sess = ed.get_session() #tf.global_variables_initializer().run() inference = ed.KLqp({model.W_0: model.qW_0, model.b_0: model.qb_0, model.W_1: model.qW_1, model.b_1: model.qb_1, model.W_2: model.qW_2, model.b_2: model.qb_2}, data={model.y: model.y_ph}) inference.initialize(n_iter=1000*5, n_samples=5) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Plot the prior model.visualize(sess, xeval) # Train the model for _ in range(1000*5): x_batch, y_batch = model.get_batch(size=np.random.randint(low=100)) info_dict = inference.update({model.x: x_batch, model.y_ph: y_batch}) inference.print_progress(info_dict) # Visualize the evolution of the posterior plots #model.visualize(sess, xeval, animate=True) # Plot the posterior model.visualize(sess, xeval)
def __init__(self, mnist, input_dim=784, output_dim=10, iterations=250, batch_size=100): self.input_dim = input_dim self.output_dim = output_dim self.iterations = iterations self.batch_size = batch_size self.X_placeholder = tf.placeholder(tf.float32, (None, self.input_dim)) self.Y_placeholder = tf.placeholder(tf.int32, (None, )) w_shape = (input_dim, output_dim) self.w = Normal(loc=tf.zeros(w_shape), scale=tf.ones(w_shape)) self.b = Normal(loc=tf.zeros(w_shape[-1]), scale=tf.ones(w_shape[-1])) self.pred = Categorical(tf.matmul(self.X_placeholder, self.w) + self.b) self.qw = Normal(loc=tf.Variable(tf.random_normal(w_shape)), scale=tf.nn.softplus( tf.Variable(tf.random_normal(w_shape)))) self.qb = Normal(loc=tf.Variable(tf.random_normal([w_shape[-1]])), scale=tf.nn.softplus( tf.Variable(tf.random_normal([w_shape[-1]])))) self.inference = ed.KLqp({ self.w: self.qw, self.b: self.qb }, data={self.pred: self.Y_placeholder}) self.inference.initialize( n_iter=self.iterations, scale={self.pred: mnist.train.num_examples / self.batch_size})
def _test(self, sess, x_data, n_minibatch, x_val=None, is_file=False): mu = Normal(mu=0.0, sigma=1.0) if n_minibatch is None: x = Normal(mu=tf.ones(10) * mu, sigma=1.0) else: x = Normal(mu=tf.ones(n_minibatch) * mu, sigma=1.0) qmu = Normal(mu=tf.Variable(tf.random_normal([])), sigma=tf.constant(1.0)) data = {x: x_data} inference = ed.KLqp({mu: qmu}, data) inference.initialize(n_minibatch=n_minibatch) init = tf.initialize_all_variables() init.run() # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) if x_val is not None: # Placeholder setting. # Check data is same as data fed to it. feed_dict = {inference.data[x]: x_val} # avoid directly fetching placeholder data_id = [tf.identity(v) for v in six.itervalues(inference.data)] val = sess.run(data_id, feed_dict) assert np.all(val == x_val) elif is_file: # File reader setting. # Check data varies by session run. val = sess.run(inference.data[x]) val_1 = sess.run(inference.data[x]) assert not np.all(val == val_1) elif n_minibatch is None: # Preloaded full setting. # Check data is full data. val = sess.run(inference.data[x]) assert np.all(val == data[x]) elif n_minibatch == 1: # Preloaded batch setting, with n_minibatch=1. # Check data is randomly shuffled. assert not np.all([sess.run(inference.data)[x] == data[x][i] for i in range(10)]) else: # Preloaded batch setting. # Check data is randomly shuffled. val = sess.run(inference.data) assert not np.all(val[x] == data[x][:n_minibatch]) # Check data varies by session run. val_1 = sess.run(inference.data) assert not np.all(val[x] == val_1[x]) inference.finalize() coord.request_stop() coord.join(threads)
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense(hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def build_net(x_train, y_train, num_train_steps=10000, x_test=None, y_test=None): # Number of stats currently used to predict outcome- 23 per team + variable for side inputs = 47 outputs = 1 if x_test is None: x_test = x_train if y_test is None: y_test = y_train # widths of fully-connected layers in NN # Input data goes here (via feed_dict or equiv) x = tf.placeholder(tf.float32, shape=[None, inputs]) layer_widths = [16, 16, 16, 16, 16, 16] activations = [Nets.selu for _ in layer_widths] + [tf.identity] layer_widths += [outputs] net = Nets.SuperDenseNet(inputs, layer_widths, activations) # Construct all parameters of NN, set to independant gaussian priors params = [Nets.gauss_prior(shape) for shape in net.param_space()] out = ed.models.Bernoulli(logits=net.apply(x, params)) # Variational 'posterior's for NN params qparams = [Nets.gauss_var_post(w.shape) for w in params] asd = tf.train.AdamOptimizer # Map from random variables to their variational posterior objects params_post = {params[i]: qparams[i] for i in range(len(params))} # evaluate accuracy and likelihood of model over the dataset before training print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out: y_test, x: x_test })) # Run variational inference, minimizing KL(q, p) using stochastic gradient descent over variational params inference = ed.KLqp(params_post, data={out: y_train, x: x_train}) #inference.initialize(optimizer=YFOptimizer()) inference.run(n_samples=32, n_iter=num_train_steps) # Get output object dependant on variational posteriors rather than priors out_post = ed.copy(out, params_post) # Re-evaluate metrics print( 'accuracy, log_likelihood, crossentropy', ed.evaluate(['accuracy', 'log_likelihood', 'crossentropy'], data={ out_post: y_test, x: x_test }))
def main(_): def neural_network(X): h = tf.tanh(tf.matmul(X, W_0) + b_0) h = tf.tanh(tf.matmul(h, W_1) + b_1) h = tf.matmul(h, W_2) + b_2 return tf.reshape(h, [-1]) ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) # MODEL with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([FLAGS.D, 10]), scale=tf.ones([FLAGS.D, 10]), name="W_0") W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]), name="W_1") W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name="W_2") b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_0") b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name="b_1") b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D], name="X") y = Normal(loc=neural_network(X), scale=0.1 * tf.ones(FLAGS.N), name="y") print(neural_network(X)) # INFERENCE with tf.variable_scope("posterior"): with tf.variable_scope("qW_0"): loc = tf.get_variable("loc", [FLAGS.D, 10]) scale = tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, 10])) qW_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_1"): loc = tf.get_variable("loc", [10, 10]) scale = tf.nn.softplus(tf.get_variable("scale", [10, 10])) qW_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_2"): loc = tf.get_variable("loc", [10, 1]) scale = tf.nn.softplus(tf.get_variable("scale", [10, 1])) qW_2 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_0"): loc = tf.get_variable("loc", [10]) scale = tf.nn.softplus(tf.get_variable("scale", [10])) qb_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_1"): loc = tf.get_variable("loc", [10]) scale = tf.nn.softplus(tf.get_variable("scale", [10])) qb_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_2"): loc = tf.get_variable("loc", [1]) scale = tf.nn.softplus(tf.get_variable("scale", [1])) qb_2 = Normal(loc=loc, scale=scale) inference = ed.KLqp({W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2}, data={X: X_train, y: y_train}) inference.run(logdir='log')
def _create_model(self): print('... building graph') # self.x = tf.placeholder(tf.float32, shape = (None, self.network_input.shape[1])) # self.y = tf.placeholder(tf.float32, shape = (None, self.network_input.shape[1])) self.x = tf.convert_to_tensor(self.network_input, dtype = tf.float32) self.y = tf.convert_to_tensor(self.network_output, dtype = tf.float32) for layer_index in range(self.num_layers): setattr(self, 'w%d' % layer_index, self.__get_weights(layer_index, self.weight_shapes[layer_index])) setattr(self, 'b%d' % layer_index, self.__get_biases(layer_index, self.bias_shapes[layer_index])) if layer_index == 0: fc = tf.nn.tanh(tf.matmul(self.x, self.weight(layer_index)) + self.bias(layer_index)) setattr(self, 'fc%d' % layer_index, fc) elif 0 < layer_index < self.num_layers - 1: fc = tf.nn.tanh(tf.matmul(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index)) setattr(self, 'fc%d' % layer_index, fc) else: self._loc = tf.nn.sigmoid(tf.matmul(getattr(self, 'fc%d' % (layer_index - 1)), self.weight(layer_index)) + self.bias(layer_index)) # getting the precision / standard deviation / variance self.tau_rescaling = np.zeros((self.num_obs, self.network_input.shape[1])) for obs_index in range(self.num_obs): self.tau_rescaling[obs_index] += self.var_e_ranges tau = ed.models.Gamma(tf.zeros((self.num_obs, self.network_input.shape[1])) + 12 * self.num_obs, tf.ones((self.num_obs, self.network_input.shape[1]))) self.tau = tau / self.tau_rescaling self.scale = ed.models.Deterministic(1. / self.tau**0.5) # learn the floats self.loc = ed.models.Deterministic((self.upper_rescalings - self.lower_rescalings) * self._loc + self.lower_rescalings) self.out_floats = ed.models.Normal(self.loc, self.scale) # inference for layer_index in range(self.num_layers): setattr(self, 'q_w%d' % layer_index, ed.models.Normal(tf.Variable(tf.zeros(self.weight_shapes[layer_index])), tf.nn.softplus(tf.Variable(tf.zeros(self.weight_shapes[layer_index]))))) setattr(self, 'q_b%d' % layer_index, ed.models.Normal(tf.Variable(tf.zeros(self.bias_shapes[layer_index])), tf.nn.softplus(tf.Variable(tf.zeros(self.bias_shapes[layer_index]))))) var_dict = {} for layer_index in range(self.num_layers): var_dict[getattr(self, 'w%d' % layer_index)] = getattr(self, 'q_w%d' % layer_index) var_dict[getattr(self, 'b%d' % layer_index)] = getattr(self, 'q_b%d' % layer_index) self.inference = ed.KLqp(var_dict, data = {self.out_floats: self.y}) optimizer = tf.train.AdamOptimizer(self.learning_rate) self.inference.initialize(optimizer = optimizer) tf.global_variables_initializer().run()
def test_tensor(self): with self.test_session(): N = 5 mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(N) * mu, scale=tf.ones(N)) qmu = Normal(loc=tf.Variable(0.0), scale=tf.constant(1.0)) x_data = tf.zeros(N) inference = ed.KLqp({mu: qmu}, data={x: x_data}) inference.run(n_iter=1, debug=True)
def test_minibatch(self): N = 10 M = 5 model = NormalNormal() qmu = Normal(mu=tf.Variable(0.0), sigma=tf.constant(1.0)) data = {'x': tf.zeros(10)} inference = ed.KLqp({'mu': qmu}, data, model_wrapper=model) inference.initialize(n_minibatch=M) assert not inference.scale # check if empty
def infer(self, X, y, n_samples=5, n_iter=250): inference = ed.KLqp({ self.W: self.qW, self.b: self.qb, }, data={ self.y: y, self.X: X }) inference.run(n_samples=n_samples, n_iter=n_iter)
def fit(self, X, y, M=None, epochs=1, updates_per_batch=1, samples=30, callback=None): """Trains the network with the given in X and y data. epochs: The iteration count over the whole dataset M: The size of the batch that should be itertated over for optimization updates_per_batch: The count of consecetive interations over the same batch samples: samples drawn from the mdoels for calucating grading descent callback: a function to be called every 1000 epochs while training the model """ latent_vars = {} N = y.shape[0] for var, q_var in zip(self.priorWs, self.qWs): latent_vars[var] = q_var for var, q_var in zip(self.priorBs, self.qBs): latent_vars[var] = q_var if M is None: M = N n_batch = int(N / M) n_epoch = epochs data = ut.generator([X, y], M) inference = ed.KLqp(latent_vars, data={self.y: self.y_ph}) inference.initialize(n_iter=n_epoch * n_batch * updates_per_batch, n_samples=samples, scale={self.y: N / M}) tf.global_variables_initializer().run() print("Total iterations: " + str(inference.n_iter)) for i in range(n_epoch): total_loss = 0 for _ in range(inference.n_iter // updates_per_batch // n_epoch): X_batch, y_batch = next(data) for _ in range(updates_per_batch): info_dict = inference.update({ self.y_ph: y_batch, self.X: X_batch }) total_loss += info_dict['loss'] print("Epoch " + str(i) + " complete. Total loss: " + str(total_loss)) if i % 1000 == 0 and callback is not None: callback(self, i)
def test_scale_1d(self): with self.test_session(): N = 10 M = 5 mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(M) * mu, sigma=tf.ones(M)) qmu = Normal(mu=tf.Variable(0.0), sigma=tf.constant(1.0)) x_ph = tf.placeholder(tf.float32, [M]) inference = ed.KLqp({mu: qmu}, data={x: x_ph}) inference.initialize(scale={x: tf.range(M, dtype=tf.float32)}) self.assertAllEqual(inference.scale[x].eval(), np.arange(M))