def reset(self): """Reinitialized all variables in the constructed graph with random values. If the model is trained, callubg this method will reset it. """ ed.get_session().run(self.init_op)
def _init(self): ed.get_session().close() tf.reset_default_graph() self.losses_ = None self.converged_ = False self.tensor_map_ = None self.criticism_args_ = None
def load(self, directory, name): """Loads a model from the disk that was saved beforehand. """ sess = ed.get_session() directory_exp = os.path.expanduser(directory) self.saver = tf.train.import_meta_graph(directory_exp + name + ".meta") self.saver.restore(sess, tf.train.latest_checkpoint(directory_exp))
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # INFERENCE qp = Empirical(params=tf.get_variable( "qp/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_p = Beta(3.0, 9.0) inference = ed.MetropolisHastings({p: qp}, {p: proposal_p}, data={x: x_data}) inference.run() # CRITICISM # exact posterior has mean 0.25 and std 0.12 sess = ed.get_session() mean, stddev = sess.run([qp.mean(), qp.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) x_post = ed.copy(x, {p: qp}) tx_rep, tx = ed.ppc( lambda xs, zs: tf.reduce_mean(tf.cast(xs[x_post], tf.float32)), data={x_post: x_data}) ed.ppc_stat_hist_plot( tx[0], tx_rep, stat_name=r'$T \equiv$mean', bins=10) plt.show()
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) x_ph = tf.placeholder(tf.float32, [FLAGS.M, 784]) z_ph = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.d]) # MODEL with tf.variable_scope("Gen"): xf = gen_data(z_ph, FLAGS.hidden_units) zf = gen_latent(x_ph, FLAGS.hidden_units) # INFERENCE: optimizer = tf.train.AdamOptimizer() optimizer_d = tf.train.AdamOptimizer() inference = ed.BiGANInference( latent_vars={zf: z_ph}, data={xf: x_ph}, discriminator=discriminative_network) inference.initialize( optimizer=optimizer, optimizer_d=optimizer_d, n_iter=100000, n_print=3000) sess = ed.get_session() init_op = tf.global_variables_initializer() sess.run(init_op) idx = np.random.randint(FLAGS.M, size=16) i = 0 for t in range(inference.n_iter): if t % inference.n_print == 1: samples = sess.run(xf, feed_dict={z_ph: z_batch}) samples = samples[idx, ] fig = plot(samples) plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( 'Generated', str(i).zfill(3)), bbox_inches='tight') plt.close(fig) fig = plot(x_batch[idx, ]) plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( 'Base', str(i).zfill(3)), bbox_inches='tight') plt.close(fig) zsam = sess.run(zf, feed_dict={x_ph: x_batch}) reconstructions = sess.run(xf, feed_dict={z_ph: zsam}) reconstructions = reconstructions[idx, ] fig = plot(reconstructions) plt.savefig(os.path.join(FLAGS.out_dir, '{}{}.png').format( 'Reconstruct', str(i).zfill(3)), bbox_inches='tight') plt.close(fig) i += 1 x_batch = next(x_train_generator) z_batch = np.random.normal(0, 1, [FLAGS.M, FLAGS.d]) info_dict = inference.update(feed_dict={x_ph: x_batch, z_ph: z_batch}) inference.print_progress(info_dict)
def fit_model(model, observations, POI, fit_type='mle'): """ Perform a fit of the model to data Args: model (ed.models class): An Edward model observations (np.ndarray): Data to fit the model to POI (dict): Parameters of interest to return fit results on fit_type (str): The minimization technique used Returns: fit_result (dict): A dict of the fitted model parameters of interest """ # observations is an ndarray of (n_observations, d_features) # model and data (obsevations) need to have the same size assert model.get_shape() == observations.shape,\ "The model and observed data features must be of the same shape.\n\ The model passed has shape {0} and the data passed have shape (n_observations, d_features) = {1}".format( model.get_shape(), observations.shape) fit_type = fit_type.lower() if fit_type == 'mle': # http://edwardlib.org/api/ed/MAP fit = ed.MAP({}, data={model: observations}) else: fit = ed.MAP({}, data={model: observations}) # default to mle fit.run() sess = ed.get_session() fit_result = {} for poi in POI: fit_result[poi] = sess.run(POI[poi]) return fit_result
def save_graph_parameters(file): sess = ed.get_session() trained_vars = [] for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): trained_vars.append(sess.run(var)) _pickle.dump(trained_vars, open(file, 'wb')) return file
def predict(samples, outputs, latent_var_dict, input_ph): """ :param samples: Data to score :param outputs: Tensor that represents the outputs :param latent_var_dict: Dictionary that contains the latent variables in the model. :param input_ph: Placeholder for the inputs :return: Predictions """ x_post = ed.copy(outputs[-1], latent_var_dict) sess = ed.get_session() predictions = np.zeros((samples.shape[0], 3)) for i in range(0, samples.shape[0]): feed_dict = {} feed_dict.update( {key: [value] for key, value in zip(input_ph, samples[i, :])}) quantile_1, quantile_2, mean = sess.run( [x_post.quantile(0.025), x_post.quantile(0.975), x_post.mean()], feed_dict=feed_dict) predictions[i, :] = [quantile_1, mean, quantile_2] return predictions
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # COMPLETE CONDITIONAL p_cond = ed.complete_conditional(p) sess = ed.get_session() print('p(probs | x) type:', p_cond.parameters['name']) param_vals = sess.run( { key: val for key, val in six.iteritems(p_cond.parameters) if isinstance(val, tf.Tensor) }, {x: x_data}) print('parameters:') for key, val in six.iteritems(param_vals): print('%s:\t%.3f' % (key, val))
def train(self, t_mask, sess=ed.get_session(), batch=50, n_iter=4000): info_dicts = [] user_indices, item_indices = np.array(np.where(t_mask)) rand_idx = np.random.choice(user_indices.shape[0], batch) user_indices, item_indices = user_indices[rand_idx], item_indices[ rand_idx] ratings_selected = self.dataset[user_indices, item_indices] feed_dict = { self.user_indices: user_indices, self.item_indices: item_indices, self.ratings_selected: ratings_selected } for _ in range(n_iter): # if (_+1)%5 == 0: # error = sess info_dict = self.inference.update(feed_dict=feed_dict) info_dicts.append(info_dict) losses = [x['loss'] for x in info_dicts] plt.plot(losses) plt.title('loss curve') plt.show() return losses
def eval(self): print("Evaluating..") # Make predictions sess = ed.get_session() print("Predictors MAE:") predictions_predictors = self.make_predictions(sess, self.M_qP, 300) mae_test = self.compute_mae(sess, predictions_predictors, self.predictors_zeros, (self.I_test[:, :self.M]).astype(bool), 'mae') mae_train = self.compute_mae(sess, predictions_predictors, self.predictors_zeros, (self.I_train[:, :self.M]).astype(bool), 'mae') print("Test:\t", mae_test, "\nTrain:\t", mae_train) print("Scores MAE:") predictions_scores = self.make_predictions(sess, self.M_qS, 300) mae_test = self.compute_mae(sess, predictions_scores, self.scores_zeros, (self.I_test[:, self.M:]).astype(bool), 'mae') mae_train = self.compute_mae(sess, predictions_scores, self.scores_zeros, (self.I_train[:, self.M:]).astype(bool), 'mae') print("Test:\t", mae_test, "\nTrain:\t", mae_train) predictors_df = pd.DataFrame(predictions_predictors.eval(session=sess)) predictors_df.to_csv('P_hat.csv') scores_df = pd.DataFrame(predictions_scores.eval(session=sess)) scores_df.to_csv('S_hat.csv')
def train(self, R, mask, n_iter=2000, n_samples=5): ''' Re-train model given the true R and a mask. ''' # Note: Each inference run starts from scratch sess = ed.get_session() sess.as_default() inference = ed.KLqp( { self.U: self.qU, self.V: self.qV, self.Up: self.qUp, self.Vp: self.qVp, self.W0: self.qW0, self.b0: self.qb0, self.W1: self.qW1, self.b1: self.qb1 }, data={ self.R: R, self.mask: mask }) inference.run(n_iter=n_iter, n_samples=n_samples) self.posterior = self._get_rhats() # I think the marginals are gaussians, so we can use mean to find MAP. self.posterior_map = np.mean(self.posterior, axis=0)
def fit(self, x_train): self.inference = ed.Gibbs( { self.pi: self.qpi, self.mu: self.qmu, self.sigmasq: self.qsigmasq, self.z: self.qz }, data={self.x: x_train}) self.inference.initialize() sess = ed.get_session() tf.global_variables_initializer().run() t_ph = tf.placeholder(tf.int32, []) running_cluster_means = tf.reduce_mean(self.qmu.params[:t_ph], 0) for _ in range(self.inference.n_iter): info_dict = self.inference.update() self.inference.print_progress(info_dict) t = info_dict['t'] if t % self.inference.n_print == 0: print("\nInferred cluster means:") print(sess.run(running_cluster_means, {t_ph: t - 1}))
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL( loc=tf.ones(2), scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [1000, 2])) inference = ed.HMC({z: qz}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) fig, ax = plt.subplots() trace = sess.run(qz.params) ax.scatter(trace[:, 0], trace[:, 1], marker=".") mvn_plot_contours(z, ax=ax) plt.show()
def optimize(self, X, Y, epochs, batch_size, X_test=None, Y_test=None, n_samples=10, saver=None): print('Optimizing {} training examples'.format(self.data_size)) losses = [] qd_a_list = [] qd_b_list = [] accuracies = [] for i in range(1, epochs + 1): print('Optimizing for epoch {}'.format(i)) loss = 0 steps = None for X_batch, Y_batch in mini_batch(batch_size, X, Y, shuffle=True): info_dict = self.inference.update(feed_dict={ self.x: X_batch, self.y: Y_batch }) loss += info_dict['loss'] steps = info_dict['t'] print('Loss: {} Steps: {}'.format(loss, steps)) losses.append(loss) variables_names = ['qd_a:0', 'qd_b:0'] sess = ed.get_session() qd_a, qd_b = sess.run(variables_names) qd_a_list.append(qd_a) qd_b_list.append(qd_b) if saver is not None: sess = ed.get_session() saver.save(sess, '../checkpoint/beta_dropout.ckpt') if X_test is not None and Y_test is not None: acc = self.validate(X_test[:1000], Y_test[:1000], batch_size, n_samples) print('Validation: {}'.format(acc)) accuracies.append(acc) print(qd_a_list) print(qd_b_list)
def main(_): ed.set_seed(42) # DATA x_data = build_toy_dataset(FLAGS.N) # MODEL pi = Dirichlet(concentration=tf.ones(FLAGS.K)) mu = Normal(0.0, 1.0, sample_shape=[FLAGS.K, FLAGS.D]) sigma = InverseGamma(concentration=1.0, rate=1.0, sample_shape=[FLAGS.K, FLAGS.D]) c = Categorical(logits=tf.log(pi) - tf.log(1.0 - pi), sample_shape=FLAGS.N) x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c)) # INFERENCE qpi = Empirical(params=tf.get_variable( "qpi/params", [FLAGS.T, FLAGS.K], initializer=tf.constant_initializer(1.0 / FLAGS.K))) qmu = Empirical(params=tf.get_variable("qmu/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.zeros_initializer())) qsigma = Empirical(params=tf.get_variable("qsigma/params", [FLAGS.T, FLAGS.K, FLAGS.D], initializer=tf.ones_initializer())) qc = Empirical(params=tf.get_variable("qc/params", [FLAGS.T, FLAGS.N], initializer=tf.zeros_initializer(), dtype=tf.int32)) gpi = Dirichlet(concentration=tf.constant([1.4, 1.6])) gmu = Normal(loc=tf.constant([[1.0, 1.0], [-1.0, -1.0]]), scale=tf.constant([[0.5, 0.5], [0.5, 0.5]])) gsigma = InverseGamma(concentration=tf.constant([[1.1, 1.1], [1.1, 1.1]]), rate=tf.constant([[1.0, 1.0], [1.0, 1.0]])) gc = Categorical(logits=tf.zeros([FLAGS.N, FLAGS.K])) inference = ed.MetropolisHastings( latent_vars={pi: qpi, mu: qmu, sigma: qsigma, c: qc}, proposal_vars={pi: gpi, mu: gmu, sigma: gsigma, c: gc}, data={x: x_data}) inference.initialize() sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t == 1 or t % inference.n_print == 0: qpi_mean, qmu_mean = sess.run([qpi.mean(), qmu.mean()]) print("") print("Inferred membership probabilities:") print(qpi_mean) print("Inferred cluster means:") print(qmu_mean)
def load_graph_parameters(file): sess = ed.get_session() trained_vars = _pickle.load(open(file, "rb")) i = 0 for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): sess.run(var.assign(trained_vars[i])) i += 1 return True
class Runtime(): #def __init__(self): tf_run_default = True tf_sess = ed.get_session() init_g = tf.global_variables_initializer() init_l = tf.local_variables_initializer() tf_sess.run(init_g) tf_sess.run(init_l)
def sample_user_ratings(self, user_index, n_samples=100): idx_i = [user_index] * self.M idx_j = list(range(self.M)) feed_dict = { self.test_idx_i: idx_i, self.test_idx_j: idx_j, self.n_test_samples: n_samples } return np.squeeze(ed.get_session().run(self.sample_rhats, feed_dict))
def sample_user_ratings(self, user_index, num_samples=100, sess=ed.get_session()): feed_dict = { self.test_user_index: user_index, self.num_samples: num_samples } return sess.run(self.R_mean_samples, feed_dict=feed_dict)
def save(self, directory, name): """Saves the graph and all variables to files on disk. Everything will be put in a new direcotry given by the argument. """ directory_exp = os.path.expanduser(directory) if not os.path.isdir(directory_exp): os.makedirs(directory_exp) self.saver.save(ed.get_session(), os.path.join(directory_exp, name))
def build(self, input_dim, output_dim, layers_defs=[3, 3], examples=50): """ Constructs a Tensorflow graph for the the BNN. """ print("Generating prior Variables") self.priorWs, self.priorBs = self.generate_prior_vars( input_dim, output_dim, layers_defs) print("Generating latent Variables") self.qWs, self.qBs = self.generate_latent_vars(input_dim, output_dim, layers_defs) print("Building Network for inference") #Final function of the network self.X = tf.placeholder(shape=[None, input_dim], name="input_placeholder", dtype=tf.float32) self.y = Normal(loc=self._neural_network(self.X, self.priorWs, self.priorBs), scale=0.1 * tf.ones(examples)) self.y_ph = tf.placeholder(tf.float32, self.y.shape, "output_placeholder") print("Building Network for evaluation") self.x_evaluation = tf.placeholder(shape=[None, input_dim], name="evaluation_placeholder", dtype=tf.float32) self.evaluation_sample_count = tf.placeholder( shape=None, name="evaluation_sample_count", dtype=tf.int32) self.y_evaluation = tf.map_fn(lambda _: self._neural_network( self.x_evaluation, list(map(lambda W: W.sample(), self.qWs)), list(map(lambda b: b.sample(), self.qBs))), tf.range(self.evaluation_sample_count), dtype=tf.float32) self.y_evaluation = tf.identity(self.y_evaluation, name="evaluation") self.init_op = tf.global_variables_initializer() ed.get_session().run(self.init_op) self.saver = tf.train.Saver()
def get_all_tensors(self, filter_fn=None): sess = ed.get_session() tensor_values = {} for t in self.tensor_map_: if filter_fn is not None and not filter_fn(t): continue v = self.tensor_map_[t] if not isinstance(v, tf.Tensor): continue tensor_values[t] = sess.run(v) return tensor_values
def generative_adversarial_network_example(): ed.set_seed(42) data_dir = '/tmp/data' out_dir = '/tmp/out' if not os.path.exists(out_dir): os.makedirs(out_dir) M = 128 # Batch size during training. d = 100 # Latent dimension. (x_train, _), (x_test, _) = mnist(data_dir) x_train_generator = generator(x_train, M) x_ph = tf.placeholder(tf.float32, [M, 784]) #-------------------- # GANs posit generative models using an implicit mechanism. # Given some random noise, the data is assumed to be generated by a deterministic function of that noise. with tf.variable_scope('Gen'): eps = Uniform(tf.zeros([M, d]) - 1.0, tf.ones([M, d])) x = generative_network(eps) #-------------------- # In Edward, the GAN algorithm (GANInference) simply takes the implicit density model on x as input, binded to its realizations x_ph. # In addition, a parameterized function discriminator is provided to distinguish their samples. inference = ed.GANInference(data={x: x_ph}, discriminator=discriminative_network) # We'll use ADAM as optimizers for both the generator and discriminator. # We'll run the algorithm for 15,000 iterations and print progress every 1,000 iterations. optimizer = tf.train.AdamOptimizer() optimizer_d = tf.train.AdamOptimizer() inference = ed.GANInference(data={x: x_ph}, discriminator=discriminative_network) inference.initialize(optimizer=optimizer, optimizer_d=optimizer_d, n_iter=15000, n_print=1000) # We now form the main loop which trains the GAN. # At each iteration, it takes a minibatch and updates the parameters according to the algorithm. sess = ed.get_session() tf.global_variables_initializer().run() idx = np.random.randint(M, size=16) i = 0 for t in range(inference.n_iter): if t % inference.n_print == 0: samples = sess.run(x) samples = samples[idx, ] fig = plot(samples) plt.savefig(os.path.join(out_dir, '{}.png').format(str(i).zfill(3)), bbox_inches='tight') plt.close(fig) i += 1 x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) inference.print_progress(info_dict)
def run(self, data, method="klqp", **kwargs): if method == "klqp": print(">> Initializing ... ", end="") inference = ed.KLqp(self.unwind_latent_vars(), data=data) inference.initialize(**kwargs) print("ok") # RUNNING THE INFERENCE sess = ed.get_session() init = tf.global_variables_initializer() init.run() losses = [] for _ in tqdm(range(inference.n_iter)): info_dict = inference.update() losses.append(info_dict['loss']) plt.figure(figsize=(7, 3)) plt.title("Loss") plt.semilogy(losses) plt.show() elif method == "hmc": print(">> Initializing ... ", end="") inference = ed.HMC(self.unwind_latent_vars(), data=data) inference.initialize(**kwargs) print("ok") # RUNNING THE INFERENCE sess = ed.get_session() init = tf.global_variables_initializer() init.run() acceptance_rates = [] for _ in tqdm(range(inference.n_iter)): info_dict = inference.update() acceptance_rates.append(info_dict['accept_rate']) plt.figure(figsize=(7, 3)) plt.title("Acceptance Rate") plt.semilogy(acceptance_rates) plt.show()
def main(_): ed.set_seed(142) # DATA x_train = build_toy_dataset(FLAGS.N, FLAGS.D, FLAGS.K) # MODEL w = Normal(loc=0.0, scale=10.0, sample_shape=[FLAGS.D, FLAGS.K]) z = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.M, FLAGS.K]) x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([FLAGS.D, FLAGS.M])) # INFERENCE qw_variables = [tf.get_variable("qw/loc", [FLAGS.D, FLAGS.K]), tf.get_variable("qw/scale", [FLAGS.D, FLAGS.K])] qw = Normal(loc=qw_variables[0], scale=tf.nn.softplus(qw_variables[1])) qz_variables = [tf.get_variable("qz/loc", [FLAGS.N, FLAGS.K]), tf.get_variable("qz/scale", [FLAGS.N, FLAGS.K])] idx_ph = tf.placeholder(tf.int32, FLAGS.M) qz = Normal(loc=tf.gather(qz_variables[0], idx_ph), scale=tf.nn.softplus(tf.gather(qz_variables[1], idx_ph))) x_ph = tf.placeholder(tf.float32, [FLAGS.D, FLAGS.M]) inference_w = ed.KLqp({w: qw}, data={x: x_ph, z: qz}) inference_z = ed.KLqp({z: qz}, data={x: x_ph, w: qw}) scale_factor = float(FLAGS.N) / FLAGS.M inference_w.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qz_variables, n_samples=5) inference_z.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qw_variables, n_samples=5) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference_w.n_iter): x_batch, idx_batch = next_batch(x_train, FLAGS.M) for _ in range(5): inference_z.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) info_dict = inference_w.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) inference_w.print_progress(info_dict) t = info_dict['t'] if t % 100 == 0: print("\nInferred principal axes:") print(sess.run(qw.mean()))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) x_ph = tf.placeholder(tf.float32, [FLAGS.M, 784]) # MODEL with tf.variable_scope("Gen"): eps = Uniform(low=tf.zeros([FLAGS.M, FLAGS.d]) - 1.0, high=tf.ones([FLAGS.M, FLAGS.d])) x = generative_network(eps) # INFERENCE optimizer = tf.train.RMSPropOptimizer(learning_rate=5e-5) optimizer_d = tf.train.RMSPropOptimizer(learning_rate=5e-5) inference = ed.WGANInference( data={x: x_ph}, discriminator=discriminative_network) inference.initialize( optimizer=optimizer, optimizer_d=optimizer_d, n_iter=15000, n_print=1000, clip=0.01, penalty=None) sess = ed.get_session() tf.global_variables_initializer().run() idx = np.random.randint(FLAGS.M, size=16) i = 0 for t in range(inference.n_iter): if t % inference.n_print == 0: samples = sess.run(x) samples = samples[idx, ] fig = plot(samples) plt.savefig(os.path.join(FLAGS.out_dir, '{}.png').format( str(i).zfill(3)), bbox_inches='tight') plt.close(fig) i += 1 x_batch = next(x_train_generator) for _ in range(5): inference.update(feed_dict={x_ph: x_batch}, variables="Disc") info_dict = inference.update(feed_dict={x_ph: x_batch}, variables="Gen") # note: not printing discriminative objective; `info_dict` above # does not store it since updating only "Gen" info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration inference.print_progress(info_dict)
def main(_): x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) with tf.variable_scope("posterior"): qmu = PointMass(params=tf.Variable(1.0)) inference = ed.MAP({mu: qmu}, data={x: x_data}) inference.run(n_iter=10) sess = ed.get_session() saver = tf.train.Saver() saver.save(sess, "test_saver")
def __init__(self, n_samples=10, center_sampling_method='k_means', n_centers=20, keep_edges=False, init_scales='default', estimator=None, X_ph=None, train_scales=False): """ Main class for Kernel Mixture Network Args: center_sampling_method: String that describes the method to use for finding kernel centers n_centers: Number of kernels to use in the output keep_edges: Keep the extreme y values as center to keep expressiveness init_scales: List or scalar that describes (initial) values of bandwidth parameter estimator: Keras or tensorflow network that ends with a dense layer to place kernel mixture output on top off, if None use a standard 15 -> 15 Dense network X_ph: Placeholder for input to your custom estimator, currently only supporting one input placeholder, but should be easy to extend to a list of placeholders train_scales: Boolean that describes whether or not to make the scales trainable n_samples: Determine how many samples to return """ self.sess = ed.get_session() self.inference = None self.estimator = estimator self.X_ph = X_ph self.n_samples = n_samples self.center_sampling_method = center_sampling_method self.n_centers = n_centers self.keep_edges = keep_edges self.train_loss = np.empty(0) self.test_loss = np.empty(0) if init_scales == 'default': init_scales = np.array([1]) # Transform scales so that the softplus will result in passed init_scales self.init_scales = [math.log(math.exp(s) - 1) for s in init_scales] self.n_scales = len(self.init_scales) self.train_scales = train_scales self.fitted = False
def __init__(self, n_components=10, n_mc_samples=1, gene_dispersion=True, zero_inflation=True, scalings=True, batch_correction=False, test_iterations=100, optimizer=None, minibatch_size=None, validation=False, X_test=None): self.n_components = n_components self.est_X = None self.est_L = None self.est_Z = None self.zero_inflation = zero_inflation if zero_inflation: print('Considering zero-inflation.') self.batch_correction = batch_correction if batch_correction: print('Performing batch correction.') self.scalings = scalings if scalings: print('Considering cell-specific scalings.') self.gene_dispersion = gene_dispersion if scalings: print('Considering gene-specific dispersion.') self.n_mc_samples = n_mc_samples self.test_iterations = test_iterations self.optimizer = optimizer self.minibatch_size = minibatch_size # if validation, use X_test to assess convergence self.validation = validation and X_test is not None self.X_test = X_test self.loss_dict = {'t_loss': [], 'v_loss': []} sess = ed.get_session() sess.close() tf.reset_default_graph()
def main(_): ed.set_seed(42) # Prior on scalar hyperparameter to Dirichlet. alpha = Gamma(1.0, 1.0) # Prior on size of Dirichlet. n = 1 + tf.cast(Exponential(0.5), tf.int32) # Build a vector of ones whose size is n; multiply it by alpha. p = Dirichlet(tf.ones([n]) * alpha) sess = ed.get_session() print(sess.run(p)) # [ 0.01012419 0.02939712 0.05036638 0.51287931 0.31020424 0.0485355 # 0.0384932 ] print(sess.run(p))
def optimize(self, mnist): variables_names = ['qd_a:0', 'qd_b:0'] sess = ed.get_session() qd_a, qd_b = sess.run(variables_names) print('Prior >> alpha: {} beta: {}'.format(qd_a, qd_b)) for _ in range(self.inference.n_iter): X_batch, Y_batch = mnist.train.next_batch(self.batch_size) info_dict = self.inference.update(feed_dict={ self.x: X_batch, self.y: Y_batch }) self.inference.print_progress(info_dict) qd_a, qd_b = sess.run(variables_names) print('Posterior >> alpha: {} beta: {}'.format(qd_a, qd_b))
def evaluate(self, x, samples_count): """Draws a samples form the model given some unseen data. """ op = tf.get_default_graph().get_tensor_by_name("evaluation:0") x_evaluation = tf.get_default_graph().get_tensor_by_name( "evaluation_placeholder:0") evaluation_sample_count = tf.get_default_graph().get_tensor_by_name( "evaluation_sample_count:0") sess = ed.get_session() res = sess.run(op, feed_dict={ x_evaluation: x, evaluation_sample_count: samples_count }) return res
def probabilistic_pca_example(): ed.set_seed(142) N = 5000 # Number of data points. D = 2 # Data dimensionality. K = 1 # Latent dimensionality. x_train = build_toy_dataset(N, D, K) plt.scatter(x_train[0, :], x_train[1, :], color='blue', alpha=0.1) plt.axis([-10, 10, -10, 10]) plt.title('Simulated data set') plt.show() #-------------------- # Model. w = Normal(loc=tf.zeros([D, K]), scale=2.0 * tf.ones([D, K])) z = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K])) x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([D, N])) #-------------------- # Inference. qw = Normal(loc=tf.get_variable('qw/loc', [D, K]), scale=tf.nn.softplus(tf.get_variable('qw/scale', [D, K]))) qz = Normal(loc=tf.get_variable('qz/loc', [N, K]), scale=tf.nn.softplus(tf.get_variable('qz/scale', [N, K]))) inference = ed.KLqp({w: qw, z: qz}, data={x: x_train}) inference.run(n_iter=500, n_print=100, n_samples=10) #-------------------- # Criticism. sess = ed.get_session() print('Inferred principal axes:') print(sess.run(qw.mean())) # Build and then generate data from the posterior predictive distribution. x_post = ed.copy(x, {w: qw, z: qz}) x_gen = sess.run(x_post) plt.scatter(x_gen[0, :], x_gen[1, :], color='red', alpha=0.1) plt.axis([-10, 10, -10, 10]) plt.title('Data generated from model') plt.show()
def update(self, feed_dict=None, scope='global'): if feed_dict is None: feed_dict = {} for key, value in six.iteritems(self.local_data): if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type: feed_dict[key] = value sess = ed.get_session() if scope == 'global': _, t, loss = sess.run([self.train, self.increment_t, self.loss], feed_dict) return {'t': t, 'loss': loss} if scope == 'local': _, local_loss = sess.run([self.train_local, self.local_loss], feed_dict) return {'t': self.increment_t, 'loss': local_loss}
def _test(data, n_data, x=None, is_file=False): sess = ed.get_session() model = NormalModel() variational = Variational() variational.add(Normal()) inference = ed.MFVI(model, variational, data) inference.initialize(n_data=n_data) if x is not None: # Placeholder setting. # Check data is same as data fed to it. feed_dict = {inference.data['x']: x} # avoid directly fetching placeholder data_id = {k: tf.identity(v) for k,v in six.iteritems(inference.data)} val = sess.run(data_id, feed_dict) assert np.all(val['x'] == x) elif is_file: # File reader setting. # Check data varies by session run. val = sess.run(inference.data) val_1 = sess.run(inference.data) assert not np.all(val['x'] == val_1['x']) elif n_data is None: # Preloaded full setting. # Check data is full data. val = sess.run(inference.data) assert np.all(val['x'] == data['x']) else: # Preloaded batch setting. # Check data is randomly shuffled. val = sess.run(inference.data) assert not np.all(val['x'] == data['x'][:n_data]) # Check data varies by session run. val_1 = sess.run(inference.data) assert not np.all(val['x'] == val_1['x']) inference.finalize() sess.close() del sess tf.reset_default_graph()
def main(_): # DATA pi_true = np.random.dirichlet(np.array([20.0, 30.0, 10.0, 10.0])) z_data = np.array([np.random.choice(FLAGS.K, 1, p=pi_true)[0] for n in range(FLAGS.N)]) print("pi: {}".format(pi_true)) # MODEL pi = Dirichlet(tf.ones(4)) z = Categorical(probs=pi, sample_shape=FLAGS.N) # INFERENCE qpi = Dirichlet(tf.nn.softplus( tf.get_variable("qpi/concentration", [FLAGS.K]))) inference = ed.KLqp({pi: qpi}, data={z: z_data}) inference.run(n_iter=1500, n_samples=30) sess = ed.get_session() print("Inferred pi: {}".format(sess.run(qpi.mean())))
def main(_): ed.set_seed(42) # MODEL z = MultivariateNormalTriL( loc=tf.ones(2), scale_tril=tf.cholesky(tf.constant([[1.0, 0.8], [0.8, 1.0]]))) # INFERENCE qz = Empirical(params=tf.get_variable("qz/params", [2000, 2])) inference = ed.SGLD({z: qz}) inference.run(step_size=5.0) # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qz.mean(), qz.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev)
def mvn_plot_contours(z, label=False, ax=None): """Plot the contours of 2-d Normal or MultivariateNormal object. Scale the axes to show 3 standard deviations. """ sess = ed.get_session() mu = sess.run(z.parameters['loc']) mu_x, mu_y = mu Sigma = sess.run(z.parameters['scale_tril']) sigma_x, sigma_y = np.sqrt(Sigma[0, 0]), np.sqrt(Sigma[1, 1]) xmin, xmax = mu_x - 3 * sigma_x, mu_x + 3 * sigma_x ymin, ymax = mu_y - 3 * sigma_y, mu_y + 3 * sigma_y xs = np.linspace(xmin, xmax, num=100) ys = np.linspace(ymin, ymax, num=100) X, Y = np.meshgrid(xs, ys) T = tf.cast(np.c_[X.flatten(), Y.flatten()], dtype=tf.float32) Z = sess.run(tf.exp(z.log_prob(T))).reshape((len(xs), len(ys))) if ax is None: fig, ax = plt.subplots() cs = ax.contour(X, Y, Z) if label: plt.clabel(cs, inline=1, fontsize=10)
def main(_): ed.set_seed(42) # DATA x_data = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1]) # MODEL p = Beta(1.0, 1.0) x = Bernoulli(probs=p, sample_shape=10) # COMPLETE CONDITIONAL p_cond = ed.complete_conditional(p) sess = ed.get_session() print('p(probs | x) type:', p_cond.parameters['name']) param_vals = sess.run({key: val for key, val in six.iteritems(p_cond.parameters) if isinstance(val, tf.Tensor)}, {x: x_data}) print('parameters:') for key, val in six.iteritems(param_vals): print('%s:\t%.3f' % (key, val))
def get_samples(x_ph, num_points=10000, num_bins=100): """Return a tuple (db, pd, pg), where + db is the discriminator's decision boundary; + pd is a histogram of samples from the data distribution; + pg is a histogram of samples from the generative model. """ sess = ed.get_session() bins = np.linspace(-8, 8, num_bins) # Decision boundary with tf.variable_scope("Disc", reuse=True): p_true = tf.sigmoid(discriminative_network(x_ph)) xs = np.linspace(-8, 8, num_points) db = np.zeros((num_points, 1)) for i in range(num_points // FLAGS.M): db[FLAGS.M * i:FLAGS.M * (i + 1)] = sess.run( p_true, {x_ph: np.reshape(xs[FLAGS.M * i:FLAGS.M * (i + 1)], (FLAGS.M, 1))}) # Data samples d = next_batch(num_points) pd, _ = np.histogram(d, bins=bins, density=True) # Generated samples eps_ph = tf.placeholder(tf.float32, [FLAGS.M, 1]) with tf.variable_scope("Gen", reuse=True): G = generative_network(eps_ph) epss = np.linspace(-8, 8, num_points) g = np.zeros((num_points, 1)) for i in range(num_points // FLAGS.M): g[FLAGS.M * i:FLAGS.M * (i + 1)] = sess.run( G, {eps_ph: np.reshape(epss[FLAGS.M * i:FLAGS.M * (i + 1)], (FLAGS.M, 1))}) pg, _ = np.histogram(g, bins=bins, density=True) return db, pd, pg
def main(_): # Data generation (known mean) xn_data = np.random.normal(FLAGS.loc, FLAGS.scale, FLAGS.N) print("scale: {}".format(FLAGS.scale)) # Prior definition alpha = 0.5 beta = 0.7 # Posterior inference # Probabilistic model ig = InverseGamma(alpha, beta) xn = Normal(FLAGS.loc, tf.sqrt(ig), sample_shape=FLAGS.N) # Inference qig = Empirical(params=tf.get_variable( "qig/params", [1000], initializer=tf.constant_initializer(0.5))) proposal_ig = InverseGamma(2.0, 2.0) inference = ed.MetropolisHastings({ig: qig}, {ig: proposal_ig}, data={xn: xn_data}) inference.run() sess = ed.get_session() print("Inferred scale: {}".format(sess.run(tf.sqrt(qig.mean()))))
def main(_): ed.set_seed(42) # DATA x_data = np.array([0.0] * 50) # MODEL: Normal-Normal with known variance mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) # INFERENCE qmu = Empirical(params=tf.get_variable("qmu/params", [1000], initializer=tf.zeros_initializer())) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.HMC({mu: qmu}, data={x: x_data}) inference.run() # CRITICISM sess = ed.get_session() mean, stddev = sess.run([qmu.mean(), qmu.stddev()]) print("Inferred posterior mean:") print(mean) print("Inferred posterior stddev:") print(stddev) # Check convergence with visual diagnostics. samples = sess.run(qmu.params) # Plot histogram. plt.hist(samples, bins='auto') plt.show() # Trace plot. plt.plot(samples) plt.show()
def main(_): ed.set_seed(42) # DATA x_train, metadata = nips(FLAGS.data_dir) documents = metadata['columns'] words = metadata['rows'] # Subset to documents in 2011 and words appearing in at least two # documents and have a total word count of at least 10. doc_idx = [i for i, document in enumerate(documents) if document.startswith('2011')] documents = [documents[doc] for doc in doc_idx] x_train = x_train[:, doc_idx] word_idx = np.logical_and(np.sum(x_train != 0, 1) >= 2, np.sum(x_train, 1) >= 10) words = [word for word, idx in zip(words, word_idx) if idx] x_train = x_train[word_idx, :] x_train = x_train.T N = x_train.shape[0] # number of documents D = x_train.shape[1] # vocabulary size # MODEL W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]]) W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]]) W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D]) z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]]) z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2)) z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1)) x = Poisson(tf.matmul(z1, W0)) # INFERENCE qW2 = pointmass_q(W2.shape) qW1 = pointmass_q(W1.shape) qW0 = pointmass_q(W0.shape) if FLAGS.q == 'gamma': qz3 = gamma_q(z3.shape) qz2 = gamma_q(z2.shape) qz1 = gamma_q(z1.shape) else: qz3 = lognormal_q(z3.shape) qz2 = lognormal_q(z2.shape) qz1 = lognormal_q(z1.shape) # We apply variational EM with E-step over local variables # and M-step to point estimate the global weight matrices. inference_e = ed.KLqp({z1: qz1, z2: qz2, z3: qz3}, data={x: x_train, W0: qW0, W1: qW1, W2: qW2}) inference_m = ed.MAP({W0: qW0, W1: qW1, W2: qW2}, data={x: x_train, z1: qz1, z2: qz2, z3: qz3}) optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr) optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr) kwargs = {'optimizer': optimizer_e, 'n_print': 100, 'logdir': FLAGS.logdir, 'log_timestamp': False} if FLAGS.q == 'gamma': kwargs['n_samples'] = 30 inference_e.initialize(**kwargs) inference_m.initialize(optimizer=optimizer_m) sess = ed.get_session() tf.global_variables_initializer().run() n_epoch = 20 n_iter_per_epoch = 10000 for epoch in range(n_epoch): print("Epoch {}".format(epoch)) nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) info_dict_e = inference_e.update() info_dict_m = inference_m.update() nll += info_dict_e['loss'] # Compute perplexity averaged over a number of training iterations. # The model's negative log-likelihood of data is upper bounded by # the variational objective. nll /= n_iter_per_epoch perplexity = np.exp(nll / np.sum(x_train)) print("Negative log-likelihood <= {:0.3f}".format(nll)) print("Perplexity <= {:0.3f}".format(perplexity)) # Print top 10 words for first 10 topics. qW0_vals = sess.run(qW0) for k in range(10): top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] top_words = " ".join([words[i] for i in top_words_idx]) print("Topic {}: {}".format(k, top_words))
def main(_): # Generate data true_mu = np.array([-1.0, 0.0, 1.0], np.float32) * 10 true_sigmasq = np.array([1.0**2, 2.0**2, 3.0**2], np.float32) true_pi = np.array([0.2, 0.3, 0.5], np.float32) N = 10000 K = len(true_mu) true_z = np.random.choice(np.arange(K), size=N, p=true_pi) x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(true_sigmasq[true_z]) # Prior hyperparameters pi_alpha = np.ones(K, dtype=np.float32) mu_sigma = np.std(true_mu) sigmasq_alpha = 1.0 sigmasq_beta = 2.0 # Model pi = Dirichlet(pi_alpha) mu = Normal(0.0, mu_sigma, sample_shape=K) sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K) x = ParamMixture(pi, {'loc': mu, 'scale': tf.sqrt(sigmasq)}, Normal, sample_shape=N) z = x.cat # Conditionals mu_cond = ed.complete_conditional(mu) sigmasq_cond = ed.complete_conditional(sigmasq) pi_cond = ed.complete_conditional(pi) z_cond = ed.complete_conditional(z) sess = ed.get_session() # Initialize randomly pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z]) print('Initial parameters:') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() # Gibbs sampler cond_dict = {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est, z: z_est, x: x_data} t0 = time() T = 500 for t in range(T): z_est = sess.run(z_cond, cond_dict) cond_dict[z] = z_est pi_est, mu_est = sess.run([pi_cond, mu_cond], cond_dict) cond_dict[pi] = pi_est cond_dict[mu] = mu_est sigmasq_est = sess.run(sigmasq_cond, cond_dict) cond_dict[sigmasq] = sigmasq_est print('took %.3f seconds to run %d iterations' % (time() - t0, T)) print() print('Final sample for parameters::') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() print() print('True parameters:') print('pi:', true_pi) print('mu:', true_mu) print('sigmasq:', true_sigmasq) print() plt.figure(figsize=[10, 10]) plt.subplot(2, 1, 1) plt.hist(x_data, 50) plt.title('Empirical Distribution of $x$') plt.xlabel('$x$') plt.ylabel('frequency') xl = plt.xlim() plt.subplot(2, 1, 2) plt.hist(sess.run(x, {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est}), 50) plt.title("Predictive distribution $p(x \mid \mathrm{inferred }\ " "\pi, \mu, \sigma^2)$") plt.xlabel('$x$') plt.ylabel('frequency') plt.xlim(xl) plt.show()
data = tf.constant(data, dtype=tf.float32) return ed.Data(data) ed.set_seed(42) model = LinearModel() variational = Variational() variational.add(Normal(model.num_vars)) data = build_toy_dataset() # Set up figure fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) sess = ed.get_session() inference = ed.MFVI(model, variational, data) inference.initialize(n_minibatch=5, n_print=5) for t in range(250): loss = inference.update() if t % inference.n_print == 0: print("iter {:d} loss {:.2f}".format(t, loss)) # Sample functions from variational model mean, std = sess.run([variational.layers[0].m, variational.layers[0].s]) rs = np.random.RandomState(0) zs = rs.randn(10, variational.num_vars) * std + mean zs = tf.constant(zs, dtype=tf.float32) inputs = np.linspace(-8, 8, num=400, dtype=np.float32) x = tf.expand_dims(tf.constant(inputs), 1)
def train(self, data, **kwargs): self._init() ed.set_seed(self.random_state) sess = ed.get_session() input_fn, latent_vars, self.tensor_map_ = self.model.inference_args(data, **kwargs) # Initialize inference engine inference = self.inference_fn(latent_vars, data=input_fn(data)) inference_kwargs = {} if self.inference_fn == ed.MAP else {'n_samples': self.n_samples} inference.initialize( logdir=self.log_dir, n_print=self.n_print_progress, optimizer=self.optimizer, **inference_kwargs ) tf.global_variables_initializer().run() self.losses_ = [] self.converged_ = False loss_buffer = collections.deque(maxlen=self.n_loss_buffer + 1) loss_change = None for t in range(self.max_steps): info_dict = inference.update() loss = info_dict['loss'] # tf.summary.scalar('Loss', ) if self.n_print_progress and t % self.n_print_progress == 0: logging.info( 'On iteration {} of at most {} (loss = {}, loss change = {})' .format(t, self.max_steps, loss, loss_change) ) # Check for convergence if at least one step has already been run if len(loss_buffer) > 1: loss_change = np.mean(np.diff(loss_buffer)) if abs(loss_change) < self.tol: self.converged_ = True if self.n_print_progress: logging.info( 'Converged on iteration {} (loss = {}, loss change = {})'\ .format(t, loss, loss_change) ) loss_buffer.append(loss) if t % self.n_collect == 0 or t == self.max_steps - 1 or self.converged_: # Collect loss as well as all parameter values self.losses_.append(info_dict['loss']) if self.converged_: break # If convergence was not reached, either log a warning or throw an error depending on which was configured if not self.converged_: msg = 'Failed to reach convergence after {} steps. '\ 'Consider increasing max_steps or altering learning rate / optimizer parameters'\ .format(self.max_steps) if self.fail_if_not_converged: raise ConvergenceError(msg) else: logger.warning(msg) # Save model results if a log/event directory was set if self.log_dir is not None and self.save_tf_model: saver = tf.train.Saver() saver.save(sess, os.path.join(self.log_dir, 'model.ckpt')) # Extract criticism arguments where first is always prediction function self.criticism_args_ = self.model.criticism_args(sess, self.tensor_map_) inference.finalize() sess.graph.finalize() return self
X_train, X_test, y_train, y_test = build_toy_dataset() print("Size of features in training data: {:s}".format(X_train.shape)) print("Size of output in training data: {:s}".format(y_train.shape)) print("Size of features in test data: {:s}".format(X_test.shape)) print("Size of output in test data: {:s}".format(y_test.shape)) sns.regplot(X_train, y_train, fit_reg=False) plt.show() X = tf.placeholder(tf.float32, shape=(None, 1)) y = tf.placeholder(tf.float32, shape=(None, 1)) data = {'X': X, 'y': y} model = MixtureDensityNetwork(20) inference = ed.MAP(model, data) sess = ed.get_session() # Start TF session K.set_session(sess) # Pass session info to Keras inference.initialize() NEPOCH = 1000 train_loss = np.zeros(NEPOCH) test_loss = np.zeros(NEPOCH) for i in range(NEPOCH): _, train_loss[i] = sess.run([inference.train, inference.loss], feed_dict={X: X_train, y: y_train}) test_loss[i] = sess.run(inference.loss, feed_dict={X: X_test, y: y_test}) pred_weights, pred_means, pred_std = sess.run([model.pi, model.mus, model.sigmas], feed_dict={X: X_test}) fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 3.5))
def fit(self, X, y, **kwargs): self._init() ed.set_seed(self.random_state) use_map = self.inference_fn == ed.MAP model = {} for g, m in self.model_config.items(): # Set the name associated with this group # (this is useful for naming tensors and should only be done once) m.set_name(g) # Configure models to use point mass distributions if MAP # inference is being run m.set_use_point_mass(use_map) # Slice observed data to only fields relevant to this group x = X[:, m.get_feature_index()] # Initialize the feature group model and retrieve parameters # to be optimized for group with tf.name_scope(g): m.initialize(x, y) model[g] = m.get_parameter_map(x, y) sess = ed.get_session() data = {'X': X, 'Y': y} # Initialize inference engine inference = self.inference_fn(_flatten_map(model, sep=':'), data, self) optimizer = self.optimizer_fn() if self.optimizer_fn else None if use_map: inference.initialize(optimizer=optimizer, n_print=self.n_print_progress) else: inference.initialize(optimizer=optimizer, n_print=self.n_print_progress, n_samples=self.n_samples) # It would be much better if inference instances exposed the ability to set the log dir # directly but at the moment it's only set in inference.run, and this is how it's used: if self.log_dir is not None: summary_writer = tf.train.SummaryWriter(self.log_dir, tf.get_default_graph()) # inference.train_writer = tf.train.SummaryWriter(self.log_dir, tf.get_default_graph()) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() init.run() self.params_ = [] self.losses_ = [] self.converged_ = False loss_buffer = collections.deque(maxlen=self.n_loss_buffer + 1) loss_change = None for t in range(self.max_steps): info_dict = inference.update() loss = info_dict['loss'] if self.n_print_progress and t % self.n_print_progress == 0: logging.info( 'On iteration {} of at most {} (loss = {}, loss change = {})' .format(t, self.max_steps, loss, loss_change) ) # Check for convergence if at least one step has already been run if len(loss_buffer) > 1: loss_change = np.mean(np.diff(loss_buffer)) if abs(loss_change) < self.tol: self.converged_ = True if self.n_print_progress: logging.info( 'Converged on iteration {} (loss = {}, loss change = {})'\ .format(t, loss, loss_change) ) loss_buffer.append(loss) if t % self.n_collect == 0 or t == self.max_steps - 1 or self.converged_: # Collect and write out any summary ops if self.log_dir is not None and summary_op is not None: summary_writer.add_summary(sess.run([summary_op])[0], t) # Collect loss as well as all parameter values self.losses_.append(info_dict['loss']) params = {g: m.get_parameter_values(sess, model[g]) for g, m in self.model_config.items()} self.params_.append(params) if self.converged_: break # If convergence was not reached, either log a warning or throw an error depending on which was configured if not self.converged_: msg = 'Failed to reach convergence after {} steps. '\ 'Consider increasing max_steps or altering learning rate / optimizer parameters'\ .format(self.max_steps) if self.fail_if_not_converged: raise ConvergenceError(msg) else: logger.warning(msg) # Save model results if a log/event directory was set if self.log_dir is not None and self.save_tf_model: saver = tf.train.Saver() saver.save(sess, os.path.join(self.log_dir, 'model.ckpt')) sess.graph.finalize() return self
def _init(self): ed.get_session().close() tf.reset_default_graph() self.params_ = None self.losses_ = None
def main(_): def ratio_estimator(data, local_vars, global_vars): """Takes as input a dict of data x, local variable samples z, and global variable samples beta; outputs real values of shape (x.shape[0] + z.shape[0],). In this example, there are no local variables. """ # data[y] has shape (M,); global_vars[w] has shape (D,) # we concatenate w to each data point y, so input has shape (M, 1 + D) input = tf.concat([ tf.reshape(data[y], [FLAGS.M, 1]), tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1) hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) output = tf.layers.dense(hidden, 1, activation=None) return output ed.set_seed(42) # DATA w_true = np.ones(FLAGS.D) * 5.0 X_train, y_train = build_toy_dataset(FLAGS.N, w_true) X_test, y_test = build_toy_dataset(FLAGS.N, w_true) data = generator([X_train, y_train], FLAGS.M) # MODEL X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) y_ph = tf.placeholder(tf.float32, [FLAGS.M]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) inference = ed.ImplicitKLqp( {w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(FLAGS.N) / FLAGS.M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update( variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) info_dict = inference.update( variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)
def main(_): ed.set_seed(42) # DATA (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes( FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.batch_size) x_ph = tf.placeholder(tf.int32, [None, 28 * 28]) # MODEL zs = [0] * len(FLAGS.hidden_sizes) for l in reversed(range(len(FLAGS.hidden_sizes))): if l == len(FLAGS.hidden_sizes) - 1: logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]]) else: logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) zs[l] = Bernoulli(logits=logits) x = Bernoulli(logits=tf.layers.dense(tf.cast(zs[0], tf.float32), 28 * 28, activation=None)) # INFERENCE # Define variational model with reverse ordering as probability model: # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up. qzs = [0] * len(FLAGS.hidden_sizes) for l in range(len(FLAGS.hidden_sizes)): if l == 0: logits = tf.layers.dense(tf.cast(x_ph, tf.float32), FLAGS.hidden_sizes[l], activation=None) else: logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) qzs[l] = Bernoulli(logits=logits) inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(FLAGS.step_size) inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples) # Build tensor for log-likelihood given one variational sample to run # on test data. x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)}) x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) / tf.cast(tf.shape(x_ph)[0], tf.float32)) sess = ed.get_session() tf.global_variables_initializer().run() for epoch in range(FLAGS.n_epoch): print("Epoch {}".format(epoch)) train_loss = 0.0 pbar = Progbar(FLAGS.n_iter_per_epoch) for t in range(1, FLAGS.n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) train_loss += info_dict['loss'] # Print per-data point loss, averaged over training epoch. train_loss /= FLAGS.n_iter_per_epoch train_loss /= FLAGS.batch_size print("Training negative log-likelihood: {:0.3f}".format(train_loss)) test_loss = [sess.run(x_neg_log_prob, {x_ph: x_test}) for _ in range(FLAGS.n_test_samples)] test_loss = np.mean(test_loss) print("Test negative log-likelihood: {:0.3f}".format(test_loss)) # Prior predictive check. images = sess.run(x, {x_ph: x_batch}) # feed ph to determine sample size for m in range(FLAGS.batch_size): imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
def get_tensor(self, tensor): sess = ed.get_session() if not isinstance(tensor, tf.Tensor): tensor = self.tensor_map_[tensor] return sess.run(tensor)
def main(_): ed.set_seed(42) # DATA x_train, _, x_test = text8(FLAGS.data_dir) vocab = string.ascii_lowercase + ' ' vocab_size = len(vocab) encoder = dict(zip(vocab, range(vocab_size))) decoder = {v: k for k, v in encoder.items()} data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder) # MODEL x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps]) with tf.variable_scope("language_model"): # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]]. x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1] x = language_model(x_ph_shift, vocab_size) with tf.variable_scope("language_model", reuse=True): x_gen = language_model_gen(5, vocab_size) imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps) encoded_x_test = np.asarray( [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb], dtype=np.int32) test_size = encoded_x_test.shape[0] print("Test set shape: {}".format(encoded_x_test.shape)) test_nll = -tf.reduce_sum(x.log_prob(x_ph)) # INFERENCE inference = ed.MAP({}, {x: x_ph}) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) inference.initialize(optimizer=optimizer, logdir=FLAGS.log_dir, log_timestamp=False) print("Number of sets of parameters: {}".format( len(tf.trainable_variables()))) print("Number of parameters: {}".format( np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()]))) for v in tf.trainable_variables(): print(v) sess = ed.get_session() tf.global_variables_initializer().run() # Double n_epoch and print progress every half an epoch. n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2) epoch = 0.0 for _ in range(FLAGS.n_epoch * 2): epoch += 0.5 print("Epoch: {0}".format(epoch)) avg_nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(data) info_dict = inference.update({x_ph: x_batch}) avg_nll += info_dict['loss'] # Print average bits per character over epoch. avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps * np.log(2)) print("Train average bits/char: {:0.8f}".format(avg_nll)) # Print per-data point log-likelihood on test set. avg_nll = 0.0 for start in range(0, test_size, batch_size): end = min(test_size, start + batch_size) x_batch = encoded_x_test[start:end] avg_nll += sess.run(test_nll, {x_ph: x_batch}) avg_nll /= test_size print("Test average NLL: {:0.8f}".format(avg_nll)) # Generate samples from model. samples = sess.run(x_gen) samples = [''.join([decoder[c] for c in sample]) for sample in samples] print("Samples:") for sample in samples: print(sample)