def _test(p, n): rv = Bernoulli(p=p) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = p.eval() assert np.allclose(rv.log_prob(x_tf).eval(), stats.bernoulli.logpmf(x, p))
class Joint: ''' Wrapper to handle calculating the log p(y, w | X) = log [ p(y | X, w) * p(w) ] for a given sample of w. Should be the same as the slow version but vectorized and therefore faster. ''' def __init__(self, Xtrain, ytrain, sess): self.Xtrain = Xtrain self.ytrain = ytrain self.sess = sess self.n_samples = 1000 # TODO this is hard coded and must be matched in elbo and fc. N, D = Xtrain.shape self.w = tf.placeholder(tf.float32, [D, self.n_samples]) self.X = tf.placeholder(tf.float32, [N, D]) #self.y = Bernoulli(logits=ed.dot(self.X, self.w)) self.y = Bernoulli(logits=tf.matmul(self.X, self.w)) self.prior = Normal(loc=tf.zeros([self.n_samples, D]), scale=1.0 * tf.ones([self.n_samples, D])) # TODO hard coded def log_prob(self, samples): copied_ytrain = np.repeat(self.ytrain[:, np.newaxis], self.n_samples, axis=1) per_sample = self.sess.run(self.y.log_prob(copied_ytrain), feed_dict={ self.X: self.Xtrain, self.w: samples.T }).astype(np.float32) lik = np.sum(per_sample, axis=0) prior = np.sum(self.prior.log_prob(samples).eval(), axis=1) return lik + prior
class Joint: '''Wrapper to handle calculating the joint probability of data log p(y, w | X) = log [ p(y | X, w) * p(w) ] ''' def __init__(self, X, y, sess, n_samples, logger=None): """Initialize the distribution. Constructs the graph for evaluation of joint probabilities of data X and weights (latent vars) w Args: X: [N x D] data y: [D] predicted target variable sess: tensorflow session n_samples: number of monte carlo samples to compute expectation """ self.sess = sess self.n_samples = n_samples # (N, ) -> (N, n_samples) # np.tile(y[:, np.newaxis], (1, self.n_samples)) y_matrix = np.repeat(y[:, np.newaxis], self.n_samples, axis=1) if logger is not None: self.logger = logger # Define the model graph N, D = X.shape self.X = tf.convert_to_tensor(X, dtype=tf.float32) self.Y = tf.convert_to_tensor(y_matrix, dtype=tf.float32) self.W = tf.get_variable('samples', (self.n_samples, D), tf.float32, initializer=tf.zeros_initializer()) # (N, n_samples) self.py = Bernoulli(logits=tf.matmul(self.X, tf.transpose(self.W))) self.w_prior = Normal(loc=tf.zeros([self.n_samples, D], tf.float32), scale=tf.ones([self.n_samples, D], tf.float32)) # to get prior log probability would be summed across the D features # [n_samples D] -> [n_samples] self.prior = tf.reduce_sum(self.w_prior.log_prob(self.W), axis=1) log_likelihoods = self.py.log_prob(self.Y) # (N, n_samples) self.ll = tf.reduce_sum(log_likelihoods, axis=0) # (n_samples, ) self.joint = self.ll + self.prior def log_prob(self, samples): """Log probability of samples. Since X is already given. samples, like for target distribution, for base distributions on approximation, for individual atoms are all samples of w. Args: samples: [self.n_samples x D] tensor Returns: [self.n_samples, ] joint log probability of samples, X, y """ assert samples.shape[ 0] == self.n_samples, 'Different number of samples' self.sess.run(self.W.assign(samples)) return self.joint
def _test(shape, n): # using Bernoulli's internally implemented log_prob_idx() to check # Distribution's log_prob() rv = Bernoulli(shape, p=tf.zeros(shape)+0.5) rv_sample = rv.sample(n) x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() val_ed = rv.log_prob(x_tf).eval() val_true = 0.0 for idx in range(shape[0]): val_true += stats.bernoulli.logpmf(x[:, idx], p[idx]) assert np.allclose(val_ed, val_true)
def _test(shape, n): # using Bernoulli's internally implemented log_prob_idx() to check # Distribution's log_prob() rv = Bernoulli(shape, p=tf.zeros(shape)+0.5) rv_sample = rv.sample(n) with sess.as_default(): x = rv_sample.eval() x_tf = tf.constant(x, dtype=tf.float32) p = rv.p.eval() val_ed = rv.log_prob(x_tf).eval() val_true = 0.0 for idx in range(shape[0]): val_true += stats.bernoulli.logpmf(x[:, idx], p[idx]) assert np.allclose(val_ed, val_true)
def main(_): ed.set_seed(FLAGS.seed) # setting up output directory outdir = FLAGS.outdir if '~' in outdir: outdir = os.path.expanduser(outdir) os.makedirs(outdir, exist_ok=True) is_vector = FLAGS.base_dist in ['mvnormal', 'mvlaplace'] ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape assert D_test == D, 'Test dimension %d different than train %d' % (D_test, D) logger.info('D = %d, Ntrain = %d, Ntest = %d' % (D, N, N_test)) # Solution components weights, q_params = [], [] # L-continous gradient estimate lipschitz_estimate = None # Metrics to log times_filename = os.path.join(outdir, 'times.csv') open(times_filename, 'w').close() # (mean, +- std) elbos_filename = os.path.join(outdir, 'elbos.csv') logger.info('saving elbos to, %s' % elbos_filename) open(elbos_filename, 'w').close() rocs_filename = os.path.join(outdir, 'roc.csv') logger.info('saving rocs to, %s' % rocs_filename) open(rocs_filename, 'w').close() gap_filename = os.path.join(outdir, 'gap.csv') open(gap_filename, 'w').close() step_filename = os.path.join(outdir, 'steps.csv') open(step_filename, 'w').close() # (mean, std) ll_train_filename = os.path.join(outdir, 'll_train.csv') open(ll_train_filename, 'w').close() ll_test_filename = os.path.join(outdir, 'll_test.csv') open(ll_test_filename, 'w').close() # (bin_ac_train, bin_ac_test) bin_ac_filename = os.path.join(outdir, 'bin_ac.csv') open(bin_ac_filename, 'w').close() # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): lipschitz_filename = os.path.join(outdir, 'lipschitz.csv') open(lipschitz_filename, 'w').close() iter_info_filename = os.path.join(outdir, 'iter_info.txt') open(iter_info_filename, 'w').close() for t in range(FLAGS.n_fw_iter): g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): tf.set_random_seed(FLAGS.seed) # Build Model w = Normal(loc=tf.zeros(D, tf.float32), scale=tf.ones(D, tf.float32)) X = tf.placeholder(tf.float32, [None, D]) y = Bernoulli(logits=ed.dot(X, w)) p_joint = blr_utils.Joint(Xtrain, ytrain, sess, FLAGS.n_monte_carlo_samples, logger) # vectorized Model evaluations n_test_samples = 100 W = tf.placeholder(tf.float32, [n_test_samples, D]) y_data = tf.placeholder(tf.float32, [None]) # N -> (N, n_test) y_data_matrix = tf.tile(tf.expand_dims(y_data, 1), (1, n_test_samples)) pred_logits = tf.matmul(X, tf.transpose(W)) # (N, n_test) ypred = tf.sigmoid(tf.reduce_mean(pred_logits, axis=1)) pY = Bernoulli(logits=pred_logits) # (N, n_test) log_likelihoods = pY.log_prob(y_data_matrix) # (N, n_test) log_likelihood_expectation = tf.reduce_mean(log_likelihoods, axis=1) # (N, ) ll_mean, ll_std = tf.nn.moments(log_likelihood_expectation, axes=[0]) if t == 0: fw_iterates = {} else: # Current solution prev_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_prev = coreutils.get_mixture(weights, prev_components) fw_iterates = {w: qtw_prev} # s is the solution to LMO, random initialization s = coreutils.construct_base(FLAGS.base_dist, [D], t, 's', multivariate=is_vector) sess.run(tf.global_variables_initializer()) total_time = 0. inference_time_start = time.time() # Run relbo to solve LMO problem # If the first atom is being selected through running LMO # it is equivalent to running vi on a uniform prior # Since uniform is not in our variational family try # only random element (without LMO inference) as initial iterate if FLAGS.iter0 == 'vi' or t > 0: inference = relbo.KLqp({w: s}, fw_iterates=fw_iterates, data={ X: Xtrain, y: ytrain }, fw_iter=t) inference.run(n_iter=FLAGS.LMO_iter) inference_time_end = time.time() # compute only step size selection time #total_time += float(inference_time_end - inference_time_start) loc_s = s.mean().eval() scale_s = s.stddev().eval() # Evaluate the next step step_result = {} if t == 0: # Initialization, q_0 q_params.append({'loc': loc_s, 'scale': scale_s}) weights.append(1.) if FLAGS.fw_variant.startswith('ada'): lipschitz_estimate = opt.adafw_linit(s, p_joint) step_type = 'init' elif FLAGS.fw_variant == 'fixed': start_step_time = time.time() step_result = opt.fixed(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) elif FLAGS.fw_variant == 'adafw': start_step_time = time.time() step_result = opt.adaptive_fw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type == 'adaptive': lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_pfw': start_step_time = time.time() step_result = opt.adaptive_pfw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_afw': start_step_time = time.time() step_result = opt.adaptive_afw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'away', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'line_search': start_step_time = time.time() step_result = opt.line_search_dkl(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] else: raise NotImplementedError( 'Step size variant %s not implemented' % FLAGS.fw_variant) if t == 0: gamma = 1. new_components = [s] else: q_params = step_result['params'] weights = step_result['weights'] gamma = step_result['gamma'] new_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_new = coreutils.get_mixture(weights, new_components) # Log metrics for current iteration logger.info('total time %f' % total_time) append_to_file(times_filename, total_time) elbo_t = elbo(qtw_new, p_joint, return_std=False) # testing elbo directory from KLqp elbo_loss = elboModel.KLqp({w: qtw_new}, data={ X: Xtrain, y: ytrain }) res_update = elbo_loss.run() logger.info("iter, %d, elbo, %.2f loss %.2f" % (t, elbo_t, res_update['loss'])) append_to_file(elbos_filename, "%f,%f" % (elbo_t, res_update['loss'])) logger.info('iter %d, gamma %.4f' % (t, gamma)) append_to_file(step_filename, gamma) if t > 0: gap_t = step_result['gap'] logger.info('iter %d, gap %.4f' % (t, gap_t)) append_to_file(gap_filename, gap_t) if FLAGS.fw_variant.startswith('ada'): append_to_file(lipschitz_filename, lipschitz_estimate) append_to_file(iter_info_filename, step_type) logger.info('lt = %.5f, iter_type = %s' % (lipschitz_estimate, step_type)) # get weight samples to evaluate expectations w_samples = qtw_new.sample([n_test_samples]).eval() ll_train_mean, ll_train_std = sess.run([ll_mean, ll_std], feed_dict={ W: w_samples, X: Xtrain, y_data: ytrain }) logger.info("iter, %d, train ll, %.2f +/- %.2f" % (t, ll_train_mean, ll_train_std)) append_to_file(ll_train_filename, "%f,%f" % (ll_train_mean, ll_train_std)) ll_test_mean, ll_test_std, y_test_pred = sess.run( [ll_mean, ll_std, ypred], feed_dict={ W: w_samples, X: Xtest, y_data: ytest }) logger.info("iter, %d, test ll, %.2f +/- %.2f" % (t, ll_test_mean, ll_test_std)) append_to_file(ll_test_filename, "%f,%f" % (ll_test_mean, ll_test_std)) roc_score = roc_auc_score(ytest, y_test_pred) logger.info("iter %d, roc %.4f" % (t, roc_score)) append_to_file(rocs_filename, roc_score) y_post = ed.copy(y, {w: qtw_new}) # eq. to y = Bernoulli(logits=ed.dot(X, qtw_new)) ed_train_ll = ed.evaluate('log_likelihood', data={ X: Xtrain, y_post: ytrain, }) ed_test_ll = ed.evaluate('log_likelihood', data={ X: Xtest, y_post: ytest, }) logger.info("edward train ll %.2f test ll %.2f" % (ed_train_ll, ed_test_ll)) bin_ac_train = ed.evaluate('binary_accuracy', data={ X: Xtrain, y_post: ytrain, }) bin_ac_test = ed.evaluate('binary_accuracy', data={ X: Xtest, y_post: ytest, }) append_to_file(bin_ac_filename, "%f,%f" % (bin_ac_train, bin_ac_test)) logger.info( "edward binary accuracy train ll %.2f test ll %.2f" % (bin_ac_train, bin_ac_test)) mse_test = ed.evaluate('mean_squared_error', data={ X: Xtest, y_post: ytest, }) logger.info("edward mse test ll %.2f" % (mse_test)) sess.close() tf.reset_default_graph()
def main(_): outdir = setup_outdir() ed.set_seed(FLAGS.seed) ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape print("Xtrain") print(Xtrain) print(Xtrain.shape) if 'synthetic' in FLAGS.exp: w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) #n_posterior_samples = 100000 n_posterior_samples = 10 qw_empirical = Empirical( params=tf.get_variable("qw/params", [n_posterior_samples, D])) inference = ed.HMC({w: qw_empirical}, data={X: Xtrain, y: ytrain}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() inference.run() empirical_samples = qw_empirical.sample(50).eval() #fig, ax = plt.subplots() #ax.scatter(posterior_samples[:,0], posterior_samples[:,1]) #plt.show() weights, q_components = [], [] ll_trains, ll_tests, bin_ac_trains, bin_ac_tests, elbos, rocs, gaps = [], [], [], [], [], [], [] total_time, times = 0., [] for iter in range(0, FLAGS.n_fw_iter): print("iter %d" % iter) g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): tf.set_random_seed(FLAGS.seed) # MODEL w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) X_test = tf.placeholder(tf.float32, [N_test, D_test]) y_test = Bernoulli(logits=ed.dot(X_test, w)) qw = construct_base_dist([D], iter, 'qw') inference_time_start = time.time() inference = relbo.KLqp({w: qw}, fw_iterates=get_fw_iterates( weights, w, q_components), data={ X: Xtrain, y: ytrain }, fw_iter=iter) tf.global_variables_initializer().run() inference.run(n_iter=FLAGS.LMO_iter) inference_time_end = time.time() total_time += float(inference_time_end - inference_time_start) joint = Joint(Xtrain, ytrain, sess) if iter > 0: qtw_prev = build_mixture(weights, q_components) gap = compute_duality_gap(joint, qtw_prev, qw) gaps.append(gap) np.savetxt(os.path.join(outdir, "gaps.csv"), gaps, delimiter=',') print("duality gap", gap) # update weights gamma = 2. / (iter + 2.) weights = [(1. - gamma) * w for w in weights] weights.append(gamma) # update components q_components = update_iterate(q_components, qw) if len(q_components) > 1 and FLAGS.fw_variant == 'fc': print("running fully corrective") # overwrite the weights weights = fully_corrective( build_mixture(weights, q_components), joint) if True: # remove inactivate iterates weights = list(weights) for i in reversed(range(len(weights))): if weights[i] == 0: del weights[i] del q_components[i] weights = np.array( weights ) # TODO type acrobatics to make elements deletable elif len(q_components ) > 1 and FLAGS.fw_variant == 'line_search': print("running line search") weights = line_search( build_mixture(weights[:-1], q_components[:-1]), qw, joint) qtw_new = build_mixture(weights, q_components) if False: for i, comp in enumerate(qtw_new.components): print("component", i, "\tmean", comp.mean().eval(), "\tstddev", comp.stddev().eval()) train_lls = [ sess.run(y.log_prob(ytrain), feed_dict={ X: Xtrain, w: qtw_new.sample().eval() }) for _ in range(50) ] train_lls = np.mean(train_lls, axis=0) ll_trains.append((np.mean(train_lls), np.std(train_lls))) test_lls = [ sess.run(y_test.log_prob(ytest), feed_dict={ X_test: Xtest, w: qtw_new.sample().eval() }) for _ in range(50) ] test_lls = np.mean(test_lls, axis=0) ll_tests.append((np.mean(test_lls), np.std(test_lls))) logits = np.mean([ np.dot(Xtest, qtw_new.sample().eval()) for _ in range(50) ], axis=0) ypred = tf.sigmoid(logits).eval() roc_score = roc_auc_score(ytest, ypred) rocs.append(roc_score) print('roc_score', roc_score) print('ytrain', np.mean(train_lls), np.std(train_lls)) print('ytest', np.mean(test_lls), np.std(test_lls)) order = np.argsort(ytest) plt.scatter(range(len(ypred)), ypred[order], c=ytest[order]) plt.savefig(os.path.join(outdir, 'ypred%d.pdf' % iter)) plt.close() np.savetxt(os.path.join(outdir, "train_lls.csv"), ll_trains, delimiter=',') np.savetxt(os.path.join(outdir, "test_lls.csv"), ll_tests, delimiter=',') np.savetxt(os.path.join(outdir, "rocs.csv"), rocs, delimiter=',') x_post = ed.copy(y, {w: qtw_new}) x_post_t = ed.copy(y_test, {w: qtw_new}) print( 'log lik train', ed.evaluate('log_likelihood', data={ x_post: ytrain, X: Xtrain })) print( 'log lik test', ed.evaluate('log_likelihood', data={ x_post_t: ytest, X_test: Xtest })) #ll_train = ed.evaluate('log_likelihood', data={x_post: ytrain, X:Xtrain}) #ll_test = ed.evaluate('log_likelihood', data={x_post_t: ytest, X_test:Xtest}) bin_ac_train = ed.evaluate('binary_accuracy', data={ x_post: ytrain, X: Xtrain }) bin_ac_test = ed.evaluate('binary_accuracy', data={ x_post_t: ytest, X_test: Xtest }) print('binary accuracy train', bin_ac_train) print('binary accuracy test', bin_ac_test) #latest_elbo = elbo(qtw_new, joint, w) #foo = ed.KLqp({w: qtw_new}, data={X: Xtrain, y: ytrain}) #op = myloss(foo) #print("myloss", sess.run(op[0], feed_dict={X: Xtrain, y: ytrain}), sess.run(op[1], feed_dict={X: Xtrain, y: ytrain})) #append_and_save(ll_trains, ll_train, "loglik_train.csv", np.savetxt) #append_and_save(ll_tests, ll_train, "loglik_test.csv", np.savetxt) #append_and_save(bin_ac_trains, bin_ac_train, "bin_acc_train.csv", np.savetxt) #append_and_save(bin_ac_tests, bin_ac_test, "bin_acc_test.csv", np.savetxt) ##append_and_save(elbos, latest_elbo, "elbo.csv", np.savetxt) #print('log-likelihood train ', ll_train) #print('log-likelihood test ', ll_test) #print('binary_accuracy train ', bin_ac_train) #print('binary_accuracy test ', bin_ac_test) #print('elbo', latest_elbo) times.append(total_time) np.savetxt(os.path.join(setup_outdir(), 'times.csv'), times) tf.reset_default_graph()
def _test(self, probs, n): rv = Bernoulli(probs) dist = ds.Bernoulli(probs) x = rv.sample(n).eval() self.assertAllEqual(rv.log_prob(x).eval(), dist.log_prob(x).eval())
class bern_emb_model(): def __init__(self, d, K, sig, sess, logdir): self.K = K self.sig = sig self.sess = sess self.logdir = logdir with tf.name_scope('model'): # Data Placeholder with tf.name_scope('input'): self.placeholders = tf.placeholder(tf.int32) self.words = self.placeholders # Index Masks with tf.name_scope('context_mask'): self.p_mask = tf.cast( tf.range(d.cs / 2, d.n_minibatch + d.cs / 2), tf.int32) rows = tf.cast( tf.tile(tf.expand_dims(tf.range(0, d.cs / 2), [0]), [d.n_minibatch, 1]), tf.int32) columns = tf.cast( tf.tile(tf.expand_dims(tf.range(0, d.n_minibatch), [1]), [1, d.cs / 2]), tf.int32) self.ctx_mask = tf.concat( [rows + columns, rows + columns + d.cs / 2 + 1], 1) with tf.name_scope('embeddings'): # Embedding vectors self.rho = tf.Variable(tf.random_normal([d.L, self.K]) / self.K, name='rho') # Context vectors self.alpha = tf.Variable(tf.random_normal([d.L, self.K]) / self.K, name='alpha') with tf.name_scope('priors'): prior = Normal(loc=0.0, scale=self.sig) self.log_prior = tf.reduce_sum( prior.log_prob(self.rho) + prior.log_prob(self.alpha)) with tf.name_scope('natural_param'): # Taget and Context Indices with tf.name_scope('target_word'): self.p_idx = tf.gather(self.words, self.p_mask) self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx)) # Negative samples with tf.name_scope('negative_samples'): unigram_logits = tf.tile( tf.expand_dims(tf.log(tf.constant(d.unigram)), [0]), [d.n_minibatch, 1]) self.n_idx = tf.multinomial(unigram_logits, d.ns) self.n_rho = tf.gather(self.rho, self.n_idx) with tf.name_scope('context'): self.ctx_idx = tf.squeeze( tf.gather(self.words, self.ctx_mask)) self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx) # Natural parameter ctx_sum = tf.reduce_sum(self.ctx_alphas, [1]) self.p_eta = tf.expand_dims( tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1) self.n_eta = tf.reduce_sum( tf.multiply( self.n_rho, tf.tile(tf.expand_dims(ctx_sum, 1), [1, d.ns, 1])), -1) # Conditional likelihood self.y_pos = Bernoulli(logits=self.p_eta) self.y_neg = Bernoulli(logits=self.n_eta) self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0)) self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0)) self.log_likelihood = self.ll_pos + self.ll_neg scale = 1.0 * d.N / d.n_minibatch self.loss = -(scale * self.log_likelihood + self.log_prior) # Training optimizer = tf.train.AdamOptimizer() self.train = optimizer.minimize(self.loss) with self.sess.as_default(): tf.global_variables_initializer().run() variable_summaries('rho', self.rho) variable_summaries('alpha', self.alpha) with tf.name_scope('objective'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('priors', self.log_prior) tf.summary.scalar('ll_pos', self.ll_pos) tf.summary.scalar('ll_neg', self.ll_neg) self.summaries = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph) self.saver = tf.train.Saver() config = projector.ProjectorConfig() alpha = config.embeddings.add() alpha.tensor_name = 'model/embeddings/alpha' alpha.metadata_path = '../vocab.tsv' rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/rho' rho.metadata_path = '../vocab.tsv' projector.visualize_embeddings(self.train_writer, config) def dump(self, fname): with self.sess.as_default(): dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()} pickle.dump(dat, open(fname, "a+")) def plot_params(self, dir_name, labels): plot_only = len(labels) with self.sess.as_default(): tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) low_dim_embs_alpha2 = tsne.fit_transform( self.alpha.eval()[:plot_only]) plot_with_labels(low_dim_embs_alpha2[:plot_only], labels[:plot_only], dir_name + '/alpha.eps') tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) low_dim_embs_rho2 = tsne.fit_transform(self.rho.eval()[:plot_only]) plot_with_labels(low_dim_embs_rho2[:plot_only], labels[:plot_only], dir_name + '/rho.eps')