def init_eval_model(self): with tf.name_scope('eval_model'): self.eval_alpha_state = tf.placeholder(tf.float32) self.eval_rho_state = tf.placeholder(tf.float32) self.eval_n_test = tf.placeholder(tf.int32) eval_n_minibatch = self.eval_n_test - self.cs # Data Placeholder with tf.name_scope('input'): self.eval_ph = tf.placeholder(tf.int32) words = self.eval_ph # Index Masks with tf.name_scope('context_mask'): p_mask = tf.cast( tf.range(self.cs / 2, eval_n_minibatch + self.cs / 2), tf.int32) rows = tf.cast( tf.tile(tf.expand_dims(tf.range(0, self.cs / 2), [0]), [eval_n_minibatch, 1]), tf.int32) columns = tf.cast( tf.tile(tf.expand_dims(tf.range(0, eval_n_minibatch), [1]), [1, self.cs / 2]), tf.int32) ctx_mask = tf.concat( [rows + columns, rows + columns + self.cs / 2 + 1], 1) with tf.name_scope('natural_param'): with tf.name_scope('target_word'): p_idx = tf.gather(words, p_mask) p_rho = tf.squeeze(tf.gather(self.eval_rho_state, p_idx)) # Negative samples with tf.name_scope('negative_samples'): self.eval_n_idx = tf.placeholder(tf.int32) n_rho = tf.gather(self.eval_rho_state, self.eval_n_idx) with tf.name_scope('context'): ctx_idx = tf.squeeze(tf.gather(words, ctx_mask)) ctx_alphas = tf.gather(self.eval_alpha_state, ctx_idx) # Natural parameter ctx_sum = tf.reduce_sum(ctx_alphas, [1]) p_eta = tf.expand_dims( tf.reduce_sum(tf.multiply(p_rho, ctx_sum), -1), 1) n_eta = tf.reduce_sum( tf.multiply( n_rho, tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])), -1) # Conditional likelihood y_pos = Bernoulli(logits=p_eta) y_neg = Bernoulli(logits=n_eta) ll_pos = y_pos.log_prob(1.0) ll_neg = tf.reduce_mean(y_neg.log_prob(0.0), axis=1) self.eval_ll = tf.nn.moments(ll_pos + ll_neg, axes=[0, 1])
class bern_emb_model(emb_model): def __init__(self, args, d, logdir): super(bern_emb_model, self).__init__(args, d, logdir) self.n_minibatch = self.n_minibatch.sum() with tf.name_scope('model'): # Data Placeholder with tf.name_scope('input'): self.placeholders = tf.placeholder(tf.int32) self.words = self.placeholders # Index Masks with tf.name_scope('context_mask'): self.p_mask = tf.cast( tf.range(int(self.cs / 2), self.n_minibatch + int(self.cs / 2)), tf.int32) rows = tf.cast( tf.tile(tf.expand_dims(tf.range(0, int(self.cs / 2)), [0]), [self.n_minibatch, 1]), tf.int32) columns = tf.cast( tf.tile(tf.expand_dims(tf.range(0, self.n_minibatch), [1]), [1, int(self.cs / 2)]), tf.int32) self.ctx_mask = tf.concat( [rows + columns, rows + columns + int(self.cs / 2) + 1], 1) with tf.name_scope('embeddings'): self.rho = tf.Variable(self.rho_init, name='rho') self.alpha = tf.Variable(self.alpha_init, name='alpha', trainable=self.alpha_trainable) with tf.name_scope('priors'): prior = Normal(loc=0.0, scale=self.sig) if self.alpha_trainable: self.log_prior = tf.reduce_sum( prior.log_prob(self.rho) + prior.log_prob(self.alpha)) else: self.log_prior = tf.reduce_sum(prior.log_prob( self.rho)) with tf.name_scope('natural_param'): # Taget and Context Indices with tf.name_scope('target_word'): self.p_idx = tf.gather(self.words, self.p_mask) self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx)) # Negative samples with tf.name_scope('negative_samples'): unigram_logits = tf.tile( tf.expand_dims(tf.log(tf.constant(self.unigram)), [0]), [self.n_minibatch, 1]) self.n_idx = tf.multinomial(unigram_logits, self.ns) self.n_rho = tf.gather(self.rho, self.n_idx) with tf.name_scope('context'): self.ctx_idx = tf.squeeze( tf.gather(self.words, self.ctx_mask)) self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx) # Natural parameter ctx_sum = tf.reduce_sum(self.ctx_alphas, [1]) self.p_eta = tf.expand_dims( tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1) self.n_eta = tf.reduce_sum( tf.multiply( self.n_rho, tf.tile(tf.expand_dims(ctx_sum, 1), [1, self.ns, 1])), -1) # Conditional likelihood self.y_pos = Bernoulli(logits=self.p_eta) self.y_neg = Bernoulli(logits=self.n_eta) self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0)) self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0)) self.log_likelihood = self.ll_pos + self.ll_neg scale = 1.0 * self.N / self.n_minibatch self.loss = -(self.n_epochs * self.log_likelihood + self.log_prior) def dump(self, fname): with self.sess.as_default(): dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()} pickle.dump(dat, open(fname, "ab+"))
return tf.matmul(hidden, w_2) ### LOGISTIC REGRESSION MODEL left_ex = tf.placeholder(tf.float32, shape = (mb, K)) right_ex = tf.placeholder(tf.float32, shape = (mb, K)) g_ex = tf.placeholder(tf.float32, shape = (mb, K)) left_eta = neural_network(left_ex, w_1, w_2) right_eta = neural_network(right_ex, w_1, w_2) g_eta = neural_network(g_ex, w_1, w_2) left_y = Bernoulli(logits = left_eta) right_y = Bernoulli(logits = right_eta) g_y = Bernoulli(logits = g_eta) left_bias = tf.reduce_mean(left_y.log_prob(1.0)) right_bias = tf.reduce_mean(right_y.log_prob(0.0)) neutral = tf.reduce_mean(g_y.log_prob(0.5)) loss = - (log_prior + 1000.0*(left_bias + right_bias + neutral)) ### TRAINING optimizer = tf.train.AdamOptimizer() train = optimizer.minimize(loss) sess = tf.Session() with sess.as_default(): tf.global_variables_initializer().run() saver = tf.train.Saver() with tf.name_scope('objective'): tf.summary.scalar('loss', loss)
def fit(self, data, epochs=1000, max_seconds=600, activation=tf.nn.elu, batch_norm_decay=0.9, learning_rate=1e-5, batch_sz=1024, adapt_lr=False, print_progress=True, show_fig=True): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # static features X = data['X_train_static_mins'] N, D = X.shape self.X = tf.placeholder(tf.float32, shape=(None, D), name='X') # timeseries features X_time = data['X_train_time_0'] T1, N1, D1 = X_time.shape assert N == N1 self.X_time = tf.placeholder(tf.float32, shape=(T1, None, D1), name='X_time') self.train = tf.placeholder(tf.bool, shape=(), name='train') self.rnn_keep_p_encode = tf.placeholder(tf.float32, shape=(), name='rnn_keep_p_encode') self.rnn_keep_p_decode = tf.placeholder(tf.float32, shape=(), name='rnn_keep_p_decode') adp_learning_rate = tf.placeholder(tf.float32, shape=(), name='adp_learning_rate') he_init = variance_scaling_initializer() bn_params = { 'is_training': self.train, 'decay': batch_norm_decay, 'updates_collections': None } latent_size = self.encoder_layer_sizes[-1] inputs = self.X with tf.variable_scope('static_encoder'): for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1], self.encoder_dropout[:-1]): inputs = dropout(inputs, keep_p, is_training=self.train) inputs = fully_connected(inputs, layer_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) if self.rnn_encoder_layer_sizes: with tf.variable_scope('rnn_encoder'): rnn_cell = MultiRNNCell([ LayerNormBasicLSTMCell( s, activation=tf.tanh, dropout_keep_prob=self.rnn_encoder_dropout) for s in self.rnn_encoder_layer_sizes ]) time_inputs, states = tf.nn.dynamic_rnn(rnn_cell, self.X_time, swap_memory=True, time_major=True, dtype=tf.float32) time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2)) time_inputs = tf.reshape( time_inputs, shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1)) inputs = tf.concat([inputs, time_inputs], axis=1) with tf.variable_scope('latent_space'): inputs = dropout(inputs, self.encoder_dropout[-1], is_training=self.train) loc = fully_connected(inputs, latent_size, weights_initializer=he_init, activation_fn=None, normalizer_fn=batch_norm, normalizer_params=bn_params) scale = fully_connected(inputs, latent_size, weights_initializer=he_init, activation_fn=tf.nn.softplus, normalizer_fn=batch_norm, normalizer_params=bn_params) standard_normal = Normal(loc=np.zeros(latent_size, dtype=np.float32), scale=np.ones(latent_size, dtype=np.float32)) e = standard_normal.sample(tf.shape(loc)[0]) outputs = e * scale + loc static_output_size = self.decoder_layer_sizes[0] if self.rnn_decoder_layer_sizes: time_output_size = self.rnn_decoder_layer_sizes[0] * T1 output_size = static_output_size + time_output_size else: output_size = static_output_size outputs = fully_connected(outputs, output_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) if self.rnn_decoder_layer_sizes: outputs, time_outputs = tf.split( outputs, [static_output_size, time_output_size], axis=1) with tf.variable_scope('static_decoder'): for layer_size, keep_p in zip(self.decoder_layer_sizes, self.decoder_dropout[:-1]): outputs = dropout(outputs, keep_p, is_training=self.train) outputs = fully_connected(outputs, layer_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) outputs = dropout(outputs, self.decoder_dropout[-1], is_training=self.train) outputs = fully_connected(outputs, D, weights_initializer=he_init, activation_fn=None, normalizer_fn=batch_norm, normalizer_params=bn_params) X_hat = Bernoulli(logits=outputs) self.posterior_predictive = X_hat.sample() self.posterior_predictive_probs = tf.nn.sigmoid(outputs) if self.rnn_decoder_layer_sizes: with tf.variable_scope('rnn_decoder'): self.rnn_decoder_layer_sizes.append(D1) time_output_size = self.rnn_decoder_layer_sizes[0] time_outputs = tf.reshape(time_outputs, shape=(-1, T1, time_output_size)) time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2)) rnn_cell = MultiRNNCell([ LayerNormBasicLSTMCell( s, activation=tf.tanh, dropout_keep_prob=self.rnn_decoder_dropout) for s in self.rnn_decoder_layer_sizes ]) time_outputs, states = tf.nn.dynamic_rnn(rnn_cell, time_outputs, swap_memory=True, time_major=True, dtype=tf.float32) time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2)) time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1)) X_hat_time = Bernoulli(logits=time_outputs) posterior_predictive_time = X_hat_time.sample() posterior_predictive_time = tf.reshape( posterior_predictive_time, shape=(-1, T1, D1)) self.posterior_predictive_time = tf.transpose( posterior_predictive_time, perm=(1, 0, 2)) self.posterior_predictive_probs_time = tf.nn.sigmoid( time_outputs) kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5 kl_div = tf.reduce_sum(kl_div, axis=1) expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1) X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2)) X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1)) if self.rnn_encoder_layer_sizes: expected_log_likelihood_time = tf.reduce_sum( X_hat_time.log_prob(X_time_reshape), axis=1) elbo = -tf.reduce_sum(expected_log_likelihood + expected_log_likelihood_time - kl_div) else: elbo = -tf.reduce_sum(expected_log_likelihood - kl_div) train_op = tf.train.AdamOptimizer( learning_rate=adp_learning_rate).minimize(elbo) tf.summary.scalar('elbo', elbo) if self.save_file: saver = tf.train.Saver() if self.tensorboard: for v in tf.trainable_variables(): tf.summary.histogram(v.name, v) train_merge = tf.summary.merge_all() writer = tf.summary.FileWriter(self.tensorboard) self.init_op = tf.global_variables_initializer() n = 0 n_batches = N // batch_sz costs = list() min_cost = np.inf t0 = dt.now() with tf.Session() as sess: sess.run(self.init_op) for epoch in range(epochs): idxs = shuffle(range(N)) X_train = X[idxs] X_train_time = X_time[:, idxs] for batch in range(n_batches): n += 1 X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz] X_batch_time = X_train_time[:, batch * batch_sz:(batch + 1) * batch_sz] sess.run(train_op, feed_dict={ self.X: X_batch, self.X_time: X_batch_time, self.rnn_keep_p_encode: self.rnn_encoder_dropout, self.rnn_keep_p_decode: self.rnn_decoder_dropout, self.train: True, adp_learning_rate: learning_rate }) if n % 100 == 0 and print_progress: cost = sess.run(elbo, feed_dict={ self.X: X, self.X_time: X_time, self.rnn_keep_p_encode: 1.0, self.rnn_keep_p_decode: 1.0, self.train: False }) cost /= N costs.append(cost) if adapt_lr and epoch > 0: if cost < min_cost: min_cost = cost elif cost > min_cost * 1.01: learning_rate *= 0.75 if print_progress: print('Updating Learning Rate', learning_rate) print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost) if self.tensorboard: train_sum = sess.run(train_merge, feed_dict={ self.X: X, self.X_time: X_time, self.rnn_keep_p_encode: 1.0, self.rnn_keep_p_decode: 1.0, self.train: False }) writer.add_summary(train_sum, n) seconds = (dt.now() - t0).seconds if seconds > max_seconds: if print_progress: print('Breaking after', seconds, 'seconds') break if self.save_file: saver.save(sess, self.save_file) if self.tensorboard: writer.add_graph(sess.graph) if show_fig: plt.plot(costs) plt.title('Costs and Scores') plt.show()
class bern_emb_model(): def __init__(self, d, K, sig, sess, logdir): self.K = K self.sig = sig self.sess = sess self.logdir = logdir with tf.name_scope('model'): # Data Placeholder with tf.name_scope('input'): self.placeholders = tf.placeholder(tf.int32) self.words = self.placeholders # Index Masks with tf.name_scope('context_mask'): self.p_mask = tf.cast( tf.range(d.cs / 2, d.n_minibatch + d.cs / 2), tf.int32) rows = tf.cast( tf.tile(tf.expand_dims(tf.range(0, d.cs / 2), [0]), [d.n_minibatch, 1]), tf.int32) columns = tf.cast( tf.tile(tf.expand_dims(tf.range(0, d.n_minibatch), [1]), [1, d.cs / 2]), tf.int32) self.ctx_mask = tf.concat( [rows + columns, rows + columns + d.cs / 2 + 1], 1) with tf.name_scope('embeddings'): # Embedding vectors self.rho = tf.Variable(tf.random_normal([d.L, self.K]) / self.K, name='rho') # Context vectors self.alpha = tf.Variable(tf.random_normal([d.L, self.K]) / self.K, name='alpha') with tf.name_scope('priors'): prior = Normal(loc=0.0, scale=self.sig) self.log_prior = tf.reduce_sum( prior.log_prob(self.rho) + prior.log_prob(self.alpha)) with tf.name_scope('natural_param'): # Taget and Context Indices with tf.name_scope('target_word'): self.p_idx = tf.gather(self.words, self.p_mask) self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx)) # Negative samples with tf.name_scope('negative_samples'): unigram_logits = tf.tile( tf.expand_dims(tf.log(tf.constant(d.unigram)), [0]), [d.n_minibatch, 1]) self.n_idx = tf.multinomial(unigram_logits, d.ns) self.n_rho = tf.gather(self.rho, self.n_idx) with tf.name_scope('context'): self.ctx_idx = tf.squeeze( tf.gather(self.words, self.ctx_mask)) self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx) # Natural parameter ctx_sum = tf.reduce_sum(self.ctx_alphas, [1]) self.p_eta = tf.expand_dims( tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1) self.n_eta = tf.reduce_sum( tf.multiply( self.n_rho, tf.tile(tf.expand_dims(ctx_sum, 1), [1, d.ns, 1])), -1) # Conditional likelihood self.y_pos = Bernoulli(logits=self.p_eta) self.y_neg = Bernoulli(logits=self.n_eta) self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0)) self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0)) self.log_likelihood = self.ll_pos + self.ll_neg scale = 1.0 * d.N / d.n_minibatch self.loss = -(scale * self.log_likelihood + self.log_prior) # Training optimizer = tf.train.AdamOptimizer() self.train = optimizer.minimize(self.loss) with self.sess.as_default(): tf.global_variables_initializer().run() variable_summaries('rho', self.rho) variable_summaries('alpha', self.alpha) with tf.name_scope('objective'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('priors', self.log_prior) tf.summary.scalar('ll_pos', self.ll_pos) tf.summary.scalar('ll_neg', self.ll_neg) self.summaries = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph) self.saver = tf.train.Saver() config = projector.ProjectorConfig() alpha = config.embeddings.add() alpha.tensor_name = 'model/embeddings/alpha' alpha.metadata_path = '../vocab.tsv' rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/rho' rho.metadata_path = '../vocab.tsv' projector.visualize_embeddings(self.train_writer, config) def dump(self, fname): with self.sess.as_default(): dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()} pickle.dump(dat, open(fname, "a+")) def plot_params(self, dir_name, labels): plot_only = len(labels) with self.sess.as_default(): tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) low_dim_embs_alpha2 = tsne.fit_transform( self.alpha.eval()[:plot_only]) plot_with_labels(low_dim_embs_alpha2[:plot_only], labels[:plot_only], dir_name + '/alpha.eps') tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) low_dim_embs_rho2 = tsne.fit_transform(self.rho.eval()[:plot_only]) plot_with_labels(low_dim_embs_rho2[:plot_only], labels[:plot_only], dir_name + '/rho.eps')