def q_net_font(observed, x, is_training): with zs.BayesianNet(observed=observed) as encoder: normalizer_params = { 'is_training': is_training, 'updates_collections': None } x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1]) ladder0 = layers.conv2d( x, ngf, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.conv2d( ladder0, ngf * 2, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.conv2d( ladder0, ngf * 4, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.conv2d( ladder0, ngf * 8, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.flatten(ladder0) font_mean = layers.fully_connected(ladder0, font_dim, activation_fn=tf.identity) font_std = layers.fully_connected(ladder0, font_dim, activation_fn=tf.sigmoid) z_font = zs.Normal('z_font', mean=font_mean, std=font_std, n_samples=1, group_event_ndims=1) return encoder, z_font
def test_normal(self): tensor_shape = (8, 12, 99) with self.cached_session(): self._runner(init_ops.RandomNormal(mean=0, stddev=1, seed=153), tensor_shape, target_mean=0., target_std=1)
def __call__(self, shape, dtype=None, partition_info=None): """ :param tuple[int] shape: [..., key_dim, num_hashes // 2] :param None|tf.DType dtype: :param partition_info: :rtype: tf.Tensor """ import tensorflow as tf assert len(shape) >= 2 key_dim, num_hashes = shape[-2:] sampled_hash_gen_top = tf.stack([ self.base_initializer(shape=shape, dtype=dtype, partition_info=partition_info) for _ in range(self.num_hash_init_samples)], axis=len(shape) - 2) # [..., init_sample, key_dim, num_hashes // 2] # sample keys (same key for each hash init) sampled_keys = init_ops.RandomNormal()(shape=(self.num_key_samples, key_dim), dtype=dtype) # [key_sample, key_dim] # compute distribution for each and choose the one where the size of the smallest hash class is maximized sampled_hash_gen = tf.concat( [sampled_hash_gen_top, -sampled_hash_gen_top], axis=-1) # [..., init_sample, key_dim, num_hashes] sampled_hash_dense = tf.matmul(sampled_keys, sampled_hash_gen) # [..., init_sample, key_sample, num_hashes] sampled_hash = tf.argmax(sampled_hash_dense, axis=-1, output_type='int32') # [..., init_sample, key_sample] sampled_hash_counts = bincount_nd(sampled_hash, axis=-1, minlength=num_hashes) # [..., init_sample, num_hashes] from returnn.tf.util.basic import py_print sampled_hash_std = tf.math.reduce_std( tf.cast(sampled_hash_counts, 'float32') / self.num_key_samples, axis=-1) # [..., init_sample] best_sample = tf.argmin(sampled_hash_std, axis=-1) # [...] best_sampled_hash_gen_top = tf.gather( params=sampled_hash_gen_top, indices=best_sample, axis=len(shape) - 2, batch_dims=len(shape) - 2) # [..., key_dim, num_hashes // 2] assert best_sampled_hash_gen_top.shape == shape return best_sampled_hash_gen_top
def vae(observed, n, y, is_training): with zs.BayesianNet(observed=observed) as decoder: normalizer_params = {'is_training': is_training, 'updates_collections': None} z_mean = tf.zeros([n, n_z]) z = zs.Normal('z', z_mean, std=1., n_samples=1, group_event_ndims=1) z = tf.reshape(z,[-1 ,n_z]) yb = tf.reshape(y, [-1, 1, 1, n_code]) lx_z = layers.fully_connected(tf.concat([z, y], 1), 1024, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02) ) lx_z = layers.fully_connected(tf.concat([lx_z, y], 1), ngf * 8 * 4 * 4, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02) ) lx_z = tf.reshape(lx_z, [-1, 4, 4, ngf * 8]) # assert tf.shape(lx_z)[0] == n lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 4, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 2, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 1, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), 1, 5, stride=2, activation_fn=None, weights_initializer=init_ops.RandomNormal(stddev=0.02)) x_logits = tf.reshape(lx_z, [1, -1, n_x]) x = zs.Bernoulli('x', x_logits, group_event_ndims=1) return decoder, x_logits
def q_net(observed, x, y, is_training): with zs.BayesianNet(observed=observed) as encoder: normalizer_params = {'is_training': is_training, 'updates_collections': None} x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1]) yb = tf.reshape(y, [-1, 1, 1, n_code]) lz_x = layers.conv2d(conv_cond_concat(x, yb), ngf, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 2, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 4, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 8, 5, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lz_x = layers.flatten(lz_x) # assert tf.shape(lz_x)[0] == tf.shape(y)[0] lz_x = layers.fully_connected(tf.concat([lz_x, y], 1), 1024, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) lz_x = layers.fully_connected(tf.concat([lz_x, y], 1), 2 * n_z, activation_fn=None, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) mu, logstd = lz_x[:, :n_z], lz_x[:, n_z:] lz_x = zs.Normal('z', mu, logstd, n_samples=1, group_event_ndims=1) return encoder, lz_x,
def q_net(observed, x, is_training): with zs.BayesianNet(observed=observed) as encoder: normalizer_params = { 'is_training': is_training, 'updates_collections': None } x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1]) # ladder0 = layers.conv2d(x, ngf, 4, stride=2, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # ladder0 = layers.conv2d(ladder0, ngf, 4, stride=1, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # ladder0 = layers.flatten(ladder0) # latent0_mean = layers.fully_connected(ladder0, font_dim, activation_fn=tf.identity) # latent0_std = layers.fully_connected(ladder0, font_dim, activation_fn=tf.sigmoid) inference0 = layers.conv2d( x, ngf, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference0 = layers.conv2d( inference0, ngf, 4, stride=1, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.conv2d( inference0, ngf * 2, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.conv2d( ladder1, ngf * 2, 4, stride=1, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.conv2d( ladder1, ngf * 4, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.flatten(ladder1) latent1_mean = layers.fully_connected(ladder1, font_dim, activation_fn=tf.identity) latent1_std = layers.fully_connected(ladder1, font_dim, activation_fn=tf.sigmoid) inference1 = layers.conv2d( inference0, ngf * 2, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.conv2d( inference1, ngf * 2, 4, stride=1, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.conv2d( inference1, ngf * 4, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.conv2d( inference1, ngf * 4, 4, stride=1, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.conv2d( ladder2, ngf * 4, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.conv2d( ladder2, ngf * 8, 4, stride=1, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.flatten(ladder2) latent2_mean = layers.fully_connected(ladder2, char_dim, activation_fn=tf.identity) latent2_std = layers.fully_connected(ladder2, char_dim, activation_fn=tf.sigmoid) # inference2 = layers.conv2d(inference1, ngf * 4, 4, stride=2, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # inference2 = layers.conv2d(inference2, ngf * 4, 4, stride=1, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # inference2 = layers.conv2d(inference2, ngf * 8, 4, stride=2, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # # ladder3 = layers.conv2d(inference2, ngf * 8, 4, stride=2, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # # ladder3 = layers.conv2d(ladder3, ngf * 8, 4, stride=1, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # # ladder3 = layers.conv2d(ladder3, ngf * 16, 4, stride=2, activation_fn=lrelu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # ladder3 = layers.flatten(ladder3) # latent3_mean = layers.fully_connected(ladder3, 20, activation_fn=tf.identity) # latent3_std = layers.fully_connected(ladder3, 20, activation_fn=tf.sigmoid) z_char = zs.Normal('z_char', mean=latent2_mean, std=latent2_std, n_samples=1, group_event_ndims=1) z_font = zs.Normal('z_font', mean=latent1_mean, std=latent1_std, n_samples=1, group_event_ndims=1) return encoder, z_font, z_char
def VLAE(observed, n, is_training): with zs.BayesianNet(observed=observed) as decoder: normalizer_params = { 'is_training': is_training, 'updates_collections': None } z_char_mean = tf.zeros([n, char_dim]) z_char = zs.Normal('z_char', z_char_mean, std=1., n_samples=1, group_event_ndims=1) z_char = tf.reshape(z_char, [-1, char_dim]) z_font_mean = tf.zeros([n, font_dim]) z_font = zs.Normal('z_font', z_font_mean, std=1., n_samples=1, group_event_ndims=1) z_font = tf.reshape(z_font, [-1, font_dim]) latent2 = z_char latent1 = z_font ladder2 = layers.fully_connected( latent2, ngf * 8 * 4 * 4, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = tf.reshape(ladder2, [-1, 4, 4, ngf * 8]) ladder2 = layers.conv2d_transpose( ladder2, ngf * 8, 4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.conv2d_transpose( ladder2, ngf * 4, 4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.conv2d_transpose( ladder2, ngf * 4, 4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.fully_connected( latent1, ngf * 4 * 8 * 8, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = tf.reshape(ladder1, [-1, 8, 8, ngf * 4]) ladder1 = tf.concat([ladder1, inference1], 3) ladder1 = layers.conv2d_transpose( ladder1, ngf * 2, 4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.conv2d_transpose( ladder1, ngf * 2, 4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference0 = layers.conv2d_transpose( ladder1, ngf, 4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) # ladder0 = layers.fully_connected(latent0, ngf * 1 * 32 * 32, activation_fn=tf.nn.relu, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params, # weights_initializer=init_ops.RandomNormal(stddev=0.02)) # ladder0 = tf.reshape(ladder0, [-1, 32, 32, ngf * 1]) # ladder0 = tf.concat([ladder0, inference0], 3) ladder0 = layers.conv2d_transpose( inference0, ngf, 4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) x_logits = layers.conv2d_transpose( ladder0, 1, 4, stride=2, activation_fn=tf.identity, weights_initializer=init_ops.RandomNormal(stddev=0.02)) x_logits = tf.reshape(x_logits, [1, -1, n_x]) x = zs.Bernoulli('x', x_logits, n_samples=1, group_event_ndims=1) return decoder, x_logits
def run_decatt(train, val, test, word_to_index, intra_sent, emb_size, emb_glove, context_size, n_intra, n_intra_bias, n_attend, n_compare, n_classif, dropout_rate, pred_thres, lr, batch_size, epoch_size): # special words word_to_index['\0'] = len(word_to_index) # network tf.reset_default_graph() X_doc_1 = tf.placeholder(tf.int32, [None, None, 2 * context_size + 1]) X_doc_2 = tf.placeholder(tf.int32, [None, None, 2 * context_size + 1]) mask_1 = tf.placeholder(tf.float32, [None, None]) mask_2 = tf.placeholder(tf.float32, [None, None]) y = tf.placeholder(tf.int32, [None]) training = tf.placeholder(tf.bool, []) emb_shape = [len(word_to_index), emb_size] emb = tf.Variable( tf.zeros(emb_shape) if emb_glove else tf. random_normal(emb_shape, 0, 0.01)) ngram, mask_n = [None, None], [None, None] for i in range(2): X_doc = [X_doc_1, X_doc_2][i] batch_size_, n_words_ = tf.shape(X_doc)[0], tf.shape(X_doc)[1] X_doc_pad = tf.fill([batch_size_, 2 * context_size], word_to_index['\0']) X_doc_n = tf.concat([X_doc_pad, X_doc, X_doc_pad], 1) X_doc_n = tf.map_fn( lambda j: X_doc_n[:, j:j + n_words_ + 2 * context_size], tf.range(2 * context_size + 1)) X_doc_n = tf.transpose(X_doc_n, [1, 0, 2]) mask_n = tf.concat( [tf.ones([batch_size_, 2 * context_size]), [mask_1, mask_2][i]], 1) ngram = tf.nn.embedding_lookup(emb, X_doc_n) ngram[i] = tf.reshape( ngram, [batch_size_, -1, (2 * context_size + 1) * emb_size]) if intra_sent: l_intra = sum([[ Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0, 0.01)), Dropout(rate=dropout_rate), ] for n in n_intra], []) long_dist_bias = tf.Variable(tf.zeros([])) dist_biases = tf.Variable(tf.zeros([2 * n_intra_bias + 1])) for i in range(2): intra_d = apply_layers(l_intra, ngram[i], training=training) intra_w = tf.matmul(intra_d, tf.transpose(intra_d, [0, 2, 1])) ngram[i] = tf.concat([ ngram[i], attend_intra(intra_w, ngram[i], mask_n[i], n_intra_bias, long_dist_bias, dist_biases) ], 2) l_attend = sum([[ Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0, 0.01)), Dropout(rate=dropout_rate), ] for n in n_attend], []) attend_d_1 = apply_layers(l_attend, ngram[0], training=training) attend_d_2 = apply_layers(l_attend, ngram[1], training=training) attend_w = tf.matmul(attend_d_1, tf.transpose(attend_d_2, [0, 2, 1])) attend_1 = attend_inter(attend_w, ngram[1], mask_n[1]) attend_2 = attend_inter(tf.transpose(attend_w, [0, 2, 1]), ngram[0], mask_n[0]) l_compare = sum([[ Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0, 0.01)), Dropout(rate=dropout_rate), ] for n in n_compare], []) compare_1 = apply_layers(l_compare, tf.concat([ngram[0], attend_1], 2), training=training) compare_2 = apply_layers(l_compare, tf.concat([ngram[1], attend_2], 2), training=training) agg_1 = tf.reduce_sum(tf.expand_dims(mask_n[0], -1) * compare_1, 1) agg_2 = tf.reduce_sum(tf.expand_dims(mask_n[1], -1) * compare_2, 1) l_classif = sum([[ Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0, 0.01)), Dropout(rate=dropout_rate), ] for n in n_classif], []) logits = apply_layers(l_classif, tf.concat([agg_1, agg_2], 1), training=training) logits = tf.layers.dense(logits, 2, kernel_initializer=init_ops.RandomNormal(0, 0.01)) probs = tf.nn.softmax(logits) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)) opt = tf.train.AdagradOptimizer(learning_rate=lr) grads = opt.compute_gradients(loss) grads = [(grad, var) for grad, var in grads if var != emb] if emb_glove else grads train_op = opt.apply_gradients(grads) # run with tf.Session() as sess: # start sampling qs = {name: Queue(1) for name in ('train', 'val', 'test')} for name, docs in ('train', train), ('val', val), ('test', test): Process(target=sample, args=(docs, word_to_index, context_size, epoch_size, batch_size, qs[name])).start() # initialize variables sess.run(tf.global_variables_initializer()) if emb_glove: emb_0 = tf.Variable(0., validate_shape=False) saver = tf.train.Saver({'emb': emb_0}) saver.restore(sess, '__cache__/tf/emb/model.ckpt') sess.run(emb[:tf.shape(emb_0)[0]].assign(emb_0)) # train print(datetime.datetime.now(), 'started training') for i in range(epoch_size): total_loss, correct = 0, {'val': 0, 'test': 0} for X_doc_1_, X_doc_2_, mask_1_, mask_2_, y_ in qs['train'].get(): _, batch_loss = sess.run( [train_op, loss], feed_dict={ X_doc_1: X_doc_1_, X_doc_2: X_doc_2_, mask_1: mask_1_, mask_2: mask_2_, y: y_, training: True, }) total_loss += len(y_) * batch_loss for name in 'val', 'test': for X_doc_1_, X_doc_2_, mask_1_, mask_2_, y_ in qs[name].get(): probs_ = sess.run(probs, feed_dict={ X_doc_1: X_doc_1_, X_doc_2: X_doc_2_, mask_1: mask_1_, mask_2: mask_2_, training: False, }) correct[name] += np.sum((probs_[:, 1] >= pred_thres) == y_) print( datetime.datetime.now(), f'finished epoch {i}, loss: {total_loss / len(train):f}, ' f'val acc: {correct["val"] / len(val):f}, test acc: {correct["test"] / len(test):f}' )
def VLAE(observed, n, n_x, n_z_0, n_z_1, n_z_2, n_particles, is_training): with zs.BayesianNet(observed=observed) as model: normalizer_params = { 'is_training': is_training, 'updates_collections': None } z_2_mean = tf.zeros([n, n_z_2]) z_2 = zs.Normal('z_2', z_2_mean, std=1., n_samples=n_particles, group_event_ndims=1) z_2 = tf.reshape(z_2, [-1, n_z_2]) z_1_mean = tf.zeros([n, n_z_1]) z_1 = zs.Normal('z_1', z_1_mean, std=1., n_samples=n_particles, group_event_ndims=1) z_1 = tf.reshape(z_1, [-1, n_z_1]) z_0_mean = tf.zeros([n, n_z_0]) z_0 = zs.Normal('z_0', z_0_mean, std=1., n_samples=n_particles, group_event_ndims=1) z_0 = tf.reshape(z_0, [-1, n_z_0]) latent2 = z_2 latent1 = z_1 latent0 = z_0 ladder2 = layers.fully_connected( latent2, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.fully_connected( ladder2, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.fully_connected(ladder2, ngf * 16, activation_fn=tf.identity) ladder1 = layers.fully_connected( latent1, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = tf.concat([ladder1, inference1], 1) ladder1 = layers.fully_connected( ladder1, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.fully_connected( ladder1, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference0 = layers.fully_connected(ladder1, ngf * 16, activation_fn=tf.identity) ladder0 = layers.fully_connected( latent0, ngf * 16, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = tf.concat([ladder0, inference0], 1) ladder0 = layers.fully_connected( ladder0, int(ngf * 2 * 7 * 7), activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = tf.reshape(ladder0, [-1, 7, 7, ngf * 2]) ladder0 = layers.conv2d_transpose( ladder0, ngf, 4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) x_logits = layers.conv2d_transpose( ladder0, 1, 4, stride=2, activation_fn=tf.identity, weights_initializer=init_ops.RandomNormal(stddev=0.02)) x_logits = tf.reshape(x_logits, [-1, n_x]) x = zs.Bernoulli('x', x_logits, n_samples=n_particles, group_event_ndims=1) return model, x_logits
def q_net(observed, x, n_z_0, n_z_1, n_z_2, n_particles, is_training): with zs.BayesianNet(observed=observed) as variational: normalizer_params = { 'is_training': is_training, 'updates_collections': None } # lz_x = layers.fully_connected( # tf.to_float(x), 500, normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params) # lz_x = layers.fully_connected( # lz_x, 500, normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params) # z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None) # z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None) # z = zs.Normal('z', z_mean, logstd=z_logstd, n_samples=n_particles, # group_event_ndims=1) # return variational x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1]) ladder0 = layers.conv2d( x, ngf, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.conv2d( ladder0, ngf * 2, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder0 = layers.flatten(ladder0) latent0_mean = layers.fully_connected(ladder0, n_z_0, activation_fn=tf.identity) latent0_std = layers.fully_connected(ladder0, n_z_0, activation_fn=tf.sigmoid) inference0 = layers.conv2d( x, ngf, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference0 = layers.conv2d( inference0, ngf, 4, stride=2, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference0 = layers.flatten(inference0) inference0 = layers.fully_connected(inference0, ngf * 16, activation_fn=tf.identity) ladder1 = layers.fully_connected( inference0, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder1 = layers.fully_connected( ladder1, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) latent1_mean = layers.fully_connected(ladder1, n_z_1, activation_fn=tf.identity) latent1_std = layers.fully_connected(ladder1, n_z_1, activation_fn=tf.sigmoid) inference1 = layers.fully_connected( inference0, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.fully_connected( inference1, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) inference1 = layers.fully_connected(inference1, ngf * 16, activation_fn=tf.identity) ladder2 = layers.fully_connected( inference1, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) ladder2 = layers.fully_connected( ladder2, ngf * 16, activation_fn=lrelu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_initializer=init_ops.RandomNormal(stddev=0.02)) latent2_mean = layers.fully_connected(ladder2, n_z_2, activation_fn=tf.identity) latent2_std = layers.fully_connected(ladder2, n_z_2, activation_fn=tf.sigmoid) z_0 = zs.Normal('z_0', mean=latent0_mean, std=latent0_std, n_samples=n_particles, group_event_ndims=1) z_1 = zs.Normal('z_1', mean=latent1_mean, std=latent1_std, n_samples=n_particles, group_event_ndims=1) z_2 = zs.Normal('z_2', mean=latent2_mean, std=latent2_std, n_samples=n_particles, group_event_ndims=1) return variational, z_0, z_1, z_2