Example #1
0
File: step.py Project: sdy1106/VLAE
def q_net_font(observed, x, is_training):
    with zs.BayesianNet(observed=observed) as encoder:
        normalizer_params = {
            'is_training': is_training,
            'updates_collections': None
        }
        x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1])
        ladder0 = layers.conv2d(
            x,
            ngf,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.conv2d(
            ladder0,
            ngf * 2,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.conv2d(
            ladder0,
            ngf * 4,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.conv2d(
            ladder0,
            ngf * 8,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.flatten(ladder0)
        font_mean = layers.fully_connected(ladder0,
                                           font_dim,
                                           activation_fn=tf.identity)
        font_std = layers.fully_connected(ladder0,
                                          font_dim,
                                          activation_fn=tf.sigmoid)

        z_font = zs.Normal('z_font',
                           mean=font_mean,
                           std=font_std,
                           n_samples=1,
                           group_event_ndims=1)
    return encoder, z_font
Example #2
0
 def test_normal(self):
     tensor_shape = (8, 12, 99)
     with self.cached_session():
         self._runner(init_ops.RandomNormal(mean=0, stddev=1, seed=153),
                      tensor_shape,
                      target_mean=0.,
                      target_std=1)
Example #3
0
  def __call__(self, shape, dtype=None, partition_info=None):
    """
    :param tuple[int] shape: [..., key_dim, num_hashes // 2]
    :param None|tf.DType dtype:
    :param partition_info:
    :rtype: tf.Tensor
    """
    import tensorflow as tf
    assert len(shape) >= 2
    key_dim, num_hashes = shape[-2:]

    sampled_hash_gen_top = tf.stack([
      self.base_initializer(shape=shape, dtype=dtype, partition_info=partition_info)
      for _ in range(self.num_hash_init_samples)], axis=len(shape) - 2)  # [..., init_sample, key_dim, num_hashes // 2]

    # sample keys (same key for each hash init)
    sampled_keys = init_ops.RandomNormal()(shape=(self.num_key_samples, key_dim), dtype=dtype)  # [key_sample, key_dim]

    # compute distribution for each and choose the one where the size of the smallest hash class is maximized
    sampled_hash_gen = tf.concat(
      [sampled_hash_gen_top, -sampled_hash_gen_top], axis=-1)  # [..., init_sample, key_dim, num_hashes]
    sampled_hash_dense = tf.matmul(sampled_keys, sampled_hash_gen)  # [..., init_sample, key_sample, num_hashes]
    sampled_hash = tf.argmax(sampled_hash_dense, axis=-1, output_type='int32')  # [..., init_sample, key_sample]
    sampled_hash_counts = bincount_nd(sampled_hash, axis=-1, minlength=num_hashes)  # [..., init_sample, num_hashes]
    from returnn.tf.util.basic import py_print
    sampled_hash_std = tf.math.reduce_std(
      tf.cast(sampled_hash_counts, 'float32') / self.num_key_samples, axis=-1)  # [..., init_sample]
    best_sample = tf.argmin(sampled_hash_std, axis=-1)  # [...]
    best_sampled_hash_gen_top = tf.gather(
      params=sampled_hash_gen_top, indices=best_sample, axis=len(shape) - 2,
      batch_dims=len(shape) - 2)  # [..., key_dim, num_hashes // 2]
    assert best_sampled_hash_gen_top.shape == shape
    return best_sampled_hash_gen_top
def vae(observed, n, y, is_training):
    with zs.BayesianNet(observed=observed) as decoder:
        normalizer_params = {'is_training': is_training,
                             'updates_collections': None}
        z_mean = tf.zeros([n, n_z])
        z = zs.Normal('z', z_mean, std=1., n_samples=1, group_event_ndims=1)
        z = tf.reshape(z,[-1 ,n_z])
        yb = tf.reshape(y, [-1, 1, 1, n_code])

        lx_z = layers.fully_connected(tf.concat([z, y], 1), 1024, activation_fn=tf.nn.relu,
                                      normalizer_fn=layers.batch_norm,
                                      normalizer_params=normalizer_params,
                                      weights_initializer=init_ops.RandomNormal(stddev=0.02)
                                      )
        lx_z = layers.fully_connected(tf.concat([lx_z, y], 1), ngf * 8 * 4 * 4, activation_fn=tf.nn.relu,
                                      normalizer_fn=layers.batch_norm,
                                      normalizer_params=normalizer_params,
                                      weights_initializer=init_ops.RandomNormal(stddev=0.02)
                                      )
        lx_z = tf.reshape(lx_z, [-1, 4, 4, ngf * 8])

        # assert tf.shape(lx_z)[0] == n

        lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 4, 5, stride=2, activation_fn=tf.nn.relu,
                                       normalizer_fn=layers.batch_norm,
                                       normalizer_params=normalizer_params,
                                       weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 2, 5, stride=2, activation_fn=tf.nn.relu,
                                       normalizer_fn=layers.batch_norm,
                                       normalizer_params=normalizer_params,
                                       weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), ngf * 1, 5, stride=2, activation_fn=tf.nn.relu,
                                       normalizer_fn=layers.batch_norm,
                                       normalizer_params=normalizer_params,
                                       weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lx_z = layers.conv2d_transpose(conv_cond_concat(lx_z, yb), 1, 5, stride=2, activation_fn=None,
                                       weights_initializer=init_ops.RandomNormal(stddev=0.02))
        x_logits = tf.reshape(lx_z, [1, -1, n_x])
        x = zs.Bernoulli('x', x_logits, group_event_ndims=1)
    return decoder, x_logits
def q_net(observed, x, y, is_training):
    with zs.BayesianNet(observed=observed) as encoder:
        normalizer_params = {'is_training': is_training,
                             'updates_collections': None}
        x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1])
        yb = tf.reshape(y, [-1, 1, 1, n_code])
        lz_x = layers.conv2d(conv_cond_concat(x, yb), ngf, 5, stride=2, activation_fn=tf.nn.relu,
                              normalizer_fn=layers.batch_norm,
                              normalizer_params=normalizer_params,
                              weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 2, 5, stride=2, activation_fn=tf.nn.relu,
                             normalizer_fn=layers.batch_norm,
                             normalizer_params=normalizer_params,
                             weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 4, 5, stride=2, activation_fn=tf.nn.relu,
                              normalizer_fn=layers.batch_norm,
                              normalizer_params=normalizer_params,
                             weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lz_x = layers.conv2d(conv_cond_concat(lz_x, yb), ngf * 8, 5, stride=2, activation_fn=tf.nn.relu,
                             normalizer_fn=layers.batch_norm,
                             normalizer_params=normalizer_params,
                             weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lz_x = layers.flatten(lz_x)
        # assert tf.shape(lz_x)[0] == tf.shape(y)[0]
        lz_x = layers.fully_connected(tf.concat([lz_x, y], 1), 1024, activation_fn=tf.nn.relu,
                                      normalizer_fn=layers.batch_norm,
                                      normalizer_params=normalizer_params,
                                      weights_initializer=init_ops.RandomNormal(stddev=0.02))
        lz_x = layers.fully_connected(tf.concat([lz_x, y], 1), 2 * n_z, activation_fn=None,
                                      normalizer_fn=layers.batch_norm,
                                      normalizer_params=normalizer_params,
                                      weights_initializer=init_ops.RandomNormal(stddev=0.02))

        mu, logstd = lz_x[:, :n_z], lz_x[:, n_z:]
        lz_x = zs.Normal('z', mu, logstd, n_samples=1, group_event_ndims=1)
    return encoder, lz_x,
Example #6
0
def q_net(observed, x, is_training):
    with zs.BayesianNet(observed=observed) as encoder:
        normalizer_params = {
            'is_training': is_training,
            'updates_collections': None
        }
        x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1])
        # ladder0 = layers.conv2d(x, ngf, 4, stride=2, activation_fn=lrelu,
        #                         normalizer_fn=layers.batch_norm,
        #                         normalizer_params=normalizer_params,
        #                         weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # ladder0 = layers.conv2d(ladder0, ngf, 4, stride=1, activation_fn=lrelu,
        #                         normalizer_fn=layers.batch_norm,
        #                         normalizer_params=normalizer_params,
        #                         weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # ladder0 = layers.flatten(ladder0)
        # latent0_mean = layers.fully_connected(ladder0, font_dim, activation_fn=tf.identity)
        # latent0_std = layers.fully_connected(ladder0, font_dim, activation_fn=tf.sigmoid)

        inference0 = layers.conv2d(
            x,
            ngf,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference0 = layers.conv2d(
            inference0,
            ngf,
            4,
            stride=1,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.conv2d(
            inference0,
            ngf * 2,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.conv2d(
            ladder1,
            ngf * 2,
            4,
            stride=1,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.conv2d(
            ladder1,
            ngf * 4,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder1 = layers.flatten(ladder1)
        latent1_mean = layers.fully_connected(ladder1,
                                              font_dim,
                                              activation_fn=tf.identity)
        latent1_std = layers.fully_connected(ladder1,
                                             font_dim,
                                             activation_fn=tf.sigmoid)

        inference1 = layers.conv2d(
            inference0,
            ngf * 2,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference1 = layers.conv2d(
            inference1,
            ngf * 2,
            4,
            stride=1,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference1 = layers.conv2d(
            inference1,
            ngf * 4,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder2 = layers.conv2d(
            inference1,
            ngf * 4,
            4,
            stride=1,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder2 = layers.conv2d(
            ladder2,
            ngf * 4,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder2 = layers.conv2d(
            ladder2,
            ngf * 8,
            4,
            stride=1,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder2 = layers.flatten(ladder2)
        latent2_mean = layers.fully_connected(ladder2,
                                              char_dim,
                                              activation_fn=tf.identity)
        latent2_std = layers.fully_connected(ladder2,
                                             char_dim,
                                             activation_fn=tf.sigmoid)

        # inference2 = layers.conv2d(inference1, ngf * 4, 4, stride=2, activation_fn=lrelu,
        #                            normalizer_fn=layers.batch_norm,
        #                            normalizer_params=normalizer_params,
        #                            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # inference2 = layers.conv2d(inference2, ngf * 4, 4, stride=1, activation_fn=lrelu,
        #                            normalizer_fn=layers.batch_norm,
        #                            normalizer_params=normalizer_params,
        #                            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # inference2 = layers.conv2d(inference2, ngf * 8, 4, stride=2, activation_fn=lrelu,
        #                            normalizer_fn=layers.batch_norm,
        #                            normalizer_params=normalizer_params,
        #                            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        #
        # ladder3 = layers.conv2d(inference2, ngf * 8, 4, stride=2, activation_fn=lrelu,
        #                         normalizer_fn=layers.batch_norm,
        #                         normalizer_params=normalizer_params,
        #                         weights_initializer=init_ops.RandomNormal(stddev=0.02))
        #
        # ladder3 = layers.conv2d(ladder3, ngf * 8, 4, stride=1, activation_fn=lrelu,
        #                         normalizer_fn=layers.batch_norm,
        #                         normalizer_params=normalizer_params,
        #                         weights_initializer=init_ops.RandomNormal(stddev=0.02))
        #
        # ladder3 = layers.conv2d(ladder3, ngf * 16, 4, stride=2, activation_fn=lrelu,
        #                         normalizer_fn=layers.batch_norm,
        #                         normalizer_params=normalizer_params,
        #                         weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # ladder3 = layers.flatten(ladder3)
        # latent3_mean = layers.fully_connected(ladder3, 20, activation_fn=tf.identity)
        # latent3_std = layers.fully_connected(ladder3, 20, activation_fn=tf.sigmoid)
        z_char = zs.Normal('z_char',
                           mean=latent2_mean,
                           std=latent2_std,
                           n_samples=1,
                           group_event_ndims=1)
        z_font = zs.Normal('z_font',
                           mean=latent1_mean,
                           std=latent1_std,
                           n_samples=1,
                           group_event_ndims=1)
        return encoder, z_font, z_char
Example #7
0
def VLAE(observed, n, is_training):
    with zs.BayesianNet(observed=observed) as decoder:
        normalizer_params = {
            'is_training': is_training,
            'updates_collections': None
        }
        z_char_mean = tf.zeros([n, char_dim])
        z_char = zs.Normal('z_char',
                           z_char_mean,
                           std=1.,
                           n_samples=1,
                           group_event_ndims=1)
        z_char = tf.reshape(z_char, [-1, char_dim])
        z_font_mean = tf.zeros([n, font_dim])
        z_font = zs.Normal('z_font',
                           z_font_mean,
                           std=1.,
                           n_samples=1,
                           group_event_ndims=1)
        z_font = tf.reshape(z_font, [-1, font_dim])
        latent2 = z_char
        latent1 = z_font
        ladder2 = layers.fully_connected(
            latent2,
            ngf * 8 * 4 * 4,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder2 = tf.reshape(ladder2, [-1, 4, 4, ngf * 8])
        ladder2 = layers.conv2d_transpose(
            ladder2,
            ngf * 8,
            4,
            stride=1,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder2 = layers.conv2d_transpose(
            ladder2,
            ngf * 4,
            4,
            stride=2,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        inference1 = layers.conv2d_transpose(
            ladder2,
            ngf * 4,
            4,
            stride=1,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.fully_connected(
            latent1,
            ngf * 4 * 8 * 8,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder1 = tf.reshape(ladder1, [-1, 8, 8, ngf * 4])

        ladder1 = tf.concat([ladder1, inference1], 3)
        ladder1 = layers.conv2d_transpose(
            ladder1,
            ngf * 2,
            4,
            stride=2,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.conv2d_transpose(
            ladder1,
            ngf * 2,
            4,
            stride=1,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        inference0 = layers.conv2d_transpose(
            ladder1,
            ngf,
            4,
            stride=2,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        # ladder0 = layers.fully_connected(latent0, ngf * 1 * 32 * 32, activation_fn=tf.nn.relu,
        #                                  normalizer_fn=layers.batch_norm,
        #                                  normalizer_params=normalizer_params,
        #                                  weights_initializer=init_ops.RandomNormal(stddev=0.02))
        # ladder0 = tf.reshape(ladder0, [-1, 32, 32, ngf * 1])
        # ladder0 = tf.concat([ladder0, inference0], 3)

        ladder0 = layers.conv2d_transpose(
            inference0,
            ngf,
            4,
            stride=1,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        x_logits = layers.conv2d_transpose(
            ladder0,
            1,
            4,
            stride=2,
            activation_fn=tf.identity,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        x_logits = tf.reshape(x_logits, [1, -1, n_x])
        x = zs.Bernoulli('x', x_logits, n_samples=1, group_event_ndims=1)
    return decoder, x_logits
Example #8
0
def run_decatt(train, val, test, word_to_index, intra_sent, emb_size,
               emb_glove, context_size, n_intra, n_intra_bias, n_attend,
               n_compare, n_classif, dropout_rate, pred_thres, lr, batch_size,
               epoch_size):
    # special words
    word_to_index['\0'] = len(word_to_index)

    # network
    tf.reset_default_graph()
    X_doc_1 = tf.placeholder(tf.int32, [None, None, 2 * context_size + 1])
    X_doc_2 = tf.placeholder(tf.int32, [None, None, 2 * context_size + 1])
    mask_1 = tf.placeholder(tf.float32, [None, None])
    mask_2 = tf.placeholder(tf.float32, [None, None])
    y = tf.placeholder(tf.int32, [None])
    training = tf.placeholder(tf.bool, [])

    emb_shape = [len(word_to_index), emb_size]
    emb = tf.Variable(
        tf.zeros(emb_shape) if emb_glove else tf.
        random_normal(emb_shape, 0, 0.01))

    ngram, mask_n = [None, None], [None, None]
    for i in range(2):
        X_doc = [X_doc_1, X_doc_2][i]
        batch_size_, n_words_ = tf.shape(X_doc)[0], tf.shape(X_doc)[1]
        X_doc_pad = tf.fill([batch_size_, 2 * context_size],
                            word_to_index['\0'])
        X_doc_n = tf.concat([X_doc_pad, X_doc, X_doc_pad], 1)
        X_doc_n = tf.map_fn(
            lambda j: X_doc_n[:, j:j + n_words_ + 2 * context_size],
            tf.range(2 * context_size + 1))
        X_doc_n = tf.transpose(X_doc_n, [1, 0, 2])
        mask_n = tf.concat(
            [tf.ones([batch_size_, 2 * context_size]), [mask_1, mask_2][i]], 1)
        ngram = tf.nn.embedding_lookup(emb, X_doc_n)
        ngram[i] = tf.reshape(
            ngram, [batch_size_, -1, (2 * context_size + 1) * emb_size])

    if intra_sent:
        l_intra = sum([[
            Dense(n,
                  tf.nn.relu,
                  kernel_initializer=init_ops.RandomNormal(0, 0.01)),
            Dropout(rate=dropout_rate),
        ] for n in n_intra], [])
        long_dist_bias = tf.Variable(tf.zeros([]))
        dist_biases = tf.Variable(tf.zeros([2 * n_intra_bias + 1]))
        for i in range(2):
            intra_d = apply_layers(l_intra, ngram[i], training=training)
            intra_w = tf.matmul(intra_d, tf.transpose(intra_d, [0, 2, 1]))
            ngram[i] = tf.concat([
                ngram[i],
                attend_intra(intra_w, ngram[i], mask_n[i], n_intra_bias,
                             long_dist_bias, dist_biases)
            ], 2)

    l_attend = sum([[
        Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0,
                                                                      0.01)),
        Dropout(rate=dropout_rate),
    ] for n in n_attend], [])
    attend_d_1 = apply_layers(l_attend, ngram[0], training=training)
    attend_d_2 = apply_layers(l_attend, ngram[1], training=training)
    attend_w = tf.matmul(attend_d_1, tf.transpose(attend_d_2, [0, 2, 1]))
    attend_1 = attend_inter(attend_w, ngram[1], mask_n[1])
    attend_2 = attend_inter(tf.transpose(attend_w, [0, 2, 1]), ngram[0],
                            mask_n[0])

    l_compare = sum([[
        Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0,
                                                                      0.01)),
        Dropout(rate=dropout_rate),
    ] for n in n_compare], [])
    compare_1 = apply_layers(l_compare,
                             tf.concat([ngram[0], attend_1], 2),
                             training=training)
    compare_2 = apply_layers(l_compare,
                             tf.concat([ngram[1], attend_2], 2),
                             training=training)

    agg_1 = tf.reduce_sum(tf.expand_dims(mask_n[0], -1) * compare_1, 1)
    agg_2 = tf.reduce_sum(tf.expand_dims(mask_n[1], -1) * compare_2, 1)
    l_classif = sum([[
        Dense(n, tf.nn.relu, kernel_initializer=init_ops.RandomNormal(0,
                                                                      0.01)),
        Dropout(rate=dropout_rate),
    ] for n in n_classif], [])
    logits = apply_layers(l_classif,
                          tf.concat([agg_1, agg_2], 1),
                          training=training)
    logits = tf.layers.dense(logits,
                             2,
                             kernel_initializer=init_ops.RandomNormal(0, 0.01))
    probs = tf.nn.softmax(logits)
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=y))

    opt = tf.train.AdagradOptimizer(learning_rate=lr)
    grads = opt.compute_gradients(loss)
    grads = [(grad, var) for grad, var in grads
             if var != emb] if emb_glove else grads
    train_op = opt.apply_gradients(grads)

    # run
    with tf.Session() as sess:
        # start sampling
        qs = {name: Queue(1) for name in ('train', 'val', 'test')}
        for name, docs in ('train', train), ('val', val), ('test', test):
            Process(target=sample,
                    args=(docs, word_to_index, context_size, epoch_size,
                          batch_size, qs[name])).start()

        # initialize variables
        sess.run(tf.global_variables_initializer())
        if emb_glove:
            emb_0 = tf.Variable(0., validate_shape=False)
            saver = tf.train.Saver({'emb': emb_0})
            saver.restore(sess, '__cache__/tf/emb/model.ckpt')
            sess.run(emb[:tf.shape(emb_0)[0]].assign(emb_0))

        # train
        print(datetime.datetime.now(), 'started training')
        for i in range(epoch_size):
            total_loss, correct = 0, {'val': 0, 'test': 0}
            for X_doc_1_, X_doc_2_, mask_1_, mask_2_, y_ in qs['train'].get():
                _, batch_loss = sess.run(
                    [train_op, loss],
                    feed_dict={
                        X_doc_1: X_doc_1_,
                        X_doc_2: X_doc_2_,
                        mask_1: mask_1_,
                        mask_2: mask_2_,
                        y: y_,
                        training: True,
                    })
                total_loss += len(y_) * batch_loss
            for name in 'val', 'test':
                for X_doc_1_, X_doc_2_, mask_1_, mask_2_, y_ in qs[name].get():
                    probs_ = sess.run(probs,
                                      feed_dict={
                                          X_doc_1: X_doc_1_,
                                          X_doc_2: X_doc_2_,
                                          mask_1: mask_1_,
                                          mask_2: mask_2_,
                                          training: False,
                                      })
                    correct[name] += np.sum((probs_[:, 1] >= pred_thres) == y_)
            print(
                datetime.datetime.now(),
                f'finished epoch {i}, loss: {total_loss / len(train):f}, '
                f'val acc: {correct["val"] / len(val):f}, test acc: {correct["test"] / len(test):f}'
            )
Example #9
0
def VLAE(observed, n, n_x, n_z_0, n_z_1, n_z_2, n_particles, is_training):
    with zs.BayesianNet(observed=observed) as model:
        normalizer_params = {
            'is_training': is_training,
            'updates_collections': None
        }

        z_2_mean = tf.zeros([n, n_z_2])
        z_2 = zs.Normal('z_2',
                        z_2_mean,
                        std=1.,
                        n_samples=n_particles,
                        group_event_ndims=1)
        z_2 = tf.reshape(z_2, [-1, n_z_2])
        z_1_mean = tf.zeros([n, n_z_1])
        z_1 = zs.Normal('z_1',
                        z_1_mean,
                        std=1.,
                        n_samples=n_particles,
                        group_event_ndims=1)
        z_1 = tf.reshape(z_1, [-1, n_z_1])
        z_0_mean = tf.zeros([n, n_z_0])
        z_0 = zs.Normal('z_0',
                        z_0_mean,
                        std=1.,
                        n_samples=n_particles,
                        group_event_ndims=1)
        z_0 = tf.reshape(z_0, [-1, n_z_0])
        latent2 = z_2
        latent1 = z_1
        latent0 = z_0

        ladder2 = layers.fully_connected(
            latent2,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder2 = layers.fully_connected(
            ladder2,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        inference1 = layers.fully_connected(ladder2,
                                            ngf * 16,
                                            activation_fn=tf.identity)

        ladder1 = layers.fully_connected(
            latent1,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = tf.concat([ladder1, inference1], 1)
        ladder1 = layers.fully_connected(
            ladder1,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.fully_connected(
            ladder1,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        inference0 = layers.fully_connected(ladder1,
                                            ngf * 16,
                                            activation_fn=tf.identity)

        ladder0 = layers.fully_connected(
            latent0,
            ngf * 16,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = tf.concat([ladder0, inference0], 1)

        ladder0 = layers.fully_connected(
            ladder0,
            int(ngf * 2 * 7 * 7),
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder0 = tf.reshape(ladder0, [-1, 7, 7, ngf * 2])

        ladder0 = layers.conv2d_transpose(
            ladder0,
            ngf,
            4,
            stride=2,
            activation_fn=tf.nn.relu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        x_logits = layers.conv2d_transpose(
            ladder0,
            1,
            4,
            stride=2,
            activation_fn=tf.identity,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        x_logits = tf.reshape(x_logits, [-1, n_x])
        x = zs.Bernoulli('x',
                         x_logits,
                         n_samples=n_particles,
                         group_event_ndims=1)
    return model, x_logits
Example #10
0
def q_net(observed, x, n_z_0, n_z_1, n_z_2, n_particles, is_training):
    with zs.BayesianNet(observed=observed) as variational:
        normalizer_params = {
            'is_training': is_training,
            'updates_collections': None
        }
        #     lz_x = layers.fully_connected(
        #         tf.to_float(x), 500, normalizer_fn=layers.batch_norm,
        #         normalizer_params=normalizer_params)
        #     lz_x = layers.fully_connected(
        #         lz_x, 500, normalizer_fn=layers.batch_norm,
        #         normalizer_params=normalizer_params)
        #     z_mean = layers.fully_connected(lz_x, n_z, activation_fn=None)
        #     z_logstd = layers.fully_connected(lz_x, n_z, activation_fn=None)
        #     z = zs.Normal('z', z_mean, logstd=z_logstd, n_samples=n_particles,
        #                   group_event_ndims=1)
        # return variational
        x = tf.reshape(tf.to_float(x), [-1, n_xl, n_xl, 1])
        ladder0 = layers.conv2d(
            x,
            ngf,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.conv2d(
            ladder0,
            ngf * 2,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        ladder0 = layers.flatten(ladder0)
        latent0_mean = layers.fully_connected(ladder0,
                                              n_z_0,
                                              activation_fn=tf.identity)
        latent0_std = layers.fully_connected(ladder0,
                                             n_z_0,
                                             activation_fn=tf.sigmoid)

        inference0 = layers.conv2d(
            x,
            ngf,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference0 = layers.conv2d(
            inference0,
            ngf,
            4,
            stride=2,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference0 = layers.flatten(inference0)
        inference0 = layers.fully_connected(inference0,
                                            ngf * 16,
                                            activation_fn=tf.identity)

        ladder1 = layers.fully_connected(
            inference0,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder1 = layers.fully_connected(
            ladder1,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        latent1_mean = layers.fully_connected(ladder1,
                                              n_z_1,
                                              activation_fn=tf.identity)
        latent1_std = layers.fully_connected(ladder1,
                                             n_z_1,
                                             activation_fn=tf.sigmoid)

        inference1 = layers.fully_connected(
            inference0,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference1 = layers.fully_connected(
            inference1,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))
        inference1 = layers.fully_connected(inference1,
                                            ngf * 16,
                                            activation_fn=tf.identity)

        ladder2 = layers.fully_connected(
            inference1,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        ladder2 = layers.fully_connected(
            ladder2,
            ngf * 16,
            activation_fn=lrelu,
            normalizer_fn=layers.batch_norm,
            normalizer_params=normalizer_params,
            weights_initializer=init_ops.RandomNormal(stddev=0.02))

        latent2_mean = layers.fully_connected(ladder2,
                                              n_z_2,
                                              activation_fn=tf.identity)
        latent2_std = layers.fully_connected(ladder2,
                                             n_z_2,
                                             activation_fn=tf.sigmoid)

        z_0 = zs.Normal('z_0',
                        mean=latent0_mean,
                        std=latent0_std,
                        n_samples=n_particles,
                        group_event_ndims=1)
        z_1 = zs.Normal('z_1',
                        mean=latent1_mean,
                        std=latent1_std,
                        n_samples=n_particles,
                        group_event_ndims=1)
        z_2 = zs.Normal('z_2',
                        mean=latent2_mean,
                        std=latent2_std,
                        n_samples=n_particles,
                        group_event_ndims=1)

    return variational, z_0, z_1, z_2