Beispiel #1
0
 def build_word_decoder(self, word_vectors_3, char_ids_3):
     config = self.config
     with tf.variable_scope('word_condition_projection'):
         word_vectors_3 = layers.mlp(word_vectors_3, self.config['sentence_decoder_projection'])
     with tf.variable_scope('word_decoder'):
         spell_vector_len = config['spell_vector_len']
         spell_vector_size = spell_vector_len * config['char_embed_size']
         spell_vector_size *= 2 # TODO make this factor configurable
         # Grab char embeds and concat them to spelling vector representations of words
         char_ids_3 = self.add_go(char_ids_3, axis=2)
         char_embeds_4 = layers.embedding(self.num_chars, config['char_embed_size'], char_ids_3)
         spell_vectors_3 = self.create_spell_vector(char_embeds_4, spell_vector_len)
         # Pass spelling vector through a layer that can see previous chars, but can't see ahead
         with tf.variable_scope('future_masked_spelling'):
             spell_vectors_projected_3 = layers.feed_forward(spell_vectors_3,
                 num_nodes=spell_vector_size, seq_len_for_future_mask=spell_vector_len)
         # Reshape word representation into individual char representations
         batch_size, sentence_len, word_len = tf.unstack(tf.shape(char_ids_3))
         char_size = spell_vectors_projected_3.shape.as_list()[-1]/spell_vector_len
         char_vectors_4 = tf.reshape(spell_vectors_projected_3,
             [batch_size, sentence_len, spell_vector_len, char_size])
         char_vectors_4 = char_vectors_4[:, :, :word_len, :]
         # Project each char_vector up to the size of the conditioning word_vector
         with tf.variable_scope('char_projection'):
             word_depth = word_vectors_3.shape.as_list()[-1]
             char_vectors_4 = layers.feed_forward(char_vectors_4, num_nodes=word_depth)
         # Add the conditioning word_vector to each char and pass result through an mlp
         char_vectors_4 += tf.expand_dims(word_vectors_3, axis=2)
         char_vectors_4 = layers.mlp(char_vectors_4, config['word_decoder_mlp'])
     with tf.variable_scope('logits'):
         char_logits_4 = layers.feed_forward(char_vectors_4, num_nodes=self.num_chars,
             noise_level=config['noise_level'])
     return char_logits_4
Beispiel #2
0
 def test_compiles(self):
     tf.reset_default_graph()
     with tf.Session() as sess:
         inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32)
         outputs = layers.feed_forward(inputs, num_nodes=20)
         initialize_vars(sess)
         outputs = sess.run(outputs)
         self.assertEqual(outputs.shape, (3, 20))
Beispiel #3
0
    def test_layer_norm(self):
        tf.reset_default_graph()

        def check_for_var(varname, count):
            var = filter(lambda var: 'gamma' in var.name,
                         tf.trainable_variables())
            self.assertEqual(len(var), count)

        with tf.Session() as sess:
            inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32)
            outputs = layers.feed_forward(inputs,
                                          num_nodes=20,
                                          layer_norm=False)
            check_for_var('gamma', 0)
            check_for_var('beta', 0)
            with tf.variable_scope('norm'):
                outputs = layers.feed_forward(outputs,
                                              num_nodes=10,
                                              layer_norm=True)
                check_for_var('gamma', 1)
                check_for_var('beta', 1)
Beispiel #4
0
    def test_activation_fn(self):
        tf.reset_default_graph()

        def to_zero(tensor):
            return tensor * 0

        with tf.Session() as sess:
            inputs = tf.constant([[0, 0], [1, 1], [2, 2]], dtype=tf.float32)
            outputs = layers.feed_forward(inputs,
                                          num_nodes=20,
                                          activation_fn=to_zero)
            initialize_vars(sess)
            outputs = sess.run(outputs)
            self.assertEqual(np.sum(outputs), 0.0)
Beispiel #5
0
 def test_dropout(self):
     tf.reset_default_graph()
     with tf.Session() as sess:
         inputs = tf.constant([[0.5, 0.5], [1, 1], [2, 2]],
                              dtype=tf.float32)
         with tf.variable_scope('dropout_test'):
             outputs_dropped = layers.feed_forward(inputs,
                                                   num_nodes=1024,
                                                   keep_prob=0.5)
         with tf.variable_scope('dropout_test', reuse=True):
             outputs_kept = layers.feed_forward(inputs,
                                                num_nodes=1024,
                                                keep_prob=1.0)
         sess.run(tf.global_variables_initializer())
         outputs_dropped = sess.run(outputs_dropped)
         outputs_kept = sess.run(outputs_kept)
         # values that haven't been dropped should be the same (after scaling),
         # therefore the only values that differ should be the dropped ones.
         zero_indices = np.where(
             np.not_equal(outputs_dropped, 2 * outputs_kept))
         self.assertEqual(np.sum(outputs_dropped[zero_indices]), 0)
         # roughly half of the values should be dropped
         drop_rate = float(zero_indices[0].size) / outputs_kept.size
         self.assertTrue(abs(0.5 - drop_rate) < 0.05)