예제 #1
0
  def testEmbeddingRNNDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2)
        cell = cell_fn()
        _, enc_state = core_rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_rnn_decoder(
            dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(1, len(res))
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)
예제 #2
0
  def testEmbeddingRNNDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2)
        cell = cell_fn()
        _, enc_state = core_rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_rnn_decoder(
            dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(1, len(res))
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)
예제 #3
0
def getDecoding(encoder_state, inputs, cell,
                num_symbols, embedding_size,
                feed_previous=True, output_prejection=None,
                dtype=s2s.dtypes.float32, scope=None):
    """
    English:
    Model for producing probabilities over x from z
    Japanese:
    このモデルにおける、z から x へ向かう確率を計算します。
    """
    with s2s.variable_scope.variable_scope(scope or 'seq2seq', reuse=True):
        if output_prejection is None:
            cell = s2s.core_rnn_cell.OutputProjectionWrapper(cell, num_symbols)
        decode_probs, _ = s2s.embedding_rnn_decoder(
            inputs, encoder_state, cell, num_symbols,
            embedding_size, output_projection=output_prejection,
            feed_previous=feed_previous)
    return decode_probs
예제 #4
0
def createVariationalVar(inputs, cell, num_symbols, embedding_size,
                         feed_previous=False, output_projection=None,
                         dtype=s2s.dtypes.float32, scope=None):
    """
    English:
    Creates Tensorflow variables which can reused.
    Japanese:
    再利用可能な Tensorflow の変数を作ります。
    """
    with s2s.variable_scope.variable_scope(scope or 'seq2seq'):
        encoder_cell = s2s.core_rnn_cell.EmbeddingWrapper(
            cell, embedding_classes=num_symbols, embedding_size=embedding_size)
        _, encoder_state = s2s.rnn.static_rnn(encoder_cell, inputs, dtype=dtype)
        # batch_size x cell.state_size
        if output_projection is None:
            cell = s2s.core_rnn_cell.OutputProjectionWrapper(cell, num_symbols)
        decode_probs, _ = s2s.embedding_rnn_decoder(
            inputs, encoder_state, cell, num_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous)
    return None
예제 #5
0
    def build(self):
        params = self.params
        N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count
        V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size

        # initialize self
        # placeholders
        input = tf.placeholder('int32', shape=[N, L], name='x')  # [num_batch, sentence_len]
        question = tf.placeholder('int32', shape=[N, Q], name='q')  # [num_batch, sentence_len]
        answer = tf.placeholder('int32', shape=[N], name='y')  # [num_batch] - one word answer
        input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask')  # [num_batch, sentence_len]
        is_training = tf.placeholder(tf.bool)

        # Prepare parameters
        gru = rnn_cell.GRUCell(d)

        # Input module
        with tf.variable_scope('input') as scope:
            input_list = tf.unstack(tf.transpose(input))
            input_states, _ = seq2seq.embedding_rnn_decoder(input_list, gru.zero_state(N, tf.float32), gru, A, V)

            # Question module
            scope.reuse_variables()

            ques_list = tf.unstack(tf.transpose(question))
            questions, _ = seq2seq.embedding_rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru, A, V)
            question_vec = questions[-1]  # use final state

        # Masking: to extract fact vectors at end of sentence. (details in paper)
        input_states = tf.transpose(tf.stack(input_states), [1, 0, 2])  # [N, L, D]
        facts = []
        for n in range(N):
            filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :])  # [?, D]
            padding = tf.zeros(tf.stack([F - tf.shape(filtered)[0], d]))
            facts.append(tf.concat(0, [filtered, padding]))  # [F, D]

        facked = tf.stack(facts)  # packing for transpose... I hate TF so much
        facts = tf.unstack(tf.transpose(facked, [1, 0, 2]), num=F)  # F x [N, D]

        # Episodic Memory
        with tf.variable_scope('episodic') as scope:
            episode = EpisodeModule(d, question_vec, facts)

            memory = tf.identity(question_vec)
            for t in range(params.memory_step):
                memory = gru(episode.new(memory), memory)[0]
                scope.reuse_variables()

        # Regularizations
        if params.batch_norm:
            memory = batch_norm(memory, is_training=is_training)
        memory = dropout(memory, params.keep_prob, is_training)

        with tf.name_scope('Answer'):
            # Answer module : feed-forward version (for it is one word answer)
            w_a = weight('w_a', [d, A])
            logits = tf.matmul(memory, w_a)  # [N, A]

        with tf.name_scope('Loss'):
            # Cross-Entropy loss
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, answer)
            loss = tf.reduce_mean(cross_entropy)
            total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2'))

        with tf.variable_scope('Accuracy'):
            # Accuracy
            predicts = tf.cast(tf.argmax(logits, 1), 'int32')
            corrects = tf.equal(predicts, answer)
            num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32))
            accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32))

        # Training
        optimizer = tf.train.AdadeltaOptimizer(params.learning_rate)
        opt_op = optimizer.minimize(total_loss, global_step=self.global_step)

        # placeholders
        self.x = input
        self.q = question
        self.y = answer
        self.mask = input_mask
        self.is_training = is_training

        # tensors
        self.total_loss = total_loss
        self.num_corrects = num_corrects
        self.accuracy = accuracy
        self.opt_op = opt_op
# dec_state_in = tf.placeholder(tf.float32, [None, memory_dim])

encoder_state = tf.nn.tanh(encoder_state)

with tf.variable_scope("decoder1", reuse=None) as scope:
    # with tf.device('/cpu:0'):

    cell = tf.contrib.rnn.GRUCell(memory_dim)
    cell = tf.contrib.rnn.OutputProjectionWrapper(cell, num_decoder_symbols)

    # W2 = tf.Variable(tf.constant(0.0, shape=[num_encoder_symbols, embedding_dim]),
    #         trainable=False, name="embedding_rnn_decoder/embedding")

    dec_outputs1, dec_memory1 = embedding_rnn_decoder(decoder_inputs,
                                                      encoder_state,
                                                      cell,
                                                      num_decoder_symbols,
                                                      embedding_dim,
                                                      feed_previous=True)
with tf.variable_scope("decoder2", reuse=None) as scope:
    # with tf.device('/cpu:0'):

    cell = tf.contrib.rnn.GRUCell(memory_dim)
    cell = tf.contrib.rnn.OutputProjectionWrapper(cell, num_decoder_symbols)

    # W2 = tf.Variable(tf.constant(0.0, shape=[num_encoder_symbols, embedding_dim]),
    #         trainable=False, name="embedding_rnn_decoder/embedding")

    dec_outputs2, dec_memory2 = embedding_rnn_decoder(decoder_inputs,
                                                      encoder_state,
                                                      cell,
                                                      num_decoder_symbols,