def create_scopes():
     self.descs = tf.constant(descs,
                              dtype=tf.float32,
                              shape=[self.num_types, self.num_types],
                              name='descs')
     max_val = np.sqrt(6. / (self.num_types + self.num_types))
     if self.attention_type == self.CHANGE_OF_VARIABLES:
         self.attn_aggrW = tf.Variable(tf.random_uniform(
             [self.num_types, self.num_types], -1. * max_val, max_val),
                                       name="attn_aggrW")
     self.type_embedding = tf.get_variable(
         'type_embedding',
         shape=[self.num_types, self.num_type_dim],
         initializer=tf.truncated_normal_initializer(0.0, 1.0))
     max_val = np.sqrt(6. / (self.hidden_size + self.num_type_dim))
     self.type_context_scope = "type_context_scope"
     self.type_context_W = tf.Variable(tf.random_uniform(
         [self.hidden_size, self.num_type_dim], -1. * max_val, max_val),
                                       name="W_type_context")
     self.type_context_attnW = tf.Variable(tf.random_uniform(
         [self.hidden_size, self.num_type_dim], -1. * max_val, max_val),
                                           name="attnW_type_context")
     self.type_mention_scope = "type_mention_scope"
     self.type_mention_W = tf.Variable(tf.random_uniform(
         [self.hidden_size, self.num_type_dim], -1. * max_val, max_val),
                                       name="W_type_mention")
     self.type_mention_attnW = tf.Variable(tf.random_uniform(
         [self.hidden_size, self.num_type_dim], -1. * max_val, max_val),
                                           name="attnW_type_mention")
     self.enc_rnn_left_context = rnn_cell.EmbeddingWrapper(
         rnn_cell.GRUCell(self.hidden_size), self.num_words,
         self.num_word_dim)
     self.enc_scope_left_context = "encoder_left_context"
     self.enc_rnn_right_context = rnn_cell.EmbeddingWrapper(
         rnn_cell.GRUCell(self.hidden_size), self.num_words,
         self.num_word_dim)
     self.enc_scope_right_context = "encoder_right_context"
     self.enc_rnn_mention = rnn_cell.EmbeddingWrapper(
         rnn_cell.GRUCell(self.hidden_size), self.num_mention_words,
         self.num_mention_dim)
     self.enc_scope_mention = "encoder_mention"
     self.dec_cells = rnn_cell.GRUCell(self.hidden_size)
     self.dec_scope = "decoder"
     max_val = np.sqrt(6. / (self.num_types + self.hidden_size))
     self.dec_weights = tf.get_variable(
         "dec_weights", [self.hidden_size, self.num_type_dim],
         initializer=tf.random_uniform_initializer(
             -1. * max_val, max_val))
     self.dec_biases = tf.get_variable(
         "dec_biases", [self.num_type_dim],
         initializer=tf.constant_initializer(0.0))
Exemple #2
0
def embedding_attention_encoder_seq2seq(enc_inp, cell, num_encoder_symbols, embedding_size):
    with variable_scope.variable_scope("embedding_attention_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell, enc_inp, dtype=dtypes.float32)
                                        
    return encoder_outputs, encoder_state
Exemple #3
0
 def _rnn(self, name, enc_inputs):
     encoder_cell = rnn_cell.EmbeddingWrapper(self.cell, self.dict_size)
     _, encoder_states = rnn.rnn(encoder_cell, enc_inputs, dtype=tf.float32)
     w = tf.get_variable(name + '-w', (self.cell.state_size, self.num_outputs),
                         initializer=tf.random_normal_initializer(stddev=0.1))
     b = tf.get_variable(name + 'b', (self.num_outputs,), initializer=tf.constant_initializer())
     return tf.matmul(encoder_states[-1], w) + b
 def create_cell_scopes():
     self.enc_cells_text = rnn_cell.EmbeddingWrapper(self.cell_type(self.cell_size), self.decoder_words,
                                                     self.text_embedding_size)
     self.enc_scope_text = "encoder_text"
     max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size))
     self.W_enc_img = tf.Variable(
         tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val),
         name="W_enc_img")
     self.b_enc_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_enc_img")
     self.enc_scope_img = "encoder_img"
     self.enc_cells_utter = self.cell_type(self.cell_size)
     self.enc_scope_utter = "encoder_utter"
     if self.task_type == "text":
         self.dec_cells_text = self.cell_type(self.cell_size)
         self.dec_scope_text = "decoder_text"
     if self.task_type == "image":
         self.tgt_scope_img = "target_encoder_img"
         self.W_enc_tgt_img = tf.Variable(
             tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val),
             name="W_enc_tgt_img")
         self.b_enc_tgt_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]),
                                          name="b_enc_tgt_img")
         max_val = np.sqrt(6. / (self.cell_size + self.image_embedding_size))
         self.proj_scope_utter = "proj_utter"
         self.W_proj_utter = tf.Variable(
             tf.random_uniform([self.cell_size, self.image_embedding_size], -1. * max_val, max_val),
             name="W_proj_utter")
         self.b_proj_utter = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_proj_utter")
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                          num_encoder_symbols, num_decoder_symbols,
                          embedding_size, output_projection=None,
                          feed_previous=False, dtype=dtypes.float32,
                          scope=None, beam_search=True, beam_size=10):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)


    return embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          embedding_size, output_projection=output_projection,
          feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
Exemple #6
0
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=dtypes.float32,
                          scope=None):
    with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

        # Decoder.
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                                         encoder_state,
                                         cell,
                                         num_decoder_symbols,
                                         embedding_size,
                                         output_projection=output_projection,
                                         feed_previous=feed_previous)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_rnn_decoder(
                    decoder_inputs,
                    encoder_state,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
 def testEmbeddingWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 1], dtype=tf.int32)
             m = tf.zeros([1, 2])
             g, new_m = rnn_cell.EmbeddingWrapper(rnn_cell.GRUCell(2), 3)(x,
                                                                          m)
             sess.run([tf.initialize_all_variables()])
             res = sess.run([g, new_m], {
                 x.name: np.array([[1]]),
                 m.name: np.array([[0.1, 0.1]])
             })
             self.assertEqual(res[1].shape, (1, 2))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.17139, 0.17139]])
        def create_cell_scopes():
            '''Creating different cells and their scopes for encoder(word level encoder) and decoder. Scopes are different at word and utterance level encoder(No parameter sharing).'''
            for i in range(self.enc_layers):
                if i == self.enc_layers - 1:
                    #Bidirectional RNN at utterance level, forward and backward cell.
                    self.enc_cells.append([
                        self.cell_type(self.cell_size),
                        self.cell_type(self.cell_size)
                    ])
                else:
                    self.enc_cells.append(self.cell_type(self.cell_size))

            self.enc_cells[0] = rnn_cell.EmbeddingWrapper(
                self.enc_cells[0], self.decoder_words, self.embedding_size)
            self.enc_scopes.append(
                "encoder_{}".format(0))  #Word level encoder scope
            self.dec_scopes.append("decoder_{}".format(0))  #Decoder scope
            self.dec_cells.append(self.cell_type(self.cell_size))
Exemple #9
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=dtypes.float32,
                                scope=None,
                                initial_state_attention=False,
                                scheduling_rate=1.0):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.
    scheduling_rate : 

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                 encoder_inputs,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        ## feed_previous :  forward propagation only ( previous prediction value input next decoder )
        ## this is role of GO symbol of bool
        ## forward path --> Valid or Test
        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                scheduling_rate=scheduling_rate)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    scheduling_rate=scheduling_rate)
                #print (outputs)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):

    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                 encoder_inputs,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            print("~~~~~~~~~~~")
            outputs, state = embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                scope=scope)
            return outputs, state, encoder_state

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    scope=scope)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state, encoder_state
Exemple #11
0
def embedding_attention_pointer_seq2seq_states(encoder_inputs,
                                               decoder_inputs,
                                               cell,
                                               num_encoder_symbols,
                                               num_decoder_symbols,
                                               embedding_size,
                                               num_heads=1,
                                               output_projection=None,
                                               feed_previous=False,
                                               dtype=tf.float32,
                                               scope=None,
                                               initial_state_attention=False):
    with variable_scope.variable_scope(
            scope or "embedding_attention_pointer_seq2seq_states") as scope:
        # Encoder.
        encoder_initial_state = tf.placeholder(dtype, [None, cell.state_size],
                                               "encoder_initial_state")
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(
            encoder_cell,
            encoder_inputs,
            initial_state=encoder_initial_state,
            dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            raise Exception("feed_previous must be a tensor!")
        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, decoder_state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                return outputs, decoder_state

        true_outputs, true_decoder_state = decoder(True)
        false_outputs, false_decoder_state = decoder(False)
        outputs = tf.cond(feed_previous, lambda: true_outputs,
                          lambda: false_outputs)
        return outputs, encoder_initial_state, encoder_state, (
            true_decoder_state, false_decoder_state)
Exemple #12
0
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell,
                         num_encoder_symbols, num_decoder_symbols_dict,
                         embedding_size, feed_previous=False,
                         dtype=dtypes.float32, scope=None):
  """One-to-many RNN sequence-to-sequence model (multi-task).

  This is a multi-task sequence-to-sequence model with one encoder and multiple
  decoders. Reference to multi-task sequence-to-sequence learning can be found
  here: http://arxiv.org/abs/1511.06114

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs_dict: A dictionany mapping decoder name (string) to
      the corresponding decoder_inputs; each decoder_inputs is a list of 1D
      Tensors of shape [batch_size]; num_decoders is defined as
      len(decoder_inputs_dict).
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an
      integer specifying number of symbols for the corresponding decoder;
      len(num_decoder_symbols_dict) must be equal to num_decoders.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of
      decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "one2many_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs_dict, state_dict), where:
      outputs_dict: A mapping from decoder name (string) to a list of the same
        length as decoder_inputs_dict[name]; each element in the list is a 2D
        Tensors with shape [batch_size x num_decoder_symbol_list[name]]
        containing the generated outputs.
      state_dict: A mapping from decoder name (string) to the final state of the
        corresponding decoder RNN; it is a 2D Tensor of shape
        [batch_size x cell.state_size].
  """
  outputs_dict = {}
  state_dict = {}

  with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    for name, decoder_inputs in decoder_inputs_dict.items():
      num_decoder_symbols = num_decoder_symbols_dict[name]

      with variable_scope.variable_scope("one2many_decoder_" + str(name)):
        decoder_cell = rnn_cell.OutputProjectionWrapper(cell,
                                                        num_decoder_symbols)
        if isinstance(feed_previous, bool):
          outputs, state = embedding_rnn_decoder(
              decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols,
              embedding_size, feed_previous=feed_previous)
        else:
          # If feed_previous is a Tensor, we construct 2 graphs and use cond.
          def filled_embedding_rnn_decoder(feed_previous):
            # pylint: disable=cell-var-from-loop
            reuse = None if feed_previous else True
            vs = variable_scope.get_variable_scope()
            with variable_scope.variable_scope(vs, reuse=reuse):
              outputs, state = embedding_rnn_decoder(
                  decoder_inputs, encoder_state, decoder_cell,
                  num_decoder_symbols, embedding_size,
                  feed_previous=feed_previous)
            # pylint: enable=cell-var-from-loop
            return outputs + [state]
          outputs_and_state = control_flow_ops.cond(
              feed_previous,
              lambda: filled_embedding_rnn_decoder(True),
              lambda: filled_embedding_rnn_decoder(False))
          outputs = outputs_and_state[:-1]
          state = outputs_and_state[-1]

      outputs_dict[name] = outputs
      state_dict[name] = state

  return outputs_dict, state_dict
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                num_layers=3,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    with tf.device('/gpu:0'):
        single_cell_1 = rnn_cell.LSTMCell(embedding_size)
    with tf.device('/gpu:1'):
        single_cell_2 = rnn_cell.LSTMCell(embedding_size)

    encoder_fw_cell = rnn_cell.EmbeddingWrapper(single_cell_1, embedding_classes=num_encoder_symbols,
                                                embedding_size=embedding_size)
    encoder_bw_cell = rnn_cell.EmbeddingWrapper(single_cell_2, embedding_classes=num_encoder_symbols,
                                                embedding_size=embedding_size)
    outputs, _, _ = rnn.bidirectional_rnn(encoder_fw_cell, encoder_bw_cell, encoder_inputs, dtype=dtype)

    list_of_cell = []
    for layer in xrange(num_layers):
      if layer % 2 == 0:
        with tf.device('/gpu:0'):
          single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size)
        list_of_cell.append(single_cell)
      else:
        with tf.device('/gpu:1'):
          single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size)
        list_of_cell.append(single_cell)

    cell2 = Stack_Residual_RNNCell.Stack_Residual_RNNCell(list_of_cell)

    encoder_outputs, encoder_state = rnn.rnn(
        cell2, outputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Exemple #14
0
def embedding_attention_seq2seq_context(encoder_inputs, decoder_inputs, cell,
                                        num_encoder_symbols, num_decoder_symbols,
                                        num_heads=1, output_projection=None,
                                        feed_previous=False, dtype=dtypes.float32,
                                        scope=None):
    """A seq2seq architecture with two encoders, one for context, one for input DA. The decoder
    uses twice the cell size. Code adapted from TensorFlow examples."""

    with vs.variable_scope(scope or "embedding_attention_seq2seq_context"):

        # split context and real inputs into separate vectors
        context_inputs = encoder_inputs[0:len(encoder_inputs)/2]
        encoder_inputs = encoder_inputs[len(encoder_inputs)/2:]

        # build separate encoders
        encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
        with vs.variable_scope("context_rnn") as scope:
            context_outputs, context_states = rnn.rnn(
                    encoder_cell, context_inputs, dtype=dtype, scope=scope)
        with vs.variable_scope("input_rnn") as scope:
            encoder_outputs, encoder_states = rnn.rnn(
                    encoder_cell, encoder_inputs, dtype=dtype, scope=scope)

        # concatenate outputs & states
        encoder_outputs = [array_ops.concat(1, [co, eo], name="context-and-encoder-output")
                           for co, eo in zip(context_outputs, encoder_outputs)]
        encoder_states = [array_ops.concat(1, [cs, es], name="context-and-encoder-state")
                          for cs, es in zip(context_states, encoder_states)]

        # calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size * 2])
                      for e in encoder_outputs]
        attention_states = array_ops.concat(1, top_states)

        # change the decoder cell to accommodate wider input
        # TODO this will work for BasicLSTMCell and GRUCell, but not for others
        cell = type(cell)(num_units=(cell.input_size * 2))

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection,
                    feed_previous)
        else:    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
            outputs1, states1 = embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection, True)
            vs.get_variable_scope().reuse_variables()
            outputs2, states2 = embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection, False)

            outputs = control_flow_ops.cond(feed_previous,
                                            lambda: outputs1, lambda: outputs2)
            states = control_flow_ops.cond(feed_previous,
                                           lambda: states1, lambda: states2)
            return outputs, states
Exemple #15
0
def embedding_attention_encoder(encoder_inputs,
                                cell,
                                num_encoder_symbols,
                                embedding_size,
                                dtype=None,
                                scope=None):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with variable_scope.variable_scope(scope or "embedding_attention_encoder",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                 encoder_inputs,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        return encoder_state, attention_states
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors. The
        output is of shape [batch_size x cell.output_size] when
        output_projection is not None (and represents the dense representation
        of predicted tokens). It is of shape [batch_size x num_decoder_symbols]
        when output_projection is None.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
    if dtype is not None:
      scope.set_dtype(dtype)
    else:
      dtype = scope.dtype

    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(
          decoder_inputs,
          encoder_state,
          cell,
          num_decoder_symbols,
          embedding_size,
          output_projection=output_projection,
          feed_previous=feed_previous)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_rnn_decoder(
            decoder_inputs, encoder_state, cell, num_decoder_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Exemple #17
0
def embedding_kv_attention_seq2seq(encoder_inputs,
                                   decoder_inputs,
                                   kb_inputs,
                                   kb_mask_inputs,
                                   cell,
                                   num_encoder_symbols,
                                   num_decoder_symbols,
                                   embedding_size,
                                   output_projection=None,
                                   feed_previous=False,
                                   attn_type="linear",
                                   enc_attn=False,
                                   use_types=False,
                                   type_to_idx=None,
                                   use_bidir=False,
                                   seq_lengths=None,
                                   enc_query=False,
                                   dtype=None,
                                   scope=None):
    """Embedding sequence-to-sequence model with attention over a KB.

    This model first embeds encoder_inputs by a newly created embedding
    (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs
    as well as an embedded KB.

    Warning: when output_projection is None, the size of the attention vectors
    and variables will be made proportional to num_decoder_symbols, can be large.

    Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    kb_inputs: Kbs for the given batch of dialogues
    kb_col_inputs: Column indices for given batch of kbs
    kb_mask_inputs: Kb masks for the given batch of dialogues
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_kb_attention_seq2seq".

    Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    if type_to_idx is not None:
        # Mapping from entity type to idx for augmenting encoder input
        num_entity_types = len(type_to_idx.keys())
        entity_encoding = np.zeros((num_entity_types, num_entity_types - 1),
                                   dtype=np.float32)
        for idx in range(num_entity_types - 1):
            entity_encoding[idx, idx] = 1.

    with variable_scope.variable_scope(scope
                                       or "embedding_kb_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        if use_types:
            print "Typed Encoder Inputs..."
            # Augment encoder inputs
            encoder_cell = CustomEmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size,
                entity_encoding=entity_encoding)
        else:
            print "Regular encoding..."
            # Just regular encoding
            encoder_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)

        # Use bidirectional encoding
        if use_bidir:
            encoder_cell_backward = copy.deepcopy(encoder_cell)
            encoder_outputs, encoder_state_fw, encoder_state_bw =\
                rnn.bidirectional_rnn(encoder_cell, encoder_cell_backward,
                                      encoder_inputs, dtype=dtype,
                                      sequence_length=seq_lengths)
            combined_c = tf.concat(1, [encoder_state_fw.c, encoder_state_bw.c])
            combined_h = tf.concat(1, [encoder_state_fw.h, encoder_state_bw.h])
            encoder_state = rnn_cell.LSTMStateTuple(c=combined_c, h=combined_h)
        else:
            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs,
                                                     dtype=dtype)

        # First calculate a concatenation of encoder outputs
        # to put attention on.
        if use_bidir:
            top_states = [
                array_ops.reshape(e, [-1, 1, 2 * cell.output_size])
                for e in encoder_outputs
            ]
        else:
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]

        attention_states = array_ops.concat(1, top_states)
        if output_projection is None:
            if use_bidir:
                # Modify dimension of decoder rnn_size
                cell = rnn_cell.BasicLSTMCell(2 * cell.output_size,
                                              state_is_tuple=True)
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols
        else:
            output_size = cell.output_size

        if isinstance(feed_previous, bool):
            return kv_attention_decoder(cell,
                                        decoder_inputs,
                                        kb_inputs,
                                        kb_mask_inputs,
                                        encoder_state,
                                        attention_states,
                                        num_decoder_symbols,
                                        embedding_size=embedding_size,
                                        output_size=output_size,
                                        feed_previous=feed_previous,
                                        attn_type=attn_type,
                                        enc_attn=enc_attn,
                                        enc_query=enc_query,
                                        dtype=dtype)
Exemple #18
0
def dialog_attention_seq2seq(encoder_inputs,
                             decoder_inputs,
                             cell,
                             vocab_size,
                             num_heads=1,
                             output_projection=None,
                             feed_previous=False,
                             dtype=dtypes.float32,
                             scope=None,
                             initial_state_attention=False):
    if len(encoder_inputs) != len(decoder_inputs):
        raise Exception

    with variable_scope.variable_scope(scope or "dialog_attention_seq2seq"):

        encoder_cell = rnn_cell.EmbeddingWrapper(cell, vocab_size)
        outputs = []

        fixed_batch_size = encoder_inputs[0][0].get_shape().with_rank_at_least(
            1)[0]
        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(encoder_inputs[0][0])[0]

        drnn_state = cell.zero_state(batch_size, dtype)

        for i in range(0, len(encoder_inputs)):
            if i > 0: variable_scope.get_variable_scope().reuse_variables()

            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs[i],
                                                     dtype=dtype)

            # First calculate a concatenation of encoder outputs to put attention on.
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(1, top_states)

            with variable_scope.variable_scope("DRNN"):
                drnn_out, drnn_state = cell(encoder_state, drnn_state)

            # Decoder.
            output_size = None
            if output_projection is None:
                cell = rnn_cell.OutputProjectionWrapper(cell, vocab_size)
                output_size = vocab_size

            answer_output, answer_state = embedding_attention_decoder(
                decoder_inputs[i],
                drnn_state,
                attention_states,
                cell,
                vocab_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

            outputs.append(answer_output)
            with variable_scope.variable_scope("DRNN", reuse=True):
                drnn_out, drnn_state = cell(answer_state, drnn_state)

        return outputs, drnn_state
Exemple #19
0
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                          num_encoder_symbols, num_decoder_symbols,
                          output_projection=None, feed_previous=False,
                          dtype=dtypes.float32, scope=None):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    state: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with vs.variable_scope(scope or "embedding_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(decoder_inputs, encoder_state, cell,
                                   num_decoder_symbols, output_projection,
                                   feed_previous)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, state1 = embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          output_projection, True)
      vs.get_variable_scope().reuse_variables()
      outputs2, state2 = embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          output_projection, False)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      state = control_flow_ops.cond(feed_previous,
                                    lambda: state1, lambda: state2)
      return outputs, state
def embedding_attention_seq2seq(encoder_inputs,
                                context_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
    #print("Inside Method Embedding Attention Seq2Seq")

    #print("Shape of encoder input {0}".format(len(encoder_inputs)))
    #print("Shape of decoder input {0}".format(len(decoder_inputs)))
    #print("num_encoder_symbols = {0}".format(num_encoder_symbols))
    ###print("num_decoder_symbols {0}".format(num_decoder_symbols))
    #print("embedding_size {0}".format(embedding_size))
    print("output_projection {0}".format(output_projection))

    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype,
                                       reuse=None) as scope:
        dtype = scope.dtype

        with variable_scope.variable_scope("encoder") as scope2:  # Encoder.
            encoder_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)
            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs,
                                                     dtype=dtype)
            #print(type(encoder_outputs))
            #np.savetxt('encoder_output.txt', encoder_outputs)
            #np.savetxt('encoder_state.txt', encoder_state)

        with variable_scope.variable_scope("context") as scope3:
            context_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)
            context_outputs, context_state = rnn.rnn(context_cell,
                                                     context_inputs,
                                                     dtype=dtype)

            #target = open("context_output.txt", 'w')
            #for out in context_outputs:
            #target.write(str(out))
            #target.write("\n")
            #target.close()

            #np.savetxt('context_output.txt', context_outputs)
            #np.savetxt('context_state.txt', context_state)

        #print("The dimension of context state {0}".format(context_state))
        #context_state = tf.Print(context_state,[context_state],message="Printing the context State")
        #context_state.eval()
        #tf.add(context_state,context_state).eval()

        #with tf.session as session_c:
        #session_c.run(context_state)

        #print("Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}".format(
        #np.shape(encoder_outputs), np.shape(encoder_state)))

        #encoder_outputs = tf.add(encoder_outputs, context_outputs)
        #encoder_state = tf.add(encoder_state, context_state)

        for i in range(len(encoder_outputs)):
            encoder_outputs[i] = tf.add(encoder_outputs[i], context_outputs[i])

        temp = []
        for i in range(len(encoder_state)):
            temp.append(tf.add(encoder_state[i], context_state[i]))
        encoder_state = tuple(temp)

        #print(type(encoder_outputs))
        #print(type(encoder_outputs[0]))
        #print(type(encoder_state))
        #print(encoder_state)

        print(
            "Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}"
            .format(np.shape(encoder_outputs), np.shape(encoder_state)))

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        print("Attention States has been created of size {0}".format(
            np.shape(attention_states)))
        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols
            print("The output size is {0}".format(output_size))

        if isinstance(feed_previous, bool):
            print("Number of heads {0}".format(num_heads))
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                batch_size,
                                state_size,
                                decoder_inputs_positions=None,
                                decoder_inputs_maps=None,
                                feed_previous=False,
                                dtype=dtypes.float32,
                                scope=None):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    batch_size: need to clarify for decoding.
    decoder_inputs_positions: a list of 2D Tensors of shape [batch_size, 3].
    decoder_inputs_maps: a 1D Tensor of length batch_size.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
    attentions: a list of 2D Tensors of shape [batch_size, cell.state_size].
    environments: a list of 2D Tensors of shape [batch_size, state_size].

  """
    with vs.variable_scope(scope or "embedding_attention_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            num_encoder_symbols,
            initializer=init_ops.random_uniform_initializer(-0.08, 0.08))
        encoder_outputs, encoder_states = rnn.rnn(encoder_cell,
                                                  encoder_inputs,
                                                  dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_states[-1],
                attention_states,
                cell,
                num_decoder_symbols,
                batch_size,
                state_size,
                decoder_inputs_positions=decoder_inputs_positions,
                decoder_inputs_maps=decoder_inputs_maps,
                output_size=output_size,
                feed_previous=feed_previous)
        else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
            # We don't consider this case.
            raise ValueError("Imcompatible variable feed_previous.\n")
Exemple #22
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=dtypes.float32,
                                scope=None, initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    state: The state of each decoder cell at the final time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with vs.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    encoder_outputs, encoder_state = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous, initial_state_attention=initial_state_attention)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, state1 = embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True,
          initial_state_attention=initial_state_attention)
      vs.get_variable_scope().reuse_variables()
      outputs2, state2 = embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False,
          initial_state_attention=initial_state_attention)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      state = control_flow_ops.cond(feed_previous,
                                    lambda: state1, lambda: state2)
      return outputs, state
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):

    with variable_scope.variable_scope(scope
                                       or "embedding_rnn_seq2seq") as scope:
        if dtype is not None:
            scope.set_dtype(dtype)
        else:
            dtype = scope.dtype

        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

        # Decoder.
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                                         encoder_state,
                                         cell,
                                         num_decoder_symbols,
                                         embedding_size,
                                         output_projection=output_projection,
                                         feed_previous=feed_previous,
                                         scope=scope)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_rnn_decoder(
                    decoder_inputs,
                    encoder_state,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state