Esempio n. 1
0
        def create_cell_scopes():
            self.enc_cells_text = EmbeddingWrapper(self.cell_type(self.cell_size), self.decoder_words,
                                                   self.text_embedding_size)
            self.enc_scope_text = "encoder_text"
            max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size))
            self.W_enc_img = tf.Variable(
                tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val),
                name="W_enc_img")
            self.b_enc_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_enc_img")
            self.enc_scope_img = "encoder_img"
            self.enc_cells_utter = [self.cell_type(self.cell_size), self.cell_type(self.cell_size)]
            self.enc_scope_utter = "encoder_utter"
            if self.task_type == "text":
                self.dec_cells_text = self.cell_type(self.cell_size)
                self.dec_scope_text = "decoder_text"
            if self.task_type == "image":
                self.tgt_scope_img = "target_encoder_img"
                max_val = np.sqrt(6. / (self.image_rep_size + self.image_embedding_size))
                self.W_enc_tgt_img = tf.Variable(
                    tf.random_uniform([self.image_rep_size, self.image_embedding_size], -1. * max_val, max_val),
                    name="W_enc_tgt_img")
                self.b_enc_tgt_img = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]),
                                                 name="b_enc_tgt_img")

                max_val = np.sqrt(6. / (self.cell_size + self.image_embedding_size))
                self.proj_scope_utter = "proj_utter"
                self.W_proj_utter = tf.Variable(
                    tf.random_uniform([self.cell_size, self.image_embedding_size], -1. * max_val, max_val),
                    name="W_proj_utter")
                self.b_proj_utter = tf.Variable(tf.constant(0., shape=[self.image_embedding_size]), name="b_proj_utter")
Esempio n. 2
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                embedding_size,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=dtypes.float32,
                                scope=None):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with vs.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = EmbeddingWrapper(cell, num_encoder_symbols, embedding_size)
    encoder_outputs, encoder_states = rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(top_states, 1)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, embedding_size, num_heads, output_size,
          output_projection, feed_previous)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, states1 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, embedding_size, num_heads, output_size,
          output_projection, True)
      vs.get_variable_scope().reuse_variables()
      outputs2, states2 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, embedding_size, num_heads, output_size,
          output_projection, False)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      states = control_flow_ops.cond(feed_previous,
                                     lambda: states1, lambda: states2)
      return outputs, states
Esempio n. 3
0
def embedding_attention_seq2seq_context(encoder_inputs,
                                        decoder_inputs,
                                        cell,
                                        num_encoder_symbols,
                                        num_decoder_symbols,
                                        embedding_size,
                                        num_heads=1,
                                        output_projection=None,
                                        feed_previous=False,
                                        dtype=dtypes.float32,
                                        scope=None):
    """A seq2seq architecture with two encoders, one for context, one for input DA. The decoder
    uses twice the cell size. Code adapted from TensorFlow examples."""

    with vs.variable_scope(scope or "embedding_attention_seq2seq_context"):

        # split context and real inputs into separate vectors
        context_inputs = encoder_inputs[0:len(encoder_inputs) / 2]
        encoder_inputs = encoder_inputs[len(encoder_inputs) / 2:]

        # build separate encoders
        encoder_cell = EmbeddingWrapper(cell, num_encoder_symbols,
                                        embedding_size)
        with vs.variable_scope("context_rnn") as scope:
            context_outputs, context_states = tf06s2s.rnn(encoder_cell,
                                                          context_inputs,
                                                          dtype=dtype,
                                                          scope=scope)
        with vs.variable_scope("input_rnn") as scope:
            encoder_outputs, encoder_states = tf06s2s.rnn(encoder_cell,
                                                          encoder_inputs,
                                                          dtype=dtype,
                                                          scope=scope)

        # concatenate outputs & states
        # adding positional arguments and concatenating output, cell and hidden states
        encoder_outputs = [
            array_ops.concat([co, eo],
                             axis=1,
                             name="context-and-encoder-output")
            for co, eo in zip(context_outputs, encoder_outputs)
        ]
        encoder_states = [
            (array_ops.concat([c1, c2],
                              axis=1), array_ops.concat([h1, h2], axis=1))
            for (c1, h1), (c2, h2) in zip(context_states, encoder_states)
        ]

        # calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size * 2])
            for e in encoder_outputs
        ]
        #added positional arguements as it was taking axis to be the values
        attention_states = array_ops.concat(axis=1, values=top_states)

        # change the decoder cell to accommodate wider input
        # TODO this will work for BasicLSTMCell and GRUCell, but not for others
        cell = type(cell)(num_units=(cell.output_size * 2))

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return tf06s2s.embedding_attention_decoder(
                decoder_inputs, encoder_states[-1], attention_states, cell,
                num_decoder_symbols, embedding_size, num_heads, output_size,
                output_projection, feed_previous)
        else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
            outputs1, states1 = tf06s2s.embedding_attention_decoder(
                decoder_inputs, encoder_states[-1], attention_states, cell,
                num_decoder_symbols, embedding_size, num_heads, output_size,
                output_projection, True)
            vs.get_variable_scope().reuse_variables()
            outputs2, states2 = tf06s2s.embedding_attention_decoder(
                decoder_inputs, encoder_states[-1], attention_states, cell,
                num_decoder_symbols, embedding_size, num_heads, output_size,
                output_projection, False)

            outputs = control_flow_ops.cond(feed_previous, lambda: outputs1,
                                            lambda: outputs2)
            states = control_flow_ops.cond(feed_previous, lambda: states1,
                                           lambda: states2)
            return outputs, states
Esempio n. 4
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                copy=False,
                                attn_type="linear"):
    """Embedding sequence-to-sequence model with attention.

    This model first embeds encoder_inputs by a newly created embedding (of shape
    [num_encoder_symbols x input_size]). Then it runs an RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs.

    Warning: when output_projection is None, the size of the attention vectors
    and variables will be made proportional to num_decoder_symbols, can be large.

    Args:
      encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      cell: rnn_cell.RNNCell defining the cell function and size.
      num_encoder_symbols: Integer; number of symbols on the encoder side.
      num_decoder_symbols: Integer; number of symbols on the decoder side.
      embedding_size: Integer, the length of the embedding vector for each symbol.
      num_heads: Number of attention heads that read from attention_states.
      output_projection: None or a pair (W, B) of output projection weights and
        biases; W has shape [output_size x num_decoder_symbols] and B has
        shape [num_decoder_symbols]; if provided and feed_previous=True, each
        fed previous output will first be multiplied by W and added B.
      feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
        of decoder_inputs will be used (the "GO" symbol), and all other decoder
        inputs will be taken from previous outputs (as in embedding_rnn_decoder).
        If False, decoder_inputs are used as given (the standard decoder case).
      dtype: The dtype of the initial RNN state (default: tf.float32).
      scope: VariableScope for the created subgraph; defaults to
        "embedding_attention_seq2seq".
      initial_state_attention: If False (default), initial attentions are zero.
        If True, initialize the attentions from the initial state and attention
        states.
      copy: If True use a copy mechanism in decoding to copy from encoder inputs
      attn_type: Attn type to use
    Returns:
      A tuple of the form (outputs, state), where:
        outputs: A list of the same length as decoder_inputs of 2D Tensors with
          shape [batch_size x num_decoder_symbols] containing the generated
          outputs.
        state: The state of each decoder cell at the final time-step.
          It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = EmbeddingWrapper(cell,
                                        embedding_classes=num_encoder_symbols,
                                        embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        # Modify num_decoder symbols to include len of src
        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                copy=copy,
                attn_type=attn_type)

        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Esempio n. 5
0
from tensorflow.contrib.rnn import BasicLSTMCell
from tensorflow.contrib.rnn import DropoutWrapper
from tensorflow.contrib.rnn import MultiRNNCell
from tensorflow.contrib.rnn import EmbeddingWrapper
from tensorflow.contrib.rnn import static_rnn
import tensorflow.contrib.seq2seq as seq2seq

tf.reset_default_graph()

sess = tf.Session()

# encoder == RNN(EmbeddingWrapper(cell))
lstm_cell = BasicLSTMCell(num_units=embedding_dim)

encoder_cell = EmbeddingWrapper(cell=lstm_cell,
                                embedding_classes=input_vocab_size,
                                embedding_size=embedding_dim)

encoder_outputs, encoder_state = static_rnn(cell=encoder_cell,
                                            inputs=encode_input,
                                            dtype=tf.float32)

# Attention ==
attn_mech = seq2seq.BahdanauAttention(num_units=input_seq_length,
                                      memory=encoder_outputs,
                                      normalize=False,
                                      name='BahdanauAttention')

lstm_cell_decoder = BasicLSTMCell(embedding_dim)

attn_cell = seq2seq.DynamicAttentionWrapper(cell=lstm_cell_decoder,