Beispiel #1
0
def translate_model(X, y):
    byte_list = skflow.ops.one_hot_matrix(X, 256)
    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(
        byte_list, y, MAX_DOCUMENT_LENGTH, MAX_DOCUMENT_LENGTH)
    cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(HIDDEN_SIZE), 256)
    decoding, _, sampling_decoding, _ = rnn_seq2seq(in_X, in_y, cell)
    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                          num_encoder_symbols, num_decoder_symbols,
                          embedding_size, output_projection=None,
                          feed_previous=False, dtype=dtypes.float32,
                          scope=None, beam_search=True, beam_size=10):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)


    return embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          embedding_size, output_projection=output_projection,
          feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
Beispiel #3
0
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols,
                          num_decoder_symbols, embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None,
                          beam_search=True,
                          beam_size=10):
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
    if dtype is not None:
      scope.set_dtype(dtype)
    else:
      dtype = scope.dtype

    # Encoder.
    encoder_cell = tf.contrib.rnn.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = tf.contrib.rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size,
 							       output_projection=output_projection,
	 					           feed_previous=feed_previous,
        						   scope=scope,
								   beam_search=beam_search,
								   beam_size=beam_size)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_rnn_decoder(
            decoder_inputs, encoder_state, cell, num_decoder_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
			beam_search=beam_search,
			beam_size=beam_size)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Beispiel #4
0
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=dtypes.float32,
                          scope=None):
    with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

        # Decoder.
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                                         encoder_state,
                                         cell,
                                         num_decoder_symbols,
                                         embedding_size,
                                         output_projection=output_projection,
                                         feed_previous=feed_previous)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_rnn_decoder(
                    decoder_inputs,
                    encoder_state,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
 def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 3])
             m = tf.zeros([1, 3])
             cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(3), 2)
             g, new_m = cell(x, m)
             sess.run([tf.initialize_all_variables()])
             res = sess.run(
                 [g, new_m], {
                     x.name: np.array([[1., 1., 1.]]),
                     m.name: np.array([[0.1, 0.1, 0.1]])
                 })
             self.assertEqual(res[1].shape, (1, 3))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.231907, 0.231907]])
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                num_layers=3,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.
    with tf.device('/gpu:0'):
        single_cell_1 = rnn_cell.LSTMCell(embedding_size)
    with tf.device('/gpu:1'):
        single_cell_2 = rnn_cell.LSTMCell(embedding_size)

    encoder_fw_cell = rnn_cell.EmbeddingWrapper(single_cell_1, embedding_classes=num_encoder_symbols,
                                                embedding_size=embedding_size)
    encoder_bw_cell = rnn_cell.EmbeddingWrapper(single_cell_2, embedding_classes=num_encoder_symbols,
                                                embedding_size=embedding_size)
    outputs, _, _ = rnn.bidirectional_rnn(encoder_fw_cell, encoder_bw_cell, encoder_inputs, dtype=dtype)

    list_of_cell = []
    for layer in xrange(num_layers):
      if layer % 2 == 0:
        with tf.device('/gpu:0'):
          single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size)
        list_of_cell.append(single_cell)
      else:
        with tf.device('/gpu:1'):
          single_cell = tf.nn.rnn_cell.LSTMCell(embedding_size)
        list_of_cell.append(single_cell)

    cell2 = Stack_Residual_RNNCell.Stack_Residual_RNNCell(list_of_cell)

    encoder_outputs, encoder_state = rnn.rnn(
        cell2, outputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
def embedding_tied_rnn_seq2seq(encoder_inputs,
                               decoder_inputs,
                               cell,
                               num_symbols,
                               embedding_size,
                               num_decoder_symbols=None,
                               output_projection=None,
                               feed_previous=False,
                               dtype=None,
                               scope=None):
  """Embedding RNN sequence-to-sequence model with tied (shared) parameters.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_symbols x input_size]). Then it runs an RNN to encode embedded
  encoder_inputs into a state vector. Next, it embeds decoder_inputs using
  the same embedding. Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs. The decoder output is over symbols
  from 0 to num_decoder_symbols - 1 if num_decoder_symbols is none; otherwise it
  is over 0 to num_symbols - 1.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_symbols: Integer; number of symbols for both encoder and decoder.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_decoder_symbols: Integer; number of output symbols for decoder. If
      provided, the decoder output is over symbols 0 to num_decoder_symbols - 1.
      Otherwise, decoder output is over symbols 0 to num_symbols - 1. Note that
      this assumes that the vocabulary is set up such that the first
      num_decoder_symbols of num_symbols are part of decoding.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the initial RNN states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_tied_rnn_seq2seq".

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_symbols] containing the generated
        outputs where output_symbols = num_decoder_symbols if
        num_decoder_symbols is not None otherwise output_symbols = num_symbols.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: When output_projection has the wrong shape.
  """
  with variable_scope.variable_scope(
      scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype

    if output_projection is not None:
      proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
      proj_weights.get_shape().assert_is_compatible_with([None, num_symbols])
      proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
      proj_biases.get_shape().assert_is_compatible_with([num_symbols])

    embedding = variable_scope.get_variable(
        "embedding", [num_symbols, embedding_size], dtype=dtype)

    emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in encoder_inputs]
    emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in decoder_inputs]

    output_symbols = num_symbols
    if num_decoder_symbols is not None:
      output_symbols = num_decoder_symbols
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, output_symbols)

    if isinstance(feed_previous, bool):
      loop_function = _extract_argmax_and_embed(
          embedding, output_projection, True) if feed_previous else None
      return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell,
                              loop_function=loop_function, dtype=dtype)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      loop_function = _extract_argmax_and_embed(
        embedding, output_projection, False) if feed_previous_bool else None
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                         reuse=reuse):
        outputs, state = tied_rnn_seq2seq(
            emb_encoder_inputs, emb_decoder_inputs, cell,
            loop_function=loop_function, dtype=dtype)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    # Calculate zero-state to know it's structure.
    static_batch_size = encoder_inputs[0].get_shape()[0]
    for inp in encoder_inputs[1:]:
      static_batch_size.merge_with(inp.get_shape()[0])
    batch_size = static_batch_size.value
    if batch_size is None:
      batch_size = array_ops.shape(encoder_inputs[0])[0]
    zero_state = cell.zero_state(batch_size, dtype)
    if nest.is_sequence(zero_state):
      state = nest.pack_sequence_as(structure=zero_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Beispiel #8
0
def basic_rnn_seq2seq(encoder_inputs,
                      en_seq_length,
                      decoder_inputs,
                      cell,
                      num_decoder_symbols,
                      output_projection=None,
                      feed_previous=False,
                      dtype=None,
                      scope=None):
    """Embedding RNN sequence-to-sequence model.

    This model first embeds encoder_inputs by a newly created embedding
    (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to
    encode embedded encoder_inputs into a state vector. Next, it embeds
    decoder_inputs by another newly created embedding (of shape
    [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized
    with the last encoder state, on embedded decoder_inputs.

    Args:
        encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
        en_seq_length: Specifies the length of each sequence in encoder inputs [batch_size].
        decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
        cell: rnn_cell.RNNCell defining the cell function and size.
        num_encoder_symbols: Integer; number of symbols on the encoder side.
        num_decoder_symbols: Integer; number of symbols on the decoder side.
        embedding_size: Integer, the length of the embedding vector for each
            symbol.
        output_projection: None or a pair (W, B) of output projection weights
            and biases; W has shape [output_size x num_decoder_symbols] and B
            has shape [num_decoder_symbols]; if provided and
            feed_previous=True, each fed previous output will first be
            multiplied by W and added B.
        feed_previous: Boolean or scalar Boolean Tensor; if True, only the
            first of decoder_inputs will be used (the "GO" symbol), and all
            other decoder inputs will be taken from previous outputs (as in
            embedding_rnn_decoder). If False, decoder_inputs are used as given
            (the standard decoder case).
        dtype: The dtype of the initial state for both the encoder and encoder
            rnn cells (default: tf.float32).
        scope: VariableScope for the created subgraph; defaults to
            "embedding_rnn_seq2seq"

    Returns:
        A tuple of the form (outputs, state), where:
            outputs: A list of the same length as decoder_inputs of 2D
                Tensors. The output is of shape [batch_size x
                cell.output_size] when output_projection is not None (and
                represents the dense representation of predicted tokens).
                It is of shape [batch_size x num_decoder_symbols]
                when output_projection is None.
            state: The state of each decoder cell in each time-step. This is a
                list with length len(decoder_inputs) -- one item for each
                time-step.
                It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    with variable_scope.variable_scope(scope or "basic_rnn_seq2seq") as scope:
        if dtype is not None:
            scope.set_dtype(dtype)
        else:
            dtype = scope.dtype

        # Encoder.
        _, encoder_state = rnn.rnn(cell,
                                   encoder_inputs,
                                   sequence_length=en_seq_length,
                                   scope='rnn_encoder',
                                   dtype=dtype)

        # Decoder.
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
        # TODO(ysu): embedding and feed_previous
        # loop_function = _extract_argmax_and_embed(
        #     embedding, output_projection,
        #     update_embedding_for_previous) if feed_previous else None
        return rnn_decoder(decoder_inputs,
                           encoder_state,
                           cell,
                           scope='rnn_decoder')
Beispiel #9
0
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                               num_symbols, output_projection=None,
                               feed_previous=False, dtype=dtypes.float32,
                               scope=None):
  """Embedding RNN sequence-to-sequence model with tied (shared) parameters.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_symbols x cell.input_size]). Then it runs an RNN to encode embedded
  encoder_inputs into a state vector. Next, it embeds decoder_inputs using
  the same embedding. Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_symbols: integer; number of symbols for both encoder and decoder.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_symbols] and B has
      shape [num_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype to use for the initial RNN states (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_tied_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    state: The state of each decoder cell at the final time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when output_projection has the wrong shape.
  """
  if output_projection is not None:
    proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
    proj_weights.get_shape().assert_is_compatible_with([cell.output_size,
                                                        num_symbols])
    proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
    proj_biases.get_shape().assert_is_compatible_with([num_symbols])

  with vs.variable_scope(scope or "embedding_tied_rnn_seq2seq"):
    with ops.device("/cpu:0"):
      embedding = vs.get_variable("embedding", [num_symbols, cell.input_size])

    emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in encoder_inputs]
    emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x)
                          for x in decoder_inputs]

    def extract_argmax_and_embed(prev, _):
      """Loop_function that extracts the symbol from prev and embeds it."""
      if output_projection is not None:
        prev = nn_ops.xw_plus_b(
            prev, output_projection[0], output_projection[1])
      prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
      return embedding_ops.embedding_lookup(embedding, prev_symbol)

    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols)

    if isinstance(feed_previous, bool):
      loop_function = extract_argmax_and_embed if feed_previous else None
      return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell,
                              loop_function=loop_function, dtype=dtype)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, state1 = tied_rnn_seq2seq(
          emb_encoder_inputs, emb_decoder_inputs, cell,
          loop_function=extract_argmax_and_embed, dtype=dtype)
      vs.get_variable_scope().reuse_variables()
      outputs2, state2 = tied_rnn_seq2seq(
          emb_encoder_inputs, emb_decoder_inputs, cell, dtype=dtype)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      state = control_flow_ops.cond(feed_previous,
                                    lambda: state1, lambda: state2)
      return outputs, state
Beispiel #10
0
def many2one_attention_seq2seq(encoder_inputs_list,
                               decoder_inputs,
                               text_len,
                               speech_len,
                               feat_dim,
                               text_cell,
                               speech_cell,
                               parse_cell,
                               num_encoder_symbols,
                               num_decoder_symbols,
                               embedding_size,
                               attention_vec_size,
                               fixed_word_length,
                               filter_sizes,
                               num_filters,
                               output_projection=None,
                               feed_previous=False,
                               dtype=dtypes.float32,
                               scope=None,
                               initial_state_attention=False):

    text_encoder_inputs, speech_encoder_inputs = encoder_inputs_list
    encoder_size = len(text_encoder_inputs)
    #speech_encoder_inputs is size [seq_len, batch_size, fixed_word_length, feat_dim]

    with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"):
        with ops.device("/cpu:0"):
            embedding_words = variable_scope.get_variable(
                "embedding_words", [num_encoder_symbols, embedding_size])

        text_encoder_inputs = [
            embedding_ops.embedding_lookup(embedding_words, i)
            for i in text_encoder_inputs
        ]
        # Encoder.
        with variable_scope.variable_scope(scope or "text_encoder"):
            text_encoder_outputs, text_encoder_state = rnn.rnn(
                text_cell,
                text_encoder_inputs,
                sequence_length=text_len,
                dtype=dtype)

        #print(text_encoder_inputs)
        #print(speech_encoder_inputs)

        # Convolution stuff happens here for speech inputs
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            print(i, filter_size)
            #with tf.name_scope("conv-maxpool-%s" % filter_size):
            with variable_scope.variable_scope(
                    scope or "conv-maxpool-%s" % filter_size):
                filter_shape = [filter_size, feat_dim, 1, num_filters]
                #W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                #b = tf.Variable(tf.truncated_normal(shape=[num_filters], stddev=0.1), name="b")
                W = variable_scope.get_variable("W-%d" % i, filter_shape)
                b = variable_scope.get_variable("B-%d" % i, num_filters)
                pooled_words = []
                for j in range(encoder_size):
                    feats = speech_encoder_inputs[j]
                    feats_conv = tf.expand_dims(feats, -1)
                    conv = tf.nn.conv2d(feats_conv,
                                        W,
                                        strides=[1, 1, 1, 1],
                                        padding="VALID",
                                        name="conv")
                    # Apply nonlinearity
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    pooled = tf.nn.max_pool(
                        h,
                        ksize=[1, fixed_word_length - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name="pool")
                    pooled_words.append(pooled)
                pooled_outputs.append(pooled_words)
                #print(pooled_words)

        num_filters_total = num_filters * len(filter_sizes)
        out_seq = tf.unpack(tf.concat(1, pooled_outputs))
        speech_conv_outputs = [
            tf.reshape(x, [-1, num_filters_total]) for x in out_seq
        ]

        #print("pooled_outputs:", [x[0].get_shape() for x in pooled_outputs])
        #h_pool = tf.concat(3, pooled_outputs)
        #h_pool_squeeze = tf.squeeze(h_pool)
        #speech_conv_outputs = tf.transpose(h_pool_squeeze, perm=[1,0,2])
        #print(h_pool)
        #speech_conv_outputs = tf.unstack(speech_conv_outputs)

        with variable_scope.variable_scope(scope or "speech_encoder"):
            speech_encoder_outputs, speech_encoder_state = rnn.rnn(
                speech_cell,
                speech_conv_outputs,
                sequence_length=speech_len,
                dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        text_top_states = [
            array_ops.reshape(e, [-1, 1, text_cell.output_size])
            for e in text_encoder_outputs
        ]
        # h_states =  attention_states in original code
        h_states = array_ops.concat(1, text_top_states)

        speech_top_states = [
            array_ops.reshape(e, [-1, 1, speech_cell.output_size])
            for e in speech_encoder_outputs
        ]
        m_states = array_ops.concat(1, speech_top_states)

        attention_states = [h_states, m_states]
        both_encoder_states = [text_encoder_state, speech_encoder_state]

        # Decoder.
        output_size = None
        if output_projection is None:
            parse_cell = rnn_cell.OutputProjectionWrapper(
                parse_cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return many2one_embedding_attention_decoder(
                decoder_inputs,
                both_encoder_states,
                attention_states,
                parse_cell,
                num_decoder_symbols,
                embedding_size,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                attention_vec_size=attention_vec_size)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = many2one_embedding_attention_decoder(
                    decoder_inputs,
                    both_encoder_states,
                    attention_states,
                    parse_cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    attention_vec_size=attention_vec_size)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
Beispiel #11
0
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell,
                         num_encoder_symbols, num_decoder_symbols_dict,
                         embedding_size, feed_previous=False,
                         dtype=dtypes.float32, scope=None):
  """One-to-many RNN sequence-to-sequence model (multi-task).

  This is a multi-task sequence-to-sequence model with one encoder and multiple
  decoders. Reference to multi-task sequence-to-sequence learning can be found
  here: http://arxiv.org/abs/1511.06114

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs_dict: A dictionany mapping decoder name (string) to
      the corresponding decoder_inputs; each decoder_inputs is a list of 1D
      Tensors of shape [batch_size]; num_decoders is defined as
      len(decoder_inputs_dict).
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an
      integer specifying number of symbols for the corresponding decoder;
      len(num_decoder_symbols_dict) must be equal to num_decoders.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of
      decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "one2many_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs_dict, state_dict), where:
      outputs_dict: A mapping from decoder name (string) to a list of the same
        length as decoder_inputs_dict[name]; each element in the list is a 2D
        Tensors with shape [batch_size x num_decoder_symbol_list[name]]
        containing the generated outputs.
      state_dict: A mapping from decoder name (string) to the final state of the
        corresponding decoder RNN; it is a 2D Tensor of shape
        [batch_size x cell.state_size].
  """
  outputs_dict = {}
  state_dict = {}

  with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    for name, decoder_inputs in decoder_inputs_dict.items():
      num_decoder_symbols = num_decoder_symbols_dict[name]

      with variable_scope.variable_scope("one2many_decoder_" + str(name)):
        decoder_cell = rnn_cell.OutputProjectionWrapper(cell,
                                                        num_decoder_symbols)
        if isinstance(feed_previous, bool):
          outputs, state = embedding_rnn_decoder(
              decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols,
              embedding_size, feed_previous=feed_previous)
        else:
          # If feed_previous is a Tensor, we construct 2 graphs and use cond.
          def filled_embedding_rnn_decoder(feed_previous):
            # pylint: disable=cell-var-from-loop
            reuse = None if feed_previous else True
            vs = variable_scope.get_variable_scope()
            with variable_scope.variable_scope(vs, reuse=reuse):
              outputs, state = embedding_rnn_decoder(
                  decoder_inputs, encoder_state, decoder_cell,
                  num_decoder_symbols, embedding_size,
                  feed_previous=feed_previous)
            # pylint: enable=cell-var-from-loop
            return outputs + [state]
          outputs_and_state = control_flow_ops.cond(
              feed_previous,
              lambda: filled_embedding_rnn_decoder(True),
              lambda: filled_embedding_rnn_decoder(False))
          outputs = outputs_and_state[:-1]
          state = outputs_and_state[-1]

      outputs_dict[name] = outputs
      state_dict[name] = state

  return outputs_dict, state_dict
Beispiel #12
0
def embedding_attention_bidirectional_seq2seq(encoder_inputs,
                                              decoder_inputs,
                                              encoder_input_length,
                                              list_of_mask,
                                              encoder_cell,
                                              decoder_cell,
                                              num_encoder_symbols,
                                              num_decoder_symbols,
                                              embedding_size,
                                              num_heads=1,
                                              beam_size=1,
                                              output_projection=None,
                                              dtype=None,
                                              scope=None,
                                              initial_state_attention=False):
    """Embedding sequence-to-sequence model with attention.

    This model first embeds encoder_inputs by a newly created embedding (of shape
    [num_encoder_symbols x input_size]). Then it runs an bidirectional RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs.

    Args:
      encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      encoder_cell: rnn_cell.RNNCell defining the cell function and size.
      decoder_cell: rnn_cell.RNNCell defining the cell function and size.
      num_encoder_symbols: Integer; number of symbols on the encoder side.
      num_decoder_symbols: Integer; number of symbols on the decoder side.
      embedding_size: Integer, the length of the embedding vector for each symbol.
      num_heads: Number of attention heads that read from attention_states.
      output_projection: None or a pair (W, B) of output projection weights and
        biases; W has shape [output_size x num_decoder_symbols] and B has
        shape [num_decoder_symbols]; if provided and feed_previous=True, each
        fed previous output will first be multiplied by W and added B.
      feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
        of decoder_inputs will be used (the "GO" symbol), and all other decoder
        inputs will be taken from previous outputs (as in embedding_rnn_decoder).
        If False, decoder_inputs are used as given (the standard decoder case).
      dtype: The dtype of the initial RNN state (default: tf.float32).
      scope: VariableScope for the created subgraph; defaults to
        "embedding_attention_seq2seq".
      initial_state_attention: If False (default), initial attentions are zero.
        If True, initialize the attentions from the initial state and attention
        states.

    Returns:
      A tuple of the form (outputs, state), where:
        outputs: A list of the same length as decoder_inputs of 2D Tensors with
          shape [batch_size x num_decoder_symbols] containing the generated
          outputs.
        state: The state of each decoder_cell at the final time-step.
          It is a 2D Tensor of shape [batch_size x decoder_cell.state_size].
    """
    with variable_scope.variable_scope(
            scope or "embedding_attention_bidirectional_seq2seq",
            dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        # encoder_cell = rnn_cell.EmbeddingWrapper(
        #     encoder_cell, embedding_classes=num_encoder_symbols,
        #     embedding_size=embedding_size)
        embedding = variable_scope.get_variable(
            "encoder_embedding", [num_encoder_symbols, embedding_size])
        encoder_inputs = array_ops.pack([
            embedding_ops.embedding_lookup(embedding, i)
            for i in encoder_inputs
        ])
        # encoder_inputs = array_ops.reshape(array_ops.pack(encoder_inputs), [50, -1, 1])
        encoder_outputs, encoder_states = rnn.bidirectional_dynamic_rnn(
            cell_fw=encoder_cell,
            cell_bw=encoder_cell,
            inputs=encoder_inputs,
            sequence_length=encoder_input_length,
            time_major=True,
            dtype=dtype)
        encoder_state_fw, encoder_state_bw = encoder_states

        # Concatenate output_fw and output_bw => [step, batch_size, cell.out_size * 2].
        concat_encoder_outputs = array_ops.concat(2, encoder_outputs)
        # Transpose to [batch_size, step, cell.out_size * 2].
        attention_states = array_ops.transpose(concat_encoder_outputs,
                                               [1, 0, 2])

        # Decoder.
        output_size = None
        if output_projection is None:
            decoder_cell = rnn_cell.OutputProjectionWrapper(
                decoder_cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        return embedding_attention_decoder(
            decoder_inputs,
            list_of_mask,
            encoder_state_bw,
            attention_states,
            decoder_cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            beam_size=beam_size,
            output_size=output_size,
            output_projection=output_projection,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
Beispiel #13
0
def maxpool_attention_seq2seq(encoder_inputs_list,
                              decoder_inputs,
                              seq_len,
                              feat_dim,
                              encoder_cell,
                              parse_cell,
                              num_encoder_symbols,
                              num_pause_symbols,
                              num_decoder_symbols,
                              embedding_size,
                              pause_size,
                              use_conv,
                              conv_filter_width,
                              conv_num_channels,
                              attention_vec_size,
                              fixed_word_length,
                              filter_sizes,
                              num_filters,
                              output_projection=None,
                              feed_previous=False,
                              dtype=dtypes.float32,
                              scope=None,
                              initial_state_attention=False,
                              use_speech=False,
                              use_pause=False,
                              use_wd=False):

    text_encoder_inputs, speech_encoder_inputs, pause_bef, pause_aft, word_durs = encoder_inputs_list
    encoder_size = len(text_encoder_inputs)
    #print(encoder_size)
    #speech_encoder_inputs is size [seq_len, batch_size, fixed_word_length, feat_dim]

    with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"):
        with ops.device("/cpu:0"):
            embedding_words = variable_scope.get_variable(
                "embedding_words", [num_encoder_symbols, embedding_size])

        if use_pause:
            with ops.device("/cpu:0"):
                embedding_pauses = variable_scope.get_variable(
                    "embedding_pauses", [num_pause_symbols, pause_size])

        ## We need to do the embedding beforehand so that the rnn infers the input type
        ## to be float and doesn't cause trouble in copying state after sequence length
        ## This issue has been fixed in 0.10 version
        ## The issue is referred here - https://github.com/tensorflow/tensorflow/issues/3322
        text_encoder_inputs = [
            embedding_ops.embedding_lookup(embedding_words, i)
            for i in text_encoder_inputs
        ]
        if use_pause:
            pause_bef = [
                embedding_ops.embedding_lookup(embedding_pauses, i)
                for i in pause_bef
            ]
            pause_aft = [
                embedding_ops.embedding_lookup(embedding_pauses, i)
                for i in pause_aft
            ]

        if use_pause and not use_wd:
            text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], pause_bef[i], pause_aft[i], \
                ]) for i in range(encoder_size)]
        elif use_wd and not use_pause:
            text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], \
                    tf.expand_dims(word_durs[i],-1) ]) for i in range(encoder_size)]
        elif use_pause and use_wd:
            text_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], pause_bef[i], pause_aft[i], \
                tf.expand_dims(word_durs[i],-1) ]) for i in range(encoder_size)]

        if use_speech:
            # Convolution stuff happens here for speech inputs
            pooled_outputs = []
            for i, filter_size in enumerate(filter_sizes):
                print(i, filter_size)
                #with tf.name_scope("conv-maxpool-%s" % filter_size):
                with variable_scope.variable_scope(
                        scope or "conv-maxpool-%s" % filter_size):
                    filter_shape = [filter_size, feat_dim, 1, num_filters]
                    W = variable_scope.get_variable("W-%d" % i, filter_shape)
                    b = variable_scope.get_variable("B-%d" % i, num_filters)
                    pooled_words = []
                    for j in range(encoder_size):
                        feats = speech_encoder_inputs[j]
                        feats_conv = tf.expand_dims(feats, -1)
                        conv = tf.nn.conv2d(feats_conv,
                                            W,
                                            strides=[1, 1, 1, 1],
                                            padding="VALID",
                                            name="conv")
                        # Apply nonlinearity
                        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                        pooled = tf.nn.max_pool(h,
                                                ksize=[
                                                    1, fixed_word_length -
                                                    filter_size + 1, 1, 1
                                                ],
                                                strides=[1, 1, 1, 1],
                                                padding='VALID',
                                                name="pool")
                        pooled_words.append(pooled)
                    pooled_outputs.append(pooled_words)

            num_filters_total = num_filters * len(filter_sizes)
            out_seq = tf.unpack(tf.concat(2, pooled_outputs))
            speech_conv_outputs = [
                tf.reshape(x, [-1, num_filters_total]) for x in out_seq
            ]

            # concat text_encoder_inputs and speech_conv_outputs
            both_encoder_inputs = [tf.concat(1, [text_encoder_inputs[i], speech_conv_outputs[i]]) \
                    for i in range(encoder_size)]
        else:
            both_encoder_inputs = text_encoder_inputs

        # Encoder.
        with variable_scope.variable_scope(scope or "encoder"):
            encoder_outputs, encoder_states = rnn.rnn(encoder_cell,
                                                      both_encoder_inputs,
                                                      sequence_length=seq_len,
                                                      dtype=dtype)


#    with variable_scope.variable_scope(scope or "speech_encoder"):
#      speech_encoder_outputs, speech_encoder_state = rnn.rnn(
#              speech_cell, speech_conv_outputs, sequence_length=speech_len, dtype=dtype)

# First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, encoder_cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        #speech_top_states = [array_ops.reshape(e, [-1, 1, speech_cell.output_size])
        #              for e in speech_encoder_outputs]
        #m_states = array_ops.concat(1, speech_top_states)
        #attention_states = [h_states, m_states]
        #both_encoder_states = [text_encoder_state, speech_encoder_state]

        # Decoder.
        output_size = None
        if output_projection is None:
            parse_cell = rnn_cell.OutputProjectionWrapper(
                parse_cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_states,
                attention_states,
                parse_cell,
                seq_len,
                num_decoder_symbols,
                embedding_size,
                use_conv,
                conv_filter_width,
                conv_num_channels,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                attention_vec_size=attention_vec_size)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    both_encoder_states,
                    attention_states,
                    parse_cell,
                    seq_len,
                    num_decoder_symbols,
                    embedding_size,
                    use_conv,
                    conv_filter_width,
                    conv_num_channels,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    attention_vec_size=attention_vec_size)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
Beispiel #14
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=dtypes.float32,
                                scope=None,
                                initial_state_attention=False,
                                scheduling_rate=1.0):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.
    scheduling_rate : 

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"):
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                 encoder_inputs,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        ## feed_previous :  forward propagation only ( previous prediction value input next decoder )
        ## this is role of GO symbol of bool
        ## forward path --> Valid or Test
        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                scheduling_rate=scheduling_rate)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    scheduling_rate=scheduling_rate)
                #print (outputs)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
Beispiel #15
0
def embedding_kv_attention_seq2seq(encoder_inputs,
                                   decoder_inputs,
                                   kb_inputs,
                                   kb_mask_inputs,
                                   cell,
                                   num_encoder_symbols,
                                   num_decoder_symbols,
                                   embedding_size,
                                   output_projection=None,
                                   feed_previous=False,
                                   attn_type="linear",
                                   enc_attn=False,
                                   use_types=False,
                                   type_to_idx=None,
                                   use_bidir=False,
                                   seq_lengths=None,
                                   enc_query=False,
                                   dtype=None,
                                   scope=None):
    """Embedding sequence-to-sequence model with attention over a KB.

    This model first embeds encoder_inputs by a newly created embedding
    (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs
    as well as an embedded KB.

    Warning: when output_projection is None, the size of the attention vectors
    and variables will be made proportional to num_decoder_symbols, can be large.

    Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    kb_inputs: Kbs for the given batch of dialogues
    kb_col_inputs: Column indices for given batch of kbs
    kb_mask_inputs: Kb masks for the given batch of dialogues
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_kb_attention_seq2seq".

    Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    if type_to_idx is not None:
        # Mapping from entity type to idx for augmenting encoder input
        num_entity_types = len(type_to_idx.keys())
        entity_encoding = np.zeros((num_entity_types, num_entity_types - 1),
                                   dtype=np.float32)
        for idx in range(num_entity_types - 1):
            entity_encoding[idx, idx] = 1.

    with variable_scope.variable_scope(scope
                                       or "embedding_kb_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        if use_types:
            print "Typed Encoder Inputs..."
            # Augment encoder inputs
            encoder_cell = CustomEmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size,
                entity_encoding=entity_encoding)
        else:
            print "Regular encoding..."
            # Just regular encoding
            encoder_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)

        # Use bidirectional encoding
        if use_bidir:
            encoder_cell_backward = copy.deepcopy(encoder_cell)
            encoder_outputs, encoder_state_fw, encoder_state_bw =\
                rnn.bidirectional_rnn(encoder_cell, encoder_cell_backward,
                                      encoder_inputs, dtype=dtype,
                                      sequence_length=seq_lengths)
            combined_c = tf.concat(1, [encoder_state_fw.c, encoder_state_bw.c])
            combined_h = tf.concat(1, [encoder_state_fw.h, encoder_state_bw.h])
            encoder_state = rnn_cell.LSTMStateTuple(c=combined_c, h=combined_h)
        else:
            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs,
                                                     dtype=dtype)

        # First calculate a concatenation of encoder outputs
        # to put attention on.
        if use_bidir:
            top_states = [
                array_ops.reshape(e, [-1, 1, 2 * cell.output_size])
                for e in encoder_outputs
            ]
        else:
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]

        attention_states = array_ops.concat(1, top_states)
        if output_projection is None:
            if use_bidir:
                # Modify dimension of decoder rnn_size
                cell = rnn_cell.BasicLSTMCell(2 * cell.output_size,
                                              state_is_tuple=True)
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols
        else:
            output_size = cell.output_size

        if isinstance(feed_previous, bool):
            return kv_attention_decoder(cell,
                                        decoder_inputs,
                                        kb_inputs,
                                        kb_mask_inputs,
                                        encoder_state,
                                        attention_states,
                                        num_decoder_symbols,
                                        embedding_size=embedding_size,
                                        output_size=output_size,
                                        feed_previous=feed_previous,
                                        attn_type=attn_type,
                                        enc_attn=enc_attn,
                                        enc_query=enc_query,
                                        dtype=dtype)
def embedding_attention_seq2seq(encoder_inputs,
                                context_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
    #print("Inside Method Embedding Attention Seq2Seq")

    #print("Shape of encoder input {0}".format(len(encoder_inputs)))
    #print("Shape of decoder input {0}".format(len(decoder_inputs)))
    #print("num_encoder_symbols = {0}".format(num_encoder_symbols))
    ###print("num_decoder_symbols {0}".format(num_decoder_symbols))
    #print("embedding_size {0}".format(embedding_size))
    print("output_projection {0}".format(output_projection))

    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype,
                                       reuse=None) as scope:
        dtype = scope.dtype

        with variable_scope.variable_scope("encoder") as scope2:  # Encoder.
            encoder_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)
            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs,
                                                     dtype=dtype)
            #print(type(encoder_outputs))
            #np.savetxt('encoder_output.txt', encoder_outputs)
            #np.savetxt('encoder_state.txt', encoder_state)

        with variable_scope.variable_scope("context") as scope3:
            context_cell = rnn_cell.EmbeddingWrapper(
                cell,
                embedding_classes=num_encoder_symbols,
                embedding_size=embedding_size)
            context_outputs, context_state = rnn.rnn(context_cell,
                                                     context_inputs,
                                                     dtype=dtype)

            #target = open("context_output.txt", 'w')
            #for out in context_outputs:
            #target.write(str(out))
            #target.write("\n")
            #target.close()

            #np.savetxt('context_output.txt', context_outputs)
            #np.savetxt('context_state.txt', context_state)

        #print("The dimension of context state {0}".format(context_state))
        #context_state = tf.Print(context_state,[context_state],message="Printing the context State")
        #context_state.eval()
        #tf.add(context_state,context_state).eval()

        #with tf.session as session_c:
        #session_c.run(context_state)

        #print("Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}".format(
        #np.shape(encoder_outputs), np.shape(encoder_state)))

        #encoder_outputs = tf.add(encoder_outputs, context_outputs)
        #encoder_state = tf.add(encoder_state, context_state)

        for i in range(len(encoder_outputs)):
            encoder_outputs[i] = tf.add(encoder_outputs[i], context_outputs[i])

        temp = []
        for i in range(len(encoder_state)):
            temp.append(tf.add(encoder_state[i], context_state[i]))
        encoder_state = tuple(temp)

        #print(type(encoder_outputs))
        #print(type(encoder_outputs[0]))
        #print(type(encoder_state))
        #print(encoder_state)

        print(
            "Inside method embedding_attention_seq2seq. Encoder Outputs {0} Encode State {1}"
            .format(np.shape(encoder_outputs), np.shape(encoder_state)))

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        print("Attention States has been created of size {0}".format(
            np.shape(attention_states)))
        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols
            print("The output size is {0}".format(output_size))

        if isinstance(feed_previous, bool):
            print("Number of heads {0}".format(num_heads))
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Beispiel #17
0
    def __init__(self,
                 embedding,
                 max_length,
                 initial_state,
                 attention_states,
                 cell,
                 num_samples=512,
                 feed_previous=False,
                 update_embedding_for_previous=True,
                 dtype=dtypes.float32,
                 scope=None,
                 initial_state_attention=False,
                 **kwargs):
        # account for _GO and _EOS
        self.max_length = max_length + 2

        self.lengths = kwargs.get(
            'lengths',
            tf.placeholder(tf.int32, shape=[None], name="decoder_lengths"))
        self.inputs = kwargs.get('inputs', [
            tf.placeholder(
                tf.int32, shape=[None], name="decoder_input{0}".format(i))
            for i in xrange(self.max_length)
        ])
        self.weights = kwargs.get('weights', [
            tf.placeholder(
                tf.float32, shape=[None], name="decoder_weight{0}".format(i))
            for i in xrange(self.max_length)
        ])

        self.targets = [
            self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)
        ]
        self.targets.append(tf.zeros_like(self.targets[0]))

        num_symbols = embedding.get_shape()[0].value
        output_projection = None
        loss_function = None
        self.cell = cell
        self.feed_previous = feed_previous

        if num_samples > 0 and num_samples < num_symbols:
            with tf.device('/cpu:0'):
                w = tf.get_variable('proj_w', [cell.output_size, num_symbols])
                w_t = tf.transpose(w)
                b = tf.get_variable('proj_b', [num_symbols])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device('/cpu:0'):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples, num_symbols)

            loss_function = sampled_loss

        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols)
            output_size = num_symbols

        if output_size is None:
            output_size = cell.output_size
        if output_projection is not None:
            proj_weights = ops.convert_to_tensor(output_projection[0],
                                                 dtype=dtype)
            proj_weights.get_shape().assert_is_compatible_with(
                [cell.output_size, num_symbols])
            proj_biases = ops.convert_to_tensor(output_projection[1],
                                                dtype=dtype)
            proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        with variable_scope.variable_scope(scope
                                           or "embedding_attention_decoder"):
            loop_function = self._extract_argmax_and_embed(
                embedding, output_projection,
                update_embedding_for_previous) if feed_previous else None

            emb_inp = [
                embedding_ops.embedding_lookup(embedding, i)
                for i in self.inputs
            ]
            self.outputs, self.state = attention_decoder(
                emb_inp,
                self.lengths,
                initial_state,
                attention_states,
                cell,
                output_size=output_size,
                loop_function=loop_function,
                initial_state_attention=initial_state_attention)

        targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)]
        targets.append(tf.zeros_like(self.inputs[-1]))

        # loss for each instance in batch
        self.instance_loss = sequence_loss_by_example(
            self.outputs,
            targets,
            self.weights,
            softmax_loss_function=loss_function)

        # aggregated average loss per instance for batch
        self.loss = tf.reduce_sum(self.instance_loss) / math_ops.cast(
            array_ops.shape(targets[0])[0], self.instance_loss.dtype)

        if output_projection is not None:
            self.projected_output = [
                tf.matmul(o, output_projection[0]) + output_projection[1]
                for o in self.outputs
            ]
            self.decoded_outputs = tf.unpack(
                tf.argmax(tf.pack(self.projected_output), 2))
        else:
            self.decoded_outputs = tf.unpack(
                tf.argmax(tf.pack(self.outputs), 2))
        self.decoded_lenghts = tf.reduce_sum(
            tf.sign(tf.transpose(tf.pack(self.decoded_outputs))), 1)
        self.decoded_batch = tf.transpose(tf.pack(self.decoded_outputs))
Beispiel #18
0
def attention_seq2seq(encoder_inputs,
                      en_seq_length,
                      decoder_inputs,
                      cell,
                      num_decoder_symbols,
                      num_heads=1,
                      output_projection=None,
                      feed_previous=False,
                      dtype=None,
                      scope=None,
                      initial_state_attention=False):
    """Embedding sequence-to-sequence model with attention.

    This model first embeds encoder_inputs by a newly created embedding (of shape
    [num_encoder_symbols x input_size]). Then it runs an RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs.

    Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

    Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    with variable_scope.variable_scope(scope or "attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_outputs, encoder_state = rnn.rnn(cell,
                                                 encoder_inputs,
                                                 sequence_length=en_seq_length,
                                                 scope='attention_rnn_encoder',
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        loop_function = None
        # if feed_previous:
        #     loop_function = _extract_argmax_and_embed(
        #         embedding, output_projection,
        #         update_embedding_for_previous) if feed_previous else None

        return attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            output_size=output_size,
            num_heads=num_heads,
            loop_function=loop_function,
            initial_state_attention=initial_state_attention)
Beispiel #19
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                seq_len,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=dtypes.float32,
                                scope=None,
                                initial_state_attention=False):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"):
        with ops.device("/cpu:0"):
            embedding_words = variable_scope.get_variable(
                "embedding_words", [num_encoder_symbols, embedding_size])

        ## We need to do the embedding beforehand so that the rnn infers the input type
        ## to be float and doesn't cause trouble in copying state after sequence length
        ## This issue has been fixed in 0.10 version
        ## The issue is referred here - https://github.com/tensorflow/tensorflow/issues/3322
        encoder_inputs = [
            embedding_ops.embedding_lookup(embedding_words, i)
            for i in encoder_inputs
        ]
        encoder_outputs, encoder_state = rnn.rnn(cell,
                                                 encoder_inputs,
                                                 sequence_length=seq_len,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                seq_len,
                num_decoder_symbols,
                embedding_size,
                use_conv,
                conv_filter_width,
                conv_num_channels,
                attention_vec_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    seq_len,
                    num_decoder_symbols,
                    embedding_size,
                    use_conv,
                    conv_filter_width,
                    conv_num_channels,
                    attention_vec_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
Beispiel #20
0
def embedding_attention_seq2seq_context(encoder_inputs, decoder_inputs, cell,
                                        num_encoder_symbols, num_decoder_symbols,
                                        num_heads=1, output_projection=None,
                                        feed_previous=False, dtype=dtypes.float32,
                                        scope=None):
    """A seq2seq architecture with two encoders, one for context, one for input DA. The decoder
    uses twice the cell size. Code adapted from TensorFlow examples."""

    with vs.variable_scope(scope or "embedding_attention_seq2seq_context"):

        # split context and real inputs into separate vectors
        context_inputs = encoder_inputs[0:len(encoder_inputs)/2]
        encoder_inputs = encoder_inputs[len(encoder_inputs)/2:]

        # build separate encoders
        encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
        with vs.variable_scope("context_rnn") as scope:
            context_outputs, context_states = rnn.rnn(
                    encoder_cell, context_inputs, dtype=dtype, scope=scope)
        with vs.variable_scope("input_rnn") as scope:
            encoder_outputs, encoder_states = rnn.rnn(
                    encoder_cell, encoder_inputs, dtype=dtype, scope=scope)

        # concatenate outputs & states
        encoder_outputs = [array_ops.concat(1, [co, eo], name="context-and-encoder-output")
                           for co, eo in zip(context_outputs, encoder_outputs)]
        encoder_states = [array_ops.concat(1, [cs, es], name="context-and-encoder-state")
                          for cs, es in zip(context_states, encoder_states)]

        # calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size * 2])
                      for e in encoder_outputs]
        attention_states = array_ops.concat(1, top_states)

        # change the decoder cell to accommodate wider input
        # TODO this will work for BasicLSTMCell and GRUCell, but not for others
        cell = type(cell)(num_units=(cell.input_size * 2))

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection,
                    feed_previous)
        else:    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
            outputs1, states1 = embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection, True)
            vs.get_variable_scope().reuse_variables()
            outputs2, states2 = embedding_attention_decoder(
                    decoder_inputs, encoder_states[-1], attention_states, cell,
                    num_decoder_symbols, num_heads, output_size, output_projection, False)

            outputs = control_flow_ops.cond(feed_previous,
                                            lambda: outputs1, lambda: outputs2)
            states = control_flow_ops.cond(feed_previous,
                                           lambda: states1, lambda: states2)
            return outputs, states
Beispiel #21
0
def embedding_attention_pointer_seq2seq_states(encoder_inputs,
                                               decoder_inputs,
                                               cell,
                                               num_encoder_symbols,
                                               num_decoder_symbols,
                                               embedding_size,
                                               num_heads=1,
                                               output_projection=None,
                                               feed_previous=False,
                                               dtype=tf.float32,
                                               scope=None,
                                               initial_state_attention=False):
    with variable_scope.variable_scope(
            scope or "embedding_attention_pointer_seq2seq_states") as scope:
        # Encoder.
        encoder_initial_state = tf.placeholder(dtype, [None, cell.state_size],
                                               "encoder_initial_state")
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(
            encoder_cell,
            encoder_inputs,
            initial_state=encoder_initial_state,
            dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            raise Exception("feed_previous must be a tensor!")
        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, decoder_state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                return outputs, decoder_state

        true_outputs, true_decoder_state = decoder(True)
        false_outputs, false_decoder_state = decoder(False)
        outputs = tf.cond(feed_previous, lambda: true_outputs,
                          lambda: false_outputs)
        return outputs, encoder_initial_state, encoder_state, (
            true_decoder_state, false_decoder_state)
Beispiel #22
0
def many2one_attention_seq2seq(encoder_inputs_list,
                               decoder_inputs,
                               text_len,
                               text_cell,
                               speech_cell,
                               parse_cell,
                               num_encoder_symbols,
                               num_decoder_symbols,
                               embedding_size,
                               output_projection=None,
                               feed_previous=False,
                               dtype=dtypes.float32,
                               scope=None,
                               initial_state_attention=False,
                               attention_vec_size=None):

    text_encoder_inputs, speech_encoder_inputs = encoder_inputs_list
    with variable_scope.variable_scope(scope or "many2one_attention_seq2seq"):
        with ops.device("/cpu:0"):
            embedding_words = variable_scope.get_variable(
                "embedding_words", [num_encoder_symbols, embedding_size])

        text_encoder_inputs = [
            embedding_ops.embedding_lookup(embedding_words, i)
            for i in text_encoder_inputs
        ]
        # Encoder.
        with variable_scope.variable_scope(scope or "text_encoder"):
            text_encoder_outputs, text_encoder_state = rnn.rnn(
                text_cell,
                text_encoder_inputs,
                sequence_length=text_len,
                dtype=dtype)

        with variable_scope.variable_scope(scope or "speech_encoder"):
            speech_encoder_outputs, speech_encoder_state = rnn.rnn(
                speech_cell, speech_encoder_inputs, dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        text_top_states = [
            array_ops.reshape(e, [-1, 1, text_cell.output_size])
            for e in text_encoder_outputs
        ]
        # h_states =  attention_states in original code
        h_states = array_ops.concat(1, text_top_states)

        speech_top_states = [
            array_ops.reshape(e, [-1, 1, speech_cell.output_size])
            for e in speech_encoder_outputs
        ]
        m_states = array_ops.concat(1, speech_top_states)

        attention_states = [h_states, m_states]
        both_encoder_states = [text_encoder_state, speech_encoder_state]

        # Decoder.
        output_size = None
        if output_projection is None:
            parse_cell = rnn_cell.OutputProjectionWrapper(
                parse_cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return many2one_embedding_attention_decoder(
                decoder_inputs,
                both_encoder_states,
                attention_states,
                parse_cell,
                num_decoder_symbols,
                embedding_size,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                attention_vec_size=attention_vec_size)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse):
                outputs, state = many2one_embedding_attention_decoder(
                    decoder_inputs,
                    both_encoder_states,
                    attention_states,
                    parse_cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    attention_vec_size=attention_vec_size)
                return outputs + [state]

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        return outputs_and_state[:-1], outputs_and_state[-1]
Beispiel #23
0
def embedding_attention_seq2seq_beam(dec_inp, use_initial, supplied_prev, supplied_state, supplied_attns, cell, num_decoder_symbols, embedding_size, encoder_outputs, encoder_state):
    with variable_scope.variable_scope("embedding_attention_seq2seq"):
        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

        with variable_scope.variable_scope("embedding_attention_decoder"):
            embedding = variable_scope.get_variable("embedding", [num_decoder_symbols, embedding_size])
            loop_function = _extract_argmax_and_embed(embedding)
            emb_inp = [embedding_ops.embedding_lookup(embedding, i) for i in dec_inp]

            decoder_inputs = emb_inp
            initial_state = encoder_state
            output_size = cell.output_size

            # Attention Decoder
            with variable_scope.variable_scope("attention_decoder"):
                batch_size_ = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
                attn_length = attention_states.get_shape()[1].value
                attn_size = attention_states.get_shape()[2].value

                # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
                hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size])
                hidden_features = []
                v = []
                attention_vec_size = attn_size  # Size of query vectors for attention.

                for a in xrange(1):
                    k = variable_scope.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
                    hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
                    v.append(variable_scope.get_variable("AttnV_%d" % a, [attention_vec_size]))

                state = tf.cond(use_initial > 0, lambda: initial_state, lambda: supplied_state)

                def attention(query):
                    """Put attention masks on hidden using hidden_features and query."""
                    ds = []  # Results of attention reads will be stored here.

                    for a in xrange(1):
                        with variable_scope.variable_scope("Attention_%d" % a):
                            y = linear(query, attention_vec_size, True)
                            y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])

                            # Attention mask is a softmax of v^T * tanh(...).
                            s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
                            a = nn_ops.softmax(s)

                            # Now calculate the attention-weighted vector d.
                            d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2])
                            ds.append(array_ops.reshape(d, [-1, attn_size]))

                    return ds

                outputs = []
                prev = None
                batch_attn_size = array_ops.pack([batch_size_, attn_size])
                attns = [tf.cond(use_initial > 0, lambda: array_ops.zeros(batch_attn_size, dtype=dtypes.float32), lambda: supplied_attns) for _ in xrange(1)]

                for a in attns:  # Ensure the second shape of attention vectors is set.
                    a.set_shape([None, attn_size])

                with variable_scope.variable_scope("loop_function", reuse=True):
                    #inp = tf.cond(use_initial > 0, lambda: decoder_inputs[0], lambda: loop_function(supplied_prev, 0))
                    inp = decoder_inputs[0]

                input_size = inp.get_shape().with_rank(2)[1]

                if input_size.value is None:
                    raise ValueError("Could not infer input size from input: %s" % inp.name)

                x = linear([inp] + attns, input_size, True)

                # Run the RNN.
                cell_output, state = cell(x, state)

                # Run the attention mechanism.
                attns = attention(state)

                with variable_scope.variable_scope("AttnOutputProjection"):
                    output = linear([cell_output] + attns, output_size, True)

            return output, state, attns[0]
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):

    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                 encoder_inputs,
                                                 dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(1, top_states)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            print("~~~~~~~~~~~")
            outputs, state = embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                scope=scope)
            return outputs, state, encoder_state

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    scope=scope)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state, encoder_state
Beispiel #25
0
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                          num_encoder_symbols, num_decoder_symbols,
                          output_projection=None, feed_previous=False,
                          dtype=dtypes.float32, scope=None):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    state: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with vs.variable_scope(scope or "embedding_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(decoder_inputs, encoder_state, cell,
                                   num_decoder_symbols, output_projection,
                                   feed_previous)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, state1 = embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          output_projection, True)
      vs.get_variable_scope().reuse_variables()
      outputs2, state2 = embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          output_projection, False)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      state = control_flow_ops.cond(feed_previous,
                                    lambda: state1, lambda: state2)
      return outputs, state
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors. The
        output is of shape [batch_size x cell.output_size] when
        output_projection is not None (and represents the dense representation
        of predicted tokens). It is of shape [batch_size x num_decoder_symbols]
        when output_projection is None.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
    if dtype is not None:
      scope.set_dtype(dtype)
    else:
      dtype = scope.dtype

    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(
          decoder_inputs,
          encoder_state,
          cell,
          num_decoder_symbols,
          embedding_size,
          output_projection=output_projection,
          feed_previous=feed_previous)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_rnn_decoder(
            decoder_inputs, encoder_state, cell, num_decoder_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Beispiel #27
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=dtypes.float32,
                                scope=None, initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    state: The state of each decoder cell at the final time-step.
      It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with vs.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    encoder_outputs, encoder_state = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous, initial_state_attention=initial_state_attention)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, state1 = embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True,
          initial_state_attention=initial_state_attention)
      vs.get_variable_scope().reuse_variables()
      outputs2, state2 = embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False,
          initial_state_attention=initial_state_attention)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      state = control_flow_ops.cond(feed_previous,
                                    lambda: state1, lambda: state2)
      return outputs, state
Beispiel #28
0
def dialog_attention_seq2seq(encoder_inputs,
                             decoder_inputs,
                             cell,
                             vocab_size,
                             num_heads=1,
                             output_projection=None,
                             feed_previous=False,
                             dtype=dtypes.float32,
                             scope=None,
                             initial_state_attention=False):
    if len(encoder_inputs) != len(decoder_inputs):
        raise Exception

    with variable_scope.variable_scope(scope or "dialog_attention_seq2seq"):

        encoder_cell = rnn_cell.EmbeddingWrapper(cell, vocab_size)
        outputs = []

        fixed_batch_size = encoder_inputs[0][0].get_shape().with_rank_at_least(
            1)[0]
        if fixed_batch_size.value:
            batch_size = fixed_batch_size.value
        else:
            batch_size = array_ops.shape(encoder_inputs[0][0])[0]

        drnn_state = cell.zero_state(batch_size, dtype)

        for i in range(0, len(encoder_inputs)):
            if i > 0: variable_scope.get_variable_scope().reuse_variables()

            encoder_outputs, encoder_state = rnn.rnn(encoder_cell,
                                                     encoder_inputs[i],
                                                     dtype=dtype)

            # First calculate a concatenation of encoder outputs to put attention on.
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(1, top_states)

            with variable_scope.variable_scope("DRNN"):
                drnn_out, drnn_state = cell(encoder_state, drnn_state)

            # Decoder.
            output_size = None
            if output_projection is None:
                cell = rnn_cell.OutputProjectionWrapper(cell, vocab_size)
                output_size = vocab_size

            answer_output, answer_state = embedding_attention_decoder(
                decoder_inputs[i],
                drnn_state,
                attention_states,
                cell,
                vocab_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

            outputs.append(answer_output)
            with variable_scope.variable_scope("DRNN", reuse=True):
                drnn_out, drnn_state = cell(answer_state, drnn_state)

        return outputs, drnn_state