Ejemplo n.º 1
0
  def testEmbeddingAttentionDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: core_rnn_cell_impl.GRUCell(2)
        cell = cell_fn()
        enc_outputs, enc_state = core_rnn.static_rnn(
            cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_attention_decoder(
            dec_inp,
            enc_state,
            attn_states,
            cell_fn(),
            num_symbols=4,
            embedding_size=2,
            output_size=3)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 3), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
Ejemplo n.º 2
0
  def testEmbeddingAttentionDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell = core_rnn_cell_impl.GRUCell(2)
        enc_outputs, enc_state = core_rnn.static_rnn(
            cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        dec, mem = seq2seq_lib.embedding_attention_decoder(
            dec_inp,
            enc_state,
            attn_states,
            cell,
            num_symbols=4,
            embedding_size=2,
            output_size=3)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 3), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
 def decoder(feed_previous_bool):
   reuse = None if feed_previous_bool else True
   with variable_scope.variable_scope(
       variable_scope.get_variable_scope(), reuse=reuse):
     outputs, state = seq2seq.embedding_attention_decoder(
         decoder_inputs,
         encoder_state,
         attention_states,
         dec_cell,
         num_decoder_symbols,
         embedding_size,
         num_heads=num_heads,
         output_size=output_size,
         output_projection=output_projection,
         feed_previous=feed_previous_bool,
         update_embedding_for_previous=False,
         initial_state_attention=initial_state_attention)
     state_list = [state]
     if nest.is_sequence(state):
       state_list = nest.flatten(state)
     return outputs + state_list
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                enc_cell,
                                dec_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: tf.nn.rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.

    encoder_cell = enc_cell

    encoder_cell = core_rnn_cell.EmbeddingWrapper(
        encoder_cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    encoder_outputs, encoder_state = rnn.static_rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [
        array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs
    ]
    attention_states = array_ops.concat(top_states, 1)

    # Decoder.
    output_size = None
    if output_projection is None:
      dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return seq2seq.embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          dec_cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse):
        outputs, state = seq2seq.embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            dec_cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(
          structure=encoder_state, flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Ejemplo n.º 5
0
def embedding_attention_seq2seq(
		encoder_inputs,
		decoder_inputs,
		cell,
		num_encoder_symbols,
		num_decoder_symbols,
		embedding_size,
		num_heads=1,
		output_projection=None,
		feed_previous=False,
		dtype=None,
		scope=None,
		initial_state_attention=False):

	with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
		dtype = scope.dtype
		# Encoder.
		#####################
		# modified deepcopy #
		#####################
		# encoder_cell = copy.deepcopy(cell)
		encoder_cell = core_rnn_cell.EmbeddingWrapper(
			# encoder_cell,
			cell,
			embedding_classes=num_encoder_symbols,
			embedding_size=embedding_size
		)
		encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

		# First calculate a concatenation of encoder outputs to put attention on.
		top_states = [
			array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs
		]
		attention_states = array_ops.concat(top_states, 1)

		# Decoder.
		output_size = None
		if output_projection is None:
			cell = core_rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
			output_size = num_decoder_symbols

		if isinstance(feed_previous, bool):
			return embedding_attention_decoder(
				decoder_inputs,
				encoder_state,
				attention_states,
				cell,
				num_decoder_symbols,
				embedding_size,
				num_heads=num_heads,
				output_size=output_size,
				output_projection=output_projection,
				feed_previous=feed_previous,
				initial_state_attention=initial_state_attention)

		# If feed_previous is a Tensor, we construct 2 graphs and use cond.
		def decoder(feed_previous_bool):
			reuse = None if feed_previous_bool else True
			with variable_scope.variable_scope(variable_scope.get_variable_scope(), reuse=reuse):
				print variable_scope.get_variable_scope()._name, variable_scope.get_variable_scope()._name_scope
				outputs, state = embedding_attention_decoder(
					decoder_inputs,
					encoder_state,
					attention_states,
					cell,
					num_decoder_symbols,
					embedding_size,
					num_heads=num_heads,
					output_size=output_size,
					output_projection=output_projection,
					feed_previous=feed_previous_bool,
					update_embedding_for_previous=False,
					initial_state_attention=initial_state_attention)
				state_list = [state]
				if nest.is_sequence(state):
					state_list = nest.flatten(state)
				return outputs + state_list

		outputs_and_state = control_flow_ops.cond(
			feed_previous, lambda: decoder(True), lambda: decoder(False)
		)
		outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
		state_list = outputs_and_state[outputs_len:]
		state = state_list[0]
		if nest.is_sequence(encoder_state):
			state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list)
		return outputs_and_state[:outputs_len], state