Esempio n. 1
0
    def zero_state(self, batch_size, dtype):
        # For GMM attention, we only need attention_computer
        assert (self.attention_mechanism is None)
        with ops.name_scope(type(self).__name__ + "ZeroState",
                            values=[batch_size]):
            if self.decoder_rnn_init_state is not None:
                rnn_cell_state = self.decoder_rnn_init_state
            else:
                rnn_cell_state = self.rnn_cell._cell.zero_state(
                    batch_size, dtype)

            with ops.control_dependencies(self.check_batch_size(batch_size)):
                rnn_cell_state = nest.map_structure(
                    lambda s: array_ops.identity(
                        s, name="checked_rnn_cell_state"), rnn_cell_state)

            return GMMTacoDecoderCellState(
                rnn_cell_state=rnn_cell_state,
                time=array_ops.zeros([], dtype=tf.int32),
                attention=rnn_cell_impl._zero_state_tensors(
                    self.attention_layer_size, batch_size, dtype),
                mu=rnn_cell_impl._zero_state_tensors(self.num_gmm_mixture,
                                                     batch_size, dtype),
                alignment_history=tensor_array_ops.TensorArray(
                    dtype=dtype, size=0, dynamic_size=True))
Esempio n. 2
0
 def zero_state(self, batch_size, dtype):
     """Return an initial (zero) state tuple for this `AttentionWrapper`.
     **NOTE** Please see the initializer documentation for details of how
     to call `zero_state` if using an `AttentionWrapper` with a
     `BeamSearchDecoder`.
     Args:
     batch_size: `0D` integer tensor: the batch size.
     dtype: The internal state data type.
     Returns:
     An `AttentionWrapperState` tuple containing zeroed out tensors and,
     possibly, empty `TensorArray` objects.
     Raises:
     ValueError: (or, possibly at runtime, InvalidArgument), if
         `batch_size` does not match the output size of the encoder passed
         to the wrapper object at initialization time.
     """
     with ops.name_scope(type(self).__name__ + "ZeroState",
                         values=[batch_size]):
         if self._initial_cell_state is not None:
             cell_state = self._initial_cell_state
         else:
             cell_state = self._cell.zero_state(batch_size, dtype)
         error_message = (
             "When calling zero_state of AttentionWrapper %s: " %
             self._base_name +
             "Non-matching batch sizes between the memory "
             "(encoder output) and the requested batch size.  Are you using "
             "the BeamSearchDecoder?  If so, make sure your encoder output has "
             "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and "
             "the batch_size= argument passed to zero_state is "
             "batch_size * beam_width.")
         with tf.control_dependencies(
                 self._batch_size_checks(batch_size, error_message)):
             cell_state = nest.map_structure(
                 lambda s: tf.identity(s, name="checked_cell_state"),
                 cell_state)
         return CoverageAttentionWrapperState(
             cell_state=cell_state,
             time=tf.zeros([], dtype=tf.int32),
             attention=_zero_state_tensors(self._attention_layer_size,
                                           batch_size, dtype),
             coverages=self._item_or_tuple(
                 _zero_state_tensors(attention_mechanism.alignments_size,
                                     batch_size, dtype)
                 for attention_mechanism in self._attention_mechanisms),
             alignments=self._item_or_tuple(
                 attention_mechanism.initial_alignments(batch_size, dtype)
                 for attention_mechanism in self._attention_mechanisms),
             # since we need to read the alignment history several times, so we need set clear_after_read to False
             alignment_history=self._item_or_tuple(
                 tf.TensorArray(dtype=dtype,
                                size=0,
                                clear_after_read=False,
                                dynamic_size=True) if self.
                 _alignment_history else ()
                 for _ in self._attention_mechanisms))
Esempio n. 3
0
 def zero_state(self, batch_size, dtype):
     """Return an initial (zero) state tuple for this `AttentionWrapper`.
     **NOTE** Please see the initializer documentation for details of how
     to call `zero_state` if using an `AttentionWrapper` with a
     `BeamSearchDecoder`.
     Args:
     batch_size: `0D` integer tensor: the batch size.
     dtype: The internal state data type.
     Returns:
     An `AttentionWrapperState` tuple containing zeroed out tensors and,
     possibly, empty `TensorArray` objects.
     Raises:
     ValueError: (or, possibly at runtime, InvalidArgument), if
         `batch_size` does not match the output size of the encoder passed
         to the wrapper object at initialization time.
     """
     name_scope_str = type(self).__name__ + "ZeroState"
     with tf.name_scope(name_scope_str, values=[batch_size]):
         if self._initial_cell_state is not None:
             cell_state = self._initial_cell_state
         else:
             cell_state = self._cell.zero_state(batch_size, dtype)
         initial_alignment = self._attention_mechanism.initial_alignments(
             batch_size, dtype)
         return tf.contrib.seq2seq.AttentionWrapperState(
             cell_state=cell_state,
             time=tf.zeros([], dtype=tf.int32),
             attention=rnn_cell_impl._zero_state_tensors(
                 self._attention_layer_size, batch_size, dtype),
             alignments=initial_alignment,
             attention_state=self._attention_mechanism.initial_state(
                 batch_size, dtype),
             alignment_history=())
Esempio n. 4
0
    def zero_state(self, batch_size, dtype):
        """Return zero-filled state tensor(s).

        Args:
          batch_size: int, float, or unit Tensor representing the batch size.
          dtype: the data type to use for the state.

        Returns:
          If `state_size` is an int or TensorShape, then the return value is a
          `N-D` tensor of shape `[batch_size, state_size]` filled with zeros.

          If `state_size` is a nested list or tuple, then the return value is
          a nested list or tuple (of the same structure) of `2-D` tensors with
          the shapes `[batch_size, s]` for each s in `state_size`.
        """
        # Try to use the last cached zero_state. This is done to avoid recreating
        # zeros, especially when eager execution is enabled.
        state_size = self.state_size
        is_eager = context.in_eager_mode()
        if is_eager and hasattr(self, "_last_zero_state"):
            (last_state_size, last_batch_size, last_dtype,
             last_output) = getattr(self, "_last_zero_state")
            if (last_batch_size == batch_size and last_dtype == dtype
                    and last_state_size == state_size):
                return last_output
        with ops.name_scope(type(self).__name__ + "ZeroState",
                            values=[batch_size]):
            output = _zero_state_tensors(state_size, batch_size, dtype)
        if is_eager:
            self._last_zero_state = (state_size, batch_size, dtype, output)
        return output
Esempio n. 5
0
    def _create(self, encoder_output, decoder_state_size, **kwargs):
        """ Creates decoder's initial RNN states according to
        `decoder_state_size`.

        Passes the final state of encoder to each layer in decoder.
        Args:
            encoder_output: An instance of `collections.namedtuple`
              from `Encoder.encode()`.
            decoder_state_size: RNN decoder state size.
            **kwargs:

        Returns: The decoder states with the structure determined
          by `decoder_state_size`.

        Raises:
            ValueError: if the structure of encoder RNN state does not
              have the same structure of decoder RNN state.
        """
        batch_size = tf.shape(encoder_output.attention_length)[0]
        # of type LSTMStateTuple
        enc_final_state = _final_state(
            encoder_output.final_states, direction=self.params["direction"])
        assert_state_is_compatible(rnn_cell_impl._zero_state_tensors(
            decoder_state_size[0],
            batch_size, tf.float32), enc_final_state)
        if nest.is_sequence(decoder_state_size):
            return tuple([enc_final_state for _ in decoder_state_size])
        return enc_final_state
def make_decoder_cell(rnn_size, num_layers, encoder_output, source_seq_len,
                      keep_prob, batch_size, encoder_state):

    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            single_cell = tf.contrib.rnn.LSTMCell(
                rnn_size,
                initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            dec_cell = tf.contrib.rnn.DropoutWrapper(single_cell,
                                                     input_keep_prob=keep_prob)

    attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
        rnn_size,
        encoder_output,
        source_seq_len,
        normalize=False,
        name='BahdanauAttention')

    dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
        dec_cell, attention_mechanism, rnn_size)

    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
        encoder_state[0], _zero_state_tensors(rnn_size, batch_size,
                                              tf.float32))
    return dec_cell, initial_state
Esempio n. 7
0
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state,
                   vocab_size, inputs_length, targets_length,
                   max_target_length, rnn_size, TEXT_2_INT, keep_prob,
                   batch_size, num_layers, direction):
    with tf.name_scope("RNN_Decoder_Cell"):
        for layer in range(num_layers):
            with tf.variable_scope('decoder_{}'.format(layer)):
                lstm = tf.contrib.rnn.LSTMCell(rnn_size)
                dec_cell = tf.contrib.rnn.DropoutWrapper(
                    lstm, input_keep_prob=keep_prob)
    output_layer = Dense(vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.0, stddev=0.1))
    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size,
                                                     enc_output,
                                                     inputs_length,
                                                     normalize=False,
                                                     name='BahdanauAttention')
    with tf.name_scope("Attention_Wrapper"):
        dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
            dec_cell, attn_mech, rnn_size)
    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
        enc_state, _zero_state_tensors(rnn_size, batch_size, tf.float32))
    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input,
                                                  targets_length, dec_cell,
                                                  initial_state, output_layer,
                                                  vocab_size,
                                                  max_target_length)
    with tf.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(
            embeddings, TEXT_2_INT['<GO>'], TEXT_2_INT['<EOS>'], dec_cell,
            initial_state, output_layer, max_target_length, batch_size)
    return training_logits, inference_logits
Esempio n. 8
0
  def zero_state(self, batch_size, dtype):
    """Return an initial (zero) state tuple for this `AttentionWrapper`.

    Args:
      batch_size: `0D` integer tensor: the batch size.
      dtype: The internal state data type.
    Returns:
      An `TacotronDecoderCellState` tuple containing zeroed out tensors and,
      possibly, empty `TensorArray` objects.
    Raises:
      ValueError: (or, possibly at runtime, InvalidArgument), if
      `batch_size` does not match the output size of the encoder passed
      to the wrapper object at initialization time.
    """
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      cell_state = self._cell.zero_state(batch_size, dtype)
      error_message = (
        "When calling zero_state of TacotronDecoderCell %s: " % self._base_name +
        "Non-matching batch sizes between the memory "
        "(encoder output) and the requested batch size.")
      with ops.control_dependencies(
        self._batch_size_checks(batch_size, error_message)):
        cell_state = nest.map_structure(
          lambda s: array_ops.identity(s, name="checked_cell_state"),
          cell_state)
      return TacotronDecoderCellState(
        cell_state=cell_state,
        time=array_ops.zeros([], dtype=tf.int32),
        attention=rnn_cell_impl._zero_state_tensors(self._attention_layer_size, batch_size, dtype),
        alignments=self._attention_mechanism.initial_alignments(batch_size, dtype),
        alignment_history=tensor_array_ops.TensorArray(dtype=dtype, size=0,
        dynamic_size=True))
    def zero_state(self, batch_size, dtype):  #返回一个0状态(代码参考AttentionWrapper)
        with ops.name_scope(type(self).__name__ + "ZeroState",
                            values=[batch_size]):
            cell_state = self._cell.zero_state(batch_size, dtype)
            error_message = (
                "When calling zero_state of TacotronDecoderCell %s: " %
                self._base_name +
                "Non-matching batch sizes between the memory "
                "(encoder output) and the requested batch size.")
            with ops.control_dependencies(
                    self._batch_size_checks(batch_size, error_message)):
                cell_state = nest.map_structure(
                    lambda s: array_ops.identity(s, name="checked_cell_state"),
                    cell_state)

            return tf.contrib.seq2seq.AttentionWrapperState(
                cell_state=cell_state,
                time=array_ops.zeros([], dtype=tf.int32),
                attention=rnn_cell_impl._zero_state_tensors(
                    self._attention_layer_size, batch_size, dtype),
                alignments=self._attention_mechanism.initial_alignments(
                    batch_size, dtype),
                alignment_history=tensor_array_ops.TensorArray(
                    dtype=dtype, size=0, dynamic_size=True),
                attention_state=tensor_array_ops.TensorArray(
                    dtype=dtype, size=0, dynamic_size=True))
Esempio n. 10
0
    def zero_state(self, batch_size, dtype):
        """Initialize the memory to the key values."""

        with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
            sent1 = tf.reshape(self.sent1, [-1, self.sent1_length * self.dim])
            sent2 = tf.reshape(self.sent2, [-1, self.sent2_length * self.dim])

            rh = _zero_state_tensors([self.num_units], batch_size, dtype=tf.float32)

            state_list = [sent1, sent2, rh[0]]

            return DoubleStateTuple(*state_list)
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length,
                   max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers):
    '''Create the decoding cell and attention for the training and inference decoding layers'''

    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.contrib.rnn.LSTMCell(rnn_size,
                                           initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            dec_cell = tf.contrib.rnn.DropoutWrapper(lstm,
                                                     input_keep_prob=keep_prob)

    output_layer = Dense(vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))

    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size,
                                                     enc_output,
                                                     text_length,
                                                     normalize=False,
                                                     name='BahdanauAttention')

    dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(dec_cell,
                                                          attn_mech,
                                                          rnn_size)

    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0],
                                                                    _zero_state_tensors(rnn_size,
                                                                                        batch_size,
                                                                                        tf.float32))

    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input,
                                                  summary_length,
                                                  dec_cell,
                                                  initial_state,
                                                  output_layer,
                                                  vocab_size,
                                                  max_summary_length)
    with tf.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(embeddings,
                                                    vocab_to_int['<GO>'],
                                                    vocab_to_int['<EOS>'],
                                                    dec_cell,
                                                    initial_state,
                                                    output_layer,
                                                    max_summary_length,
                                                    batch_size)

    return training_logits, inference_logits
Esempio n. 12
0
    def zero_state(self, batch_size, dtype):
        """Initialize the memory to the key values."""

        with tf.name_scope(type(self).__name__ + "ZeroState",
                           values=[batch_size]):
            sent1 = tf.reshape(self.sent1, [-1, self.sent1_length * self.dim])
            sent2 = tf.reshape(self.sent2, [-1, self.sent2_length * self.dim])
            sent3 = tf.reshape(self.sent3, [-1, self.sent3_length * self.dim])

            rh = _zero_state_tensors([self.num_units] * 3, batch_size, dtype)
            # rh = [tf.tile(tf.expand_dims(self.keys[i], axis=0), [batch_size, 1])
            #       for i in range(3)]

            state_list = [sent1, sent2, sent3] + rh

            return state_list
Esempio n. 13
0
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, 
                   max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers):
    '''Create the decoding cell and attention for the training and inference decoding layers'''
    
    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.contrib.rnn.LSTMCell(rnn_size,
                                           initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            dec_cell = tf.contrib.rnn.DropoutWrapper(lstm, 
                                                     input_keep_prob = keep_prob)
    
    output_layer = Dense(vocab_size,
                         kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))
    
    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size,
                                                  enc_output,
                                                  text_length,
                                                  normalize=False,
                                                  name='BahdanauAttention')

    dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(dec_cell,
                                                          attn_mech,
                                                          rnn_size)
            
    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0],
                                                                    _zero_state_tensors(rnn_size, 
                                                                                        batch_size, 
                                                                                        tf.float32)) 
    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input, 
                                                  summary_length, 
                                                  dec_cell, 
                                                  initial_state,
                                                  output_layer,
                                                  vocab_size, 
                                                  max_summary_length)
    with tf.variable_scope("decode", reuse=True):
        inference_logits = inference_decoding_layer(embeddings,  
                                                    vocab_to_int['<GO>'], 
                                                    vocab_to_int['<EOS>'],
                                                    dec_cell, 
                                                    initial_state, 
                                                    output_layer,
                                                    max_summary_length,
                                                    batch_size)

    return training_logits, inference_logits
Esempio n. 14
0
    def zero_state(self, batch_size, dtype):

        with ops.name_scope(type(self).__name__ + "ZeroState",
                            values=[batch_size]):
            cell_state = self._cell.zero_state(batch_size, dtype)
            with ops.control_dependencies(self._batch_size_checks(batch_size)):
                cell_state = nest.map_structure(
                    lambda s: array_ops.identity(s, name="checked_cell_state"),
                    cell_state)
            return TacotronDecoderCellState(
                cell_state=cell_state,
                time=array_ops.zeros([], dtype=tf.int32),
                attention=rnn_cell_impl._zero_state_tensors(
                    self._attention_layer_size, batch_size, dtype),
                alignments=self._attention_mechanism.initial_alignments(
                    batch_size, dtype),
                alignment_history=tensor_array_ops.TensorArray(
                    dtype=dtype, size=0, dynamic_size=True))
Esempio n. 15
0
    def _create(self, decoder_state_size, **kwargs):
        """ Creates decoder's initial RNN states according to
        `decoder_state_size`.

        If `decoder_state_size` is int/LSTMStateTuple(int, int), return Tensor
        with shape [batch_size, int] or LSTMStateTuple([batch_size, int], [batch_size, int]).
        If `decoder_state_size` is a tuple of int/LSTMStateTupe, return a tuple
        whose elements' structure match the `decoder_state_size` respectively.
        Args:
            decoder_state_size: RNN decoder state size.
            **kwargs:

        Returns: The decoder states with the structure determined
          by `decoder_state_size`.
        """
        batch_size = self.batch_size
        return rnn_cell_impl._zero_state_tensors(
            decoder_state_size, batch_size, tf.float32)
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
      the shapes `[batch_size x s]` for each s in `state_size`.
    """
    # Keep scope for backwards compatibility.
    with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      return rnn_cell_impl._zero_state_tensors(  # pylint: disable=protected-access
          self.state_size, batch_size, dtype)
Esempio n. 17
0
  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).

    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.

    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.

      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
      the shapes `[batch_size x s]` for each s in `state_size`.
    """
    # Keep scope for backwards compatibility.
    with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      return rnn_cell_impl._zero_state_tensors(  # pylint: disable=protected-access
          self.state_size, batch_size, dtype)
Esempio n. 18
0
    def _create(self, encoder_output, decoder_state_size, **kwargs):
        """ Creates decoder's initial RNN states according to
        `decoder_state_size`.

        If `decoder_state_size` is int/LSTMStateTuple(int, int), return Tensor
        with shape [batch_size, int] or LSTMStateTuple([batch_size, int], [batch_size, int]).
        If `decoder_state_size` is a tuple of int/LSTMStateTupe, return a tuple
        whose elements' structure match the `decoder_state_size` respectively.
        Args:
            encoder_output: An instance of `collections.namedtuple`
              from `Encoder.encode()`.
            decoder_state_size: RNN decoder state size.
            **kwargs:

        Returns: The decoder states with the structure determined
          by `decoder_state_size`.
        """
        batch_size = tf.shape(encoder_output.attention_length)[0]
        return rnn_cell_impl._zero_state_tensors(
            decoder_state_size, batch_size, tf.float32)
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state,
                   vocab_size, TEXT_LENGTH, SUMMARY_LENGTH, MAX_SUMMARY_LENGTH,
                   RNN_SIZE, VOCAB_TO_INT, KEEP_PROB, BATCH_SIZE, NUM_LAYERS):
    """Create the decoding cell and attention for the training and inference decoding layers"""

    for layer in range(NUM_LAYERS):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.contrib.rnn.LSTMCell(
                RNN_SIZE,
                initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            dec_cell = tf.contrib.rnn.DropoutWrapper(lstm,
                                                     input_keep_prob=KEEP_PROB)

    output_layer = Dense(vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(
                             mean=0.0, stddev=0.1))

    attn_mech = tf.contrib.seq2seq.BahdanauAttention(RNN_SIZE,
                                                     enc_output,
                                                     TEXT_LENGTH,
                                                     normalize=False,
                                                     name='BahdanauAttention')

    dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
        dec_cell, attn_mech, RNN_SIZE)

    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
        enc_state[0], _zero_state_tensors(RNN_SIZE, BATCH_SIZE, tf.float32))
    with tf.variable_scope("decode"):
        TRAINING_LOGITS = training_decoding_layer(dec_embed_input,
                                                  SUMMARY_LENGTH, dec_cell,
                                                  initial_state, output_layer,
                                                  vocab_size,
                                                  MAX_SUMMARY_LENGTH)
    with tf.variable_scope("decode", reuse=True):
        INFERENCE_LOGITS = inference_decoding_layer(
            embeddings, VOCAB_TO_INT['<GO>'], VOCAB_TO_INT['<EOS>'], dec_cell,
            initial_state, output_layer, MAX_SUMMARY_LENGTH, BATCH_SIZE)

    return TRAINING_LOGITS, INFERENCE_LOGITS
Esempio n. 20
0
    def decoding_layer(self, input, encoder_output, encoder_state):
        for i in range(self.num_layers):
            with tf.variable_scope('decoder_{}'.format(i)):
                decoder_cell = rnn.LSTMCell(
                    self.cell_size,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=2))
                decoder_cell = rnn.DropoutWrapper(
                    decoder_cell, input_keep_prob=self.keep_prob)

        output_layer = Dense(
            self.vocab_length,
            kernel_initializer=tf.truncated_normal_initializer(mean=0.0,
                                                               stddev=0.1))

        attention_mech = seq2seq.BahdanauAttention(self.cell_size,
                                                   encoder_output,
                                                   self.in_length,
                                                   normalize=False)
        decoder_cell = seq2seq.DynamicAttentionWrapper(decoder_cell,
                                                       attention_mech,
                                                       self.cell_size)

        zero_state = _zero_state_tensors(self.cell_size, self.batch_size,
                                         tf.float32)
        initial_state = seq2seq.DynamicAttentionWrapperState(
            encoder_state[0], zero_state)

        with tf.variable_scope("decode"):
            train_logits = self.train_decoding_layer(input, decoder_cell,
                                                     initial_state,
                                                     output_layer)

        with tf.variable_scope("decode", reuse=True):
            inference_logits = self.inference_decoding_layer(
                self.embeddings, decoder_cell, initial_state, output_layer)

        return train_logits, inference_logits
Esempio n. 21
0
 def initial_alignments(self, batch_size, dtype):
     '''Returns all the alignment saturated in first block'''
     max_time = self._alignments_size
     alignments = _zero_state_tensors(max_time - 1, batch_size, dtype)
     return tf.concat([tf.fill([batch_size, 1], 1.0), alignments], 1)
Esempio n. 22
0
 def zero_state(self, batch_size, dtype):
     cell_state = self._cell.zero_state(batch_size, dtype)
     attention = rnn_cell_impl._zero_state_tensors(
         self.state_size.attention, batch_size, tf.float32)
     return SeqMatchSeqAttentionState(cell_state=cell_state,
                                      attention=attention)
Esempio n. 23
0
 def zero_input(self, batch_size, dtype):
   with tf.name_scope(type(self).__name__ + "ZeroInput", values=[batch_size]):
     output = rnn_cell_impl._zero_state_tensors(self._input_shape, batch_size, dtype)
   return output
Esempio n. 24
0
 def _create(s, d):
     return rnn_cell_impl._zero_state_tensors(s, batch_size, d)
Esempio n. 25
0
    def _init_decoder(self):
        data_y = process_decoding_input(self.data_y, self.vocab_to_int_y,
                                        self.batch_size)

        self.dec_embeddings = tf.Variable(tf.random_uniform(
            [self.vocab_size_y, self.embedding_size], -1.0, 1.0),
                                          dtype=tf.float32)

        dec_embedded = tf.nn.embedding_lookup(self.dec_embeddings, data_y)

        with tf.variable_scope("decoder"):
            dec_cell = rnn_cell(self.cell_size, self.dec_num_layers,
                                self.dec_keep_prob)

        out_layer = Dense(self.vocab_size_y,
                          kernel_initializer=tf.truncated_normal_initializer(
                              mean=0.0, stddev=0.1))

        att_mechanism = seq2seq.BahdanauAttention(self.cell_size,
                                                  self.enc_outputs,
                                                  self.x_length,
                                                  normalize=False)

        dec_cell = seq2seq.DynamicAttentionWrapper(
            dec_cell, att_mechanism, attention_size=self.cell_size)

        init_state = seq2seq.DynamicAttentionWrapperState(
            cell_state=self.enc_states[0],
            attention=_zero_state_tensors(self.cell_size, self.batch_size,
                                          tf.float32))

        with tf.variable_scope("decoding"):
            train_helper = seq2seq.TrainingHelper(
                dec_embedded, sequence_length=self.y_length, time_major=False)

            train_decoder = seq2seq.BasicDecoder(dec_cell, train_helper,
                                                 init_state, out_layer)

            train_out, _ = seq2seq.dynamic_decode(
                train_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length,
                swap_memory=True)

            self.decoder_train = train_out.rnn_output

        with tf.variable_scope("decoding", reuse=True):
            start_tokens = tf.tile(
                tf.constant([self.vocab_to_int_y[START]], dtype=tf.int32),
                [self.batch_size])

            infer_helper = seq2seq.GreedyEmbeddingHelper(
                embedding=self.dec_embeddings,
                start_tokens=start_tokens,
                end_token=self.vocab_to_int_y[STOP])

            infer_decoder = seq2seq.BasicDecoder(dec_cell, infer_helper,
                                                 init_state, out_layer)

            infer_out, _ = seq2seq.dynamic_decode(
                infer_decoder,
                output_time_major=False,
                impute_finished=True,
                maximum_iterations=self.max_length)

            self.decoder_inference = infer_out.sample_id

        tf.identity(self.decoder_train, 'decoder_train')
        tf.identity(self.decoder_inference, 'decoder_inference')
Esempio n. 26
0
    def add_decoder(self):
        with tf.variable_scope('Decoder') as scope:
            with tf.device('/cpu:0'):
                self.dec_Wemb = tf.get_variable('embedding',
                                                initializer=tf.random_uniform([
                                                    dec_vocab_size + 2,
                                                    self.dec_emb_size
                                                ]),
                                                dtype=tf.float32)

            # get dynamic batch_size
            batch_size = tf.shape(self.enc_inputs)[0]

            dec_cell = self.cell(self.hidden_size)

            attn_mech = tf.contrib.seq2seq.LuongAttention(
                num_units=self.attn_size,
                memory=self.enc_outputs,
                memory_sequence_length=self.enc_sequence_length,
                normalize=False,
                name='LuongAttention')

            dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
                cell=dec_cell,
                attention_mechanism=attn_mech,
                attention_size=self.attn_size,
                # attention_history=False (in ver 1.2)
                name='Attention_Wrapper')

            initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
                cell_state=self.enc_last_state,
                attention=_zero_state_tensors(self.attn_size, batch_size,
                                              tf.float32))

            # output projection (replacing `OutputProjectionWrapper`)
            output_layer = Dense(dec_vocab_size + 2, name='output_projection')

            if self.mode == 'training':

                # maxium unrollings in current batch = max(dec_sent_len) + 1(GO symbol)
                self.max_dec_len = tf.reduce_max(self.dec_sequence_length + 1,
                                                 name='max_dec_len')

                self.dec_emb_inputs = tf.nn.embedding_lookup(self.dec_Wemb,
                                                             self.dec_inputs,
                                                             name='emb_inputs')

                training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=self.dec_emb_inputs,
                    sequence_length=self.dec_sequence_length + 1,
                    time_major=False,
                    name='training_helper')

                training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=dec_cell,
                    helper=training_helper,
                    initial_state=initial_state,
                    output_layer=output_layer)

                self.train_dec_outputs, train_dec_last_state = tf.contrib.seq2seq.dynamic_decode(
                    training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.max_dec_len)

                # dec_outputs: collections.namedtuple(rnn_outputs, sample_id)
                # dec_outputs.rnn_output: [batch_size x max(dec_sequence_len) x dec_vocab_size+2], tf.float32
                # dec_outputs.sample_id [batch_size], tf.int32

                # logits: [batch_size x max_dec_len x dec_vocab_size+2]
                self.logits = tf.identity(self.train_dec_outputs.rnn_output,
                                          name='logits')

                # targets: [batch_size x max_dec_len x dec_vocab_size+2]
                self.targets = tf.slice(self.dec_inputs, [0, 0],
                                        [-1, self.max_dec_len], 'targets')

                # masks: [batch_size x max_dec_len]
                # => ignore outputs after `dec_senquence_length+1` when calculating loss
                self.masks = tf.sequence_mask(self.dec_sequence_length + 1,
                                              self.max_dec_len,
                                              dtype=tf.float32,
                                              name='masks')

                # Control loss dimensions with `average_across_timesteps` and `average_across_batch`
                # internal: `tf.nn.sparse_softmax_cross_entropy_with_logits`
                self.batch_loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.targets,
                    weights=self.masks,
                    name='batch_loss')

                # prediction sample for validation
                self.valid_predictions = tf.identity(
                    self.train_dec_outputs.sample_id, name='valid_preds')

                # List of training variables
                # self.training_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

            elif self.mode == 'inference':

                start_tokens = tf.tile(tf.constant([self.start_token],
                                                   dtype=tf.int32),
                                       [batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=self.dec_Wemb,
                    start_tokens=start_tokens,
                    end_token=self.end_token)

                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=dec_cell,
                    helper=inference_helper,
                    initial_state=initial_state,
                    output_layer=output_layer)

                infer_dec_outputs, infer_dec_last_state = tf.contrib.seq2seq.dynamic_decode(
                    inference_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=dec_sentence_length)

                # [batch_size x dec_sentence_length], tf.int32
                self.predictions = tf.identity(infer_dec_outputs.sample_id,
                                               name='predictions')
 def initial_alignments(self, batch_size, dtype):
     max_time = self._alignments_size
     return rnn_cell_impl._zero_state_tensors(max_time, batch_size, dtype)
 def initial_state(self, batch_size, dtype):
     state_size_ = self.state_size
     return rnn_cell_impl._zero_state_tensors(state_size_, batch_size,
                                              dtype)
Esempio n. 29
0
    def _init_decoder(self, forward_only):
        with tf.variable_scope("decoder") as scope:

            def output_fn(outputs):
                return tf.contrib.layers.linear(outputs,
                                                self.target_vocab_size,
                                                scope=scope)

            # attention_states: size [batch_size, max_time, num_units]
            #attention_states = tf.transpose(self.encoder_outputs, [1, 0, 2])
            self.batch_size = tf.shape(self.encoder_inputs)[0]

            self.attn_mech = tf.contrib.seq2seq.LuongAttention(
                num_units=self.dec_hidden_size,
                memory=self.encoder_outputs,
                memory_sequence_length=self.encoder_inputs_length,
                normalize=False,
                name='LuongAttention')

            self.dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
                cell=self.decoder_cell,
                attention_mechanism=self.attn_mech,
                attention_size=self.dec_hidden_size,
                # attention_history=False (in ver 1.2)
                name='Attention_Wrapper')

            self.initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
                cell_state=self.encoder_state,
                attention=_zero_state_tensors(self.dec_hidden_size,
                                              self.batch_size, tf.float32))

            self.output_layer = Dense(self.target_vocab_size + 2,
                                      name='output_projection')

            if forward_only:
                start_tokens = tf.tile(tf.constant([model_config.PAD_ID],
                                                   dtype=tf.int32),
                                       [self.batch_size],
                                       name='start_tokens')

                inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                    embedding=self.dec_embedding_matrix,
                    start_tokens=start_tokens,
                    end_token=model_config.EOS_ID)

                inference_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.dec_cell,
                    helper=inference_helper,
                    initial_state=self.initial_state,
                    output_layer=self.output_layer)

                infer_dec_outputs, infer_dec_last_state = tf.contrib.seq2seq.dynamic_decode(
                    inference_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.target_vocab_size)

                # [batch_size x dec_sentence_length], tf.int32
                self.predictions = tf.identity(infer_dec_outputs.sample_id,
                                               name='predictions')
            else:
                # maxium unrollings in current batch = max(dec_sent_len) + 1(GO symbol)
                self.max_dec_len = tf.reduce_max(self.decoder_inputs_length +
                                                 1,
                                                 name='max_dec_len')

                self.training_helper = tf.contrib.seq2seq.TrainingHelper(
                    inputs=self.decoder_inputs_embedded,
                    sequence_length=self.decoder_inputs_length + 1,
                    time_major=False,
                    name='training_helper')

                self.training_decoder = tf.contrib.seq2seq.BasicDecoder(
                    cell=self.dec_cell,
                    helper=self.training_helper,
                    initial_state=self.initial_state,
                    output_layer=self.output_layer)

                self.decoder_outputs, self.decoder_state = tf.contrib.seq2seq.dynamic_decode(
                    self.training_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=self.max_dec_len)

                # logits: [batch_size x max_dec_len x dec_vocab_size+2]
                self.logits = tf.identity(self.decoder_outputs.rnn_output,
                                          name='logits')

                # targets: [batch_size x max_dec_len x dec_vocab_size+2]
                self.targets = tf.slice(self.decoder_inputs, [0, 0],
                                        [-1, self.max_dec_len], 'targets')

                # masks: [batch_size x max_dec_len]
                # => ignore outputs after `dec_senquence_length+1` when calculating loss
                self.masks = tf.sequence_mask(self.decoder_inputs_length + 1,
                                              self.max_dec_len,
                                              dtype=tf.float32,
                                              name='masks')

                # internal: `tf.nn.sparse_softmax_cross_entropy_with_logits`
                self.loss = tf.contrib.seq2seq.sequence_loss(
                    logits=self.logits,
                    targets=self.targets,
                    weights=self.masks,
                    name='batch_loss')
                cell_decode = []
                for a in range(SIZE_RNN_LAYER):
                    cell = rnn.BasicLSTMCell(SIZE_RNN_STATE)
                    cell = rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
                    cell_decode.append(cell)
                multi_rnn_decode = rnn.MultiRNNCell(cell_decode, state_is_tuple=True)

                dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(
                    cell=multi_rnn_decode,
                    attention_mechanism=attn_luong,
                    attention_size=SIZE_ATTN,
                    name="attention_wrapper")

                initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(
                    cell_state=state_enc,
                    attention=_zero_state_tensors(SIZE_ATTN, batch_size, tf.float32))

                output_layer = Dense(voc_size_kor, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))

            # train mode
            with tf.variable_scope("decoder_layer"):
                train_helper = tf.contrib.seq2seq.TrainingHelper(inputs=embed_dec,
                                                                 sequence_length=dec_pad_len,
                                                                 time_major=False)
                train_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, train_helper, initial_state, output_layer)

                output_train_dec, state_train_dec = tf.contrib.seq2seq.dynamic_decode(
                    decoder=train_decoder,
                    output_time_major=False,
                    impute_finished=True,
                    maximum_iterations=padded_kor_len)