Beispiel #1
0
    def build(self, hparams, is_training=True, name_or_scope='encoder'):
        self._is_training = is_training
        self._name_or_scope = name_or_scope
        self._use_cudnn = hparams.use_cudnn

        tf.logging.info('\nEncoder Cells (bidirectional):\n'
                        '  units: %s\n', hparams.enc_rnn_size)

        if isinstance(name_or_scope, tf.VariableScope):
            name = name_or_scope.name
            reuse = name_or_scope.reuse
        else:
            name = name_or_scope
            reuse = None

        cells_fw = []
        cells_bw = []
        for i, layer_size in enumerate(hparams.enc_rnn_size):
            if self._use_cudnn:
                cells_fw.append(
                    lstm_utils.cudnn_lstm_layer(
                        [layer_size],
                        hparams.dropout_keep_prob,
                        is_training,
                        name_or_scope=tf.VariableScope(
                            reuse,
                            name + '/cell_%d/bidirectional_rnn/fw' % i)))
                cells_bw.append(
                    lstm_utils.cudnn_lstm_layer(
                        [layer_size],
                        hparams.dropout_keep_prob,
                        is_training,
                        name_or_scope=tf.VariableScope(
                            reuse,
                            name + '/cell_%d/bidirectional_rnn/bw' % i)))
            else:
                cells_fw.append(
                    lstm_utils.rnn_cell([layer_size],
                                        hparams.dropout_keep_prob,
                                        is_training))
                cells_bw.append(
                    lstm_utils.rnn_cell([layer_size],
                                        hparams.dropout_keep_prob,
                                        is_training))

        self._cells = (cells_fw, cells_bw)
Beispiel #2
0
  def build(self, hparams, is_training=True, name_or_scope='encoder'):
    if hparams.use_cudnn:
      tf.logging.warning('cuDNN LSTM no longer supported. Using regular LSTM.')

    self._is_training = is_training
    self._name_or_scope = name_or_scope

    tf.logging.info('\nEncoder Cells (unidirectional):\n'
                    '  units: %s\n',
                    hparams.enc_rnn_size)
    self._cell = lstm_utils.rnn_cell(
        hparams.enc_rnn_size, hparams.dropout_keep_prob,
        hparams.residual_encoder, is_training)
Beispiel #3
0
    def _hierarchical_decode(self, z=None):
        hparams = self.hparams
        batch_size = hparams.batch_size

        if z is None:
            learned_initial_embedding = tf.get_variable(
                'learned_initial_embedding',
                shape=hparams.z_size,
                initializer=tf.random_normal_initializer(stddev=0.001))
            embeddings = [tf.stack([learned_initial_embedding] * batch_size)]
        else:
            embeddings = [z]

        for i, h_size in enumerate(hparams.hierarchical_output_sizes):
            if h_size % len(embeddings) != 0:
                raise ValueError(
                    'Each size in `hierarchical_output_sizes` must be evenly divisible '
                    'by the previous. Got: %d !/ %d', h_size, len(embeddings))
            num_steps = h_size // len(embeddings)
            all_outputs = []
            with tf.variable_scope('hierarchical_layer_%d' % i) as scope:
                cell = lstm_utils.rnn_cell(hparams.dec_rnn_size,
                                           hparams.dropout_keep_prob,
                                           self._is_training)
                cudnn_cell = lstm_utils.cudnn_lstm_layer(
                    hparams.dec_rnn_size, hparams.dropout_keep_prob,
                    self._is_training)
                for e in embeddings:
                    e.set_shape([batch_size] + e.shape[1:].as_list())
                    initial_state = lstm_utils.initial_cell_state_from_embedding(
                        cell, e, name='e_to_initial_state')
                    if hparams.use_cudnn:
                        input_ = tf.zeros([num_steps, batch_size, 1])
                        outputs, _ = cudnn_cell(
                            input_,
                            initial_state=lstm_utils.cudnn_lstm_state(
                                initial_state),
                            training=self._is_training)
                        outputs = tf.unstack(outputs)
                    else:
                        input_ = [tf.zeros([batch_size, 1])] * num_steps
                        outputs, _ = tf.nn.static_rnn(
                            cell, input_, initial_state=initial_state)
                    all_outputs.extend(outputs)
                    # Reuse layer next time.
                    scope.reuse_variables()
            embeddings = all_outputs
        return embeddings
Beispiel #4
0
  def build(self, hparams, is_training=True, name_or_scope='encoder'):
    self._is_training = is_training
    self._name_or_scope = name_or_scope
    self._use_cudnn = hparams.use_cudnn

    tf.logging.info('\nEncoder Cells (unidirectional):\n'
                    '  units: %s\n',
                    hparams.enc_rnn_size)
    if self._use_cudnn:
      self._cudnn_lstm = lstm_utils.cudnn_lstm_layer(
          hparams.enc_rnn_size,
          hparams.dropout_keep_prob,
          is_training,
          name_or_scope=self._name_or_scope)
    else:
      self._cell = lstm_utils.rnn_cell(
          hparams.enc_rnn_size, hparams.dropout_keep_prob, is_training)
Beispiel #5
0
  def build(self, hparams, output_depth, is_training=False):
    self._is_training = is_training

    tf.logging.info('\nDecoder Cells:\n'
                    '  units: %s\n',
                    hparams.dec_rnn_size)

    self._sampling_probability = lstm_utils.get_sampling_probability(
        hparams, is_training)
    self._output_depth = output_depth
    self._output_layer = layers_core.Dense(
        output_depth, name='output_projection')
    self._dec_cell = lstm_utils.rnn_cell(
        hparams.dec_rnn_size, hparams.dropout_keep_prob, is_training)
    self._cudnn_dec_lstm = lstm_utils.cudnn_lstm_layer(
        hparams.dec_rnn_size, hparams.dropout_keep_prob, is_training,
        name_or_scope='decoder') if hparams.use_cudnn else None
Beispiel #6
0
  def build(self, hparams, output_depth, is_training=True):
    if hparams.use_cudnn:
      tf.logging.warning('cuDNN LSTM no longer supported. Using regular LSTM.')

    self._is_training = is_training

    tf.logging.info('\nDecoder Cells:\n'
                    '  units: %s\n',
                    hparams.dec_rnn_size)

    self._sampling_probability = lstm_utils.get_sampling_probability(
        hparams, is_training)
    self._output_depth = output_depth
    self._output_layer = tf.layers.Dense(
        output_depth, name='output_projection')
    self._dec_cell = lstm_utils.rnn_cell(
        hparams.dec_rnn_size, hparams.dropout_keep_prob,
        hparams.residual_decoder, is_training)
Beispiel #7
0
  def build(self, hparams, output_depth, is_training):
    self.hparams = hparams
    self._output_depth = output_depth
    self._total_length = hparams.max_seq_len
    if self._total_length != np.prod(self._level_lengths):
      raise ValueError(
          'The product of the HierarchicalLstmDecoder level lengths (%d) must '
          'equal the padded input sequence length (%d).' % (
              np.prod(self._level_lengths), self._total_length))
    tf.logging.info('\nHierarchical Decoder:\n'
                    '  input length: %d\n'
                    '  level output lengths: %s\n',
                    self._total_length,
                    self._level_lengths)

    self._hier_cells = [
        lstm_utils.rnn_cell(
            hparams.dec_rnn_size,
            dropout_keep_prob=hparams.dropout_keep_prob)
        for _ in range(len(self._level_lengths))]

    with tf.variable_scope('core_decoder', reuse=tf.AUTO_REUSE):
      self._core_decoder.build(hparams, output_depth, is_training)