def build(self, hparams, is_training=True, name_or_scope='encoder'): self._is_training = is_training self._name_or_scope = name_or_scope self._use_cudnn = hparams.use_cudnn tf.logging.info('\nEncoder Cells (bidirectional):\n' ' units: %s\n', hparams.enc_rnn_size) if isinstance(name_or_scope, tf.VariableScope): name = name_or_scope.name reuse = name_or_scope.reuse else: name = name_or_scope reuse = None cells_fw = [] cells_bw = [] for i, layer_size in enumerate(hparams.enc_rnn_size): if self._use_cudnn: cells_fw.append( lstm_utils.cudnn_lstm_layer( [layer_size], hparams.dropout_keep_prob, is_training, name_or_scope=tf.VariableScope( reuse, name + '/cell_%d/bidirectional_rnn/fw' % i))) cells_bw.append( lstm_utils.cudnn_lstm_layer( [layer_size], hparams.dropout_keep_prob, is_training, name_or_scope=tf.VariableScope( reuse, name + '/cell_%d/bidirectional_rnn/bw' % i))) else: cells_fw.append( lstm_utils.rnn_cell([layer_size], hparams.dropout_keep_prob, is_training)) cells_bw.append( lstm_utils.rnn_cell([layer_size], hparams.dropout_keep_prob, is_training)) self._cells = (cells_fw, cells_bw)
def build(self, hparams, is_training=True, name_or_scope='encoder'): if hparams.use_cudnn: tf.logging.warning('cuDNN LSTM no longer supported. Using regular LSTM.') self._is_training = is_training self._name_or_scope = name_or_scope tf.logging.info('\nEncoder Cells (unidirectional):\n' ' units: %s\n', hparams.enc_rnn_size) self._cell = lstm_utils.rnn_cell( hparams.enc_rnn_size, hparams.dropout_keep_prob, hparams.residual_encoder, is_training)
def _hierarchical_decode(self, z=None): hparams = self.hparams batch_size = hparams.batch_size if z is None: learned_initial_embedding = tf.get_variable( 'learned_initial_embedding', shape=hparams.z_size, initializer=tf.random_normal_initializer(stddev=0.001)) embeddings = [tf.stack([learned_initial_embedding] * batch_size)] else: embeddings = [z] for i, h_size in enumerate(hparams.hierarchical_output_sizes): if h_size % len(embeddings) != 0: raise ValueError( 'Each size in `hierarchical_output_sizes` must be evenly divisible ' 'by the previous. Got: %d !/ %d', h_size, len(embeddings)) num_steps = h_size // len(embeddings) all_outputs = [] with tf.variable_scope('hierarchical_layer_%d' % i) as scope: cell = lstm_utils.rnn_cell(hparams.dec_rnn_size, hparams.dropout_keep_prob, self._is_training) cudnn_cell = lstm_utils.cudnn_lstm_layer( hparams.dec_rnn_size, hparams.dropout_keep_prob, self._is_training) for e in embeddings: e.set_shape([batch_size] + e.shape[1:].as_list()) initial_state = lstm_utils.initial_cell_state_from_embedding( cell, e, name='e_to_initial_state') if hparams.use_cudnn: input_ = tf.zeros([num_steps, batch_size, 1]) outputs, _ = cudnn_cell( input_, initial_state=lstm_utils.cudnn_lstm_state( initial_state), training=self._is_training) outputs = tf.unstack(outputs) else: input_ = [tf.zeros([batch_size, 1])] * num_steps outputs, _ = tf.nn.static_rnn( cell, input_, initial_state=initial_state) all_outputs.extend(outputs) # Reuse layer next time. scope.reuse_variables() embeddings = all_outputs return embeddings
def build(self, hparams, is_training=True, name_or_scope='encoder'): self._is_training = is_training self._name_or_scope = name_or_scope self._use_cudnn = hparams.use_cudnn tf.logging.info('\nEncoder Cells (unidirectional):\n' ' units: %s\n', hparams.enc_rnn_size) if self._use_cudnn: self._cudnn_lstm = lstm_utils.cudnn_lstm_layer( hparams.enc_rnn_size, hparams.dropout_keep_prob, is_training, name_or_scope=self._name_or_scope) else: self._cell = lstm_utils.rnn_cell( hparams.enc_rnn_size, hparams.dropout_keep_prob, is_training)
def build(self, hparams, output_depth, is_training=False): self._is_training = is_training tf.logging.info('\nDecoder Cells:\n' ' units: %s\n', hparams.dec_rnn_size) self._sampling_probability = lstm_utils.get_sampling_probability( hparams, is_training) self._output_depth = output_depth self._output_layer = layers_core.Dense( output_depth, name='output_projection') self._dec_cell = lstm_utils.rnn_cell( hparams.dec_rnn_size, hparams.dropout_keep_prob, is_training) self._cudnn_dec_lstm = lstm_utils.cudnn_lstm_layer( hparams.dec_rnn_size, hparams.dropout_keep_prob, is_training, name_or_scope='decoder') if hparams.use_cudnn else None
def build(self, hparams, output_depth, is_training=True): if hparams.use_cudnn: tf.logging.warning('cuDNN LSTM no longer supported. Using regular LSTM.') self._is_training = is_training tf.logging.info('\nDecoder Cells:\n' ' units: %s\n', hparams.dec_rnn_size) self._sampling_probability = lstm_utils.get_sampling_probability( hparams, is_training) self._output_depth = output_depth self._output_layer = tf.layers.Dense( output_depth, name='output_projection') self._dec_cell = lstm_utils.rnn_cell( hparams.dec_rnn_size, hparams.dropout_keep_prob, hparams.residual_decoder, is_training)
def build(self, hparams, output_depth, is_training): self.hparams = hparams self._output_depth = output_depth self._total_length = hparams.max_seq_len if self._total_length != np.prod(self._level_lengths): raise ValueError( 'The product of the HierarchicalLstmDecoder level lengths (%d) must ' 'equal the padded input sequence length (%d).' % ( np.prod(self._level_lengths), self._total_length)) tf.logging.info('\nHierarchical Decoder:\n' ' input length: %d\n' ' level output lengths: %s\n', self._total_length, self._level_lengths) self._hier_cells = [ lstm_utils.rnn_cell( hparams.dec_rnn_size, dropout_keep_prob=hparams.dropout_keep_prob) for _ in range(len(self._level_lengths))] with tf.variable_scope('core_decoder', reuse=tf.AUTO_REUSE): self._core_decoder.build(hparams, output_depth, is_training)