Example #1
0
    def _build_all_encoder_layers(self, bi_encoder_outputs, num_uni_layers,
                                  dtype, hparams):
        """Build encoder layers all at once."""
        uni_cell = model_helper.create_rnn_cell(
            unit_type=hparams.unit_type,
            num_units=hparams.num_units,
            num_layers=num_uni_layers,
            num_residual_layers=self.num_encoder_residual_layers,
            forget_bias=hparams.forget_bias,
            dropout=hparams.dropout,
            num_gpus=self.num_gpus,
            base_gpu=1,
            mode=self.mode,
            single_cell_fn=self.single_cell_fn)
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            uni_cell,
            bi_encoder_outputs,
            dtype=dtype,
            sequence_length=self.iterator.source_sequence_length,
            time_major=self.time_major)

        # Use the top layer for now
        self.encoder_state_list = [encoder_outputs]

        return encoder_state, encoder_outputs
Example #2
0
    def _build_encoder_cell(self,
                            hparams,
                            num_layers,
                            num_residual_layers,
                            base_gpu=0):
        """Build a multi-layer RNN cell that can be used by encoder."""

        return model_helper.create_rnn_cell(
            unit_type=hparams.unit_type,
            num_units=self.num_units,
            num_layers=num_layers,
            num_residual_layers=num_residual_layers,
            forget_bias=hparams.forget_bias,
            dropout=hparams.dropout,
            num_gpus=hparams.num_gpus,
            mode=self.mode,
            base_gpu=base_gpu,
            single_cell_fn=self.single_cell_fn)
Example #3
0
    def _build_decoder_cell(self,
                            hparams,
                            encoder_outputs,
                            encoder_state,
                            source_sequence_length,
                            base_gpu=0):
        """Build an RNN cell that can be used by decoder."""
        # We only make use of encoder_outputs in attention-based models
        if hparams.attention:
            raise ValueError("BasicModel doesn't support attention.")

        cell = model_helper.create_rnn_cell(
            unit_type=hparams.unit_type,
            num_units=self.num_units,
            num_layers=self.num_decoder_layers,
            num_residual_layers=self.num_decoder_residual_layers,
            forget_bias=hparams.forget_bias,
            dropout=hparams.dropout,
            num_gpus=self.num_gpus,
            mode=self.mode,
            single_cell_fn=self.single_cell_fn,
            base_gpu=base_gpu)

        if hparams.language_model:
            encoder_state = cell.zero_state(self.batch_size, self.dtype)
        elif not hparams.pass_hidden_state:
            raise ValueError("For non-attentional model, "
                             "pass_hidden_state needs to be set to True")

        # For beam search, we need to replicate encoder infos beam_width times
        if self.mode == tf.contrib.learn.ModeKeys.INFER and hparams.infer_mode == "beam_search":
            decoder_initial_state = tf.contrib.seq2seq.tile_batch(
                encoder_state, multiplier=hparams.beam_width)
        else:
            decoder_initial_state = encoder_state

        return cell, decoder_initial_state
Example #4
0
    def _build_decoder_cell(self, hparams, encoder_outputs, encoder_state,
                            source_sequence_length):
        """Build a RNN cell with attention mechanism that can be used by decoder."""
        # No Attention
        if not self.has_attention:
            return super(AttentionModel,
                         self)._build_decoder_cell(hparams, encoder_outputs,
                                                   encoder_state,
                                                   source_sequence_length)
        elif hparams.attention_architecture != "standard":
            raise ValueError("Unknown attention architecture %s" %
                             hparams.attention_architecture)

        num_units = hparams.num_units
        num_layers = self.num_decoder_layers
        num_residual_layers = self.num_decoder_residual_layers
        infer_mode = hparams.infer_mode

        dtype = tf.float32

        # Ensure memory is batch-major
        if self.time_major:
            memory = tf.transpose(encoder_outputs, [1, 0, 2])
        else:
            memory = encoder_outputs

        if self.mode == tf.contrib.learn.ModeKeys.INFER and infer_mode == "beam_search":
            memory, source_sequence_length, encoder_state, batch_size = (
                self._prepare_beam_search_decoder_inputs(
                    hparams.beam_width, memory, source_sequence_length,
                    encoder_state))
        else:
            batch_size = self.batch_size

        # Attention
        attention_mechanism = self.attention_mechanism_fn(
            hparams.attention, num_units, memory, source_sequence_length,
            self.mode)

        cell = model_helper.create_rnn_cell(
            unit_type=hparams.unit_type,
            num_units=num_units,
            num_layers=num_layers,
            num_residual_layers=num_residual_layers,
            forget_bias=hparams.forget_bias,
            dropout=hparams.dropout,
            num_gpus=self.num_gpus,
            mode=self.mode,
            single_cell_fn=self.single_cell_fn)

        # Only generate alignment in greedy INFER mode.
        alignment_history = (self.mode == tf.contrib.learn.ModeKeys.INFER
                             and infer_mode != "beam_search")
        cell = tf.contrib.seq2seq.AttentionWrapper(
            cell,
            attention_mechanism,
            attention_layer_size=num_units,
            alignment_history=alignment_history,
            output_attention=hparams.output_attention,
            name="attention")

        # TODO(thangluong): do we need num_layers, num_gpus?
        cell = tf.contrib.rnn.DeviceWrapper(
            cell, model_helper.get_device_str(num_layers - 1, self.num_gpus))

        if hparams.pass_hidden_state:
            decoder_initial_state = cell.zero_state(
                batch_size, dtype).clone(cell_state=encoder_state)
        else:
            decoder_initial_state = cell.zero_state(batch_size, dtype)

        return cell, decoder_initial_state