Esempio n. 1
0
    def forward(self, src, src_length):
        # encoding
        encoder_output, encoder_final_state = self.encoder(src, src_length)

        # decoder initial states
        decoder_initial_states = [
            encoder_final_state,
            self.decoder.lstm_attention.cell.get_initial_states(
                batch_ref=encoder_output, shape=[self.hidden_size])
        ]
        # attention mask to avoid paying attention on padddings
        src_mask = layers.sequence_mask(
            src_length,
            maxlen=layers.shape(src)[1],
            dtype=encoder_output.dtype)
        encoder_padding_mask = (src_mask - 1.0) * 1e9
        encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])

        # Tile the batch dimension with beam_size
        encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_output, self.beam_size)
        encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_padding_mask, self.beam_size)

        # dynamic decoding with beam search
        rs, _ = self.beam_search_decoder(
            inits=decoder_initial_states,
            encoder_output=encoder_output,
            encoder_padding_mask=encoder_padding_mask)
        return rs
Esempio n. 2
0
    def _build_decoder(self, enc_final_state, mode='train', beam_size=10):
        output_layer = lambda x: layers.fc(
            x,
            size=self.tar_vocab_size,
            num_flatten_dims=len(x.shape) - 1,
            param_attr=fluid.ParamAttr(
                name="output_w",
                initializer=fluid.initializer.UniformInitializer(
                    low=-self.init_scale, high=self.init_scale)),
            bias_attr=False)

        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout, self.init_scale)
        dec_initial_states = [
            enc_final_state,
            dec_cell.get_initial_states(batch_ref=self.enc_output,
                                        shape=[self.hidden_size])
        ]
        max_src_seq_len = layers.shape(self.src)[1]
        src_mask = layers.sequence_mask(self.src_sequence_length,
                                        maxlen=max_src_seq_len,
                                        dtype='float32')
        enc_padding_mask = (src_mask - 1.0)
        if mode == 'train':
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=self.tar_emb,
                                initial_states=dec_initial_states,
                                sequence_length=None,
                                enc_output=self.enc_output,
                                enc_padding_mask=enc_padding_mask)

            dec_output = output_layer(dec_output)

        elif mode == 'beam_search':
            output_layer = lambda x: layers.fc(
                x,
                size=self.tar_vocab_size,
                num_flatten_dims=len(x.shape) - 1,
                param_attr=fluid.ParamAttr(name="output_w"),
                bias_attr=False)
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                self.beam_start_token,
                self.beam_end_token,
                beam_size,
                embedding_fn=self.tar_embeder,
                output_fn=output_layer)
            enc_output = beam_search_decoder.tile_beam_merge_with_batch(
                self.enc_output, beam_size)
            enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch(
                enc_padding_mask, beam_size)
            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=dec_initial_states,
                                        max_step_num=self.beam_max_step_num,
                                        enc_output=enc_output,
                                        enc_padding_mask=enc_padding_mask)
            return outputs

        return dec_output
Esempio n. 3
0
    def forward(self, inputs, *args):
        gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
        decoder_boot = self.fc(gru_backward[:, 0])

        if self.beam_size:
            # Tile the batch dimension with beam_size
            encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch(
                encoded_vector, self.beam_size)
            encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch(
                encoded_proj, self.beam_size)
        # dynamic decoding with beam search
        rs, _ = self.infer_decoder(inits=decoder_boot,
                                   encoder_vec=encoded_vector,
                                   encoder_proj=encoded_proj)
        return rs
Esempio n. 4
0
    def _build_net(self):
        self.seq_len = fluid.layers.data(name="seq_len",
                                         shape=[1],
                                         dtype='int64',
                                         lod_level=0)
        self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
        src_mask = fluid.layers.sequence_mask(self.seq_len_used,
                                              maxlen=self.max_seq_len,
                                              dtype='float32')
        enc_padding_mask = (src_mask - 1.0)

        # Define decoder and initialize it.
        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout)
        dec_init_hidden = fluid.layers.fc(
            input=self.feature,
            size=self.hidden_size,
            num_flatten_dims=1,
            param_attr=fluid.ParamAttr(
                name="dec_init_hidden_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="dec_init_hidden_b",
                initializer=fluid.initializer.Constant(0.)))
        dec_initial_states = [[[
            dec_init_hidden,
            dec_cell.get_initial_states(batch_ref=self.feature,
                                        shape=[self.hidden_size])
        ]] * self.num_layers,
                              dec_cell.get_initial_states(
                                  batch_ref=self.feature,
                                  shape=[self.hidden_size])]
        tar_vocab_size = len(self._label_list)
        tar_embeder = lambda x: fluid.embedding(
            input=x,
            size=[tar_vocab_size, self.hidden_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(name='target_embedding',
                                       initializer=fluid.initializer.
                                       UniformInitializer(low=-0.1, high=0.1)))
        start_token_id = self._label_list.index(self.start_token)
        end_token_id = self._label_list.index(self.end_token)
        if not self.is_predict_phase:
            self.dec_input = fluid.layers.data(name="dec_input",
                                               shape=[self.max_seq_len],
                                               dtype='int64')
            tar_emb = tar_embeder(self.dec_input)
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=tar_emb,
                                initial_states=dec_initial_states,
                                sequence_length=None,
                                enc_output=self.token_feature,
                                enc_padding_mask=enc_padding_mask)
            self.logits = fluid.layers.fc(
                dec_output,
                size=tar_vocab_size,
                num_flatten_dims=len(dec_output.shape) - 1,
                param_attr=fluid.ParamAttr(
                    name="output_w",
                    initializer=fluid.initializer.UniformInitializer(
                        low=-0.1, high=0.1)))
            self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(
                self.logits, axis=2),
                                                   shape=[-1, 1])
            logits = self.logits
            logits = fluid.layers.softmax(logits)
            return [logits]
        else:
            output_layer = lambda x: fluid.layers.fc(
                x,
                size=tar_vocab_size,
                num_flatten_dims=len(x.shape) - 1,
                param_attr=fluid.ParamAttr(name="output_w"))
            beam_search_decoder = BeamSearchDecoder(dec_cell,
                                                    start_token_id,
                                                    end_token_id,
                                                    self.beam_size,
                                                    embedding_fn=tar_embeder,
                                                    output_fn=output_layer)
            enc_output = beam_search_decoder.tile_beam_merge_with_batch(
                self.token_feature, self.beam_size)
            enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch(
                enc_padding_mask, self.beam_size)
            self.ret_infers, _ = dynamic_decode(
                beam_search_decoder,
                inits=dec_initial_states,
                max_step_num=self.beam_max_step_num,
                enc_output=enc_output,
                enc_padding_mask=enc_padding_mask)
            return self.ret_infers