def forward(self, src, src_length): # encoding encoder_output, encoder_final_state = self.encoder(src, src_length) # decoder initial states decoder_initial_states = [ encoder_final_state, self.decoder.lstm_attention.cell.get_initial_states( batch_ref=encoder_output, shape=[self.hidden_size]) ] # attention mask to avoid paying attention on padddings src_mask = layers.sequence_mask( src_length, maxlen=layers.shape(src)[1], dtype=encoder_output.dtype) encoder_padding_mask = (src_mask - 1.0) * 1e9 encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1]) # Tile the batch dimension with beam_size encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, self.beam_size) encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, self.beam_size) # dynamic decoding with beam search rs, _ = self.beam_search_decoder( inits=decoder_initial_states, encoder_output=encoder_output, encoder_padding_mask=encoder_padding_mask) return rs
def _build_decoder(self, enc_final_state, mode='train', beam_size=10): output_layer = lambda x: layers.fc( x, size=self.tar_vocab_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr( name="output_w", initializer=fluid.initializer.UniformInitializer( low=-self.init_scale, high=self.init_scale)), bias_attr=False) dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size, self.dropout, self.init_scale) dec_initial_states = [ enc_final_state, dec_cell.get_initial_states(batch_ref=self.enc_output, shape=[self.hidden_size]) ] max_src_seq_len = layers.shape(self.src)[1] src_mask = layers.sequence_mask(self.src_sequence_length, maxlen=max_src_seq_len, dtype='float32') enc_padding_mask = (src_mask - 1.0) if mode == 'train': dec_output, _ = rnn(cell=dec_cell, inputs=self.tar_emb, initial_states=dec_initial_states, sequence_length=None, enc_output=self.enc_output, enc_padding_mask=enc_padding_mask) dec_output = output_layer(dec_output) elif mode == 'beam_search': output_layer = lambda x: layers.fc( x, size=self.tar_vocab_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr(name="output_w"), bias_attr=False) beam_search_decoder = BeamSearchDecoder( dec_cell, self.beam_start_token, self.beam_end_token, beam_size, embedding_fn=self.tar_embeder, output_fn=output_layer) enc_output = beam_search_decoder.tile_beam_merge_with_batch( self.enc_output, beam_size) enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch( enc_padding_mask, beam_size) outputs, _ = dynamic_decode(beam_search_decoder, inits=dec_initial_states, max_step_num=self.beam_max_step_num, enc_output=enc_output, enc_padding_mask=enc_padding_mask) return outputs return dec_output
def forward(self, inputs, *args): gru_backward, encoded_vector, encoded_proj = self.encoder(inputs) decoder_boot = self.fc(gru_backward[:, 0]) if self.beam_size: # Tile the batch dimension with beam_size encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch( encoded_vector, self.beam_size) encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch( encoded_proj, self.beam_size) # dynamic decoding with beam search rs, _ = self.infer_decoder(inits=decoder_boot, encoder_vec=encoded_vector, encoder_proj=encoded_proj) return rs
def __init__(self, src_vocab_size, trg_vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., bos_id=0, eos_id=1, beam_size=4, max_out_len=256): args = dict(locals()) args.pop("self") args.pop("__class__", None) # py3 self.bos_id = args.pop("bos_id") self.eos_id = args.pop("eos_id") self.beam_size = args.pop("beam_size") self.max_out_len = args.pop("max_out_len") super(AttentionInferModel, self).__init__(**args) # dynamic decoder for inference decoder = BeamSearchDecoder( self.decoder.lstm_attention.cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=self.decoder.embedder, output_fn=self.decoder.output_layer) self.beam_search_decoder = DynamicDecode( decoder, max_step_num=max_out_len, is_test=True)
def __init__( self, in_channle=1, encoder_size=200, decoder_size=128, emb_dim=128, num_classes=None, beam_size=0, bos_id=0, eos_id=1, max_out_len=20, ): super(Seq2SeqAttInferModel, self).__init__(in_channle, encoder_size, decoder_size, emb_dim, num_classes) self.beam_size = beam_size # dynamic decoder for inference decoder = BeamSearchDecoder(self.decoder.decoder_attention.cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=self.embedding, output_fn=self.decoder.fc) self.infer_decoder = DynamicDecode(decoder, max_step_num=max_out_len, is_test=True)
def _build_decoder(self, enc_final_state, mode='train', beam_size=10): dec_cell = DecoderCell(self.num_layers, self.hidden_size, self.dropout, self.init_scale) output_layer = lambda x: layers.fc(x, size=self.tar_vocab_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr( name="output_w", initializer=uniform_initializer( self.init_scale)), bias_attr=False) if mode == 'train': dec_output, dec_final_state = rnn(cell=dec_cell, inputs=self.tar_emb, initial_states=enc_final_state) dec_output = output_layer(dec_output) return dec_output elif mode == 'beam_search': beam_search_decoder = BeamSearchDecoder( dec_cell, self.beam_start_token, self.beam_end_token, beam_size, embedding_fn=self.tar_embeder, output_fn=output_layer) outputs, _ = dynamic_decode(beam_search_decoder, inits=enc_final_state, max_step_num=self.beam_max_step_num) return outputs
def model_init(self, vocab_size, embed_dim, hidden_size, bos_id=0, eos_id=1, beam_size=4, max_step_num=20): embedder = Embedding(size=[vocab_size, embed_dim]) output_layer = Linear(hidden_size, vocab_size) cell = BasicLSTMCell(embed_dim, hidden_size) decoder = BeamSearchDecoder(cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=embedder, output_fn=output_layer) self.beam_search_decoder = DynamicDecode(decoder, max_step_num=max_step_num, is_test=True)
def _build_net(self): self.seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1]) src_mask = fluid.layers.sequence_mask(self.seq_len_used, maxlen=self.max_seq_len, dtype='float32') enc_padding_mask = (src_mask - 1.0) # Define decoder and initialize it. dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size, self.dropout) dec_init_hidden = fluid.layers.fc( input=self.feature, size=self.hidden_size, num_flatten_dims=1, param_attr=fluid.ParamAttr( name="dec_init_hidden_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="dec_init_hidden_b", initializer=fluid.initializer.Constant(0.))) dec_initial_states = [[[ dec_init_hidden, dec_cell.get_initial_states(batch_ref=self.feature, shape=[self.hidden_size]) ]] * self.num_layers, dec_cell.get_initial_states( batch_ref=self.feature, shape=[self.hidden_size])] tar_vocab_size = len(self._label_list) tar_embeder = lambda x: fluid.embedding( input=x, size=[tar_vocab_size, self.hidden_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name='target_embedding', initializer=fluid.initializer. UniformInitializer(low=-0.1, high=0.1))) start_token_id = self._label_list.index(self.start_token) end_token_id = self._label_list.index(self.end_token) if not self.is_predict_phase: self.dec_input = fluid.layers.data(name="dec_input", shape=[self.max_seq_len], dtype='int64') tar_emb = tar_embeder(self.dec_input) dec_output, _ = rnn(cell=dec_cell, inputs=tar_emb, initial_states=dec_initial_states, sequence_length=None, enc_output=self.token_feature, enc_padding_mask=enc_padding_mask) self.logits = fluid.layers.fc( dec_output, size=tar_vocab_size, num_flatten_dims=len(dec_output.shape) - 1, param_attr=fluid.ParamAttr( name="output_w", initializer=fluid.initializer.UniformInitializer( low=-0.1, high=0.1))) self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax( self.logits, axis=2), shape=[-1, 1]) logits = self.logits logits = fluid.layers.softmax(logits) return [logits] else: output_layer = lambda x: fluid.layers.fc( x, size=tar_vocab_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr(name="output_w")) beam_search_decoder = BeamSearchDecoder(dec_cell, start_token_id, end_token_id, self.beam_size, embedding_fn=tar_embeder, output_fn=output_layer) enc_output = beam_search_decoder.tile_beam_merge_with_batch( self.token_feature, self.beam_size) enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch( enc_padding_mask, self.beam_size) self.ret_infers, _ = dynamic_decode( beam_search_decoder, inits=dec_initial_states, max_step_num=self.beam_max_step_num, enc_output=enc_output, enc_padding_mask=enc_padding_mask) return self.ret_infers
def _build_decoder(self, z_mean=None, z_log_var=None, enc_output=None, mode='train', beam_size=10): dec_input = layers.dropout(self.tar_emb, dropout_prob=self.dec_dropout_in, dropout_implementation="upscale_in_train") # `output_layer` will be used within BeamSearchDecoder output_layer = lambda x: layers.fc(x, size=self.tar_vocab_size, num_flatten_dims=len(x.shape) - 1, name="output_w") # `sample_output_layer` samples an id from the logits distribution instead of argmax(logits) # it will be used within BeamSearchDecoder sample_output_layer = lambda x: layers.unsqueeze( fluid.one_hot(layers.unsqueeze( layers.sampling_id(layers.softmax( layers.squeeze(output_layer(x), [1])), dtype='int'), [1]), depth=self.tar_vocab_size), [1]) if mode == 'train': latent_z = self._sampling(z_mean, z_log_var) else: latent_z = layers.gaussian_random_batch_size_like( self.tar, shape=[-1, self.latent_size]) dec_first_hidden_cell = layers.fc(latent_z, 2 * self.hidden_size * self.num_layers, name='fc_hc') dec_first_hidden, dec_first_cell = layers.split( dec_first_hidden_cell, 2) if self.num_layers > 1: dec_first_hidden = layers.split(dec_first_hidden, self.num_layers) dec_first_cell = layers.split(dec_first_cell, self.num_layers) else: dec_first_hidden = [dec_first_hidden] dec_first_cell = [dec_first_cell] dec_initial_states = [[h, c] for h, c in zip(dec_first_hidden, dec_first_cell) ] dec_cell = DecoderCell(self.num_layers, self.hidden_size, latent_z, self.param_attr_initializer, self.param_attr_scale, self.dec_dropout_out) if mode == 'train': dec_output, _ = rnn(cell=dec_cell, inputs=dec_input, initial_states=dec_initial_states, sequence_length=self.tar_sequence_length) dec_output = output_layer(dec_output) return dec_output elif mode == 'greedy': start_token = 1 end_token = 2 max_length = 100 beam_search_decoder = BeamSearchDecoder( dec_cell, start_token, end_token, beam_size=1, embedding_fn=self.tar_embeder, output_fn=output_layer) outputs, _ = dynamic_decode(beam_search_decoder, inits=dec_initial_states, max_step_num=max_length) return outputs elif mode == 'sampling': start_token = 1 end_token = 2 max_length = 100 beam_search_decoder = BeamSearchDecoder( dec_cell, start_token, end_token, beam_size=1, embedding_fn=self.tar_embeder, output_fn=sample_output_layer) outputs, _ = dynamic_decode(beam_search_decoder, inits=dec_initial_states, max_step_num=max_length) return outputs else: print("mode not supprt", mode)