def model_build_fun(self, model, forward_only=False, loss_scale=None): encoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_inputs', ) encoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_lengths', ) decoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_inputs', ) decoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_lengths', ) targets = model.net.AddExternalInput( workspace.GetNameScope() + 'targets', ) target_weights = model.net.AddExternalInput( workspace.GetNameScope() + 'target_weights', ) attention_type = self.model_params['attention'] assert attention_type in ['none', 'regular', 'dot'] ( encoder_outputs, weighted_encoder_outputs, final_encoder_hidden_states, final_encoder_cell_states, encoder_units_per_layer, ) = seq2seq_util.build_embedding_encoder( model=model, encoder_params=self.encoder_params, num_decoder_layers=len(self.model_params['decoder_layer_configs']), inputs=encoder_inputs, input_lengths=encoder_lengths, vocab_size=self.source_vocab_size, embeddings=self.encoder_embeddings, embedding_size=self.model_params['encoder_embedding_size'], use_attention=(attention_type != 'none'), num_gpus=self.num_gpus, ) ( decoder_outputs, decoder_output_size, ) = seq2seq_util.build_embedding_decoder( model, decoder_layer_configs=self.model_params['decoder_layer_configs'], inputs=decoder_inputs, input_lengths=decoder_lengths, encoder_lengths=encoder_lengths, encoder_outputs=encoder_outputs, weighted_encoder_outputs=weighted_encoder_outputs, final_encoder_hidden_states=final_encoder_hidden_states, final_encoder_cell_states=final_encoder_cell_states, encoder_units_per_layer=encoder_units_per_layer, vocab_size=self.target_vocab_size, embeddings=self.decoder_embeddings, embedding_size=self.model_params['decoder_embedding_size'], attention_type=attention_type, forward_only=False, num_gpus=self.num_gpus, ) output_logits = seq2seq_util.output_projection( model=model, decoder_outputs=decoder_outputs, decoder_output_size=decoder_output_size, target_vocab_size=self.target_vocab_size, decoder_softmax_size=self.model_params['decoder_softmax_size'], ) targets, _ = model.net.Reshape( [targets], ['targets', 'targets_old_shape'], shape=[-1], ) target_weights, _ = model.net.Reshape( [target_weights], ['target_weights', 'target_weights_old_shape'], shape=[-1], ) _, loss_per_word = model.net.SoftmaxWithLoss( [output_logits, targets, target_weights], ['OutputProbs_INVALID', 'loss_per_word'], only_loss=True, ) num_words = model.net.SumElements( [target_weights], 'num_words', ) total_loss_scalar = model.net.Mul( [loss_per_word, num_words], 'total_loss_scalar', ) total_loss_scalar_weighted = model.net.Scale( [total_loss_scalar], 'total_loss_scalar_weighted', scale=1.0 / self.batch_size, ) return [total_loss_scalar_weighted]
def model_build_fun(self, model, forward_only=False, loss_scale=None): encoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_inputs', ) encoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_lengths', ) decoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_inputs', ) decoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_lengths', ) targets = model.net.AddExternalInput( workspace.GetNameScope() + 'targets', ) target_weights = model.net.AddExternalInput( workspace.GetNameScope() + 'target_weights', ) attention_type = self.model_params['attention'] assert attention_type in ['none', 'regular'] ( encoder_outputs, weighted_encoder_outputs, final_encoder_hidden_state, final_encoder_cell_state, encoder_output_dim, ) = seq2seq_util.build_embedding_encoder( model=model, encoder_params=self.encoder_params, inputs=encoder_inputs, input_lengths=encoder_lengths, vocab_size=self.source_vocab_size, embeddings=self.encoder_embeddings, embedding_size=self.model_params['encoder_embedding_size'], use_attention=(attention_type != 'none'), num_gpus=self.num_gpus, ) assert len(self.model_params['decoder_layer_configs']) == 1 decoder_num_units = ( self.model_params['decoder_layer_configs'][0]['num_units']) initial_states = seq2seq_util.build_initial_rnn_decoder_states( model=model, encoder_num_units=encoder_output_dim, decoder_num_units=decoder_num_units, final_encoder_hidden_state=final_encoder_hidden_state, final_encoder_cell_state=final_encoder_cell_state, use_attention=(attention_type != 'none'), ) if self.num_gpus == 0: embedded_decoder_inputs = model.net.Gather( [self.decoder_embeddings, decoder_inputs], ['embedded_decoder_inputs'], ) else: with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): embedded_decoder_inputs_cpu = model.net.Gather( [self.decoder_embeddings, decoder_inputs], ['embedded_decoder_inputs_cpu'], ) embedded_decoder_inputs = model.CopyCPUToGPU( embedded_decoder_inputs_cpu, 'embedded_decoder_inputs', ) # seq_len x batch_size x decoder_embedding_size if attention_type == 'none': decoder_outputs, _, _, _ = rnn_cell.LSTM( model=model, input_blob=embedded_decoder_inputs, seq_lengths=decoder_lengths, initial_states=initial_states, dim_in=self.model_params['decoder_embedding_size'], dim_out=decoder_num_units, scope='decoder', outputs_with_grads=[0], ) decoder_output_size = decoder_num_units else: (decoder_outputs, _, _, _, attention_weighted_encoder_contexts, _) = rnn_cell.LSTMWithAttention( model=model, decoder_inputs=embedded_decoder_inputs, decoder_input_lengths=decoder_lengths, initial_decoder_hidden_state=initial_states[0], initial_decoder_cell_state=initial_states[1], initial_attention_weighted_encoder_context=initial_states[2], encoder_output_dim=encoder_output_dim, encoder_outputs=encoder_outputs, decoder_input_dim=self.model_params['decoder_embedding_size'], decoder_state_dim=decoder_num_units, scope='decoder', outputs_with_grads=[0, 4], ) decoder_outputs, _ = model.net.Concat( [decoder_outputs, attention_weighted_encoder_contexts], [ 'states_and_context_combination', '_states_and_context_combination_concat_dims', ], axis=2, ) decoder_output_size = decoder_num_units + encoder_output_dim # we do softmax over the whole sequence # (max_length in the batch * batch_size) x decoder embedding size # -1 because we don't know max_length yet decoder_outputs_flattened, _ = model.net.Reshape( [decoder_outputs], [ 'decoder_outputs_flattened', 'decoder_outputs_and_contexts_combination_old_shape', ], shape=[-1, decoder_output_size], ) output_logits = seq2seq_util.output_projection( model=model, decoder_outputs=decoder_outputs_flattened, decoder_output_size=decoder_output_size, target_vocab_size=self.target_vocab_size, decoder_softmax_size=self.model_params['decoder_softmax_size'], ) targets, _ = model.net.Reshape( [targets], ['targets', 'targets_old_shape'], shape=[-1], ) target_weights, _ = model.net.Reshape( [target_weights], ['target_weights', 'target_weights_old_shape'], shape=[-1], ) output_probs = model.net.Softmax( [output_logits], ['output_probs'], engine=('CUDNN' if self.num_gpus > 0 else None), ) label_cross_entropy = model.net.LabelCrossEntropy( [output_probs, targets], ['label_cross_entropy'], ) weighted_label_cross_entropy = model.net.Mul( [label_cross_entropy, target_weights], 'weighted_label_cross_entropy', ) total_loss_scalar = model.net.SumElements( [weighted_label_cross_entropy], 'total_loss_scalar', ) total_loss_scalar_weighted = model.net.Scale( [total_loss_scalar], 'total_loss_scalar_weighted', scale=1.0 / self.batch_size, ) return [total_loss_scalar_weighted]