def _pack_output(self, encoder_output, decoder_output, decoding_result, target_modality, **kwargs): """ Packs model outputs. Args: encoder_output: An instance of `collections.namedtuple` from `Encoder.encode()`. decoder_output: An instance of `collections.namedtuple` whose element types are defined by `Decoder.output_dtype` property. decoding_result: A dict containing hypothesis, log probabilities, beam ids and decoding length if mode==INFER, else, a logits Tensor with shape [timesteps, batch_size, vocab_size]. target_modality: An instance of `Modality`. **kwargs: Returns: A dictionary containing inference status if mode==INFER, else a list with the first element be `loss`. """ if self.mode == ModeKeys.TRAIN or self.mode == ModeKeys.EVAL: loss = self._compute_loss( logits=decoding_result, # [timesteps, batch_size, dim] label_ids=kwargs[Constants.LABEL_IDS_NAME], label_length=kwargs[Constants.LABEL_LENGTH_NAME], target_modality=target_modality) if self.mode == ModeKeys.TRAIN: return loss attentions = dict() def get_attention(name, atts): if isinstance(atts, list): for idx, a in enumerate(atts): # for multi-layer attentions[name + str(idx)] = a else: attentions[name] = atts if hasattr(encoder_output, "encoder_self_attention"): # now it can be only MultiHeadAttention with shape [batch_size, num_heads, length_q, length_k] get_attention("encoder_self_attention", getattr(encoder_output, "encoder_self_attention")) if hasattr(decoder_output, "encoder_decoder_attention"): get_attention("encoder_decoder_attention", getattr(decoder_output, "encoder_decoder_attention")) if hasattr(decoder_output, "decoder_self_attention"): get_attention("decoder_self_attention", getattr(decoder_output, "decoder_self_attention")) if self.mode == ModeKeys.EVAL: return loss, attentions assert self.mode == ModeKeys.INFER predict_out = process_beam_predictions( decoding_result=decoding_result, beam_size=self.params["inference.beam_size"], alpha=self.params["inference.length_penalty"]) predict_out["attentions"] = attentions predict_out["source"] = kwargs[Constants.FEATURE_IDS_NAME] return predict_out
def _pack_output(self, encoder_output, decoder_output, decoding_result, **kwargs): """ Packs model outputs. Args: encoder_output: An instance of `collections.namedtuple` from `Encoder.encode()`. decoder_output: An instance of `collections.namedtuple` whose element types are defined by `Decoder.output_dtype` property. decoding_result: A dict containing hypothesis, log probabilities, beam ids and decoding length if mode==INFER, else, a logits Tensor with shape [timesteps, batch_size, vocab_size]. **kwargs: e.g. input fields. Returns: A dictionary containing inference status if mode==INFER, else a list with the first element be `loss`. """ if self.mode == ModeKeys.TRAIN or self.mode == ModeKeys.EVAL: loss_sum, weight_sum = self._compute_loss( logits=decoding_result, # [timesteps, batch_size, dim] label_ids=kwargs[Constants.LABEL_IDS_NAME], label_length=kwargs[Constants.LABEL_LENGTH_NAME]) if self.mode == ModeKeys.TRAIN: return loss_sum, weight_sum attentions = dict() def get_attention(name, atts): if isinstance(atts, list): for idx, a in enumerate(atts): # for multi-layer attentions[name + str(idx)] = a else: attentions[name] = atts if hasattr(encoder_output, "encoder_self_attention"): # now it can be only MultiHeadAttention with shape [batch_size, num_heads, length_q, length_k] get_attention("encoder_self_attention", getattr(encoder_output, "encoder_self_attention")) if hasattr(decoder_output, "encoder_decoder_attention"): get_attention("encoder_decoder_attention", getattr(decoder_output, "encoder_decoder_attention")) if hasattr(decoder_output, "decoder_self_attention"): get_attention("decoder_self_attention", getattr(decoder_output, "decoder_self_attention")) if self.mode == ModeKeys.EVAL: return (loss_sum, weight_sum), attentions assert self.mode == ModeKeys.INFER predict_out = process_beam_predictions( decoding_result=decoding_result, beam_size=self.params["inference.beam_size"], alpha=self.params["inference.length_penalty"]) predict_out["attentions"] = attentions predict_out["source"] = kwargs[Constants.FEATURE_IDS_NAME] return predict_out
def build(self, base_models, vocab_target, input_fields): """ Builds the ensemble model. Args: base_models: A list of `BaseSeq2Seq` instances. vocab_target: An instance of `Vocab`. input_fields: A dict of placeholders. Returns: A dictionary of inference status. """ encoder_outputs = [] encdec_bridges = [] decoders = [] target_modalities = [] # prepare for decoding of each model for index, model in enumerate(base_models): with tf.variable_scope(Constants.ENSEMBLE_VARNAME_PREFIX + str(index)): with tf.variable_scope(model.name): input_modality, target_modality = model._create_modalities( ) encoder = model._create_encoder() encoder_output = model._encode( encoder=encoder, input_modality=input_modality, input_fields=input_fields) bridge = model._create_bridge(encoder_output) decoder = model._create_decoder() vs_name = tf.get_variable_scope().name decoder.name = os.path.join(vs_name, decoder.name) target_modality.name = os.path.join( vs_name, target_modality.name) encoder_outputs.append(encoder_output) encdec_bridges.append(bridge) decoders.append(decoder) target_modalities.append(target_modality) helper = BeamFeedback( vocab=vocab_target, batch_size=tf.shape(input_fields[Constants.FEATURE_IDS_NAME])[0], maximum_labels_length=self._maximum_labels_length, beam_size=self._beam_size, alpha=self._length_penalty, ensemble_weight=self.get_ensemble_weights(len(base_models))) decoding_result = dynamic_ensemble_decode( decoders=decoders, encoder_outputs=encoder_outputs, bridges=encdec_bridges, target_modalities=target_modalities, helper=helper, beam_size=self._beam_size) predict_out = process_beam_predictions(decoding_result=decoding_result, beam_size=self._beam_size, alpha=self._length_penalty) predict_out["source"] = input_fields[Constants.FEATURE_IDS_NAME] return predict_out
def build(self, input_fields): """ Builds the ensemble model. Args: input_fields: A dict of placeholders. Returns: A dictionary of inference status. """ encoder_outputs = [] # prepare for decoding of each model for index, model in enumerate(self._base_models): encoder_output = model._encode(input_fields=input_fields) encoder_outputs.append(encoder_output) helper = BeamFeedback( vocab=self._vocab_target, batch_size=tf.shape(input_fields[Constants.FEATURE_IDS_NAME])[0], maximum_labels_length=self._maximum_labels_length, beam_size=self._beam_size, alpha=self._length_penalty, ensemble_weight=self.get_ensemble_weights(len(self._base_models))) decoders, bridges, target_to_emb_fns, outputs_to_logits_fns = \ repeat_n_times( len(self._base_models), lambda m: (m._decoder, m._encoder_decoder_bridge, m._target_to_embedding_fn, m._outputs_to_logits_fn), self._base_models) decoding_result = dynamic_ensemble_decode( decoders=decoders, encoder_outputs=encoder_outputs, bridges=bridges, helper=helper, target_to_embedding_fns=target_to_emb_fns, outputs_to_logits_fns=outputs_to_logits_fns, beam_size=self._beam_size) predict_out = process_beam_predictions( decoding_result=decoding_result, beam_size=self._beam_size, alpha=self._length_penalty) predict_out["source"] = input_fields[Constants.FEATURE_IDS_NAME] return predict_out
def build(self, input_fields): """ Builds the ensemble model. Args: input_fields: A dict of placeholders. Returns: A dictionary of inference status. """ encoder_outputs = [] # prepare for decoding of each model for index, model in enumerate(self._base_models): encoder_output = model._encode(input_fields=input_fields) encoder_outputs.append(encoder_output) helper = BeamFeedback( vocab=self._vocab_target, batch_size=tf.shape(input_fields[Constants.FEATURE_IDS_NAME])[0], maximum_labels_length=self._maximum_labels_length, beam_size=self._beam_size, alpha=self._length_penalty, ensemble_weight=self.get_ensemble_weights(len(self._base_models))) decoders, bridges, target_to_emb_fns, outputs_to_logits_fns = \ repeat_n_times( len(self._base_models), lambda m: (m._decoder, m._encoder_decoder_bridge, m._target_to_embedding_fn, m._outputs_to_logits_fn), self._base_models) decoding_result = dynamic_ensemble_decode( decoders=decoders, encoder_outputs=encoder_outputs, bridges=bridges, helper=helper, target_to_embedding_fns=target_to_emb_fns, outputs_to_logits_fns=outputs_to_logits_fns, beam_size=self._beam_size) predict_out = process_beam_predictions(decoding_result=decoding_result, beam_size=self._beam_size, alpha=self._length_penalty) predict_out["source"] = input_fields[Constants.FEATURE_IDS_NAME] return predict_out