def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(input=encoder_outputs)[0] input_length = tf.shape(input=encoder_outputs)[1] max_decode_length = input_length + self.params.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params.hidden_size], dtype=tf.bfloat16), "v": tf.zeros([batch_size, 0, self.params.hidden_size], dtype=tf.bfloat16) } for layer in range(self.params.num_hidden_layers) } # Add encoder output and attention bias to the cache. # self.decoder_stack.enc_out_cache["encoder_outputs"] = encoder_outputs # self.decoder_stack.enc_out_cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Initialize encoder-decoder projection cache self.decoder_stack.cache_encdec(encoder_outputs, encoder_decoder_attention_bias) # Use beam search to find the top beam_size sequences and scores. mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_SEQ_BEAM_SEARCH, value={ "vocab_size": self.params.vocab_size, "beam_size": self.params.beam_size, "alpha": self.params.alpha, "extra_decode_length": self.params.extra_decode_length }) decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.vocab_size, beam_size=self.params.beam_size, alpha=self.params.alpha, max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = encoder_outputs.shape[0] input_length = encoder_outputs.shape[1] max_decode_length = input_length + self.param.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) initial_ids = nd.zeros(shape=batch_size, ctx=mx.cpu()) # Create cache storing decoder attention values for each layer. '''cache = { "layer_%d" % layer: { "k": nd.zeros(shape=(batch_size, 0, self.param.hidden_size), ctx=ctx), "v": nd.zeros(shape=(batch_size, 0, self.param.hidden_size), ctx=ctx), } for layer in range(self.param.num_hidden_layers)}''' cache = {} for layer in range(self.param.num_hidden_layers): cache["layer_%d" % layer] = { "k": nd.zeros(shape=(batch_size, 1, self.param.hidden_size), ctx=mx.cpu()), "v": nd.zeros(shape=(batch_size, 1, self.param.hidden_size), ctx=mx.cpu()), "init": 1 } cache["encoder_outputs"] = encoder_outputs.as_in_context(mx.cpu()) cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias.as_in_context( mx.cpu()) decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.param.vocab_size, beam_size=self.param.beam_size, alpha=self.param.alpha, max_decode_length=max_decode_length, eos_id=EOS_ID) top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params["extra_decode_length"] symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. # <BOS>: 0 initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { # tf.Tensor([], shape=(batch_size, 0, hidden_size), dtype=float32) "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]), "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]), } for layer in range(self.params["num_hidden_layers"]) } # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. # decoded_ids's shape: [batch_size, beam_size, max_decode_length] # scores's shape: [batch_size, beam_size] decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params["vocab_size"], beam_size=self.params["beam_size"], alpha=self.params["alpha"], max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] # without <BOS> top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """ :param encoder_outputs: [batch_size, input_length, hidden_size] :param encoder_decoder_attention_bias: [batch_size, 1, 1, length] :return: dict """ batch_size = tf.shape(encoder_outputs)[0] max_decode_length = self.params.get('max_decode_length') symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]), "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]), } for layer in range(self.params["num_blocks"]) } # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Top decoded sequences [batch_size, beam_size, max_decode_length] # sequence scores [batch_size, beam_size] decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.get('vocab_size'), beam_size=self.params.get('beam_size'), alpha=self.params.get('alpha'), max_decode_length=max_decode_length, eos_id=self.params.get('eos_id'), ) top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params.hidden_size]), "v": tf.zeros([batch_size, 0, self.params.hidden_size]), } for layer in range(self.params.num_hidden_layers)} # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.vocab_size, beam_size=self.params.beam_size, alpha=self.params.alpha, max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias,eos_id): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params.hidden_size]), "v": tf.zeros([batch_size, 0, self.params.hidden_size]), } for layer in range(self.params.num_hidden_layers)} # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.vocab_size, beam_size=self.params.beam_size, alpha=self.params.alpha, max_decode_length=max_decode_length, eos_id=eos_id) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, :, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" if ModeKeys.is_predict_one(self.mode): batch_size = 1 else: batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params.hidden_size]), "v": tf.zeros([batch_size, 0, self.params.hidden_size]), } for layer in range(self.params.num_hidden_layers) } # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs if not ModeKeys.is_predict_one(self.mode): cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias if self.params.beam_size > 1: print("!!!!!!!!!!! right here, beam_size = %i!!!!!!!!!!!!" % self.params.beam_size) # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.target_vocab_size, beam_size=self.params.beam_size, alpha=self.params.alpha, max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores} else: def inner_loop(i, finished, next_id, decoded_ids, cache): """One step of greedy decoding.""" logits, cache = symbols_to_logits_fn(next_id, i, cache) next_id = tf.argmax(logits, -1, output_type=tf.int32) finished |= tf.equal(next_id, EOS_ID) # next_id = tf.expand_dims(next_id, axis=1) next_id = tf.reshape(next_id, shape=[-1, 1]) decoded_ids = tf.concat([decoded_ids, next_id], axis=1) return i + 1, finished, next_id, decoded_ids, cache def is_not_finished(i, finished, *_): return (i < max_decode_length) & tf.logical_not( tf.reduce_all(finished)) decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int32) finished = tf.fill([batch_size], False) next_id = tf.zeros([batch_size, 1], dtype=tf.int32) _, _, _, decoded_ids, _ = tf.while_loop( is_not_finished, inner_loop, [tf.constant(0), finished, next_id, decoded_ids, cache], shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None]), tf.TensorShape([None, None]), tf.TensorShape([None, None]), nest.map_structure(get_state_shape_invariants, cache), ]) return {"outputs": decoded_ids, "scores": tf.ones([batch_size, 1])}