def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params[ "extra_decode_length"] # 最大decode长度 symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) # 返回一个能够计算下一个token的decode函数 # Create initial set of IDs that will be passed into symbols_to_logits_fn. # 要传到symbols_to_logits_fn里面的初始id # 预测第一个词用的 initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. # 这个缓存是干嘛的呀呀呀,说是用来存储每层(big:共6层)decoder的attention cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]), "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]), } for layer in range(self.params["num_hidden_layers"]) } # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( # 用beamsearch获得前beam_size个最佳结果 symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params["vocab_size"], beam_size=self.params["beam_size"], alpha=self.params["alpha"], max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] # 返回最好的一个结果 top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias, training): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params["extra_decode_length"] symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length, training) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. # pylint: disable=g-complex-comprehension cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]), "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]) } for layer in range(self.params["num_hidden_layers"]) } # pylint: enable=g-complex-comprehension # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params["vocab_size"], beam_size=self.params["beam_size"], alpha=self.params["alpha"], max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params["extra_decode_length"] # 传递当前的句长,获得可以计算该句子长度限制下的每个预测时间步的logit的函数(给beamsearch用) symbols_to_logits_fn = self._get_symbols_to_logits_fn( max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. 刚开始的id都是0(未预测) initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. 用来存储每个时间步生成的key,value cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params["hidden_size"]]), "v": tf.zeros([batch_size, 0, self.params["hidden_size"]]), } for layer in range(self.params["num_hidden_layers"]) } # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache[ "encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. 用beamsearch来预测 decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params["vocab_size"], beam_size=self.params["beam_size"], alpha=self.params["alpha"], max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}
def predict(self, encoder_outputs, encoder_decoder_attention_bias): """Return predicted sequence.""" batch_size = tf.shape(encoder_outputs)[0] input_length = tf.shape(encoder_outputs)[1] max_decode_length = input_length + self.params.extra_decode_length symbols_to_logits_fn = self._get_symbols_to_logits_fn(max_decode_length) # Create initial set of IDs that will be passed into symbols_to_logits_fn. initial_ids = tf.zeros([batch_size], dtype=tf.int32) # Create cache storing decoder attention values for each layer. cache = { "layer_%d" % layer: { "k": tf.zeros([batch_size, 0, self.params.hidden_size]), "v": tf.zeros([batch_size, 0, self.params.hidden_size]), } for layer in range(self.params.num_hidden_layers)} # Add encoder output and attention bias to the cache. cache["encoder_outputs"] = encoder_outputs cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias # Use beam search to find the top beam_size sequences and scores. decoded_ids, scores = beam_search.sequence_beam_search( symbols_to_logits_fn=symbols_to_logits_fn, initial_ids=initial_ids, initial_cache=cache, vocab_size=self.params.vocab_size, beam_size=self.params.beam_size, alpha=self.params.alpha, max_decode_length=max_decode_length, eos_id=EOS_ID) # Get the top sequence for each batch element top_decoded_ids = decoded_ids[:, 0, 1:] top_scores = scores[:, 0] return {"outputs": top_decoded_ids, "scores": top_scores}