def beam_decode(model, batch, vocab, params): def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state, enc_extended_inp, batch_oov_len, enc_pad_mask, use_coverage, prev_coverage): """ Method to decode the output step by step (used for beamSearch decoding) Args: sess : tf.Session object batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)] (for the beam search decoding, batch_size = beam_size) enc_outputs : hiddens outputs computed by the encoder LSTM dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects, shape = [beam_size, 2, hidden_size] dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size] cov_vec : beam_size-many list of previous coverage vector Returns: A dictionary of the results of all the ops computations (see below for more details) """ final_dists, dec_hidden, attentions, p_gens = model(enc_outputs, # shape=(3, 115, 256) dec_state, # shape=(3, 256) enc_inp, # shape=(3, 115) enc_extended_inp, # shape=(3, 115) dec_input, # shape=(3, 1) batch_oov_len, # shape=() enc_pad_mask, # shape=(3, 115) use_coverage, prev_coverage) # shape=(3, 115, 1) top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists), k=params["beam_size"] * 2) top_k_log_probs = tf.math.log(top_k_probs) results = {"dec_state": dec_hidden, "attention_vec": attentions, # [batch_sz, max_len_x, 1] "top_k_ids": top_k_ids, "top_k_log_probs": top_k_log_probs, "p_gen": p_gens, } return results # end of the nested class # We run the encoder once and then we use the results to decode each time step token # state shape=(3, 256), enc_outputs shape=(3, 115, 256) enc_outputs, state = model.call_encoder(batch[0]["enc_input"]) # print('enc_outputs is ', enc_outputs) # Initial Hypothesises (beam_size many list) # print('xxxxxxxx is ', batch[0]["enc_input"].shape[1]) # hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')], # [2] # # we initalize all the beam_size hypothesises with the token start # log_probs=[0.0], # Initial log prob = 0 # # state=state[0], # state=state[0], # shape=(256,) # # initial dec_state (we will use only the first dec_state because they're initially the same) # attn_dists=[], # p_gens=[], # we init the coverage vector to zero # coverage=np.zeros([batch[0]["enc_input"].shape[1], 1], dtype=np.float32)) # for _ in range(params['batch_size'])] # batch_size == beam_size # hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')], # [2] # # we initalize all the beam_size hypothesises with the token start # log_probs=[0.0], # Initial log prob = 0 # # state=state[0], # state=state[0], # shape=(256,) # # initial dec_state (we will use only the first dec_state because they're initially the same) # attn_dists=[], # p_gens=[], # we init the coverage vector to zero # coverage=np.zeros([batch[0]["enc_input"].shape[1], 1], dtype=np.float32)) # for _ in range(params['batch_size'])] # batch_size == beam_size hyps = [Hypothesis(tokens=[vocab.word_to_id('[START]')], log_probs=[0.0], state=state[0], p_gens=[], attn_dists=[]) for _ in range(params['batch_size'])] # print('hyps', hyps) results = [] # list to hold the top beam_size hypothesises steps = 0 # initial step while steps < params['max_dec_steps'] and len(results) < params['beam_size']: # print('step is ', steps) latest_tokens = [h.latest_token for h in hyps] # latest token for each hypothesis , shape : [beam_size] # print('latest_tokens is ', latest_tokens) # we replace all the oov is by the unknown token # print(latest_tokens) latest_tokens = [t if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]') for t in latest_tokens] # latest_tokens = [t if t in vocab.id2word else vocab.word2id('[UNK]') for t in latest_tokens] # print('latest_tokens is ', latest_tokens) # we collect the last states for each hypothesis # print(latest_tokens) states = [h.state for h in hyps] # print('states i s', states) # prev_coverage = [h.coverage for h in hyps] # list of coverage vectors (or None) # print('prev_coverage_1 is ', prev_coverage) # prev_coverage = tf.convert_to_tensor(prev_coverage) # print('prev_coverage_2 is ', prev_coverage) # we decode the top likely 2 x beam_size tokens tokens at time step t for each hypothesis # model, batch, vocab, params dec_input = tf.expand_dims(latest_tokens, axis=1) # shape=(3, 1) # print('dec_input is ', dec_input) # print('step is ', steps) # print('dec_input is ', dec_input) # print('states is ', states) dec_states = tf.stack(states, axis=0) # print('dec_states is ', dec_states) # print('batch[0][enc_input] is ', batch[0]['enc_input']) # print('enc_outputs is ', enc_outputs) # print('dec_input is ', dec_input) # print('dec_states is ', dec_states) # print('batch[0][extended_enc_input is ', batch[0]['extended_enc_input']) # problem maybe # print('batch[0][max_oov_len] is ', batch[0]['max_oov_len']) # print('batch[0][sample_encoder_pad_mask is ', batch[0]['sample_encoder_pad_mask']) # print('prev_coverage is ', prev_coverage) returns = decode_onestep(batch[0]['enc_input'], # shape=(3, 115) enc_outputs, # shape=(3, 115, 256) dec_input, # shape=(3, 1) dec_states, # shape=(3, 256) batch[0]['extended_enc_input'], # shape=(3, 115) batch[0]['max_oov_len'], # shape=() batch[0]['sample_encoder_pad_mask'], # shape=(3, 115) params['is_coverage'], # true prev_coverage=None) # shape=(3, 115, 1) # print('returns["p_gen"] is ', returns["p_gen"]) # print(np.squeeze(returns["p_gen"])) # np.squeeze(returns["p_gen"]) # print('returns is ', returns["p_gen"]) topk_ids, topk_log_probs, new_states, attn_dists, p_gens = returns['top_k_ids'],\ returns['top_k_log_probs'],\ returns['dec_state'],\ returns['attention_vec'],\ returns["p_gen"],\ # print('topk_ids is ', topk_ids) # print('topk_log_probs is ', topk_log_probs) all_hyps = [] num_orig_hyps = 1 if steps == 0 else len(hyps) num = 1 # print('num_orig_hyps is ', num_orig_hyps) for i in range(num_orig_hyps): # h, new_state, attn_dist, p_gen, coverage = hyps[i], new_states[i], attn_dists[i], p_gens[i], prev_coverages[i] h, new_state, attn_dist, p_gen = hyps[i], new_states[i], attn_dists[i], p_gens[i] # print('h is ', h) # print('new_state is ', new_state) shape=(256,) # print('attn_dist ids ', attn_dist) shape=(115,) # print('p_gen is ', p_gen) 0.4332452 # print('coverage is ', coverage)shape=(115, 1), num += 1 # print('num is ', num) for j in range(params['beam_size'] * 2): # we extend each hypothesis with each of the top k tokens # (this gives 2 x beam_size new hypothesises for each of the beam_size old hypothesises) # print('topk_ids is ', topk_ids) shape=(3, 6) # print('token is ', topk_log_probs) # print('topk_log_probs is ', topk_log_probs)shape=(3, 6) # print(topk_ids[i, j].numpy()) # print('steps is ', steps) # print(topk_log_probs[i, j].numpy()) # print('h is ', h.avg_log_prob) # print(coverage) new_hyp = h.extend(token=topk_ids[i, j].numpy(), log_prob=topk_log_probs[i, j], state=new_state, attn_dist=attn_dist, p_gen=p_gen, ) all_hyps.append(new_hyp) # in the following lines, we sort all the hypothesises, and select only the beam_size most likely hypothesises hyps = [] sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True) for h in sorted_hyps: if h.latest_token == vocab.word_to_id('[STOP]'): if steps >= params['min_dec_steps']: results.append(h) else: # print(h.latest_token) hyps.append(h) if len(hyps) == params['beam_size'] or len(results) == params['beam_size']: break # print('hyps is ', hyps.) # print('steps is ', steps) steps += 1 if len(results) == 0: results = hyps # At the end of the loop we return the most likely hypothesis, which holds the most likely ouput sequence, # given the input fed to the model hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True) best_hyp = hyps_sorted[0] # print('best_hyp.tokens is ', best_hyp.tokens) best_hyp.abstract = " ".join(output_to_words(best_hyp.tokens, vocab, batch[0]["article_oovs"][0])[1:-1]) best_hyp.text = batch[0]["article"].numpy()[0].decode() print('best_hyp is ', best_hyp.abstract) return best_hyp
def beam_decode(model, batch, vocab, params): def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state, enc_extended_inp, batch_oov_len, enc_pad_mask, use_coverage, prev_coverage): """ Method to decode the output step by step (used for beamSearch decoding) Args: sess : tf.Session object batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)] (for the beam search decoding, batch_size = beam_size) enc_outputs : hiddens outputs computed by the encoder LSTM dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects, shape = [beam_size, 2, hidden_size] dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size] cov_vec : beam_size-many list of previous coverage vector Returns: A dictionary of the results of all the ops computations (see below for more details) """ # print("enc_outputs:",enc_outputs.shape) # print("dec_state:",dec_state.shape) # print("enc_inp:",enc_inp.shape) # print("enc_extended_inp:",enc_extended_inp.shape) # print("dec_input:",dec_input.shape) dec_tar = tf.ones(shape=(params["beam_size"], 1)) final_dists, dec_hidden = model( enc_outputs, # shape=(32, 200, 128) dec_input, # shape=(3, 1) dec_state, # shape=(3, 128) dec_tar) # shape=(200, ) # enc_extended_inp) # shape=(200, ) # batch_oov_len, # shape=() # enc_pad_mask, # shape=(3, 115) # use_coverage, # prev_coverage) # shape=(3, 115, 1) top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists), k=params["beam_size"] * 2) top_k_log_probs = tf.math.log(top_k_probs) results = { "dec_state": dec_hidden, # "attention_vec": attentions, # [batch_sz, max_len_x, 1] "top_k_ids": top_k_ids, "top_k_log_probs": top_k_log_probs, # "p_gen": p_gens, } return results # print(batch) params["batch_size"] = params["beam_size"] dataset = batch res = [] for k in range(params["batch_size"]): # enc_input = tf.expand_dims(dataset["enc_input"][k], axis=1) enc_input = dataset["enc_input"][k] enc_ = tf.squeeze(tf.stack([[enc_input] * params["beam_size"]], axis=0)) print(enc_) enc_outputs, state = model.call_encoder(enc_) # 全部编码 hyps = [ Hypothesis(tokens=[vocab.word_to_id('[START]')], log_probs=[0.0], state=state[0], p_gens=[], attn_dists=[]) for _ in range(params['beam_size']) ] # print('hyps', hyps) results = [] # list to hold the top beam_size hypothesises steps = 0 # initial step while steps < params['max_dec_steps'] and len( results) < params['beam_size']: # 一次beam_search # print('step is ', steps) latest_tokens = [ h.latest_token for h in hyps ] # latest token for each hypothesis , shape : [beam_size] latest_tokens = [ t if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]') for t in latest_tokens ] # [batch] states = [h.state for h in hyps] # [batch] dec_input = tf.expand_dims(latest_tokens, axis=1) # shape=(beam, 1) dec_states = tf.stack(states, axis=0) # shape=[beam,128] returns = decode_onestep( dataset['enc_input'][k], # shape=(3, 115) enc_outputs, # shape=(3, 115, 256) dec_input, # shape=(3, 1) dec_states, # shape=(3, 256) dataset['extended_enc_input'][k], # shape=(3, 115) dataset['max_oov_len'], # shape=() dataset['sample_encoder_pad_mask'][k], # shape=(3, 115) True, # true prev_coverage=None) # shape=(3, 115, 1) topk_ids, topk_log_probs, new_states = returns['top_k_ids'], \ returns['top_k_log_probs'], \ returns['dec_state'] # returns['attention_vec'], \ # returns["p_gen"], \ all_hyps = [] num_orig_hyps = 1 if steps == 0 else len(hyps) num = 1 # 获取3x3x2种可能性 for i in range(num_orig_hyps): h, new_state = hyps[i], new_states[i] num += 1 for j in range(params['beam_size'] * 2): new_hyp = h.extend( token=topk_ids[i, j].numpy(), log_prob=topk_log_probs[i, j], state=new_state, attn_dist=None, p_gen=[], ) all_hyps.append(new_hyp) hyps = [] # 取前3种 sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True) for h in sorted_hyps: if h.latest_token == vocab.word_to_id('[STOP]'): if steps >= params['min_dec_steps']: results.append(h) else: hyps.append(h) if len(hyps) == params['beam_size'] or len( results) == params['beam_size']: break steps += 1 if len(results) == 0: results = hyps hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True) best_hyp = hyps_sorted[0] # 取最优 best_hyp.abstract = " ".join( output_to_words(best_hyp.tokens, vocab, dataset["article_oovs"][0])[1:-1]) best_hyp.text = dataset["article"].numpy()[0].decode() print('best_hyp is ', best_hyp.abstract) res.append(best_hyp.abstract) return res
def batch_beam_decode(model, enc_data, vocab, params): #去掉部分参数,无用参数enc_pad_mask,只保留有用参数 enc_inp, enc_outputs, dec_input, dec_state, def decode_onestep(enc_inp, enc_outputs, dec_input, dec_state): """ Method to decode the output step by step (used for beamSearch decoding) Args: sess : tf.Session object batch : current batch, shape = [beam_size, 1, vocab_size( + max_oov_len if pointer_gen)] (for the beam search decoding, batch_size = beam_size) enc_outputs : hiddens outputs computed by the encoder LSTM dec_state : beam_size-many list of decoder previous state, LSTMStateTuple objects, shape = [beam_size, 2, hidden_size] dec_input : decoder_input, the previous decoded batch_size-many words, shape = [beam_size, embed_size] cov_vec : beam_size-many list of previous coverage vector Returns: A dictionary of the results of all the ops computations (see below for more details) """ #此处需要让batch_size=beam_size,可以用GPU加速,矩阵变换30000&9变成90000*3,做并行计算 final_dists, dec_hidden, attentions, p_gens = model( enc_outputs, # shape=(3, 115, 256) dec_state, # shape=(3, 256) enc_inp, # shape=(3, 115) dec_input) # shape=(3, 1) #5000*2变成5000x2*1 #拿到最大的概率值和对应的token_id,再将概率值进行log计算 top_k_probs, top_k_ids = tf.nn.top_k(tf.squeeze(final_dists), k=params["beam_size"] * 2) top_k_log_probs = tf.math.log(top_k_probs) results = { "dec_state": dec_hidden, "top_k_ids": top_k_ids, "top_k_log_probs": top_k_log_probs, } return results # 判断输入长度 batch_data = enc_data["enc_input"] #shape=(batch_size,实际输入序列长度) batch_size = enc_data["enc_input"].shape[0] # 开辟结果存储list predicts = [''] * batch_size #也是一批一起运算,大小为batch_size #print("batch_data,batch_size,predicts",batch_data,batch_size,predicts) # inputs = batch_data # shape=(batch_size,实际序列长度) inputs = tf.convert_to_tensor(batch_data) # # We run the encoder once and then we use the results to decode each time step token enc_outputs, state = model.call_encoder(inputs) hyps = [ Hypothesis(tokens=[vocab.word_to_id('[START]')], log_probs=[0.0], state=state[0]) for _ in range(params['batch_size']) ] # print('hyps', hyps) results = [] # list to hold the top beam_size hypothesises steps = 0 # initial step while steps < params['max_dec_steps']: # print('step is ', steps) latest_tokens = [ h.latest_token for h in hyps ] # latest token for each hypothesis , shape : [beam_size] latest_tokens = [ t if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]') for t in latest_tokens ] tokens = [h.tokens for h in hyps ] # tokens for each hypothesis , shape : [beam_size] tokens = [ t if t in range(params['vocab_size']) else vocab.word_to_id('[UNK]') for t in tokens ] states = [h.state for h in hyps] # we decode the top likely 2 x beam_size tokens tokens at time step t for each hypothesis dec_input = tf.expand_dims(latest_tokens, axis=1) # shape=(3, 1) enc_input = tf.expand_dims(tokens, axis=1) # shape=(3, 1) dec_states = tf.stack(states, axis=0) print('decode_onestep', enc_input.get_shape(), enc_outputs.get_shape(), dec_input.get_shape(), dec_states.get_shape()) returns = decode_onestep( enc_input, # shape=(3, 115) enc_outputs, # shape=(3, 115, 256) dec_input, # shape=(3, 1) dec_states) # shape=(3, 256) #可以修改:topk_ids, prediction, new_states topk_ids, topk_log_probs, new_states = returns['top_k_ids'],\ returns['top_k_log_probs'],\ returns['dec_state'],\ # print('topk_ids is ', topk_ids) # print('topk_log_probs is ', topk_log_probs) all_hyps = [] num_orig_hyps = 1 if steps == 0 else len(hyps) num = 1 # print('num_orig_hyps is ', num_orig_hyps) for i in range(num_orig_hyps): h, new_state = hyps[i], new_states[i] num += 1 # print('num is ', num) for j in range(params['beam_size'] * 2): # we extend each hypothesis with each of the top k tokens # (this gives 2 x beam_size new hypothesises for each of the beam_size old hypothesises) new_hyp = h.extend( token=topk_ids[i, j].numpy(), log_prob=topk_log_probs[i, j], state=new_state, ) all_hyps.append(new_hyp) # in the following lines, we sort all the hypothesises, and select only the beam_size most likely hypothesises hyps = [] sorted_hyps = sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True) for h in sorted_hyps: if h.latest_token == vocab.word_to_id('[STOP]'): if steps >= params['min_dec_steps']: results.append(h) else: # print(h.latest_token) hyps.append(h) if len(hyps) == params['beam_size'] or len( results) == params['beam_size']: break # print('hyps is ', hyps.) # print('steps is ', steps) steps += 1 if len(results) == 0: results = hyps # At the end of the loop we return the most likely hypothesis, which holds the most likely ouput sequence, # given the input fed to the model hyps_sorted = sorted(results, key=lambda h: h.avg_log_prob, reverse=True) best_hyp = hyps_sorted[0] # print('best_hyp.tokens is ', best_hyp.tokens) best_hyp.abstract = " ".join( output_to_words(best_hyp.tokens, vocab, batch[0]["article_oovs"][0])[1:-1]) best_hyp.text = batch[0]["article"].numpy()[0].decode() print('best_hyp is ', best_hyp.abstract) return best_hyp