def eval_one(self, input_tokens): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' """ xs: tuple of x: int32 tensor. (N, T1) x_seqlens: int32 tensor. (N,) sents1: str tensor. (N,) ys: tuple of decoder_input: int32 tensor. (N, T2) y: int32 tensor. (N, T2) y_seqlen: int32 tensor. (N, ) sents2: str tensor. (N,) """ dict = self.token2idx x = [[dict.get(t, dict["<UNK>"]) for t in input_tokens]] x_seqlens = len(x[0]) sent1 = convert_idx_to_token_tensor(x[0], self.idx2token) xs = (x, x_seqlens, sent1) ys = ( tf.zeros([self.hp.batch_size, self.hp.maxlen2], dtype=tf.int32), tf.zeros([self.hp.batch_size, self.hp.maxlen2], dtype=tf.int32), tf.zeros([self.hp.batch_size], dtype=tf.int32), tf.zeros([self.hp.batch_size], dtype=tf.string), ) decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<GO>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1, src_masks = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): #解码过程 logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.token2idx["<PAD>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) pred = convert_idx_to_token_tensor(y_hat[0], self.idx2token) #model sentence return pred
def eval(self, xs, ys): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1, src_masks = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def predict(self, xs, ys): decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.char_index["<s>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1, src_masks = self.encoder(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.sequence_length)): logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.char_index["<pad>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.index_char) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def eval(self, xs, ys): decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx['<S>'] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sent1 = self.encode(xs, False) logging.info('Infernece graph is building, Please be patient.') for _ in tqdm(range(self.hp.maxlen)): logits, y_hat, y, sents2 = self.decode(ys, memory, False) if tf.reduce_sum(y_hat, 1) == self.token2idx['<PAD>']: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents2[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text('sent1', sent1) tf.summary.text('pred', pred) tf.summary.text('sent2', sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def eval(self, xs, ys, mode): mu, sigma = self.encoder_vae(xs, training=False, mode=mode) #这里主要是为了获取 embeding 其他的都没用 if mode == "TPAGE" or mode == "PPAGE": # 表示 训练VAE # 这里提醒自己一下 将embeding 全部设为True z = mu + sigma * tf.random_normal( tf.shape(mu), 0, 1, dtype=tf.float32) else: raise ("许海明在这里提醒你:出现非法mode") # z = tf.random_normal([get_shape_list(xs[0], expected_rank=2)[0], self.hp.z_dim]) #自动生成采样因子 input_ids_vae_decoder_enc, input_ids_vae_decoder_dec, output_ids_vae_decoder_dec, sents1, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<S>"] ys = (input_ids_vae_decoder_enc, decoder_inputs, output_ids_vae_decoder_dec, sents1, sents2) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen_vae_Decoder_de)): logits, y_hat, y, sents2 = self.decoder_vae(ys, z, training=False, mode=mode) if tf.reduce_sum(y_hat, 1) == self.token2idx["[PAD]"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (input_ids_vae_decoder_enc, _decoder_inputs, output_ids_vae_decoder_dec, sents1, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def eval_smaple(self, xs, ys, top_k=100, top_p=1, temperature=0.5): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<GO>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1, src_masks = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): #解码过程 logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) logits = logits[:, -1, :] / tf.to_float(temperature) logits = self.top_k_logits(logits, k=top_k) print("logits is :", logits) logits = self.top_p_logits(logits, p=top_p) y_hat = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32) if tf.reduce_sum(y_hat, 1) == self.token2idx["<PAD>"]: break #decoder_input 之前解码的token ids# #[batch_size,t] y_hat是当前解码的token id _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) #这里的n是采样到summary中的 # tf.random_uniform((6, 6), minval=low,maxval=high,dtype=tf.float32)))返回6*6的矩阵, # 产生于low和high之间,产生的值是均匀分布的。 sent1 = sents1[n] #source sentence pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) #model sentence sent2 = sents2[n] #targtet sentence tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return pred, summaries
def eval_gen(self, xs, ys): """ Predicts pretreatment :param xs: x: (N, T1); seqlens; sents1 :param ys: :return: y_hat: (N, T2) """ x, seqlens, sents1 = xs decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"] ys = (decoder_inputs, y, y_seqlen, sents2) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(3)): memory, sents1, src_masks = self.encode(xs, False) # memory_ = tf.to_int32(tf.argmax(memory, axis=-1)) # memory_ = random_id(memory) logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"]: break # concat input _x = tf.concat((x, random_id(logits)), 1) xs = (_x, seqlens, sents1) _decoder_inputs = tf.concat((decoder_inputs, random_id(logits)), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries, logits
def eval(self, inputs, targets): inferences = self.infer(inputs) activated_infr = self.activate(inferences) n = tf.random_uniform((), 0, tf.shape(inferences)[0] - 1, tf.int32) tfstrings = [ tf.convert_to_tensor("pred:", tf.string) + tf.as_string(activated_infr[n]) ] for i, inputt in enumerate(inputs): tokens = convert_idx_to_token_tensor(inputt[n], self._idx2tokens[i]) prefix = tf.convert_to_tensor("input_%s:" % i, tf.string) tfstrings.append(prefix + tokens) tf.summary.text("eval result", tf.strings.join(tfstrings, separator="|||")) summaries = tf.summary.merge_all() return activated_infr, summaries
def eval(self, xs, ys): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' # decoder_inputs <s> sentences decoder_inputs, y, sents2 = ys # decoder_inputs shape: [batch_size, 1] [[<s>], [<s>], [<s>], [<s>]] decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"] ys = (decoder_inputs, y, sents2) memory, sents1 = self.encode(xs, False) y_hat = None logits = None logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): logits, y, sents2 = self.decode(xs, ys, memory, False) y_hat = tf.to_int32(tf.argmax(logits, axis=-1)) if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] eval_loss = self._calc_loss(y, logits) tf.summary.scalar('eval_loss', eval_loss) tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries, sent2, pred
def eval(self, xs, ys): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"] ys = (decoder_inputs, y, y_seqlen, sents2) # 这里xs[0]表示取第一个tensor x。 # tf.shape(xs[0]) = [N, T], tf.shape([xs[0])[0] # 即取batch_size大小,在evaluation部分,由于解码器的预测仍然是按序列式的进行(与train时候的不同), # 即每一次解码过程预测一个目标词汇,所以在时刻t = 0时解码器的输入维度应该是(N, 1), # 即此时为一个batch输入,每个batch的开头为 < s > 表示开始进行解码,然后每完成一次解码过程,则每个batch已输出词汇数 + 1, # 例如t = 1时刻,则解码器的输入维度为(N, 2), 以此类推,直到输入到表示停止。然后再将新的decoder_inputs加入ys中作为下一时刻decoder的输入。 memory, sents1, src_masks = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") # 这里这一过程,是不断的进行序列化的预测过程。 # 循环次数为maxlen2次,表示是要翻译完一整个句子的长度,然后不断的将上一时刻的解码器的输出添加到下一时刻解码器的输入。 for _ in tqdm(range(self.hp.maxlen2)): logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample # 随机抽取一个batch来查看模型的结果。n代表从0,batch_size - 1之间选择一个batch sample进行观察。 # sent1[n]表示原始的输入句子,pred即代表了decoder的预测翻译的输出句子,sents2[n]即表示正确的翻译输出句子。 n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def eval(self, xs, ys): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' decoder_inputs, y, y_seqlen, sents2 = ys decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<GO>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1, src_masks = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): #解码过程 logits, y_hat, y, sents2 = self.decode(ys, memory, src_masks, False) if tf.reduce_sum(y_hat, 1) == self.token2idx["<PAD>"]: break _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) ys = (_decoder_inputs, y, y_seqlen, sents2) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) # tf.random_uniform((6, 6), minval=low,maxval=high,dtype=tf.float32)))返回6*6的矩阵, # 产生于low和high之间,产生的值是均匀分布的。 sent1 = sents1[n] #source sentence pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) #model sentence sent2 = sents2[n] #targtet sentence tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries
def eval(self, xs, ys): '''Predicts autoregressively At inference, input ys is ignored. Returns y_hat: (N, T2) ''' decoder_inputs, y, y_seqlen, sents2 = ys # decoder_inputs (N, 1) decoder_inputs = tf.ones( (tf.shape(xs[0])[0], 1), tf.int32) * self.token2idx["<s>"] ys = (decoder_inputs, y, y_seqlen, sents2) memory, sents1 = self.encode(xs, False) logging.info("Inference graph is being built. Please be patient.") for _ in tqdm(range(self.hp.maxlen2)): logits, y_hat, y, sents2 = self.decode(ys, memory, False) # if tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"] or \ # tf.reduce_sum(y_hat, 1) == self.token2idx["<s>"]: break # # # print('y_hat.shape = ', y_hat.shape) _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) # print('_decoder_inputs.shape =', _decoder_inputs.shape) _decoder_inputs = tf.cond( tf.cast( tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"], tf.bool), lambda: _decoder_inputs, lambda: tf.concat( (decoder_inputs, y_hat), 1)) ys = (_decoder_inputs, y, y_seqlen, sents2) # print('ys =', ys) # loss # logits, y_hat, y, sents2 = self.decode(ys, memory, False) # _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) # def cond(_decoder_inputs, y, y_seqlen, sents2, memory, y_hat, logits): # return tf.reduce_sum(y_hat, 1) == self.token2idx["<pad>"] or \ # tf.reduce_sum(y_hat, 1) == self.token2idx["<s>"] # def body(_decoder_inputs, y, y_seqlen, sents2, memory, y_hat, logits): # _decoder_inputs = tf.concat((decoder_inputs, y_hat), 1) # ys = (_decoder_inputs, y, y_seqlen, sents2) # logits, y_hat, y, sents2 = self.decode(ys, memory, False) # return _decoder_inputs, y, y_seqlen, sents2, memory, y_hat, logits # _decoder_inputs, y, y_seqlen, sents2, memory, y_hat, logits = \ # tf.while_loop(cond, body, # [_decoder_inputs, y, y_seqlen, sents2, memory, y_hat, logits], # shape_invariants=[ # tf.TensorShape([None, None]), y.get_shape(), y_seqlen.get_shape(), # sents2.get_shape(), memory.get_shape(), tf.TensorShape([None, None]), # tf.TensorShape([None, None, self.hp.vocab_size]) # ]) shape_pri = tf.print('eval logits.shape, y.shape =', tf.shape(logits), tf.shape(y)) # with tf.control_dependencies([shape_pri]): y_ = label_smoothing(tf.one_hot(y, depth=self.hp.vocab_size)) # logits = tf.Print(logits, [logits], message='logits =', summarize=10) # y_ = tf.Print(y_, [y_], message='y_ =', summarize=10) ce = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits[:, :tf.shape(y_)[1], :], labels=y_) nonpadding = tf.to_float(tf.not_equal( y, self.token2idx["<pad>"])) # 0: <pad> loss = tf.reduce_sum( ce * nonpadding) / (tf.reduce_sum(nonpadding) + 1e-7) # monitor a random sample n = tf.random_uniform((), 0, tf.shape(y_hat)[0] - 1, tf.int32) sent1 = sents1[n] pred = convert_idx_to_token_tensor(y_hat[n], self.idx2token) sent2 = sents2[n] tf.summary.text("sent1", sent1) tf.summary.text("pred", pred) tf.summary.text("sent2", sent2) summaries = tf.summary.merge_all() return y_hat, summaries, loss