def f_eval(self, pred, target, length): pred = pred.tolist() pred = [pred_i[:length[index]]for index, pred_i in enumerate(pred)] target = target.tolist() target = [target_i[:length[index]]for index, target_i in enumerate(target)] bleu_score = get_bleu(pred, target) return bleu_score
def f_inference(self, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] batch_index = 0 bleu_score_list = [] hidden_size = self.m_network.m_hidden_size batch_size = self.m_batch_size for input_batch, target_batch, length_batch in eval_data: if batch_index > 1: continue batch_index += 1 input_batch = input_batch.to(self.m_device) length_batch = length_batch.to(self.m_device) target_batch = target_batch.to(self.m_device) print("->" * 10, *idx2word(input_batch, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') hidden = torch.randn([batch_size, hidden_size]).to(self.m_device) mean = hidden # print("hidden", hidden) max_seq_len = max(length_batch) samples, scores = self.f_decode_text_beam(mean, max_seq_len) pred = samples target = target_batch.cpu().tolist() target = [ target_i[:length_batch.cpu()[index]] for index, target_i in enumerate(target) ] bleu_score_batch = get_bleu(pred, target) # bleu_score_batch = self.f_eval(pred, target_batch.cpu(), length_batch.cpu()) print("batch bleu score", bleu_score_batch) bleu_score_list.append(bleu_score_batch) print("<-" * 10, *idx2word(samples, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_eval_new(self, train_data, eval_data): self.f_init_user_item(eval_data) self.f_get_user_item(train_data, eval_data) eval_user2uid = eval_data.m_user2uid eval_item2iid = eval_data.m_item2iid # print("user num", len(eval_user2uid)) # print("item num", len(eval_item2iid)) # exit() batch_index = 0 bleu_score_list = [] self.m_network.eval() with torch.no_grad(): for input_batch, target_batch, length_batch, user_batch, item_batch in eval_data: input_batch_gpu = input_batch.to(self.m_device) length_batch_gpu = length_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) z_mean_gpu = self.m_user_embedding[user_batch].to( self.m_device) s_mean_gpu = self.m_item_embedding[item_batch].to( self.m_device) mean = z_mean_gpu + s_mean_gpu mean = mean / 2 max_seq_len = max(length_batch) samples, z = self.f_decode_text(mean, max_seq_len) lens = length_batch.tolist() preds = samples.cpu().tolist() target_batch = target_batch.tolist() preds = [ pred_i[:lens[index]] for index, pred_i in enumerate(preds) ] targets = [ target_i[:lens[index]] for index, target_i in enumerate(target_batch) ] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("new bleu score", mean_bleu_score)
def f_eval(self, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] batch_index = 0 bleu_score_list = [] for input_batch, user_batch, target_batch, ARe_batch, RRe_batch, length_batch in eval_data: # if batch_index > 0: # break # batch_index += 1 input_batch_gpu = input_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) length_batch_gpu = length_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) RRe_batch_gpu = RRe_batch.to(self.m_device) ARe_batch_gpu = ARe_batch.to(self.m_device) logits, z_mean, z_logv, z = self.m_network(input_batch_gpu, user_batch_gpu, length_batch_gpu) # print("*"*10, "encode --> decode <--", "*"*10) mean = logits max_seq_len = max(length_batch) samples, z = self.f_decode_text(mean, max_seq_len) lens = length_batch.tolist() preds = samples.cpu().tolist() target_batch = target_batch.tolist() preds = [ pred_i[:lens[index]] for index, pred_i in enumerate(preds) ] targets = [ target_i[:lens[index]] for index, target_i in enumerate(target_batch) ] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_rec_bleu(self, eval_data): self.m_mean_loss = 0 infer_loss_list = [] bleu_score_list = [] hidden_size = self.m_network.m_hidden_size batch_size = self.m_batch_size batch_index = 0 self.m_network.eval() with torch.no_grad(): for input_batch, target_batch, length_batch, _, _ in eval_data: input_batch_gpu = input_batch.to(self.m_device) length_batch_gpu = length_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) logp, z_mean, z_logv, z, _, _ = self.m_network( input_batch_gpu, length_batch_gpu) # hidden = torch.randn([batch_size, hidden_size]).to(self.m_device) # mean = hidden mean = z_mean max_seq_len = max(length_batch) samples, z = self.f_decode_text(mean, max_seq_len) lens = length_batch.tolist() preds = samples.cpu().tolist() target_batch = target_batch.tolist() preds = [ pred_i[:lens[index]] for index, pred_i in enumerate(preds) ] targets = [ target_i[:lens[index]] for index, target_i in enumerate(target_batch) ] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_eval(self, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] bleu_score_list = [] hidden_size = self.m_network.m_hidden_size batch_size = self.m_batch_size for input_batch, target_batch, length_batch in eval_data: input_batch = input_batch.to(self.m_device) target_batch = target_batch.to(self.m_device) length_batch = length_batch.to(self.m_device) hidden = torch.randn([batch_size, hidden_size]).to(self.m_device) mean = hidden # print("mean", mean) max_seq_len = max(length_batch) # samples, z = self.f_decode_text(mean, max_seq_len) # pred = samples.cpu().tolist() samples, scores = self.f_decode_text_beam(mean, max_seq_len) pred = samples target = target_batch.cpu().tolist() target = [ target_i[:length_batch.cpu()[index]] for index, target_i in enumerate(target) ] bleu_score_batch = get_bleu(pred, target) # bleu_score_batch = self.f_get_bleu_score(samples.cpu(), target_batch.cpu(), length_batch.cpu()) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_eval_new(self, train_data, eval_data): self.f_init_user_item(eval_data) self.f_get_user_item(train_data, eval_data) eval_user2uid = eval_data.m_user2uid eval_item2iid = eval_data.m_item2iid batch_index = 0 bleu_score_list = [] total_target_word_num = 0 total_review_num = 0 self.m_network.eval() with torch.no_grad(): for input_batch, input_length_batch, user_batch, item_batch, target_batch, target_length_batch, random_flag in eval_data: # print("batch_index", batch_index) batch_size = input_batch.size(0) input_batch_gpu = input_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) target_length_batch_gpu = target_length_batch.to(self.m_device) input_de_batch_gpu = target_batch_gpu[:, :-1] input_de_length_batch_gpu = target_length_batch_gpu-1 user_hidden_gpu = self.m_user_embedding(user_batch_gpu) item_hidden_gpu = self.m_item_embedding(item_batch_gpu) max_seq_len = max(target_length_batch-1) samples, z = self.f_decode_text(user_hidden_gpu, item_hidden_gpu, max_seq_len) lens = target_length_batch-1 lens = lens.tolist() preds = samples.cpu().tolist() target_batch = target_batch[:, 1:].tolist() preds = [pred_i[:lens[index]] for index, pred_i in enumerate(preds)] targets = [target_i[:lens[index]] for index, target_i in enumerate(target_batch)] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) total_target_word_num += sum(lens) total_target_word_num -= batch_size total_review_num += batch_size batch_index += 1 # print("encoding", "->"*10, *idx2word(input_batch, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') # print("decoding", "<-"*10, *idx2word(samples, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') # if batch_index > 1: # break mean_bleu_score = np.mean(bleu_score_list) print("generating new reviews bleu score", mean_bleu_score) print("total_target_word_num", total_target_word_num) print("total_review_num", total_review_num)
def f_eval_rec(self, train_data, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] batch_index = 0 bleu_score_list = [] with torch.no_grad(): for input_batch, input_length_batch, user_batch, item_batch, target_batch, target_length_batch, random_flag in eval_data: # if batch_index > 0: # break # batch_index += 1 input_batch_gpu = input_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) target_length_batch_gpu = target_length_batch.to(self.m_device) input_de_batch_gpu = target_batch_gpu[:, :-1] input_de_length_batch_gpu = target_length_batch_gpu-1 logits, z_prior, z_mean, z_logv, z, s_prior, s_mean, s_logv, s, l_mean, l_logv, l, variational_hidden = self.m_network(input_batch_gpu, input_length_batch_gpu, input_de_batch_gpu, input_de_length_batch_gpu, user_batch_gpu, item_batch_gpu, random_flag) # mean = torch.cat([z_mean, s_mean], dim=1) # mean = z_mean+s_mean+l_mean # mean = z_mean+s_mean # mean = s_mean+l_mean if random_flag == 0: mean = z_mean+s_mean+l_mean elif random_flag == 1: mean = z_mean+s_mean elif random_flag == 2: mean = s_mean+l_mean elif random_flag == 3: mean = z_mean+l_mean max_seq_len = max(target_length_batch-1) samples, z = self.f_decode_text(z_mean, s_mean, l_mean, max_seq_len) lens = target_length_batch-1 lens = lens.tolist() preds = samples.cpu().tolist() target_batch = target_batch[:, 1:].tolist() preds = [pred_i[:lens[index]]for index, pred_i in enumerate(preds)] targets = [target_i[:lens[index]]for index, target_i in enumerate(target_batch)] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_inference(self, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] batch_index = 0 bleu_score_list = [] for input_batch, target_batch, length_batch in eval_data: if batch_index > 1: continue batch_index += 1 input_batch = input_batch.to(self.m_device) length_batch = length_batch.to(self.m_device) target_batch = target_batch.to(self.m_device) # logp, z_mean, z_logv, z, pre_z = self.m_network(input_batch, length_batch) # print(" "*10, "*"*10, " "*10) logp, _, _, _, last_en_hidden, hidden_0 = self.m_network( input_batch, length_batch) # KL_loss = -0.5 * torch.sum(1 + z_logv - z_mean.pow(2) - z_logv.exp()) # print("z_mean", z_mean) # print("z", z) # print("z_logv", z_logv) # print("KL loss", KL_loss) print("->" * 10, *idx2word(input_batch, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') # mean = mean.unsqueeze(0) # print("z_mean", z_mean) # print("z_logv", z_logv) # print() # print("z", z) # mean = torch.cat([z_mean], dim=1) # mean = z_mean mean = last_en_hidden max_seq_len = max(length_batch) samples, scores = self.f_decode_text(mean, hidden_0, max_seq_len) pred = samples target = target_batch.cpu().tolist() target = [ target_i[:length_batch.cpu()[index]] for index, target_i in enumerate(target) ] bleu_score_batch = get_bleu(pred, target) # print("mean", mean) # print("input_batch", input_batch) # bleu_score_batch = self.f_eval(samples.cpu(), target_batch.cpu(), length_batch.cpu()) print("batch bleu score", bleu_score_batch) bleu_score_list.append(bleu_score_batch) print("<-" * 10, *idx2word(samples, i2w=self.m_i2w, pad_idx=self.m_pad_idx), sep='\n') mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_eval_new(self, train_data, eval_data): self.f_init_user_item(eval_data) self.f_get_user_item(train_data, eval_data) eval_user2uid = eval_data.m_user2uid eval_item2iid = eval_data.m_item2iid batch_index = 0 bleu_score_list = [] self.m_network.eval() with torch.no_grad(): for input_batch, input_length_batch, user_batch, item_batch, target_batch, target_length_batch, random_flag in eval_data: # print("batch_index", batch_index) batch_size = input_batch.size(0) input_batch_gpu = input_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) target_length_batch_gpu = target_length_batch.to(self.m_device) input_de_batch_gpu = target_batch_gpu[:, :-1] input_de_length_batch_gpu = target_length_batch_gpu-1 z_mean_gpu = self.m_user_embedding[user_batch].to(self.m_device) s_mean_gpu = self.m_item_embedding[item_batch].to(self.m_device) # l_mean_gpu = self.m_local_embedding.to(self.m_device) l_mean_gpu = torch.cat(batch_size*[self.m_local_embedding]).to(self.m_device) mean = z_mean_gpu+s_mean_gpu+l_mean_gpu max_seq_len = max(target_length_batch-1) # print("z_mean_gpu", z_mean_gpu) # if torch.isnan(z_mean_gpu).any(): # print("z size", z_mean_gpu.size()) # print("s size", s_mean_gpu.size()) # print("l size", l_mean_gpu.size()) samples, z = self.f_decode_text(z_mean_gpu, s_mean_gpu, l_mean_gpu, max_seq_len) lens = target_length_batch-1 lens = lens.tolist() preds = samples.cpu().tolist() target_batch = target_batch[:, 1:].tolist() preds = [pred_i[:lens[index]] for index, pred_i in enumerate(preds)] targets = [target_i[:lens[index]] for index, target_i in enumerate(target_batch)] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) batch_index += 1 mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)
def f_eval_new(self, train_data, eval_data): self.f_init_user_item(eval_data) self.f_get_user_item(train_data, eval_data) eval_user2uid = eval_data.m_user2uid eval_item2iid = eval_data.m_item2iid batch_index = 0 bleu_score_list = [] total_target_word_num = 0 total_review_num = 0 self.m_network.eval() with torch.no_grad(): for input_batch, input_length_batch, user_batch, item_batch, target_batch, target_length_batch, random_flag in eval_data: # print("batch_index", batch_index) batch_size = input_batch.size(0) input_batch_gpu = input_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) target_length_batch_gpu = target_length_batch.to(self.m_device) input_de_batch_gpu = target_batch_gpu[:, :-1] input_de_length_batch_gpu = target_length_batch_gpu - 1 # user_hidden_gpu = self.m_user_embedding(user_batch_gpu) item_hidden_gpu = self.m_item_embedding(item_batch_gpu) input_user_cluster_prob_gpu = self.m_user_cluster_prob[ user_batch_gpu] print("---" * 20) print("input_user_cluster_prob_gpu 0", input_user_cluster_prob_gpu[0]) print("input_user_cluster_prob_gpu 1", input_user_cluster_prob_gpu[1]) max_seq_len = max(target_length_batch - 1) samples = self.f_decode_text(input_user_cluster_prob_gpu, item_hidden_gpu, max_seq_len) lens = target_length_batch - 1 lens = lens.tolist() preds = samples.cpu().tolist() target_batch = target_batch[:, 1:].tolist() preds = [ pred_i[:lens[index]] for index, pred_i in enumerate(preds) ] targets = [ target_i[:lens[index]] for index, target_i in enumerate(target_batch) ] bleu_score_batch = get_bleu(preds, targets) bleu_score_list.append(bleu_score_batch) total_target_word_num += sum(lens) total_target_word_num -= batch_size total_review_num += batch_size batch_index += 1 mean_bleu_score = np.mean(bleu_score_list) print("generating new reviews bleu score", mean_bleu_score) print("total_target_word_num", total_target_word_num) print("total_review_num", total_review_num)
def f_eval(self, train_data, eval_data): self.m_mean_loss = 0 # for epoch_i in range(self.m_epoch): # batch_size = args.batch_size infer_loss_list = [] batch_index = 0 bleu_score_list = [] ### initialize self.f_init_user_item(eval_data) ### get the user embedding self.f_get_user_item(train_data, eval_data) eval_user2uid = eval_data.m_user2uid eval_item2iid = eval_data.m_item2iid ### reconstruct the review batch_index = 0 self.m_network.eval() with torch.no_grad(): for input_batch, user_batch, item_batch, target_batch, ARe_batch, RRe_batch, length_batch in eval_data: print("batch index", batch_index) batch_index += 1 input_batch_gpu = input_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) length_batch_gpu = length_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) RRe_batch_gpu = RRe_batch.to(self.m_device) ARe_batch_gpu = ARe_batch.to(self.m_device) # logits, z_mean, z_logv, z, s_mean, s_logv, s, ARe_pred, RRe_pred = self.m_network(input_batch_gpu, user_batch_gpu, length_batch_gpu) # print("*"*10, "encode --> decode <--", "*"*10) # for i, user_idx in enumerate() # print("user batch", user_batch.size()) # print("item batch", item_batch.size()) z_mean_gpu = self.m_user_embedding[user_batch].to( self.m_device) s_mean_gpu = self.m_item_embedding[item_batch].to( self.m_device) mean = torch.cat([z_mean_gpu, s_mean_gpu], dim=1) max_seq_len = max(length_batch) samples, z = self.f_decode_text(mean, max_seq_len) lens = length_batch.tolist() # lens = pred_lens.cpu().tolist() # print("pred_lens", pred_lens) # print("lens", lens) preds = samples.cpu().tolist() target_batch = target_batch.tolist() preds = [ pred_i[:lens[index]] for index, pred_i in enumerate(preds) ] targets = [ target_i[:lens[index]] for index, target_i in enumerate(target_batch) ] bleu_score_batch = get_bleu(preds, targets) print("bleu_score_batch", bleu_score_batch) bleu_score_list.append(bleu_score_batch) mean_bleu_score = np.mean(bleu_score_list) print("bleu score", mean_bleu_score)