def f_eval_epoch(self, eval_data, network, optimizer, logger_obj, local_rank): loss_list = [] precision_list = [] recall_list = [] iteration = 0 self.m_eval_iteration = self.m_train_iteration logger_obj.f_add_output2IO(" "*10+" eval the user and item encoder"+" "*10) network.eval() with torch.no_grad(): for pos_tag_batch, mask_batch, user_batch, item_batch in eval_data: user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) preds = self.f_get_pred(network, user_batch_gpu, item_batch_gpu, local_rank) precision, recall = get_precision_recall(preds.cpu(), pos_tag_batch, mask_batch, k=3) if precision != 0 and recall != 0: # loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) logger_obj.f_add_output2IO("%d, precision:%.4f, recall:%.4f"%(self.m_eval_iteration, np.mean(precision_list), np.mean(recall_list))) # logger_obj.f_add_scalar2tensorboard("eval/precision", np.mean(precision_list), self.m_eval_iteration) # logger_obj.f_add_scalar2tensorboard("eval/recall", np.mean(recall_list), self.m_eval_iteration) self.m_mean_eval_precision = np.mean(precision_list) self.m_mean_eval_recall = np.mean(recall_list) network.train()
def f_eval_epoch(self, eval_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 # self.m_eval_iteration = 0 self.m_eval_iteration = self.m_train_iteration # logger_obj.f_add_output2IO("--"*20) logger_obj.f_add_output2IO(" "*10+" eval the user and item encoder"+" "*10) # logger_obj.f_add_output2IO("--"*20) network.eval() with torch.no_grad(): for attr_item_batch, attr_tf_item_batch, attr_length_item_batch, item_batch, attr_user_batch, attr_tf_user_batch, attr_length_user_batch, user_batch, pos_target_batch, pos_length_batch, neg_target_batch, neg_length_batch in eval_data: attr_item_gpu = attr_item_batch.to(self.m_device) attr_tf_item_gpu = attr_tf_item_batch.to(self.m_device) attr_length_item_gpu = attr_length_item_batch.to(self.m_device) item_gpu = item_batch.to(self.m_device) attr_user_gpu = attr_user_batch.to(self.m_device) attr_tf_user_gpu = attr_tf_user_batch.to(self.m_device) attr_length_user_gpu = attr_length_user_batch.to(self.m_device) user_gpu = user_batch.to(self.m_device) pos_target_gpu = pos_target_batch.to(self.m_device) pos_length_gpu = pos_length_batch.to(self.m_device) neg_target_gpu = neg_target_batch.to(self.m_device) neg_length_gpu = neg_length_batch.to(self.m_device) output = network(attr_item_gpu, attr_tf_item_gpu, attr_length_item_gpu, item_gpu, attr_user_gpu, attr_tf_user_gpu, attr_length_user_gpu, user_gpu) logits, mask, targets = network.f_pred_forward(output, pos_target_gpu, pos_length_gpu, neg_target_gpu, neg_length_gpu) NLL_loss = self.m_rec_loss(logits, targets, mask) loss = NLL_loss precision, recall = get_precision_recall(logits.cpu(), targets.cpu(), mask.cpu(), k=3) if precision != 0 and recall != 0: loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(self.m_eval_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("eval/loss", np.mean(loss_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/precision", np.mean(precision_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/recall", np.mean(recall_list), self.m_eval_iteration) self.m_mean_eval_loss = np.mean(loss_list) self.m_mean_eval_precision = np.mean(precision_list) self.m_mean_eval_recall = np.mean(recall_list) network.train()
def f_eval_new(self, train_data, eval_data): self.f_get_user_item(train_data, eval_data) batch_index = 0 precision_list = [] recall_list = [] mrr_list = [] print('--'*10) # print("user output weight", self.m_network.m_user_output.weight) # print("item output weight", self.m_network.m_item_output.weight) self.m_network.eval() with torch.no_grad(): pop_correct_num_total = 0 non_pop_correct_num_total = 0 pred_num_total = 0 topk = 3 for pos_tag_batch, user_batch, item_batch in eval_data: user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) preds = self.f_get_pred(self.m_network, user_batch_gpu, item_batch_gpu) precision, recall = get_precision_recall(preds.cpu(), pos_tag_batch, k=topk) # precision_batch, recall_batch = self.f_eval_bow(user_item_attr_logits, target_logits) if precision == 0 and recall == 0: continue batch_index += 1 precision_list.append(precision) recall_list.append(recall) print("precision: ", np.mean(precision_list)) print("recall: ", np.mean(recall_list))
def f_train_epoch(self, train_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 # logger_obj.f_add_output2IO("--"*20) logger_obj.f_add_output2IO(" "*10+"training the user and item encoder"+" "*10) # logger_obj.f_add_output2IO("--"*20) tmp_loss_list = [] tmp_precision_list = [] tmp_recall_list = [] network.train() for attr_item_batch, attr_tf_item_batch, attr_length_item_batch, item_batch, attr_user_batch, attr_tf_user_batch, attr_length_user_batch, user_batch, pos_target_batch, pos_length_batch, neg_target_batch, neg_length_batch in train_data: attr_item_gpu = attr_item_batch.to(self.m_device) attr_tf_item_gpu = attr_tf_item_batch.to(self.m_device) attr_length_item_gpu = attr_length_item_batch.to(self.m_device) item_gpu = item_batch.to(self.m_device) attr_user_gpu = attr_user_batch.to(self.m_device) attr_tf_user_gpu = attr_tf_user_batch.to(self.m_device) attr_length_user_gpu = attr_length_user_batch.to(self.m_device) user_gpu = user_batch.to(self.m_device) pos_target_gpu = pos_target_batch.to(self.m_device) pos_length_gpu = pos_length_batch.to(self.m_device) neg_target_gpu = neg_target_batch.to(self.m_device) neg_length_gpu = neg_length_batch.to(self.m_device) output = network(attr_item_gpu, attr_tf_item_gpu, attr_length_item_gpu, item_gpu, attr_user_gpu, attr_tf_user_gpu, attr_length_user_gpu, user_gpu) logits, mask, targets = network.f_pred_forward(output, pos_target_gpu, pos_length_gpu, neg_target_gpu, neg_length_gpu) NLL_loss = self.m_rec_loss(logits, targets, mask) loss = NLL_loss precision, recall = get_precision_recall(logits.cpu(), targets.cpu(), mask.cpu(), k=3) # NLL_loss = self.m_rec_loss(user_attr_item_logits, target_gpu, attr_item_mask) # loss = NLL_loss # precision, recall = get_precision_recall(user_attr_item_logits.cpu(), target_batch, k=3) if precision != 0 and recall != 0: loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) tmp_loss_list.append(loss.item()) tmp_precision_list.append(precision) tmp_recall_list.append(recall) optimizer.zero_grad() loss.backward() optimizer.step() self.m_train_iteration += 1 iteration += 1 if iteration % self.m_print_interval == 0: logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(iteration, np.mean(tmp_loss_list), np.mean(tmp_precision_list), np.mean(tmp_recall_list))) tmp_loss_list = [] tmp_precision_list = [] tmp_recall_list = [] logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(self.m_train_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("train/loss", np.mean(loss_list), self.m_train_iteration) logger_obj.f_add_scalar2tensorboard("train/precision", np.mean(precision_list), self.m_train_iteration) logger_obj.f_add_scalar2tensorboard("train/recall", np.mean(recall_list), self.m_train_iteration) self.m_mean_train_loss = np.mean(loss_list) self.m_mean_train_precision = np.mean(precision_list) self.m_mean_train_recall = np.mean(recall_list)
def f_eval_epoch(self, eval_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 # self.m_eval_iteration = 0 self.m_eval_iteration = self.m_train_iteration # logger_obj.f_add_output2IO("--"*20) logger_obj.f_add_output2IO(" "*10+" eval the user and item encoder"+" "*10) # logger_obj.f_add_output2IO("--"*20) beta = 0.1 network.eval() with torch.no_grad(): for input_batch, input_freq_batch, input_length_batch, user_batch, item_batch, target_batch in eval_data: # eval_flag = random.randint(1,5) # if eval_flag != 2: # continue input_batch_gpu = input_batch.to(self.m_device) input_freq_batch_gpu = input_freq_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) batch_size = input_batch.size(0) user_item_attr_logits, mask = network(input_batch_gpu, input_freq_batch_gpu, input_length_batch_gpu, user_batch_gpu, item_batch_gpu) # target_batch_gpu = torch.gather(target_batch_gpu, 1, input_batch_gpu) NLL_loss = self.m_rec_loss(user_item_attr_logits, target_batch_gpu, mask) # NLL_loss = NLL_loss/batch_size loss = NLL_loss precision, recall = get_precision_recall(user_item_attr_logits.cpu(), target_batch, k=3) # print("precision", precision, "recall", recall) if precision != 0 and recall != 0: loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(self.m_eval_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("eval/loss", np.mean(loss_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/precision", np.mean(precision_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/recall", np.mean(recall_list), self.m_eval_iteration) self.m_mean_eval_loss = np.mean(loss_list) self.m_mean_eval_precision = np.mean(precision_list) self.m_mean_eval_recall = np.mean(recall_list) network.train()
def f_train_epoch(self, train_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 # logger_obj.f_add_output2IO("--"*20) logger_obj.f_add_output2IO(" "*10+"training the user and item encoder"+" "*10) # logger_obj.f_add_output2IO("--"*20) beta = 0.1 tmp_loss_list = [] tmp_precision_list = [] tmp_recall_list = [] network.train() for input_batch, input_freq_batch, input_length_batch, user_batch, item_batch, target_batch in train_data: input_batch_gpu = input_batch.to(self.m_device) input_freq_batch_gpu = input_freq_batch.to(self.m_device) input_length_batch_gpu = input_length_batch.to(self.m_device) user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) target_batch_gpu = target_batch.to(self.m_device) batch_size = input_batch.size(0) # print("+++"*20) # logits, z, z_mean, z_logvar = network(input_batch_gpu) user_item_attr_logits, mask = network(input_batch_gpu, input_freq_batch_gpu, input_length_batch_gpu, user_batch_gpu, item_batch_gpu) NLL_loss = self.m_rec_loss(user_item_attr_logits, target_batch_gpu, mask) loss = NLL_loss precision, recall = get_precision_recall(user_item_attr_logits.cpu(), target_batch, k=3) if precision != 0 and recall != 0: loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) tmp_loss_list.append(loss.item()) tmp_precision_list.append(precision) tmp_recall_list.append(recall) # loss = NLL_loss+beta*KL_loss # print("loss", loss) optimizer.zero_grad() loss.backward() optimizer.step() self.m_train_iteration += 1 iteration += 1 if iteration % self.m_print_interval == 0: logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(iteration, np.mean(tmp_loss_list), np.mean(tmp_precision_list), np.mean(tmp_recall_list))) tmp_loss_list = [] tmp_precision_list = [] tmp_recall_list = [] # logger_obj.f_add_scalar2tensorboard("train/loss", np.mean(loss_list), self.m_train_iteration) # logger_obj.f_add_scalar2tensorboard("train/precision", np.mean(precision_list), self.m_train_iteration) # logger_obj.f_add_scalar2tensorboard("train/recall", np.mean(recall_list), self.m_train_iteration) # loss_list = [] # precision_list = [] # recall_list = [] logger_obj.f_add_output2IO("%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f"%(self.m_train_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("train/loss", np.mean(loss_list), self.m_train_iteration) logger_obj.f_add_scalar2tensorboard("train/precision", np.mean(precision_list), self.m_train_iteration) logger_obj.f_add_scalar2tensorboard("train/recall", np.mean(recall_list), self.m_train_iteration) self.m_mean_train_loss = np.mean(loss_list) self.m_mean_train_precision = np.mean(precision_list) self.m_mean_train_recall = np.mean(recall_list)
def f_eval_epoch(self, eval_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 # self.m_eval_iteration = 0 self.m_eval_iteration = self.m_train_iteration # logger_obj.f_add_output2IO("--"*20) logger_obj.f_add_output2IO(" " * 10 + " eval the user and item encoder" + " " * 10) # logger_obj.f_add_output2IO("--"*20) network.eval() with torch.no_grad(): for attr_item_batch, attr_tf_item_batch, attr_length_item_batch, attr_index_item_batch, attr_index_item_iter_debug_batch, item_batch, attr_user_batch, attr_tf_user_batch, attr_length_user_batch, attr_index_user_batch, user_batch, attr_input_batch, attr_length_batch, target_batch in eval_data: attr_item_gpu = attr_item_batch.to(self.m_device) attr_tf_item_gpu = attr_tf_item_batch.to(self.m_device) attr_length_item_gpu = attr_length_item_batch.to(self.m_device) attr_index_item_gpu = attr_index_item_batch.to(self.m_device) item_gpu = item_batch.to(self.m_device) attr_index_item_iter_debug_gpu = attr_index_item_iter_debug_batch.to( self.m_device) attr_user_gpu = attr_user_batch.to(self.m_device) attr_tf_user_gpu = attr_tf_user_batch.to(self.m_device) attr_length_user_gpu = attr_length_user_batch.to(self.m_device) attr_index_user_gpu = attr_index_user_batch.to(self.m_device) user_gpu = user_batch.to(self.m_device) attr_input_gpu = attr_input_batch.to(self.m_device) attr_length_gpu = attr_length_batch.to(self.m_device) target_gpu = target_batch.to(self.m_device) logits = torch.zeros_like(target_gpu).float() # print(attr_input_gpu.masked_select(attr_index_item_iter_debug_gpu.bool())) # print("logits size", logits.type()) # exit() # print("=="*10) # print("target_batch", target_batch) # print("attr_input_batch", attr_input_batch) # print("attr_item_batch", attr_item_batch) # print("attr_user_batch", attr_user_batch) # print("=="*10) user_attr_item_logits, attr_item_mask, item_attr_user_logits, attr_user_mask, logits, mask = network( attr_item_gpu, attr_tf_item_gpu, attr_length_item_gpu, attr_index_item_gpu, item_gpu, attr_user_gpu, attr_tf_user_gpu, attr_length_user_gpu, attr_index_user_gpu, user_gpu, attr_length_gpu, logits) # NLL_loss = self.m_rec_loss(logits, target_gpu, mask) # loss = NLL_loss # precision, recall = get_precision_recall(logits.cpu(), target_batch, mask.cpu(), k=3) NLL_loss = self.m_rec_loss(logits, target_gpu, mask, attr_index_item_iter_debug_gpu) loss = NLL_loss precision, recall = get_precision_recall(logits.cpu(), target_batch, mask.cpu(), k=3) if precision != 0 and recall != 0: loss_list.append(loss.item()) precision_list.append(precision) recall_list.append(recall) logger_obj.f_add_output2IO( "%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f" % (self.m_eval_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("eval/loss", np.mean(loss_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/precision", np.mean(precision_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/recall", np.mean(recall_list), self.m_eval_iteration) self.m_mean_eval_loss = np.mean(loss_list) self.m_mean_eval_precision = np.mean(precision_list) self.m_mean_eval_recall = np.mean(recall_list) network.train()
def f_eval_epoch(self, eval_data, network, optimizer, logger_obj): loss_list = [] precision_list = [] recall_list = [] iteration = 0 self.m_eval_iteration = self.m_train_iteration logger_obj.f_add_output2IO(" " * 10 + " eval the user and item encoder" + " " * 10) network.eval() with torch.no_grad(): for pos_tag_batch, mask_batch, user_batch, item_batch in eval_data: # eval_flag = random.randint(1,5) # if eval_flag != 2: # continue user_batch_gpu = user_batch.to(self.m_device) item_batch_gpu = item_batch.to(self.m_device) # pos_tag_batch_gpu = pos_tag_batch.to(self.m_device) # logits = network(pos_tag_batch_gpu, , user_batch_gpu, item_batch_gpu) # NLL_loss = self.m_rec_loss(logits) # loss = NLL_loss loss = 0.0 preds = self.f_get_pred(network, user_batch_gpu, item_batch_gpu) precision, recall = get_precision_recall(preds.cpu(), pos_tag_batch, mask_batch, k=3) if precision != 0 and recall != 0: # loss_list.append(loss.item()) loss_list.append(loss) precision_list.append(precision) recall_list.append(recall) logger_obj.f_add_output2IO( "%d, NLL_loss:%.4f, precision:%.4f, recall:%.4f" % (self.m_eval_iteration, np.mean(loss_list), np.mean(precision_list), np.mean(recall_list))) logger_obj.f_add_scalar2tensorboard("eval/loss", np.mean(loss_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/precision", np.mean(precision_list), self.m_eval_iteration) logger_obj.f_add_scalar2tensorboard("eval/recall", np.mean(recall_list), self.m_eval_iteration) self.m_mean_eval_loss = np.mean(loss_list) self.m_mean_eval_precision = np.mean(precision_list) self.m_mean_eval_recall = np.mean(recall_list) network.train()