def run_eval(self, filename, num_ngs): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. num_ngs (int): The number of negative sampling for a positive instance. Returns: dict: A dictionary contains evaluation metrics. """ load_sess = self.sess preds = [] labels = [] group_preds = [] group_labels = [] group = num_ngs + 1 for batch_data_input in self.iterator.load_data_from_file( filename, min_seq_length=self.min_seq_length, batch_num_ngs=0): if batch_data_input: step_pred, step_labels = self.eval(load_sess, batch_data_input) preds.extend(np.reshape(step_pred, -1)) labels.extend(np.reshape(step_labels, -1)) group_preds.extend(np.reshape(step_pred, (-1, group))) group_labels.extend(np.reshape(step_labels, (-1, group))) res = cal_metric(labels, preds, self.hparams.metrics) res_pairwise = cal_metric(group_labels, group_preds, self.hparams.pairwise_metrics) res.update(res_pairwise) return res
def run_eval(self, filename): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. Returns: dict: A dictionary contains evaluation metrics. """ load_sess = self.sess preds = [] labels = [] imp_indexs = [] for batch_data_input, imp_index, data_size in self.iterator.load_data_from_file( filename): step_pred, step_labels = self.eval(load_sess, batch_data_input) preds.extend(np.reshape(step_pred, -1)) labels.extend(np.reshape(step_labels, -1)) imp_indexs.extend(np.reshape(imp_index, -1)) res = cal_metric(labels, preds, self.hparams.metrics) if self.hparams.pairwise_metrics is not None: group_labels, group_preds = self.group_labels( labels, preds, imp_indexs) res_pairwise = cal_metric(group_labels, group_preds, self.hparams.pairwise_metrics) res.update(res_pairwise) return res
def run_eval(self, news_filename, behaviors_file): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. Returns: dict: A dictionary contains evaluation metrics. """ if self.support_quick_scoring: group_labels, group_preds = self.run_fast_eval( news_filename, behaviors_file ) else: preds = [] labels = [] imp_indexes = [] for batch_data_input in self.test_iterator.load_data_from_file( news_filename, behaviors_file ): step_pred, step_labels, step_imp_index = self.eval(batch_data_input) preds.extend(np.reshape(step_pred, -1)) labels.extend(np.reshape(step_labels, -1)) imp_indexes.extend(np.reshape(step_imp_index, -1)) group_labels, group_preds = self.group_labels(labels, preds, imp_indexes) res = cal_metric(group_labels, group_preds, self.hparams.metrics) return res
def run_eval(self, filename): load_sess = self.sess group_preds = [] group_labels = [] for ( batch_data_input, newsid_list, data_size, ) in self.iterator.load_data_from_file(filename): if batch_data_input: step_pred, step_labels = self.eval(load_sess, batch_data_input) group_preds.extend(step_pred) group_labels.extend(step_labels) res = cal_metric(group_labels, group_preds, self.hparams.pairwise_metrics) return res
def run_eval(self, news_filename, behaviors_file): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. Returns: dict: A dictionary contains evaluation metrics. """ self.model.eval() with torch.no_grad(): if self.support_quick_scoring: _, group_labels, group_preds = self.run_fast_eval(news_filename, behaviors_file) else: _, group_labels, group_preds = self.run_slow_eval(news_filename, behaviors_file) res = cal_metric(group_labels, group_preds, self.hparams.metrics) return res
def run_eval(self, filename): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. Returns: dict: A dictionary contains evaluation metrics. """ load_sess = self.sess preds = [] labels = [] for batch_data_input in self.iterator.load_data_from_file(filename): step_pred, step_labels = self.eval(load_sess, batch_data_input) preds.extend(np.reshape(step_pred, -1)) labels.extend(np.reshape(step_labels, -1)) res = cal_metric(labels, preds, self.hparams.metrics) return res
def run_test(self, filename, lenth, save_model=True, validate=False): """Evaluate the given file and returns some evaluation metrics. Args: filename (str): A file name that will be evaluated. Returns: dict: A dictionary contains evaluation metrics. """ load_sess = self.sess preds = [] labels = [] imp_indexs = [] for batch_data_input, imp_index, data_size in tqdm( self.iterator.load_data_from_file(filename), total=math.ceil(lenth / self.hparams.batch_size)): step_pred, step_labels = self.eval(load_sess, batch_data_input) preds.extend(np.reshape(step_pred, -1)) labels.extend(np.reshape(step_labels, -1)) imp_indexs.extend(np.reshape(imp_index, -1)) print(len(preds)) if lenth < 2000: print(imp_indexs) preds = preds[:lenth] labels = labels[:lenth] imp_indexs = imp_indexs[:lenth] if lenth < 2000: print(labels) print(len(preds)) all_keys = sorted([int(ip) for ip in set(imp_indexs)]) group_labels = {k: [] for k in all_keys} group_preds = {k: [] for k in all_keys} for l, p, k in zip(labels, preds, imp_indexs): group_labels[int(k)].append(l) group_preds[int(k)].append(p) if validate: all_labels = [] all_preds = [] for k in all_keys: all_labels.append(group_labels[k]) all_preds.append(group_preds[k]) metric_list = [ x.strip() for x in "group_auc || mean_mrr || ndcg@5;10".split("||") ] ret = cal_metric(all_labels, all_preds, metric_list) for metric, val in ret.items(): print("Epoch: {}, {}: {}".format(1, metric, val)) if save_model: final_arr = [] for k in all_keys: new_row = [] new_row.append(k) new_row.append(','.join( list(map(str, np.array(group_labels[k]).astype(int))))) new_row.append(','.join( list(map(str, np.array(group_preds[k]).astype(float))))) rank = ss.rankdata(-np.array(group_preds[k])).astype( int).tolist() new_row.append('[' + ','.join(list(map(str, rank))) + ']') assert (len(rank) == len(group_labels[k])) final_arr.append(new_row) output_path = './result/' fdf = pd.DataFrame( final_arr, columns=['impression', 'labels', 'preds', 'ranks']) fdf.drop(columns=['labels', 'ranks']).to_csv(output_path + 'score.txt', sep=' ', index=False) fdf.drop(columns=['labels', 'preds']).to_csv(output_path + 'result.txt', header=None, sep=' ', index=False) return
from reco_utils.recommender.deeprec.deeprec_utils import cal_metric lines = open('examples/test_mind/results/npa-valid.txt', 'r').readlines() group_labels, group_preds = [], [] metrics = ["group_auc", "mean_mrr", "ndcg@5;10"] for l in lines: r = l.strip().split("\t") group_labels.append([int(float(x)) for x in r[0].split(",")]) group_preds.append([float(x) for x in r[1].split(",")]) res = cal_metric(group_labels, group_preds, metrics) print(res)
def fit( self, train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file, test_news_file=None, test_behaviors_file=None, ): """Fit the model with train_file. Evaluate the model on valid_file per epoch to observe the training status. If test_news_file is not None, evaluate it too. Args: train_file (str): training data set. valid_file (str): validation set. test_news_file (str): test set. Returns: obj: An instance of self. """ for epoch in range(1, self.hparams.epochs + 1): step = 0 self.hparams.current_epoch = epoch epoch_loss = 0 train_start = time.time() tqdm_util = tqdm( self.train_iterator.load_data_from_file( train_news_file, train_behaviors_file)) for batch_data_input in tqdm_util: step_result = self.train(batch_data_input) step_data_loss = step_result epoch_loss += step_data_loss step += 1 if step % self.hparams.show_step == 0: tqdm_util.set_description( "step {0:d} , total_loss: {1:.4f}, data_loss: {2:.4f}". format(step, epoch_loss / step, step_data_loss)) train_end = time.time() train_time = train_end - train_start self.model.save_weights('./para/dkn_' + str(epoch) + '.h5') eval_start = time.time() train_info = ",".join([ str(item[0]) + ":" + str(item[1]) for item in [("logloss loss", epoch_loss / step)] ]) _, group_labels, group_preds = self.run_slow_eval( valid_news_file, valid_behaviors_file) eval_res = cal_metric(group_labels, group_preds, self.hparams.metrics) eval_info = ", ".join([ str(item[0]) + ":" + str(item[1]) for item in sorted(eval_res.items(), key=lambda x: x[0]) ]) if test_news_file is not None: test_res = self.run_eval(test_news_file, test_behaviors_file) test_info = ", ".join([ str(item[0]) + ":" + str(item[1]) for item in sorted(test_res.items(), key=lambda x: x[0]) ]) eval_end = time.time() eval_time = eval_end - eval_start if test_news_file is not None: print("at epoch {0:d}".format(epoch) + "\ntrain info: " + train_info + "\neval info: " + eval_info + "\ntest info: " + test_info) else: print("at epoch {0:d}".format(epoch) + "\ntrain info: " + train_info + "\neval info: " + eval_info) print("at epoch {0:d} , train time: {1:.1f} eval time: {2:.1f}". format(epoch, train_time, eval_time)) return self