def test_one_user(x): # user u's ratings for user u rating = x[0] #uid u = x[1] #user u's items in the training set item_num = x[2] data_generator = data_generator_all[x[3]] training_items = data_generator.train_items[u] #user u's items in the test set user_pos_test = data_generator.test_set[u] all_items = set(range(item_num)) test_items = list(all_items - set(training_items)) item_score = [] for i in test_items: item_score.append((i, rating[i])) item_score = sorted(item_score, key=lambda x: x[1]) item_score.reverse() item_sort = [x[0] for x in item_score] r = [] for i in item_sort: if i in user_pos_test: r.append(1) else: r.append(0) recall_20 = ut.recall_at_k(r, 20, len(user_pos_test)) recall_40 = ut.recall_at_k(r, 40, len(user_pos_test)) recall_60 = ut.recall_at_k(r, 60, len(user_pos_test)) recall_80 = ut.recall_at_k(r, 80, len(user_pos_test)) recall_100 = ut.recall_at_k(r, 100, len(user_pos_test)) ap_20 = ut.average_precision(r, 20) ap_40 = ut.average_precision(r, 40) ap_60 = ut.average_precision(r, 60) ap_80 = ut.average_precision(r, 80) ap_100 = ut.average_precision(r, 100) return np.array([ recall_20, recall_40, recall_60, recall_80, recall_100, ap_20, ap_40, ap_60, ap_80, ap_100 ])
def simple_test_one_user(id, reclist): """ 测试单个用户推荐的准确度, 结果是一个列表,包括top3, top5, top10的召回率、准确率、MRR、NDCG :param id: 用户id :param reclist: 生成的打分列表 :return: 召回率、准确率、MRR、NDCG """ global loader if id in trainset_dict.keys(): for i in trainset_dict[id]: reclist[i] = 0 recom = np.argsort(-reclist) gnd = set(testset_dict[id]) pos = np.zeros(n_items) for i, e in enumerate(recom): if e in gnd: pos[i] = 1 result = [] for k in [3, 5, 10]: r = utils.recall_at_k(pos, k, len(gnd)) p = utils.precision_at_k(pos, k) mrr = utils.mrr_at_k(pos, k) ndcg = utils.ndcg_at_k(pos, k) result.extend([r, p, mrr, ndcg]) return result
def tfidf(): print('Loading data ...') train_c, train_r, train_l, \ dev_c, dev_r, dev_l, \ test_c, test_r, test_l = load_data(ratio=0.05) n_dev = dev_c.shape[0] n_test = test_c.shape[0] # Transform to text corpus for tfidf vectorizer print('Transforming to text corpuses ...') _, inv_word_index, _ = load_dictionary() train_c_corpus = dataset_to_corpus(train_c, inv_word_index) train_r_corpus = dataset_to_corpus(train_r, inv_word_index) dev_c_corpus = dataset_to_corpus(dev_c, inv_word_index) dev_r_corpus = dataset_to_corpus(dev_r, inv_word_index) test_c_corpus = dataset_to_corpus(test_c, inv_word_index) test_r_corpus = dataset_to_corpus(test_r, inv_word_index) # Transform to tfidf vector print('Fitting a tfidf model ...') vectorizer = TfidfVectorizer() X = vectorizer.fit_transform(train_c_corpus + train_r_corpus) print('Transforming to tfidf features ...') X_dev_c = vectorizer.transform(dev_c_corpus) X_dev_r = vectorizer.transform(dev_r_corpus) X_test_c = vectorizer.transform(test_c_corpus) X_test_r = vectorizer.transform(test_r_corpus) # Tfidf predict (cosine similarity) print('Predicting ...') def cos_sim(x, y): return x * y.transpose() / np.sqrt( x * x.transpose() * y * y.transpose()) y_pred_dev = np.array( [cos_sim(X_dev_c[i, :], X_dev_r[i, :]) for i in range(n_dev)]) y_pred_test = np.array( [cos_sim(X_test_c[i, :], X_test_r[i, :]) for i in range(n_test)]) # Evaluate results print('Results:\n') #print('Validation set') #for group_size in [2, 10]: # for k in [1, 2, 5]: # if k >= group_size: # break # r = recall_at_k(y_pred_dev, k, group_size) # print('recall@{} ({} options): {}'.format(k, group_size-1, r)) #print('Testing set') for group_size in [2, 10]: for k in [1, 2, 5]: if k >= group_size: break r = recall_at_k(y_pred_test, k, group_size) print('recall@{} ({} options): {}'.format(k, group_size - 1, r))
def get_full_sort_score(self, epoch, answers, pred_list): recall, ndcg = [], [] for k in [5, 10, 15, 20]: recall.append(recall_at_k(answers, pred_list, k)) ndcg.append(ndcg_k(answers, pred_list, k)) post_fix = { "Epoch": epoch, "HIT@5": '{:.4f}'.format(recall[0]), "NDCG@5": '{:.4f}'.format(ndcg[0]), "HIT@10": '{:.4f}'.format(recall[1]), "NDCG@10": '{:.4f}'.format(ndcg[1]), "HIT@20": '{:.4f}'.format(recall[3]), "NDCG@20": '{:.4f}'.format(ndcg[3]) } print(post_fix) with open(self.args.log_file, 'a') as f: f.write(str(post_fix) + '\n') return [recall[0], ndcg[0], recall[1], ndcg[1], recall[3], ndcg[3]], str(post_fix)
def simple_test_one_user(x): id, lst, idx = x[0], x[1], x[2] global loader if id in loader.trainset_dict.keys(): for i in loader.trainset_dict[id]: lst[i] = 0 recom = np.argsort(-lst) gnd = set(loader.testset_dict[id]) pos = np.zeros(opt.n_item) for i, e in enumerate(recom): if e in gnd: pos[i] = 1 result = [] for k in [3, 5, 10]: r = utils.recall_at_k(pos, k, len(gnd)) p = utils.precision_at_k(pos, k) mrr = utils.mrr_at_k(pos, k) ndcg = utils.ndcg_at_k(pos, k) result.extend([r, p, mrr, ndcg]) return result