def simple_test_one_user(id, reclist): """ 测试单个用户推荐的准确度, 结果是一个列表,包括top3, top5, top10的召回率、准确率、MRR、NDCG :param id: 用户id :param reclist: 生成的打分列表 :return: 召回率、准确率、MRR、NDCG """ global loader if id in trainset_dict.keys(): for i in trainset_dict[id]: reclist[i] = 0 recom = np.argsort(-reclist) gnd = set(testset_dict[id]) pos = np.zeros(n_items) for i, e in enumerate(recom): if e in gnd: pos[i] = 1 result = [] for k in [3, 5, 10]: r = utils.recall_at_k(pos, k, len(gnd)) p = utils.precision_at_k(pos, k) mrr = utils.mrr_at_k(pos, k) ndcg = utils.ndcg_at_k(pos, k) result.extend([r, p, mrr, ndcg]) return result
def evaluate_score(all_scores,exc_idx,rating_pos_idx,test_pos_list,num_item,Ks = [50]): exp_score = np.exp(all_scores) if len(exc_idx) > 0: all_scores[exc_idx] = -30.0 exp_score[exc_idx] = 0.0 if len(rating_pos_idx) > 0: exp_score[rating_pos_idx] = 0 all_scores[rating_pos_idx] = -30.0 exp_prob = exp_score / exp_score.sum() all_ranks = all_scores.argsort(axis=0)[::-1][:, 0] all_rank_array = np.zeros(num_item) for idx in range(num_item): all_rank_array[all_ranks[idx]] = idx + 1 t_pos_prob = (exp_prob[np.array(test_pos_list)] + 1e-6).tolist() t_recalls = [] t_ndcgs = [] t_item_ranks = all_rank_array[np.array(test_pos_list)] rank_relevant_array = np.zeros(num_item).astype(np.int) rank_relevant_array[t_item_ranks.astype(int)] = 1 t_item_ranks = t_item_ranks.tolist() for k in Ks: top_at_set = set(all_ranks[:k].tolist()) pos_set = set(test_pos_list) t_recalls.append(len(top_at_set & pos_set) * 1.0 / len(pos_set)) t_ndcgs.append(ndcg_at_k(rank_relevant_array.tolist(),k,1)) return t_pos_prob,t_item_ranks,t_recalls,t_ndcgs
def simple_test_one_user(x): id, lst, idx = x[0], x[1], x[2] global loader if id in loader.trainset_dict.keys(): for i in loader.trainset_dict[id]: lst[i] = 0 recom = np.argsort(-lst) gnd = set(loader.testset_dict[id]) pos = np.zeros(opt.n_item) for i, e in enumerate(recom): if e in gnd: pos[i] = 1 result = [] for k in [3, 5, 10]: r = utils.recall_at_k(pos, k, len(gnd)) p = utils.precision_at_k(pos, k) mrr = utils.mrr_at_k(pos, k) ndcg = utils.ndcg_at_k(pos, k) result.extend([r, p, mrr, ndcg]) return result
for idx in range(num_item): all_rank_array[all_ranks[idx]] = idx + 1 pos_prob.extend((exp_prob[np.array(test_pos_list)] + 1e-10).tolist()) item_ranks.extend(all_rank_array[np.array(test_pos_list)].tolist()) if u_idx > 0 and u_idx % 3000 == 0: print(str(u_idx) + "/" + str(num_user)) pos_set = set(test_user_obs_list[u_idx]) t_item_ranks = all_rank_array[np.array(test_pos_list)] rank_relevant_array = np.zeros(num_item).astype(np.int) rank_relevant_array[t_item_ranks.astype(int)] = 1 t_item_ranks = t_item_ranks.tolist() for (k_idx,k) in enumerate(Ks): top_at_set = set(all_ranks[:k].tolist()) k_recalls[k_idx].append(len(top_at_set & pos_set) * 1.0 / len(pos_set)) k_ndcgs[k_idx].append(ndcg_at_k(rank_relevant_array.tolist(),k,1)) print("NLL:%f" % np.log(np.array(pos_prob)).mean()) for (k_idx,k) in enumerate(Ks): print("Recall@%d:%f" % (k,np.array(k_recalls[k_idx]).mean())) for (k_idx,k) in enumerate(Ks): print("NDCG@%d:%f" % (k,np.array(k_ndcgs[k_idx]).mean())) # Save propensity scores with open("propensity_scores/scores_hpf_%s_%d" % (args.dataset,num_train),"w") as fout: for t_data in train_list: user,item = t_data[0],t_data[1] pred_p = recommender.predict(user=user, item=item) out_line = str(user) + "," + str(item) + "," + str(pred_p) + "\n" fout.write(out_line)
def predict(self, mode): """function for prediction based mode Args: mode (str): validation when mode is set to "valid", otherwise testing Returns: None """ result = np.array([0.] * 15) pool = multiprocessing.Pool(cores) # all users needed to test if mode == 'valid': test_users = list(self.data.valid_set.keys()) else: test_users = list(self.data.test_set.keys()) test_user_num = len(test_users) for u in test_users: users = [u] * self.batch_size user_pos_test = self.data.test_set[ u] if mode == 'test' else self.data.valid_set[u] neg_items = set(range(self.data.n_items)) - set( self.data.train_items[u]) if u in self.data.valid_set: neg_items = neg_items - set(self.data.valid_set[u]) if u in self.data.test_set: neg_items = neg_items - set(self.data.test_set[u]) neg_items = rd.sample(neg_items, self.batch_size - 1) items_to_test = neg_items + user_pos_test ratings = self.sess.run(self.distance, { self.users: users, self.pos_items: items_to_test }) item_score = [(items_to_test[i], ratings[i]) for i in range(len(items_to_test))] item_score = sorted(item_score, key=lambda x: x[1]) #item_score.reverse() item_sort = [x[0] for x in item_score] r = [] for i in item_sort: if i in user_pos_test: r.append(1) else: r.append(0) hr_1 = ut.hr_at_k(r, 1) hr_3 = ut.hr_at_k(r, 3) hr_5 = ut.hr_at_k(r, 5) hr_7 = ut.hr_at_k(r, 7) hr_10 = ut.hr_at_k(r, 10) ap_1 = ut.average_precision(r, 1) ap_3 = ut.average_precision(r, 3) ap_5 = ut.average_precision(r, 5) ap_7 = ut.average_precision(r, 7) ap_10 = ut.average_precision(r, 10) ndcg_1 = ut.ndcg_at_k(r, 1) ndcg_3 = ut.ndcg_at_k(r, 3) ndcg_5 = ut.ndcg_at_k(r, 5) ndcg_7 = ut.ndcg_at_k(r, 7) ndcg_10 = ut.ndcg_at_k(r, 10) ret = np.array([ hr_1, hr_3, hr_5, hr_7, hr_10, ap_1, ap_3, ap_5, ap_7, ap_10, ndcg_1, ndcg_3, ndcg_5, ndcg_7, ndcg_10 ]) result += ret ret = result / test_user_num hr_1, hr_3, hr_5, hr_7, hr_10 = ret[0], ret[1], ret[2], ret[3], ret[4] map_1, map_3, map_5, map_7, map_10 = ret[5], ret[6], ret[7], ret[ 8], ret[9] ndcg_1, ndcg_3, ndcg_5, ndcg_7, ndcg_10 = ret[10], ret[11], ret[ 12], ret[13], ret[14] print('Test:') if mode == 'test' else print('Valid:') print('hr@1 %f hr@3 %f hr@5 %f hr@7 %f hr@10 %f' % (hr_1, hr_3, hr_5, hr_7, hr_10)) print('MAP@1 %f MAP@3 %f MAP@5 %f MAP@7 %f MAP@10 %f' % (map_1, map_3, map_5, map_7, map_10)) print('ndcg@1 %f ndcg@3 %f ndcg@5 %f ndcg@7 %f ndcg@10 %f' % (ndcg_1, ndcg_3, ndcg_5, ndcg_7, ndcg_10))