os.makedirs(result_save_path) res = pd.DataFrame({'metric@K': ['pre', 'rec', 'hr', 'map', 'mrr', 'ndcg']}) for k in [1, 5, 10, 20, 30, 50]: if k > args.topk: continue tmp_preds = preds.copy() tmp_preds = {key: rank_list[:k] for key, rank_list in tmp_preds.items()} pre_k = np.mean([precision_at_k(r, k) for r in tmp_preds.values()]) rec_k = recall_at_k(tmp_preds, test_ur, k) hr_k = hr_at_k(tmp_preds, test_ur) map_k = map_at_k(tmp_preds.values()) mrr_k = mrr_at_k(tmp_preds, k) ndcg_k = np.mean([ndcg_at_k(r, k) for r in tmp_preds.values()]) if k == 10: print(f'Precision@{k}: {pre_k:.4f}') print(f'Recall@{k}: {rec_k:.4f}') print(f'HR@{k}: {hr_k:.4f}') print(f'MAP@{k}: {map_k:.4f}') print(f'MRR@{k}: {mrr_k:.4f}') print(f'NDCG@{k}: {ndcg_k:.4f}') res[k] = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k]) res.to_csv(f'{result_save_path}{args.prepro}_{args.test_method}_pairneumf{args.loss_type}_{args.sample_method}.csv', index=False) print('='* 20, ' Done ', '='*20)
def opt_func(params, mi=args.sc_met, topk=args.topk): sim_method, maxk = params['sim_method'], int(params['maxk']) print(f'Parameter Settings: sim_method:{sim_method}, maxk: {maxk}') # store metrics result for test set fnl_metric = [] for fold in range(fn): print(f'Start Validation [{fold + 1}]......') train = train_set_list[fold] validation = val_set_list[fold] # get ground truth train_ur = get_ur(train) val_ur = get_ur(validation) # build recommender model model = ItemKNNCF(user_num, item_num, maxk=args.maxk, min_k=args.mink, similarity=args.sim_method, tune_or_not=True, serial=f'{args.dataset}-{args.prepro}-{args.val_method}-{fold}-{sim_method}') model.fit(train) # build candidates set val_ucands = defaultdict(list) for k, v in val_ur.items(): sample_num = candidates_num - len(v) if len(v) < candidates_num else 0 sub_item_pool = item_pool - v - train_ur[k] # remove GT & interacted sample_num = min(len(sub_item_pool), sample_num) samples = random.sample(sub_item_pool, sample_num) val_ucands[k] = list(v | set(samples)) # get predict result # preds = {} # for u in tqdm(val_ucands.keys()): # pred_rates = [model.predict(u, i) for i in val_ucands[u]] # rec_idx = np.argsort(pred_rates)[::-1][:topk] # top_n = np.array(val_ucands[u])[rec_idx] # preds[u] = top_n cores = 32 pool = ThreadPoolExecutor(cores) preds = {} ct = 0 def func(u): pred_rates = [model.predict(u, i) for i in val_ucands[u]] rec_idx = np.argsort(pred_rates)[::-1][:topk] top_n = np.array(val_ucands[u])[rec_idx] preds[u] = top_n return 1 for u in tqdm(val_ucands.keys()): c_r = pool.submit(func, u) ct += c_r.result() # convert rank list to binary-interaction for u in preds.keys(): preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]] # calculate metrics for validation set pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()]) rec_k = recall_at_k(preds, val_ur, topk) hr_k = hr_at_k(preds, val_ur) map_k = map_at_k(preds.values()) mrr_k = mrr_at_k(preds, topk) ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()]) tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k]) fnl_metric.append(tmp_metric) # get final validation metrics result by average operation fnl_metric = np.array(fnl_metric).mean(axis=0) print('='*20, 'Metrics for All Validation', '='*20) print(f'Precision@{topk}: {fnl_metric[0]:.4f}') print(f'Recall@{topk}: {fnl_metric[1]:.4f}') print(f'HR@{topk}: {fnl_metric[2]:.4f}') print(f'MAP@{topk}: {fnl_metric[3]:.4f}') print(f'MRR@{topk}: {fnl_metric[4]:.4f}') print(f'NDCG@{topk}: {fnl_metric[5]:.4f}') score = fnl_metric[metric_idx[mi]] # record all tuning result and settings fnl_metric = [f'{mt:.4f}' for mt in fnl_metric] line = ','.join(fnl_metric) + f',{sim_method},{maxk}' + '\n' f.write(line) f.flush() return -score
def opt_func(params, mi=args.sc_met, topk=args.topk): num_ng, factor_num, num_layers = int(params['num_ng']), int(params['factor_num']), int(params['num_layers']) dropout, lr, batch_size, lamda = params['dropout'], params['lr'], params['batch_size'], params['lamda'] print(f'Parameter Settings: num_ng:{num_ng},factors:{factor_num},layers:{num_layers},dropout:{dropout},lr:{lr},batch_size:{batch_size},lamda:{lamda}') # store metrics result for final validation set fnl_metric = [] for fold in range(fn): print(f'Start Validation [{fold + 1}]......') train = train_set_list[fold] validation = val_set_list[fold] # get ground truth train_ur = get_ur(train) val_ur = get_ur(validation) # start negative sampling train_sampled = negative_sampling(user_num, item_num, train, num_ng) # format training data train_dataset = PointMFData(train_sampled) train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) # whether load pre-train model model_name = args.model_name assert model_name in ['MLP', 'GMF', 'NeuMF-end', 'NeuMF-pre'] GMF_model_path = f'./tmp/{args.dataset}/CL/GMF.pt' MLP_model_path = f'./tmp/{args.dataset}/CL/MLP.pt' NeuMF_model_path = f'./tmp/{args.dataset}/CL/NeuMF.pt' if model_name == 'NeuMF-pre': assert os.path.exists(GMF_model_path), 'lack of GMF model' assert os.path.exists(MLP_model_path), 'lack of MLP model' GMF_model = torch.load(GMF_model_path) MLP_model = torch.load(MLP_model_path) else: GMF_model = None MLP_model = None # build recommender model model = PointNeuMF(user_num, item_num, factor_num, num_layers, dropout, lr, args.epochs, lamda, args.model_name, GMF_model, MLP_model, args.gpu, args.loss_type) model.fit(train_loader) # build candidates set val_ucands = defaultdict(list) for k, v in val_ur.items(): sample_num = candidates_num - len(v) if len(v) < candidates_num else 0 sub_item_pool = item_pool - v - train_ur[k] # remove GT & interacted sample_num = min(len(sub_item_pool), sample_num) samples = random.sample(sub_item_pool, sample_num) val_ucands[k] = list(v | set(samples)) # get predict result print('') print('Generate recommend list...') print('') preds = {} for u in tqdm(val_ucands.keys()): # build a validation MF dataset for certain user u tmp = pd.DataFrame({'user': [u for _ in val_ucands[u]], 'item': val_ucands[u], 'rating': [0. for _ in val_ucands[u]], # fake label, make nonsense }) tmp_dataset = PointMFData(tmp) tmp_loader = data.DataLoader(tmp_dataset, batch_size=candidates_num, shuffle=False, num_workers=0) # get top-N list with torch method for user_u, item_i, _ in tmp_loader: if torch.cuda.is_available(): user_u = user_u.cuda() item_i = item_i.cuda() else: user_u = user_u.cpu() item_i = item_i.cpu() prediction = model.predict(user_u, item_i) _, indices = torch.topk(prediction, topk) top_n = torch.take(torch.tensor(val_ucands[u]), indices).cpu().numpy() preds[u] = top_n # convert rank list to binary-interaction for u in preds.keys(): preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]] # calculate metrics for validation set pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()]) rec_k = recall_at_k(preds, val_ur, topk) hr_k = hr_at_k(preds, val_ur) map_k = map_at_k(preds.values()) mrr_k = mrr_at_k(preds, topk) ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()]) tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k]) fnl_metric.append(tmp_metric) # get final validation metrics result by average operation fnl_metric = np.array(fnl_metric).mean(axis=0) print('='*20, 'Metrics for All Validation', '='*20) print(f'Precision@{topk}: {fnl_metric[0]:.4f}') print(f'Recall@{topk}: {fnl_metric[1]:.4f}') print(f'HR@{topk}: {fnl_metric[2]:.4f}') print(f'MAP@{topk}: {fnl_metric[3]:.4f}') print(f'MRR@{topk}: {fnl_metric[4]:.4f}') print(f'NDCG@{topk}: {fnl_metric[5]:.4f}') score = fnl_metric[metric_idx[mi]] # record all tuning result and settings fnl_metric = [f'{mt:.4f}' for mt in fnl_metric] line = ','.join(fnl_metric) + f',{num_ng},{factor_num},{num_layers},{dropout},{lr},{batch_size},{lamda}' + '\n' f.write(line) f.flush() return -score
def opt_func(params, mi=args.sc_met, topk=args.topk): factors = int(params[0]) print(f'Parameter Settings: factors:{factors}') # store metrics result for final validation set fnl_metric = [] for fold in range(fn): print(f'Start Validation [{fold + 1}]......') train = train_set_list[fold] validation = val_set_list[fold] # get ground truth train_ur = get_ur(train) val_ur = get_ur(validation) # build recommender model model = PureSVD(user_num, item_num, factors) model.fit(train) # build candidates set val_ucands = defaultdict(list) for k, v in val_ur.items(): sample_num = candidates_num - len(v) if len( v) < candidates_num else 0 sub_item_pool = item_pool - v - train_ur[ k] # remove GT & interacted sample_num = min(len(sub_item_pool), sample_num) samples = random.sample(sub_item_pool, sample_num) val_ucands[k] = list(v | set(samples)) # get predict result print('') print('Generate recommend list...') print('') preds = {} for u in tqdm(val_ucands.keys()): pred_rates = [model.predict(u, i) for i in val_ucands[u]] rec_idx = np.argsort(pred_rates)[::-1][:topk] top_n = np.array(val_ucands[u])[rec_idx] preds[u] = top_n # convert rank list to binary-interaction for u in preds.keys(): preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]] # calculate metrics for validation set pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()]) rec_k = recall_at_k(preds, val_ur, topk) hr_k = hr_at_k(preds, val_ur) map_k = map_at_k(preds.values()) mrr_k = mrr_at_k(preds, topk) ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()]) tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k]) fnl_metric.append(tmp_metric) # get final validation metrics result by average operation fnl_metric = np.array(fnl_metric).mean(axis=0) print('=' * 20, 'Metrics for All Validation', '=' * 20) print(f'Precision@{topk}: {fnl_metric[0]:.4f}') print(f'Recall@{topk}: {fnl_metric[1]:.4f}') print(f'HR@{topk}: {fnl_metric[2]:.4f}') print(f'MAP@{topk}: {fnl_metric[3]:.4f}') print(f'MRR@{topk}: {fnl_metric[4]:.4f}') print(f'NDCG@{topk}: {fnl_metric[5]:.4f}') score = fnl_metric[metric_idx[mi]] # record all tuning result and settings fnl_metric = [f'{mt:.4f}' for mt in fnl_metric] line = ','.join(fnl_metric) + f',{factors}' + '\n' f.write(line) f.flush() return -score
model.fit(train) # get predict result preds = model.predict(val_ur, train_ur, args.topk) # convert rank list to binary-interaction for u in preds.keys(): preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]] # calculate metrics for validation set pre_k = np.mean([precision_at_k(r, args.topk) for r in preds.values()]) rec_k = recall_at_k(preds, val_ur, args.topk) hr_k = hr_at_k(preds, val_ur) map_k = map_at_k(preds.values()) mrr_k = mrr_at_k(preds, args.topk) ndcg_k = np.mean([ndcg_at_k(r, args.topk) for r in preds.values()]) print('-'*20) print(f'Precision@{args.topk}: {pre_k:.4f}') print(f'Recall@{args.topk}: {rec_k:.4f}') print(f'HR@{args.topk}: {hr_k:.4f}') print(f'MAP@{args.topk}: {map_k:.4f}') print(f'MRR@{args.topk}: {mrr_k:.4f}') print(f'NDCG@{args.topk}: {ndcg_k:.4f}') tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k]) fnl_metric.append(tmp_metric) # get final validation metrics result by average operation fnl_metric = np.array(fnl_metric).mean(axis=0) print('='*20, 'Metrics for All Validation', '='*20)