コード例 #1
0
        os.makedirs(result_save_path)

    res = pd.DataFrame({'metric@K': ['pre', 'rec', 'hr', 'map', 'mrr', 'ndcg']})

    for k in [1, 5, 10, 20, 30, 50]:
        if k > args.topk:
            continue
        tmp_preds = preds.copy()        
        tmp_preds = {key: rank_list[:k] for key, rank_list in tmp_preds.items()}

        pre_k = np.mean([precision_at_k(r, k) for r in tmp_preds.values()])
        rec_k = recall_at_k(tmp_preds, test_ur, k)
        hr_k = hr_at_k(tmp_preds, test_ur)
        map_k = map_at_k(tmp_preds.values())
        mrr_k = mrr_at_k(tmp_preds, k)
        ndcg_k = np.mean([ndcg_at_k(r, k) for r in tmp_preds.values()])

        if k == 10:
            print(f'Precision@{k}: {pre_k:.4f}')
            print(f'Recall@{k}: {rec_k:.4f}')
            print(f'HR@{k}: {hr_k:.4f}')
            print(f'MAP@{k}: {map_k:.4f}')
            print(f'MRR@{k}: {mrr_k:.4f}')
            print(f'NDCG@{k}: {ndcg_k:.4f}')

        res[k] = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k])

    res.to_csv(f'{result_save_path}{args.prepro}_{args.test_method}_pairneumf{args.loss_type}_{args.sample_method}.csv', 
               index=False)
    print('='* 20, ' Done ', '='*20)
コード例 #2
0
def opt_func(params, mi=args.sc_met, topk=args.topk):
    sim_method, maxk = params['sim_method'], int(params['maxk'])
    print(f'Parameter Settings: sim_method:{sim_method}, maxk: {maxk}')

    # store metrics result for test set
    fnl_metric = []
    for fold in range(fn):
        print(f'Start Validation [{fold + 1}]......')
        train = train_set_list[fold]
        validation = val_set_list[fold]

        # get ground truth
        train_ur = get_ur(train)
        val_ur = get_ur(validation)

        # build recommender model
        model = ItemKNNCF(user_num, item_num, 
                          maxk=args.maxk, 
                          min_k=args.mink, 
                          similarity=args.sim_method,
                          tune_or_not=True,
                          serial=f'{args.dataset}-{args.prepro}-{args.val_method}-{fold}-{sim_method}')
        model.fit(train)

        # build candidates set
        val_ucands = defaultdict(list)
        for k, v in val_ur.items():
            sample_num = candidates_num - len(v) if len(v) < candidates_num else 0
            sub_item_pool = item_pool - v - train_ur[k] # remove GT & interacted
            sample_num = min(len(sub_item_pool), sample_num)
            samples = random.sample(sub_item_pool, sample_num)
            val_ucands[k] = list(v | set(samples))

        # get predict result
        # preds = {}
        # for u in tqdm(val_ucands.keys()):
        #     pred_rates = [model.predict(u, i) for i in val_ucands[u]]
        #     rec_idx = np.argsort(pred_rates)[::-1][:topk]
        #     top_n = np.array(val_ucands[u])[rec_idx]
        #     preds[u] = top_n
        cores = 32
        pool = ThreadPoolExecutor(cores)

        preds = {}
        ct = 0
        def func(u):
            pred_rates = [model.predict(u, i) for i in val_ucands[u]]
            rec_idx = np.argsort(pred_rates)[::-1][:topk]
            top_n = np.array(val_ucands[u])[rec_idx]
            preds[u] = top_n
            return 1

        for u in tqdm(val_ucands.keys()):
            c_r = pool.submit(func, u)
            ct += c_r.result()

        # convert rank list to binary-interaction
        for u in preds.keys():
            preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]]

        # calculate metrics for validation set
        pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()])
        rec_k = recall_at_k(preds, val_ur, topk)
        hr_k = hr_at_k(preds, val_ur)
        map_k = map_at_k(preds.values())
        mrr_k = mrr_at_k(preds, topk)
        ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()])

        tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k])
        fnl_metric.append(tmp_metric)

    # get final validation metrics result by average operation
    fnl_metric = np.array(fnl_metric).mean(axis=0)
    print('='*20, 'Metrics for All Validation', '='*20)
    print(f'Precision@{topk}: {fnl_metric[0]:.4f}')
    print(f'Recall@{topk}: {fnl_metric[1]:.4f}')
    print(f'HR@{topk}: {fnl_metric[2]:.4f}')
    print(f'MAP@{topk}: {fnl_metric[3]:.4f}')
    print(f'MRR@{topk}: {fnl_metric[4]:.4f}')
    print(f'NDCG@{topk}: {fnl_metric[5]:.4f}')

    score = fnl_metric[metric_idx[mi]]

    # record all tuning result and settings
    fnl_metric = [f'{mt:.4f}' for mt in fnl_metric]
    line = ','.join(fnl_metric) + f',{sim_method},{maxk}' + '\n'

    f.write(line)
    f.flush()

    return -score
コード例 #3
0
def opt_func(params, mi=args.sc_met, topk=args.topk):
    num_ng, factor_num, num_layers = int(params['num_ng']), int(params['factor_num']), int(params['num_layers'])
    dropout, lr, batch_size, lamda = params['dropout'], params['lr'], params['batch_size'], params['lamda']
    print(f'Parameter Settings: num_ng:{num_ng},factors:{factor_num},layers:{num_layers},dropout:{dropout},lr:{lr},batch_size:{batch_size},lamda:{lamda}')

    # store metrics result for final validation set
    fnl_metric = []
    for fold in range(fn):
        print(f'Start Validation [{fold + 1}]......')
        train = train_set_list[fold]
        validation = val_set_list[fold]

        # get ground truth
        train_ur = get_ur(train)
        val_ur = get_ur(validation)

        # start negative sampling
        train_sampled = negative_sampling(user_num, item_num, train, num_ng)
        # format training data
        train_dataset = PointMFData(train_sampled)
        train_loader = data.DataLoader(train_dataset, batch_size=batch_size, 
                                    shuffle=True, num_workers=4)

        # whether load pre-train model
        model_name = args.model_name
        assert model_name in ['MLP', 'GMF', 'NeuMF-end', 'NeuMF-pre']
        GMF_model_path = f'./tmp/{args.dataset}/CL/GMF.pt'
        MLP_model_path = f'./tmp/{args.dataset}/CL/MLP.pt'
        NeuMF_model_path = f'./tmp/{args.dataset}/CL/NeuMF.pt'

        if model_name == 'NeuMF-pre':
            assert os.path.exists(GMF_model_path), 'lack of GMF model'    
            assert os.path.exists(MLP_model_path), 'lack of MLP model'
            GMF_model = torch.load(GMF_model_path)
            MLP_model = torch.load(MLP_model_path)
        else:
            GMF_model = None
            MLP_model = None

        # build recommender model
        model = PointNeuMF(user_num, item_num, factor_num, num_layers, dropout, 
                            lr, args.epochs, lamda, args.model_name, GMF_model, MLP_model,
                            args.gpu, args.loss_type)
        model.fit(train_loader)

        # build candidates set
        val_ucands = defaultdict(list)
        for k, v in val_ur.items():
            sample_num = candidates_num - len(v) if len(v) < candidates_num else 0
            sub_item_pool = item_pool - v - train_ur[k] # remove GT & interacted
            sample_num = min(len(sub_item_pool), sample_num)
            samples = random.sample(sub_item_pool, sample_num)
            val_ucands[k] = list(v | set(samples))
        
        # get predict result
        print('')
        print('Generate recommend list...')
        print('')
        preds = {}
        for u in tqdm(val_ucands.keys()):
            # build a validation MF dataset for certain user u
            tmp = pd.DataFrame({'user': [u for _ in val_ucands[u]], 
                                'item': val_ucands[u], 
                                'rating': [0. for _ in val_ucands[u]], # fake label, make nonsense
                                })
            tmp_dataset = PointMFData(tmp)
            tmp_loader = data.DataLoader(tmp_dataset, batch_size=candidates_num, 
                                        shuffle=False, num_workers=0)

            # get top-N list with torch method 
            for user_u, item_i, _ in tmp_loader:
                if torch.cuda.is_available():
                    user_u = user_u.cuda()
                    item_i = item_i.cuda()
                else:
                    user_u = user_u.cpu()
                    item_i = item_i.cpu()

                prediction = model.predict(user_u, item_i)
                _, indices = torch.topk(prediction, topk)
                top_n = torch.take(torch.tensor(val_ucands[u]), indices).cpu().numpy()

            preds[u] = top_n

        # convert rank list to binary-interaction
        for u in preds.keys():
            preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]]

        # calculate metrics for validation set
        pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()])
        rec_k = recall_at_k(preds, val_ur, topk)
        hr_k = hr_at_k(preds, val_ur)
        map_k = map_at_k(preds.values())
        mrr_k = mrr_at_k(preds, topk)
        ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()])

        tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k])
        fnl_metric.append(tmp_metric)

    # get final validation metrics result by average operation
    fnl_metric = np.array(fnl_metric).mean(axis=0)
    print('='*20, 'Metrics for All Validation', '='*20)
    print(f'Precision@{topk}: {fnl_metric[0]:.4f}')
    print(f'Recall@{topk}: {fnl_metric[1]:.4f}')
    print(f'HR@{topk}: {fnl_metric[2]:.4f}')
    print(f'MAP@{topk}: {fnl_metric[3]:.4f}')
    print(f'MRR@{topk}: {fnl_metric[4]:.4f}')
    print(f'NDCG@{topk}: {fnl_metric[5]:.4f}')

    score = fnl_metric[metric_idx[mi]]

    # record all tuning result and settings
    fnl_metric = [f'{mt:.4f}' for mt in fnl_metric]
    line = ','.join(fnl_metric) + f',{num_ng},{factor_num},{num_layers},{dropout},{lr},{batch_size},{lamda}' + '\n'

    f.write(line)
    f.flush()

    return -score
コード例 #4
0
ファイル: tune_puresvd.py プロジェクト: daisy-dd/code-submit
def opt_func(params, mi=args.sc_met, topk=args.topk):
    factors = int(params[0])
    print(f'Parameter Settings: factors:{factors}')

    # store metrics result for final validation set
    fnl_metric = []
    for fold in range(fn):
        print(f'Start Validation [{fold + 1}]......')
        train = train_set_list[fold]
        validation = val_set_list[fold]

        # get ground truth
        train_ur = get_ur(train)
        val_ur = get_ur(validation)

        # build recommender model
        model = PureSVD(user_num, item_num, factors)
        model.fit(train)

        # build candidates set
        val_ucands = defaultdict(list)
        for k, v in val_ur.items():
            sample_num = candidates_num - len(v) if len(
                v) < candidates_num else 0
            sub_item_pool = item_pool - v - train_ur[
                k]  # remove GT & interacted
            sample_num = min(len(sub_item_pool), sample_num)
            samples = random.sample(sub_item_pool, sample_num)
            val_ucands[k] = list(v | set(samples))

        # get predict result
        print('')
        print('Generate recommend list...')
        print('')
        preds = {}
        for u in tqdm(val_ucands.keys()):
            pred_rates = [model.predict(u, i) for i in val_ucands[u]]
            rec_idx = np.argsort(pred_rates)[::-1][:topk]
            top_n = np.array(val_ucands[u])[rec_idx]
            preds[u] = top_n

        # convert rank list to binary-interaction
        for u in preds.keys():
            preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]]

        # calculate metrics for validation set
        pre_k = np.mean([precision_at_k(r, topk) for r in preds.values()])
        rec_k = recall_at_k(preds, val_ur, topk)
        hr_k = hr_at_k(preds, val_ur)
        map_k = map_at_k(preds.values())
        mrr_k = mrr_at_k(preds, topk)
        ndcg_k = np.mean([ndcg_at_k(r, topk) for r in preds.values()])

        tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k])
        fnl_metric.append(tmp_metric)

    # get final validation metrics result by average operation
    fnl_metric = np.array(fnl_metric).mean(axis=0)
    print('=' * 20, 'Metrics for All Validation', '=' * 20)
    print(f'Precision@{topk}: {fnl_metric[0]:.4f}')
    print(f'Recall@{topk}: {fnl_metric[1]:.4f}')
    print(f'HR@{topk}: {fnl_metric[2]:.4f}')
    print(f'MAP@{topk}: {fnl_metric[3]:.4f}')
    print(f'MRR@{topk}: {fnl_metric[4]:.4f}')
    print(f'NDCG@{topk}: {fnl_metric[5]:.4f}')

    score = fnl_metric[metric_idx[mi]]

    # record all tuning result and settings
    fnl_metric = [f'{mt:.4f}' for mt in fnl_metric]
    line = ','.join(fnl_metric) + f',{factors}' + '\n'

    f.write(line)
    f.flush()

    return -score
コード例 #5
0
        model.fit(train)

        # get predict result
        preds = model.predict(val_ur, train_ur, args.topk)

        # convert rank list to binary-interaction
        for u in preds.keys():
            preds[u] = [1 if i in val_ur[u] else 0 for i in preds[u]]

        # calculate metrics for validation set
        pre_k = np.mean([precision_at_k(r, args.topk) for r in preds.values()])
        rec_k = recall_at_k(preds, val_ur, args.topk)
        hr_k = hr_at_k(preds, val_ur)
        map_k = map_at_k(preds.values())
        mrr_k = mrr_at_k(preds, args.topk)
        ndcg_k = np.mean([ndcg_at_k(r, args.topk) for r in preds.values()])

        print('-'*20)
        print(f'Precision@{args.topk}: {pre_k:.4f}')
        print(f'Recall@{args.topk}: {rec_k:.4f}')
        print(f'HR@{args.topk}: {hr_k:.4f}')
        print(f'MAP@{args.topk}: {map_k:.4f}')
        print(f'MRR@{args.topk}: {mrr_k:.4f}')
        print(f'NDCG@{args.topk}: {ndcg_k:.4f}')

        tmp_metric = np.array([pre_k, rec_k, hr_k, map_k, mrr_k, ndcg_k])
        fnl_metric.append(tmp_metric)

    # get final validation metrics result by average operation
    fnl_metric = np.array(fnl_metric).mean(axis=0)
    print('='*20, 'Metrics for All Validation', '='*20)