Beispiel #1
0
def main():
    """
	evaluate RCF models using percentage of correctly scored positive-negative pairs
	"""
    data = Dataset()
    args = helper.parse_args()
    model = helper.get_pretrained_RCF_model(data,
                                            args,
                                            path=os.path.join(
                                                os.path.dirname(__file__),
                                                'pretrain-rcf'))

    correct = 0
    for user in range(data.num_users):
        scores = model.get_scores_per_user(user, data, args)
        tmp = data.test_data[data.test_data['user'] == user]
        if tmp.shape[0] == 0:
            print(user, data.raw_user_id[user])
            continue
        scores_pos = scores[tmp['pos_item']]
        scores_neg = scores[tmp['neg_item']]
        u_correct = sum(scores_pos > scores_neg)
        print(user, u_correct, tmp.shape[0], u_correct / tmp.shape[0])
        correct += u_correct
    print(correct, data.test_data.shape[0], correct / data.test_data.shape[0])
Beispiel #2
0
def get_new_scores(ks):
    """
	get new scores after retrained for the given values of k
	Args:
		ks: values of k to consider
	"""
    args = parse_args()
    input_files = [f"{args.algo}_{k}.csv" for k in ks]

    home_dir = str(Path.home()) + '/pretrain-rcf-counterfactual'
    get_new_scores_main(home_dir, input_files, get_scores)
def retrain(ks):
    """
	retrain models without counterfactual sets for given values of k.
	Trained models are saved to user's home directory
	Args:
		ks:	values of k to consider
	"""
    args = parse_args()
    inputs = []
    input_files = [f"{args.algo}_{k}.csv" for k in ks]
    for file in input_files:
        inputs.append(pd.read_csv(file))
    inputs = pd.concat(inputs, ignore_index=True)
    print(inputs)

    home_dir = str(Path.home()) + '/pretrain-rcf-counterfactual'
    np.random.seed(1802)
    seeds = np.random.randint(1000, 10000, 5)
    seeds[0] = 2512

    for row in inputs.itertuples():
        idx, user_id, item_id, topk, counterfactual, predicted_scores, replacement = read_row_from_result_file(
            row)
        if counterfactual is None:
            continue

        data = Dataset(ignored_user=user_id, ignored_items=counterfactual)
        args = parse_args()
        args.pretrain = -1

        for i, seed in enumerate(seeds):
            path = prepare_path(home_dir, user_id, counterfactual, seed)
            model = get_new_RCF_model(data,
                                      args,
                                      save_file=path +
                                      f'ml1M_{args.hidden_factor}')
            print('begin retraining', idx, user_id, item_id, topk,
                  counterfactual, predicted_scores, replacement, i, seed)
            begin = time()
            model.train(data, args, seed=seed)
            print(f"done retraining {time() - begin}")
Beispiel #4
0
def generate_cf(ks):
    """
	generate counterfactual explanations for multiple k values
	Args:
		ks: values of k to consider

	Returns:

	"""
    data = Dataset()
    args = helper.parse_args()
    model = helper.get_pretrained_RCF_model(data,
                                            args,
                                            path=os.path.join(
                                                os.path.dirname(__file__),
                                                'pretrain-rcf'))

    user_ids = list(range(data.num_users))
    n_samples = data.num_users

    all_results = init_all_results(ks)

    if args.algo == 'pure_att':
        explaner = PureAttention()
    elif args.algo == 'attention':
        explaner = Attention()
    elif args.algo == 'pure_fia':
        explaner = PureFIA()
    elif args.algo == 'fia':
        explaner = FIA()
    else:
        explaner = Accent()

    for i, user_id in enumerate(user_ids):
        print(f'testing user {i}/{n_samples}: {user_id}')
        res = explaner.find_counterfactual_multiple_k(user_id, ks, model, data,
                                                      args)
        append_result(ks, all_results, user_id, res)

    for j in range(len(ks)):
        df = pd.DataFrame(all_results[j])
        df.to_csv(f'{args.algo}_{ks[j]}.csv', index=False)
Beispiel #5
0
def get_scores(idx, user_id, item_id, topk, counterfactual, predicted_scores,
               replacement, item2scores, home_dir):
    """
	get scores of all items after retrained
	Args:
		idx: test number
		user_id: ID of user
		item_id: ID of item
		topk: the top-k items
		counterfactual: the counterfactual set
		predicted_scores: the predicted scores
		replacement: the replacement item
		item2scores: a dict for caching
		home_dir: the directory where trained models are stored

	Returns:
		a 2d array where each row is the scores of all items in one retrain.
	"""
    key = counterfactual2path(user_id, counterfactual)
    if key in item2scores:  # if cached
        return item2scores[key]

    subfolders = prepare_new_scores(user_id, key, home_dir)
    if subfolders is None:
        return None

    data = Dataset(ignored_user=user_id, ignored_items=counterfactual)
    args = parse_args()
    args.pretrain = -1

    new_scores = np.zeros(shape=(5, data.num_items))
    for i, path in enumerate(subfolders):
        model = get_pretrained_RCF_model(data, args, path)
        print('begin scoring', idx, user_id, item_id, topk, counterfactual,
              predicted_scores, replacement, i, path)

        new_scores[i] = model.get_scores_per_user(user_id, data, args)
    item2scores[key] = new_scores
    return new_scores
Beispiel #6
0
import os
from time import time

from RCF.src.dataset import Dataset
from RCF.src.helper import parse_args, get_new_RCF_model
from RCF.src.test_rcf import main

if __name__ == '__main__':
    # Data loading
    args = parse_args()
    args.pretrain = -1
    data = Dataset()

    path = os.path.dirname(__file__)
    save_file = os.path.join(path, 'pretrain-rcf/%s_%d' % ('ml1M', args.hidden_factor))
    # Training
    model = get_new_RCF_model(data, args, save_file)

    begin = time()
    print("begin train {}".format(begin))
    model.train(data, args, seed=2512)
    end = time()
    print("end train {} {}".format(end, end - begin))

    begin = time()
    print("begin test {}".format(begin))
    main()
    end = time()
    print("finish {} {}".format(end, end - begin))