def test_read_uir(): """Test read_uir function""" data_file = './tests/data.txt' triplet_data = reader.read_uir(data_file) assert len(triplet_data) == 10 assert triplet_data[4][2] == 3 assert triplet_data[6][1] == '478' assert triplet_data[8][0] == '543' try: reader.read_uir(data_file, 10) except IndexError: assert True
def test_with_ratio_split(): data_file = './tests/data.txt' data = reader.read_uir(data_file) exp = Experiment(eval_method=RatioSplit(data, verbose=True), models=[PMF(1, 0)], metrics=[MAE(), RMSE(), Recall(1), FMeasure(1)], verbose=True) exp.run() assert (1, 4) == exp.results.avg.shape assert 1 == len(exp.results.per_user) assert 4 == len(exp.results.per_user['PMF']) assert 2 == len(exp.results.per_user['PMF']['MAE']) assert 2 == len(exp.results.per_user['PMF']['RMSE']) assert 2 == len(exp.results.per_user['PMF']['Recall@1']) assert 2 == len(exp.results.per_user['PMF']['F1@1']) try: Experiment(None, None, None) except ValueError: assert True try: Experiment(None, [PMF(1, 0)], None) except ValueError: assert True
def test_with_cross_validation(): data_file = './tests/data.txt' data = reader.read_uir(data_file) exp = Experiment(eval_method=CrossValidation(data), models=[PMF(1, 0)], metrics=[MAE(), RMSE(), Recall(1), FMeasure(1)], verbose=True) exp.run()
def test_get_train_test_sets_next_fold(): data = reader.read_uir('./tests/data.txt') nfolds = 5 cv = CrossValidation(data=data, n_folds=nfolds) for n in range(cv.n_folds): cv._get_train_test() assert cv.current_fold == n assert cv.train_set.matrix.shape == (8, 8) cv._next_fold()
def test_splits(): data_file = './tests/data.txt' data = reader.read_uir(data_file) ratio_split = RatioSplit(data, test_size=0.1, val_size=0.1, seed=123, verbose=True) ratio_split.split() assert ratio_split._split_ran ratio_split.split()
def test_matrix_trainset_uir_iter(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uir_iter()] assert all([a == b for a, b in zip(users, range(10))]) items = [batch_items for _, batch_items, _ in train_set.uir_iter()] assert all([a == b for a, b in zip(items, range(10))]) ratings = [batch_ratings for _, _, batch_ratings in train_set.uir_iter()] assert all([a == b for a, b in zip(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])])
def test_matrix_trainset_uij_iter(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) users = [batch_users for batch_users, _, _ in train_set.uij_iter()] assert all([a == b for a, b in zip(users, range(10))]) pos_items = [batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()] assert all([a == b for a, b in zip(pos_items, range(10))]) neg_items = [batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()] assert all([a != b for a, b in zip(neg_items, range(10))])
def test_partition_data(): data = reader.read_uir('./tests/data.txt') nfolds = 5 cv = CrossValidation(data=data, n_folds=nfolds) ref_set = set(range(nfolds)) res_set = set(cv.partition) fold_sizes = np.unique(cv.partition, return_counts=True)[1] assert len(data) == len(cv.partition) assert res_set == ref_set assert np.all(fold_sizes == 2)
def test_testset(): """Test TestSet""" triplet_data = reader.read_uir('./tests/data.txt') test_set = TestSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set()) assert test_set.get_uid('768') == 1 assert test_set.get_iid('195') == 7 assert all([a == b for a, b in zip(test_set.users, range(10))]) assert all([a == b for a, b in zip(test_set.get_ratings(2), [(2, 4)])]) test_set = TestSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set([('76', '93')]), verbose=True) assert len(test_set.users) == 9
def test_uir_tuple(): triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map=None, global_iid_map=None, global_ui_set=None, verbose=True) try: train_set.uir_tuple = ([], []) except ValueError: assert True assert 2 == train_set.num_batches(batch_size=5)
def test_validate_partition(): data = reader.read_uir('./tests/data.txt') nfolds = 5 cv = CrossValidation(data=data, n_folds=nfolds) try: cv._validate_partition([0, 0, 1, 1]) except: assert True try: cv._validate_partition([0, 0, 1, 1, 2, 2, 2, 2, 3, 3]) except: assert True
def test_matrix_trainset(): """Test MatrixTrainSet""" triplet_data = reader.read_uir('./tests/data.txt') train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set(), verbose=True) assert train_set.matrix.shape == (10, 10) assert train_set.min_rating == 3 assert train_set.max_rating == 5 assert int(train_set.global_mean) == int((3 * 2 + 4 * 7 + 5) / 10) assert all([a == b for a, b in zip(train_set.item_ppl_rank, [7, 9, 6, 5, 3, 2, 1, 0, 8, 4])]) assert train_set.num_users == 10 assert train_set.num_items == 10 assert train_set.is_unk_user(7) == False assert train_set.is_unk_user(13) == True assert train_set.is_unk_item(3) == False assert train_set.is_unk_item(16) == True assert train_set.get_uid('768') == 1 assert train_set.get_iid('195') == 7 assert all([a == b for a, b in zip(train_set.uid_list, range(10))]) assert all([a == b for a, b in zip(train_set.raw_uid_list, ['76', '768', '642', '930', '329', '633', '716', '871', '543', '754'])]) assert all([a == b for a, b in zip(train_set.iid_list, range(10))]) assert all([a == b for a, b in zip(train_set.raw_iid_list, ['93', '257', '795', '709', '705', '226', '478', '195', '737', '282'])]) train_set = MatrixTrainSet.from_uir(triplet_data, global_uid_map={}, global_iid_map={}, global_ui_set=set([('76', '93')]), verbose=True) assert train_set.num_users == 9 assert train_set.num_items == 9
def test_from_splits(): data = reader.read_uir('./tests/data.txt') try: BaseMethod.from_splits(train_data=None, test_data=None) except ValueError: assert True try: BaseMethod.from_splits(train_data=data, test_data=None) except ValueError: assert True bm = BaseMethod.from_splits(train_data=data, test_data=data) assert bm.total_users == 10 assert bm.total_items == 10 bm = BaseMethod.from_splits(train_data=data, test_data=data, val_data=data, verbose=True) assert bm.total_users == 10 assert bm.total_items == 10
def test_evaluate(): data_file = './tests/data.txt' data = reader.read_uir(data_file) ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=False) users = [] items = [] for u, i, r in data: users.append(u) items.append(i) for u in users: for i in items: data.append((u, i, 5)) ratio_split = RatioSplit(data, exclude_unknowns=True, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True) ratio_split = RatioSplit(data, exclude_unknowns=False, verbose=True) ratio_split.evaluate(MF(), [MAE(), Recall()], user_based=True)
@author: Quoc-Tuan Truong <*****@*****.**> """ from cornac.data import reader from cornac.eval_methods import BaseMethod from cornac.models import MF from cornac.metrics import MAE, RMSE from cornac.utils import cache # Download MovieLens 100K provided training and test splits train_path = cache( url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.base') test_path = cache( url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.test') train_data = reader.read_uir(train_path) test_data = reader.read_uir(test_path) eval_method = BaseMethod.from_splits(train_data=train_data, test_data=test_data, exclude_unknowns=False, verbose=True) mf = MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, early_stop=True, verbose=True)