def test_trainset_testset(): """Test the construct_trainset and construct_testset methods.""" current_dir = os.path.dirname(os.path.realpath(__file__)) folds_files = [(current_dir + '/custom_train', current_dir + '/custom_test')] data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) for trainset, testset in data.folds(): pass # just need trainset and testset to be set # test rm: rm = trainset.rm assert rm[0, 0] == 4 assert rm[1, 0] == 4 assert rm[3, 1] == 5 assert rm[40, 20000] == 0 # not in the trainset # test ur ur = trainset.ur assert ur[0] == [(0, 4)] assert ur[1] == [(0, 4), (1, 2)] assert ur[40] == [] # not in the trainset # test ir ir = trainset.ir assert ir[0] == [(0, 4), (1, 4), (2, 1)] assert ir[1] == [(1, 2), (2, 1), (3, 5)] assert ir[20000] == [] # not in the trainset # test n_users, n_items, r_min, r_max assert trainset.n_users == 4 assert trainset.n_items == 2 assert trainset.r_min == 1 assert trainset.r_max == 5 # test raw2inner: ensure inner ids are given in proper order raw2inner_id_users = trainset._raw2inner_id_users for i in range(4): assert raw2inner_id_users['user' + str(i)] == i raw2inner_id_items = trainset._raw2inner_id_items for i in range(2): assert raw2inner_id_items['item' + str(i)] == i
if it were not built-in. """ from __future__ import (absolute_import, division, print_function, unicode_literals) from recsys import BaselineOnly from recsys import Dataset from recsys import evaluate from recsys import Reader # path to dataset folder files_dir = '/home/nico/.recsys_data/ml-100k/ml-100k/' # change this # This time, we'll use the built-in reader. reader = Reader('ml-100k') # folds_files is a list of tuples containing file paths: # [(u1.base, u1.test), (u2.base, u2.test), ... (u5.base, u5.test)] train_file = files_dir + 'u%d.base' test_file = files_dir + 'u%d.test' folds_files = [(train_file % i, test_file % i) for i in (1, 2, 3, 4, 5)] data = Dataset.load_from_folds(folds_files, reader=reader) # We'll use an algorithm that predicts baseline estimates. algo = BaselineOnly() # Evaluate performances of our algorithm on the dataset. evaluate(algo, data)
def test_wrong_file_name(): """Ensure file names are checked when creating a (custom) Dataset.""" wrong_files = [('does_not_exist', 'does_not_either')] with pytest.raises(ValueError): Dataset.load_from_folds(folds_files=wrong_files, reader=reader)