def test_dump(): """Train an algorithm, compute its predictions then dump them. Ensure that the predictions that are loaded back are the correct ones, and that the predictions of the dumped algorithm are also equal to the other ones.""" random.seed(0) train_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train') test_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test') data = Dataset.load_from_folds([(train_file, test_file)], Reader('ml-100k')) pkf = PredefinedKFold() trainset, testset = next(pkf.split(data)) algo = BaselineOnly() algo.fit(trainset) predictions = algo.test(testset) with tempfile.NamedTemporaryFile() as tmp_file: dump.dump(tmp_file.name, predictions, algo) predictions_dumped, algo_dumped = dump.load(tmp_file.name) predictions_algo_dumped = algo_dumped.test(testset) assert predictions == predictions_dumped assert predictions == predictions_algo_dumped
def test_method_field(): """Ensure the method field is taken into account.""" bsl_options = {'method': 'als'} algo = BaselineOnly(bsl_options=bsl_options) rmse_als = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd'} algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_als != rmse_sgd with pytest.raises(ValueError): bsl_options = {'method': 'wrong_name'} algo = BaselineOnly(bsl_options=bsl_options) cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
def test_als_reg_i_field(): """Ensure the reg_i field is taken into account.""" bsl_options = { 'method': 'als', 'reg_i': 0, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_0 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'als', 'reg_i': 10, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_10 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_als_regi_0 != rmse_als_regi_10
def test_sgd_n_epoch_field(): """Ensure the n_epoch field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 20, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_5 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
def test_sgd_reg_field(): """Ensure the reg field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 0.02, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_002 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_reg_002 != rmse_sgd_reg_1
def test_sgd_learning_rate_field(): """Ensure the learning_rate field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .00005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_00005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
def test_trainset_testset(): """Test the construct_trainset and construct_testset methods.""" current_dir = os.path.dirname(os.path.realpath(__file__)) folds_files = [(current_dir + '/custom_train', current_dir + '/custom_test')] data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) with pytest.warns(UserWarning): trainset, testset = next(data.folds()) # test ur ur = trainset.ur assert ur[0] == [(0, 4)] assert ur[1] == [(0, 4), (1, 2)] assert ur[40] == [] # not in the trainset # test ir ir = trainset.ir assert ir[0] == [(0, 4), (1, 4), (2, 1)] assert ir[1] == [(1, 2), (2, 1), (3, 5)] assert ir[20000] == [] # not in the trainset # test n_users, n_items, n_ratings, rating_scale assert trainset.n_users == 4 assert trainset.n_items == 2 assert trainset.n_ratings == 6 assert trainset.rating_scale == (1, 5) # test raw2inner for i in range(4): assert trainset.to_inner_uid('user' + str(i)) == i with pytest.raises(ValueError): trainset.to_inner_uid('unkown_user') for i in range(2): assert trainset.to_inner_iid('item' + str(i)) == i with pytest.raises(ValueError): trainset.to_inner_iid('unkown_item') # test inner2raw assert trainset._inner2raw_id_users is None assert trainset._inner2raw_id_items is None for i in range(4): assert trainset.to_raw_uid(i) == 'user' + str(i) for i in range(2): assert trainset.to_raw_iid(i) == 'item' + str(i) assert trainset._inner2raw_id_users is not None assert trainset._inner2raw_id_items is not None # Test the build_testset() method algo = BaselineOnly() algo.fit(trainset) testset = trainset.build_testset() algo.test(testset) # ensure an algorithm can manage the data assert ('user0', 'item0', 4) in testset assert ('user3', 'item1', 5) in testset assert ('user3', 'item1', 0) not in testset # Test the build_anti_testset() method algo = BaselineOnly() algo.fit(trainset) testset = trainset.build_anti_testset() algo.test(testset) # ensure an algorithm can manage the data assert ('user0', 'item0', trainset.global_mean) not in testset assert ('user3', 'item1', trainset.global_mean) not in testset assert ('user0', 'item1', trainset.global_mean) in testset assert ('user3', 'item0', trainset.global_mean) in testset