def test_old_style_algo(small_ml): '''Test that old algorithms (i.e. algoritms that only define train()) can support both calls to fit() and to train() - supporting algo.fit() is needed so that custom algorithms that only define train() can still use up to date tools (such as evalute, which has been updated to use fit()). - algo.train() is the old way, and must still be supported for custom algorithms and tools. ''' class CustomAlgoTrain(AlgoBase): def __init__(self): AlgoBase.__init__(self) self.cnt = -1 def train(self, trainset): AlgoBase.train(self, trainset) self.est = 3 self.bu, self.bi = 1, 1 self.cnt += 1 def estimate(self, u, i): return self.est with pytest.warns(UserWarning): algo = CustomAlgoTrain() kf = KFold(n_splits=2) for i, (trainset, testset) in enumerate(kf.split(small_ml)): with pytest.warns(UserWarning): algo.fit(trainset) predictions = algo.test(testset) # Make sure AlgoBase.fit has been called assert hasattr(algo, 'trainset') # Make sure CustomAlgoFit.train has been called assert all(est == 3 for (_, _, _, est, _) in predictions) # Make sure AlgoBase.fit is finished before CustomAlgoTrain.train assert (algo.bu, algo.bi) == (1, 1) # Make sure the rest of train() is only called once assert algo.cnt == i with pytest.warns(UserWarning): algo = CustomAlgoTrain() for i, (trainset, testset) in enumerate(kf.split(small_ml)): with pytest.warns(UserWarning): algo.train(trainset) predictions = algo.test(testset) # Make sure AlgoBase.fit has been called assert hasattr(algo, 'trainset') # Make sure CustomAlgoFit.train has been called assert all(est == 3 for (_, _, _, est, _) in predictions) # Make sure AlgoBase.fit is finished before CustomAlgoTrain.train assert (algo.bu, algo.bi) == (1, 1) # Make sure the rest of train() is only called once assert algo.cnt == i
def test_new_style_algo(small_ml): '''Test that new algorithms (i.e. algoritms that only define fit()) can support both calls to fit() and to train() - algo.fit() is the new way of doing things - supporting algo.train() is needed for the (unlikely?) case where a user has defined custom tools that use algo.train(). ''' class CustomAlgoFit(AlgoBase): def __init__(self): AlgoBase.__init__(self) self.cnt = -1 def fit(self, trainset): AlgoBase.fit(self, trainset) self.est = 3 self.bu, self.bi = 1, 1 self.cnt += 1 def estimate(self, u, i): return self.est algo = CustomAlgoFit() kf = KFold(n_splits=2) for i, (trainset, testset) in enumerate(kf.split(small_ml)): algo.fit(trainset) predictions = algo.test(testset) # Make sure AlgoBase.fit has been called assert hasattr(algo, 'trainset') # Make sure CustomAlgoFit.fit has been called assert all(est == 3 for (_, _, _, est, _) in predictions) # Make sure AlgoBase.fit is finished before CustomAlgoFit.fit assert (algo.bu, algo.bi) == (1, 1) # Make sure the rest of fit() is only called once assert algo.cnt == i algo = CustomAlgoFit() for i, (trainset, testset) in enumerate(kf.split(small_ml)): with pytest.warns(UserWarning): algo.train(trainset) predictions = algo.test(testset) # Make sure AlgoBase.fit has been called assert hasattr(algo, 'trainset') # Make sure CustomAlgoFit.fit has been called assert all(est == 3 for (_, _, _, est, _) in predictions) # Make sure AlgoBase.fit is finished before CustomAlgoFit.fit assert (algo.bu, algo.bi) == (1, 1) # Make sure the rest of fit() is only called once assert algo.cnt == i
def test_gridsearchcv_same_splits(): """Ensure that all parameter combinations are tested on the same splits (we check their RMSE scores are the same once averaged over the splits, which should be enough). We use as much parallelism as possible.""" data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test') data = Dataset.load_from_file(data_file, reader=Reader('ml-100k'), rating_scale=(1, 5)) kf = KFold(3, shuffle=True, random_state=4) # all RMSE should be the same (as param combinations are the same) param_grid = {'n_epochs': [5], 'lr_all': [.2, .2], 'reg_all': [.4, .4], 'n_factors': [5], 'random_state': [0]} gs = GridSearchCV(SVD, param_grid, measures=['RMSE'], cv=kf, n_jobs=1) gs.fit(data) rmse_scores = [m for m in gs.cv_results['mean_test_rmse']] assert len(set(rmse_scores)) == 1 # assert rmse_scores are all equal # Note: actually, even when setting random_state=None in kf, the same folds # are used because we use product(param_comb, kf.split(...)). However, it's # needed to have the same folds when calling fit again: gs.fit(data) rmse_scores += [m for m in gs.cv_results['mean_test_rmse']] assert len(set(rmse_scores)) == 1 # assert rmse_scores are all equal
def test_randomizedsearchcv_cv_results(): """Test the cv_results attribute""" f = os.path.join(os.path.dirname(__file__), './u1_ml100k_test') data = Dataset.load_from_file(f, Reader('ml-100k'), rating_scale=(1, 5)) kf = KFold(3, shuffle=True, random_state=4) param_distributions = {'n_epochs': [5], 'lr_all': uniform(.2, .3), 'reg_all': uniform(.4, .3), 'n_factors': [5], 'random_state': [0]} n_iter = 5 rs = RandomizedSearchCV(SVD, param_distributions, n_iter=n_iter, measures=['RMSE', 'mae'], cv=kf, return_train_measures=True) rs.fit(data) # test keys split*_test_rmse, mean and std dev. assert rs.cv_results['split0_test_rmse'].shape == (n_iter,) assert rs.cv_results['split1_test_rmse'].shape == (n_iter,) assert rs.cv_results['split2_test_rmse'].shape == (n_iter,) assert rs.cv_results['mean_test_rmse'].shape == (n_iter,) assert np.allclose(rs.cv_results['mean_test_rmse'], np.mean([rs.cv_results['split0_test_rmse'], rs.cv_results['split1_test_rmse'], rs.cv_results['split2_test_rmse']], axis=0)) assert np.allclose(rs.cv_results['std_test_rmse'], np.std([rs.cv_results['split0_test_rmse'], rs.cv_results['split1_test_rmse'], rs.cv_results['split2_test_rmse']], axis=0)) # test keys split*_train_mae, mean and std dev. assert rs.cv_results['split0_train_rmse'].shape == (n_iter,) assert rs.cv_results['split1_train_rmse'].shape == (n_iter,) assert rs.cv_results['split2_train_rmse'].shape == (n_iter,) assert rs.cv_results['mean_train_rmse'].shape == (n_iter,) assert np.allclose(rs.cv_results['mean_train_rmse'], np.mean([rs.cv_results['split0_train_rmse'], rs.cv_results['split1_train_rmse'], rs.cv_results['split2_train_rmse']], axis=0)) assert np.allclose(rs.cv_results['std_train_rmse'], np.std([rs.cv_results['split0_train_rmse'], rs.cv_results['split1_train_rmse'], rs.cv_results['split2_train_rmse']], axis=0)) # test fit and train times dimensions. assert rs.cv_results['mean_fit_time'].shape == (n_iter,) assert rs.cv_results['std_fit_time'].shape == (n_iter,) assert rs.cv_results['mean_test_time'].shape == (n_iter,) assert rs.cv_results['std_test_time'].shape == (n_iter,) assert rs.cv_results['params'] is rs.param_combinations # assert that best parameter in rs.cv_results['rank_test_measure'] is # indeed the best_param attribute. best_index = np.argmin(rs.cv_results['rank_test_rmse']) assert rs.cv_results['params'][best_index] == rs.best_params['rmse'] best_index = np.argmin(rs.cv_results['rank_test_mae']) assert rs.cv_results['params'][best_index] == rs.best_params['mae']
def test_get_cv(): get_cv(None) get_cv(4) get_cv(KFold()) with pytest.raises(ValueError): get_cv(23.2) with pytest.raises(ValueError): get_cv('bad')
def main(): class MyParser(argparse.ArgumentParser): '''A parser which prints the help message when an error occurs. Taken from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu.''' # noqa def error(self, message): sys.stderr.write('error: %s\n' % message) self.print_help() sys.exit(2) parser = MyParser( description='Evaluate the performance of a rating prediction ' + 'algorithm ' + 'on a given dataset using cross validation. You can use a built-in ' + 'or a custom dataset, and you can choose to automatically split the ' + 'dataset into folds, or manually specify train and test files. ' + 'Please refer to the documentation page ' + '(http://amaze.readthedocs.io/) for more details.', epilog="""Example:\n amaze -algo SVD -params "{'n_epochs': 5, 'verbose': True}" -load-builtin ml-100k -n-folds 3""") algo_choices = { 'NormalPredictor': NormalPredictor, 'BaselineOnly': BaselineOnly, 'KNNBasic': KNNBasic, 'KNNBaseline': KNNBaseline, 'KNNWithMeans': KNNWithMeans, 'SVD': SVD, 'SVDpp': SVDpp, 'NMF': NMF, 'SlopeOne': SlopeOne, 'CoClustering': CoClustering, } parser.add_argument('-algo', type=str, choices=algo_choices, help='The prediction algorithm to use. ' + 'Allowed values are ' + ', '.join(algo_choices.keys()) + '.', metavar='<prediction algorithm>') parser.add_argument('-params', type=str, metavar='<algorithm parameters>', default='{}', help='A kwargs dictionary that contains all the ' + 'algorithm parameters.' + 'Example: "{\'n_epochs\': 10}".') parser.add_argument('-load-builtin', type=str, dest='load_builtin', metavar='<dataset name>', default='ml-100k', help='The name of the built-in dataset to use.' + 'Allowed values are ' + ', '.join(dataset.BUILTIN_DATASETS.keys()) + '. Default is ml-100k.') parser.add_argument( '-load-custom', type=str, dest='load_custom', metavar='<file path>', default=None, help='A file path to custom dataset to use. ' + 'Ignored if ' + '-loadbuiltin is set. The -reader parameter needs ' + 'to be set.') parser.add_argument('-folds-files', type=str, dest='folds_files', metavar='<train1 test1 train2 test2... >', default=None, help='A list of custom train and test files. ' + 'Ignored if -load-builtin or -load-custom is set. ' 'The -reader parameter needs to be set.') parser.add_argument('-reader', type=str, metavar='<reader>', default=None, help='A Reader to read the custom dataset. Example: ' + '"Reader(line_format=\'user item rating timestamp\',' + ' sep=\'\\t\')"') parser.add_argument('-n-folds', type=int, dest='n_folds', metavar="<number of folds>", default=5, help='The number of folds for cross-validation. ' + 'Default is 5.') parser.add_argument('-seed', type=int, metavar='<random seed>', default=None, help='The seed to use for RNG. ' + 'Default is the current system time.') parser.add_argument('--with-dump', dest='with_dump', action='store_true', help='Dump the algorithm ' + 'results in a file (one file per fold). ' + 'Default is False.') parser.add_argument('-dump-dir', dest='dump_dir', type=str, metavar='<dir>', default=None, help='Where to dump the files. Ignored if ' + 'with-dump is not set. Default is ' + os.path.join(get_dataset_dir(), 'dumps/')) parser.add_argument('--clean', dest='clean', action='store_true', help='Remove the ' + get_dataset_dir() + ' directory and exit.') parser.add_argument('-v', '--version', action='version', version=__version__) args = parser.parse_args() if args.clean: folder = get_dataset_dir() shutil.rmtree(folder) print('Removed', folder) exit() # setup RNG rd.seed(args.seed) np.random.seed(args.seed) # setup algorithm params = eval(args.params) if args.algo is None: parser.error('No algorithm was specified.') algo = algo_choices[args.algo](**params) # setup dataset if args.load_custom is not None: # load custom and split if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) data = Dataset.load_from_file(args.load_custom, reader=reader) cv = KFold(n_splits=args.n_folds, random_state=args.seed) elif args.folds_files is not None: # load from files if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) folds_files = args.folds_files.split() folds_files = [(folds_files[i], folds_files[i + 1]) for i in range(0, len(folds_files) - 1, 2)] data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) cv = PredefinedKFold() else: # load builtin dataset and split data = Dataset.load_builtin(args.load_builtin) cv = KFold(n_splits=args.n_folds, random_state=args.seed) cross_validate(algo, data, cv=cv, verbose=True)
stable + 'basic_algorithms.html#amaze.prediction_algorithms.random_pred.NormalPredictor'), 'ml-100k': '[{}]({})'.format('Movielens 100k', 'http://grouplens.org/datasets/movielens/100k'), 'ml-1m': '[{}]({})'.format('Movielens 1M', 'http://grouplens.org/datasets/movielens/1m'), } # set RNG np.random.seed(0) random.seed(0) dataset = 'ml-1m' data = Dataset.load_builtin(dataset) kf = KFold(random_state=0) # folds will be the same for all algorithms. table = [] for klass in classes: start = time.time() out = cross_validate(klass(), data, ['rmse', 'mae'], kf) cv_time = str(datetime.timedelta(seconds=int(time.time() - start))) link = LINK[klass.__name__] mean_rmse = '{:.3f}'.format(np.mean(out['test_rmse'])) mean_mae = '{:.3f}'.format(np.mean(out['test_mae'])) new_line = [link, mean_rmse, mean_mae, cv_time] print(tabulate([new_line], tablefmt="pipe")) # print current algo perf table.append(new_line) header = [LINK[dataset],
def test_KFold(toy_data): # Test n_folds parameter kf = KFold(n_splits=5) assert len(list(kf.split(toy_data))) == 5 with pytest.raises(ValueError): kf = KFold(n_splits=10) next(kf.split(toy_data)) # Too big (greater than number of ratings) with pytest.raises(ValueError): kf = KFold(n_splits=1) next(kf.split(toy_data)) # Too low (must be >= 2) # Make sure data has not been shuffled. If not shuffled, the users in the # testsets are 0, 1, 2... 4 (in that order). kf = KFold(n_splits=5, shuffle=False) users = [int(testset[0][0][-1]) for (_, testset) in kf.split(toy_data)] assert users == list(range(5)) # Make sure that when called two times without shuffling, folds are the # same. kf = KFold(n_splits=5, shuffle=False) testsets_a = [testset for (_, testset) in kf.split(toy_data)] testsets_b = [testset for (_, testset) in kf.split(toy_data)] assert testsets_a == testsets_b # test once again with another KFold instance kf = KFold(n_splits=5, shuffle=False) testsets_a = [testset for (_, testset) in kf.split(toy_data)] assert testsets_a == testsets_b # We'll now shuffle b and check that folds are different. # (this is conditioned by seed setting at the beginning of file) kf = KFold(n_splits=5, random_state=None, shuffle=True) testsets_b = [testset for (_, testset) in kf.split(toy_data)] assert testsets_a != testsets_b # test once again: two calls to kf.split make different splits when # random_state=None testsets_a = [testset for (_, testset) in kf.split(toy_data)] assert testsets_a != testsets_b # Make sure that folds are the same when same KFold instance is used with # suffle is True but random_state is set to some value kf = KFold(n_splits=5, random_state=1, shuffle=True) testsets_a = [testset for (_, testset) in kf.split(toy_data)] testsets_b = [testset for (_, testset) in kf.split(toy_data)] assert testsets_a == testsets_b # Make sure raw ratings are not shuffled by KFold old_raw_ratings = copy(toy_data.raw_ratings) kf = KFold(n_splits=5, shuffle=True) next(kf.split(toy_data)) assert old_raw_ratings == toy_data.raw_ratings # Make sure kf.split() and the old toy_data.split() have the same folds. np.random.seed(3) with pytest.warns(UserWarning): toy_data.split(2, shuffle=True) testsets_a = [testset for (_, testset) in toy_data.folds()] kf = KFold(n_splits=2, random_state=3, shuffle=True) testsets_b = [testset for (_, testset) in kf.split(toy_data)]
n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings) # Number of recommended items in top k n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k]) # Number of relevant and recommended items in top k n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold)) for (est, true_r) in user_ratings[:k]) # Precision@K: Proportion of recommended items that are relevant precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1 # Recall@K: Proportion of relevant items that are recommended recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1 return precisions, recalls data = Dataset.load_builtin('ml-100k') kf = KFold(n_splits=5) algo = SVD() for trainset, testset in kf.split(data): algo.fit(trainset) predictions = algo.test(testset) precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4) # Precision and recall can then be averaged over all users print(sum(prec for prec in precisions.values()) / len(precisions)) print(sum(rec for rec in recalls.values()) / len(recalls))
from amaze import accuracy from amaze.model_selection import KFold data = Dataset.load_builtin('ml-100k') algo = SVD() trainset = data.build_full_trainset() algo.fit(trainset) testset = trainset.build_testset() predictions = algo.test(testset) # RMSE should be low as we are biased accuracy.rmse(predictions, verbose=True) # ~ 0.68 (which is low) # We can also do this during a cross-validation procedure! print('CV procedure:') kf = KFold(n_splits=3) for i, (trainset_cv, testset_cv) in enumerate(kf.split(data)): print('fold number', i + 1) algo.fit(trainset_cv) print('On testset,', end=' ') predictions = algo.test(testset_cv) accuracy.rmse(predictions, verbose=True) print('On trainset,', end=' ') predictions = algo.test(trainset_cv.build_testset()) accuracy.rmse(predictions, verbose=True)