def test_cross_validate(toy_data): # First test with a specified CV iterator. current_dir = os.path.dirname(os.path.realpath(__file__)) folds_files = [(current_dir + '/custom_train', current_dir + '/custom_test')] reader = Reader(line_format='user item rating', sep=' ', skip_lines=3) data = Dataset.load_from_folds(folds_files=folds_files, reader=reader, rating_scale=(1, 5)) algo = NormalPredictor() pkf = ms.PredefinedKFold() ret = ms.cross_validate(algo, data, measures=['rmse', 'mae'], cv=pkf, verbose=1) # Basically just test that keys (dont) exist as they should assert len(ret['test_rmse']) == 1 assert len(ret['test_mae']) == 1 assert len(ret['fit_time']) == 1 assert len(ret['test_time']) == 1 assert 'test_fcp' not in ret assert 'train_rmse' not in ret assert 'train_mae' not in ret # Test that 5 fold CV is used when cv=None # Also check that train_* key exist when return_train_measures is True. ret = ms.cross_validate(algo, toy_data, measures=['rmse', 'mae'], cv=None, return_train_measures=True, verbose=True) assert len(ret['test_rmse']) == 5 assert len(ret['test_mae']) == 5 assert len(ret['fit_time']) == 5 assert len(ret['test_time']) == 5 assert len(ret['train_rmse']) == 5 assert len(ret['train_mae']) == 5
def test_user_based_field(u1_ml100k, pkf): """Ensure that the user_based field is taken into account (only) when needed.""" algorithms = (KNNBasic, KNNWithMeans, KNNBaseline) for klass in algorithms: algo = klass(sim_options={'user_based': True}) rmses_user_based = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] algo = klass(sim_options={'user_based': False}) rmses_item_based = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmses_user_based != rmses_item_based
def test_SVDpp_parameters(u1_ml100k, pkf): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVDpp(n_factors=1, n_epochs=1, random_state=1) rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] # n_factors algo = SVDpp(n_factors=2, n_epochs=1, random_state=1) rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_factors # The rest is OK but just takes too long for now... """
def test_sgd_n_epoch_field(u1_ml100k, pkf): """Ensure the n_epoch field is taken into account.""" bsl_options = {'method': 'sgd', 'n_epochs': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_1 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd', 'n_epochs': 20, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_5 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
def test_als_reg_i_field(u1_ml100k, pkf): """Ensure the reg_i field is taken into account.""" bsl_options = {'method': 'als', 'reg_i': 0, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_0 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'als', 'reg_i': 10, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_10 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_als_regi_0 != rmse_als_regi_10
def test_method_field(u1_ml100k, pkf): """Ensure the method field is taken into account.""" bsl_options = {'method': 'als'} algo = BaselineOnly(bsl_options=bsl_options) rmse_als = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd'} algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_als != rmse_sgd with pytest.raises(ValueError): bsl_options = {'method': 'wrong_name'} algo = BaselineOnly(bsl_options=bsl_options) cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
def test_shrinkage_field(u1_ml100k, pkf): """Ensure the shrinkage field is taken into account.""" sim_options = {'name': 'pearson_baseline', 'shrinkage': 0 } bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options) rmse_shrinkage_0 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson_baseline', 'shrinkage': 100 } bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_shrinkage_100 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_shrinkage_0 != rmse_shrinkage_100
def test_sgd_reg_field(u1_ml100k, pkf): """Ensure the reg field is taken into account.""" bsl_options = {'method': 'sgd', 'n_epochs': 1, 'reg': 0.02, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_002 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd', 'n_epochs': 1, 'reg': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_1 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_reg_002 != rmse_sgd_reg_1
def test_sgd_learning_rate_field(u1_ml100k, pkf): """Ensure the learning_rate field is taken into account.""" bsl_options = {'method': 'sgd', 'n_epochs': 1, 'learning_rate': .005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_005 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd', 'n_epochs': 1, 'learning_rate': .00005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_00005 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
def test_CoClustering_parameters(u1_ml100k, pkf): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = CoClustering(n_epochs=1, random_state=1) rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] # n_cltr_u algo = CoClustering(n_cltr_u=1, n_epochs=1, random_state=1) rmse_n_cltr_u = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_cltr_u # n_cltr_i algo = CoClustering(n_cltr_i=1, n_epochs=1, random_state=1) rmse_n_cltr_i = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_cltr_i # n_epochs algo = CoClustering(n_epochs=2, random_state=1) rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_epochs
def test_name_field(u1_ml100k, pkf): """Ensure the name field is taken into account.""" sim_options = {'name': 'cosine'} algo = KNNBasic(sim_options=sim_options) rmse_cosine = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'msd'} algo = KNNBasic(sim_options=sim_options) rmse_msd = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson'} algo = KNNBasic(sim_options=sim_options) rmse_pearson = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson_baseline'} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_pearson_bsl = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] for rmse_a, rmse_b in combinations((rmse_cosine, rmse_msd, rmse_pearson, rmse_pearson_bsl), 2): assert (rmse_a != rmse_b) with pytest.raises(NameError): sim_options = {'name': 'wrong_name'} algo = KNNBasic(sim_options=sim_options) cross_validate(algo, u1_ml100k, ['rmse'], pkf)
def test_gridsearchcv_best_estimator(u1_ml100k): """Ensure that the best estimator is the one giving the best score (by re-running it)""" param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]} gs = GridSearchCV(SVD, param_grid, measures=['mae'], cv=PredefinedKFold(), joblib_verbose=100) gs.fit(u1_ml100k) best_estimator = gs.best_estimator['mae'] # recompute MAE of best_estimator mae = cross_validate(best_estimator, u1_ml100k, measures=['MAE'], cv=PredefinedKFold())['test_mae'] assert mae == gs.best_score['mae']
def test_randomizedsearchcv_best_estimator(u1_ml100k): """Ensure that the best estimator is the one that gives the best score (by re-running it)""" param_distributions = {'n_epochs': [5], 'lr_all': uniform(0.002, 0.003), 'reg_all': uniform(0.04, 0.02), 'n_factors': [1], 'init_std_dev': [0]} rs = RandomizedSearchCV(SVD, param_distributions, measures=['mae'], cv=PredefinedKFold(), joblib_verbose=100) rs.fit(u1_ml100k) best_estimator = rs.best_estimator['mae'] # recompute MAE of best_estimator mae = cross_validate(best_estimator, u1_ml100k, measures=['MAE'], cv=PredefinedKFold())['test_mae'] assert mae == rs.best_score['mae']
def main(): class MyParser(argparse.ArgumentParser): '''A parser which prints the help message when an error occurs. Taken from http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu.''' # noqa def error(self, message): sys.stderr.write('error: %s\n' % message) self.print_help() sys.exit(2) parser = MyParser( description='Evaluate the performance of a rating prediction ' + 'algorithm ' + 'on a given dataset using cross validation. You can use a built-in ' + 'or a custom dataset, and you can choose to automatically split the ' + 'dataset into folds, or manually specify train and test files. ' + 'Please refer to the documentation page ' + '(http://amaze.readthedocs.io/) for more details.', epilog="""Example:\n amaze -algo SVD -params "{'n_epochs': 5, 'verbose': True}" -load-builtin ml-100k -n-folds 3""") algo_choices = { 'NormalPredictor': NormalPredictor, 'BaselineOnly': BaselineOnly, 'KNNBasic': KNNBasic, 'KNNBaseline': KNNBaseline, 'KNNWithMeans': KNNWithMeans, 'SVD': SVD, 'SVDpp': SVDpp, 'NMF': NMF, 'SlopeOne': SlopeOne, 'CoClustering': CoClustering, } parser.add_argument('-algo', type=str, choices=algo_choices, help='The prediction algorithm to use. ' + 'Allowed values are ' + ', '.join(algo_choices.keys()) + '.', metavar='<prediction algorithm>') parser.add_argument('-params', type=str, metavar='<algorithm parameters>', default='{}', help='A kwargs dictionary that contains all the ' + 'algorithm parameters.' + 'Example: "{\'n_epochs\': 10}".') parser.add_argument('-load-builtin', type=str, dest='load_builtin', metavar='<dataset name>', default='ml-100k', help='The name of the built-in dataset to use.' + 'Allowed values are ' + ', '.join(dataset.BUILTIN_DATASETS.keys()) + '. Default is ml-100k.') parser.add_argument( '-load-custom', type=str, dest='load_custom', metavar='<file path>', default=None, help='A file path to custom dataset to use. ' + 'Ignored if ' + '-loadbuiltin is set. The -reader parameter needs ' + 'to be set.') parser.add_argument('-folds-files', type=str, dest='folds_files', metavar='<train1 test1 train2 test2... >', default=None, help='A list of custom train and test files. ' + 'Ignored if -load-builtin or -load-custom is set. ' 'The -reader parameter needs to be set.') parser.add_argument('-reader', type=str, metavar='<reader>', default=None, help='A Reader to read the custom dataset. Example: ' + '"Reader(line_format=\'user item rating timestamp\',' + ' sep=\'\\t\')"') parser.add_argument('-n-folds', type=int, dest='n_folds', metavar="<number of folds>", default=5, help='The number of folds for cross-validation. ' + 'Default is 5.') parser.add_argument('-seed', type=int, metavar='<random seed>', default=None, help='The seed to use for RNG. ' + 'Default is the current system time.') parser.add_argument('--with-dump', dest='with_dump', action='store_true', help='Dump the algorithm ' + 'results in a file (one file per fold). ' + 'Default is False.') parser.add_argument('-dump-dir', dest='dump_dir', type=str, metavar='<dir>', default=None, help='Where to dump the files. Ignored if ' + 'with-dump is not set. Default is ' + os.path.join(get_dataset_dir(), 'dumps/')) parser.add_argument('--clean', dest='clean', action='store_true', help='Remove the ' + get_dataset_dir() + ' directory and exit.') parser.add_argument('-v', '--version', action='version', version=__version__) args = parser.parse_args() if args.clean: folder = get_dataset_dir() shutil.rmtree(folder) print('Removed', folder) exit() # setup RNG rd.seed(args.seed) np.random.seed(args.seed) # setup algorithm params = eval(args.params) if args.algo is None: parser.error('No algorithm was specified.') algo = algo_choices[args.algo](**params) # setup dataset if args.load_custom is not None: # load custom and split if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) data = Dataset.load_from_file(args.load_custom, reader=reader) cv = KFold(n_splits=args.n_folds, random_state=args.seed) elif args.folds_files is not None: # load from files if args.reader is None: parser.error('-reader parameter is needed.') reader = eval(args.reader) folds_files = args.folds_files.split() folds_files = [(folds_files[i], folds_files[i + 1]) for i in range(0, len(folds_files) - 1, 2)] data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) cv = PredefinedKFold() else: # load builtin dataset and split data = Dataset.load_builtin(args.load_builtin) cv = KFold(n_splits=args.n_folds, random_state=args.seed) cross_validate(algo, data, cv=cv, verbose=True)
'http://grouplens.org/datasets/movielens/1m'), } # set RNG np.random.seed(0) random.seed(0) dataset = 'ml-1m' data = Dataset.load_builtin(dataset) kf = KFold(random_state=0) # folds will be the same for all algorithms. table = [] for klass in classes: start = time.time() out = cross_validate(klass(), data, ['rmse', 'mae'], kf) cv_time = str(datetime.timedelta(seconds=int(time.time() - start))) link = LINK[klass.__name__] mean_rmse = '{:.3f}'.format(np.mean(out['test_rmse'])) mean_mae = '{:.3f}'.format(np.mean(out['test_mae'])) new_line = [link, mean_rmse, mean_mae, cv_time] print(tabulate([new_line], tablefmt="pipe")) # print current algo perf table.append(new_line) header = [LINK[dataset], 'RMSE', 'MAE', 'Time' ] print(tabulate(table, header, tablefmt="pipe"))
""" This module descibes how to load a custom dataset from a single file. As a custom dataset we will actually use the movielens-100k dataset, but act as if it were not built-in. """ from __future__ import (absolute_import, division, print_function, unicode_literals) import os from amaze import BaselineOnly from amaze import Dataset from amaze import Reader from amaze.model_selection import cross_validate # path to dataset file file_path = os.path.expanduser('~/.amaze_data/ml-100k/ml-100k/u.data') # As we're loading a custom dataset, we need to define a reader. In the # movielens-100k dataset, each line has the following format: # 'user item rating timestamp', separated by '\t' characters. reader = Reader(line_format='user item rating timestamp', sep='\t') data = Dataset.load_from_file(file_path, reader=reader, rating_scale=(1, 5)) # We can now use this dataset as we please, e.g. calling cross_validate cross_validate(BaselineOnly(), data, verbose=True)
""" This module descibes how to load a dataset from a pandas dataframe. """ from __future__ import (absolute_import, division, print_function, unicode_literals) import pandas as pd from amaze import NormalPredictor from amaze import Dataset from amaze.model_selection import cross_validate # Creation of the dataframe. Column names are irrelevant. ratings_dict = { 'itemID': [1, 1, 1, 2, 2], 'userID': [9, 32, 2, 45, 'user_foo'], 'rating': [3, 2, 4, 3, 1] } df = pd.DataFrame(ratings_dict) # The columns must correspond to user id, item id and ratings (in that order). data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], rating_scale=(1, 5)) # We can now use this dataset as we please, e.g. calling cross_validate cross_validate(NormalPredictor(), data, cv=2)
def test_SVD_parameters(u1_ml100k, pkf): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVD(n_factors=1, n_epochs=1, random_state=1) rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] # n_factors algo = SVD(n_factors=2, n_epochs=1, random_state=1) rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_factors # n_epochs algo = SVD(n_factors=1, n_epochs=2, random_state=1) rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_epochs # biased algo = SVD(n_factors=1, n_epochs=1, biased=False, random_state=1) rmse_biased = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_biased # lr_all algo = SVD(n_factors=1, n_epochs=1, lr_all=5, random_state=1) rmse_lr_all = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_all # reg_all algo = SVD(n_factors=1, n_epochs=1, reg_all=5, random_state=1) rmse_reg_all = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_all # lr_bu algo = SVD(n_factors=1, n_epochs=1, lr_bu=5, random_state=1) rmse_lr_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bu # lr_bi algo = SVD(n_factors=1, n_epochs=1, lr_bi=5, random_state=1) rmse_lr_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bi # lr_pu algo = SVD(n_factors=1, n_epochs=1, lr_pu=5, random_state=1) rmse_lr_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_pu # lr_qi algo = SVD(n_factors=1, n_epochs=1, lr_qi=5, random_state=1) rmse_lr_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_qi # reg_bu algo = SVD(n_factors=1, n_epochs=1, reg_bu=5, random_state=1) rmse_reg_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bu # reg_bi algo = SVD(n_factors=1, n_epochs=1, reg_bi=5, random_state=1) rmse_reg_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bi # reg_pu algo = SVD(n_factors=1, n_epochs=1, reg_pu=5, random_state=1) rmse_reg_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_pu # reg_qi algo = SVD(n_factors=1, n_epochs=1, reg_qi=5, random_state=1) rmse_reg_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_qi
computation. """ from __future__ import (absolute_import, division, print_function, unicode_literals) from amaze import KNNBasic from amaze import Dataset from amaze.model_selection import cross_validate # Load the movielens-100k dataset. data = Dataset.load_builtin('ml-100k') # Example using cosine similarity sim_options = { 'name': 'cosine', 'user_based': False # compute similarities between items } algo = KNNBasic(sim_options=sim_options) cross_validate(algo, data, verbose=True) # Example using pearson_baseline similarity sim_options = { 'name': 'pearson_baseline', 'shrinkage': 0 # no shrinkage } algo = KNNBasic(sim_options=sim_options) cross_validate(algo, data, verbose=True)
""" This module describes the most basic usage of Amaze: you define a prediction algorithm, (down)load a dataset and run a cross-validation procedure. """ from __future__ import (absolute_import, division, print_function, unicode_literals) from amaze import SVD from amaze import Dataset from amaze.model_selection import cross_validate # Load the movielens-100k dataset (download it if needed), data = Dataset.load_builtin('ml-100k') # We'll use the famous SVD algorithm. algo = SVD() # Run 5-fold cross-validation and print results cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
def test_NMF_parameters(u1_ml100k, pkf): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = NMF(n_factors=1, n_epochs=1, random_state=1) rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] # n_factors algo = NMF(n_factors=2, n_epochs=1, random_state=1) rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_factors # n_epochs algo = NMF(n_factors=1, n_epochs=2, random_state=1) rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_epochs # biased algo = NMF(n_factors=1, n_epochs=1, biased=True, random_state=1) rmse_biased = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_biased # reg_pu algo = NMF(n_factors=1, n_epochs=1, reg_pu=1, random_state=1) rmse_reg_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_pu # reg_qi algo = NMF(n_factors=1, n_epochs=1, reg_qi=1, random_state=1) rmse_reg_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_qi # reg_bu algo = NMF(n_factors=1, n_epochs=1, reg_bu=1, biased=True, random_state=1) rmse_reg_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bu # reg_bi algo = NMF(n_factors=1, n_epochs=1, reg_bi=1, biased=True, random_state=1) rmse_reg_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bi # lr_bu algo = NMF(n_factors=1, n_epochs=1, lr_bu=1, biased=True, random_state=1) rmse_lr_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bu # lr_bi algo = NMF(n_factors=1, n_epochs=1, lr_bi=1, biased=True, random_state=1) rmse_lr_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bi # init_low algo = NMF(n_factors=1, n_epochs=1, init_low=.5, random_state=1) rmse_init_low = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_init_low # init_low with pytest.raises(ValueError): algo = NMF(n_factors=1, n_epochs=1, init_low=-1, random_state=1) # init_high algo = NMF(n_factors=1, n_epochs=1, init_high=.5, random_state=1) rmse_init_high = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_init_high