def test_user_based_field(): """Ensure that the user_based field is taken into account (only) when needed.""" algorithms = (KNNBasic, KNNWithMeans, KNNBaseline) for klass in algorithms: algo = klass(sim_options={'user_based': True}) rmses_user_based = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] algo = klass(sim_options={'user_based': False}) rmses_item_based = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmses_user_based != rmses_item_based
def test_shrinkage_field(): """Ensure the shrinkage field is taken into account.""" sim_options = {'name': 'pearson_baseline', 'shrinkage': 0} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options) rmse_shrinkage_0 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson_baseline', 'shrinkage': 100} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_shrinkage_100 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_shrinkage_0 != rmse_shrinkage_100
def test_SVDpp_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVDpp(n_factors=1, n_epochs=1, random_state=1) rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] # n_factors algo = SVDpp(n_factors=2, n_epochs=1, random_state=1) rmse_factors = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_factors # The rest is OK but just takes too long for now... """
def test_gridsearchcv_best_estimator(): """Ensure that the best estimator is the one giving the best score (by re-running it)""" train_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train') test_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test') data = Dataset.load_from_folds([(train_file, test_file)], Reader('ml-100k')) param_grid = { 'n_epochs': [5], 'lr_all': [0.002, 0.005], 'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0] } gs = GridSearchCV(SVD, param_grid, measures=['mae'], cv=PredefinedKFold(), joblib_verbose=100) gs.fit(data) best_estimator = gs.best_estimator['mae'] # recompute MAE of best_estimator mae = cross_validate(best_estimator, data, measures=['MAE'], cv=PredefinedKFold())['test_mae'] assert mae == gs.best_score['mae']
def test_method_field(): """Ensure the method field is taken into account.""" bsl_options = {'method': 'als'} algo = BaselineOnly(bsl_options=bsl_options) rmse_als = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = {'method': 'sgd'} algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_als != rmse_sgd with pytest.raises(ValueError): bsl_options = {'method': 'wrong_name'} algo = BaselineOnly(bsl_options=bsl_options) cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
def test_als_reg_i_field(): """Ensure the reg_i field is taken into account.""" bsl_options = { 'method': 'als', 'reg_i': 0, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_0 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'als', 'reg_i': 10, } algo = BaselineOnly(bsl_options=bsl_options) rmse_als_regi_10 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_als_regi_0 != rmse_als_regi_10
def test_sgd_n_epoch_field(): """Ensure the n_epoch field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 20, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_n_epoch_5 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
def test_cross_validate(): # First test with a specified CV iterator. current_dir = os.path.dirname(os.path.realpath(__file__)) folds_files = [(current_dir + '/custom_train', current_dir + '/custom_test')] reader = Reader(line_format='user item rating', sep=' ', skip_lines=3, rating_scale=(1, 5)) data = Dataset.load_from_folds(folds_files=folds_files, reader=reader) algo = NormalPredictor() pkf = ms.PredefinedKFold() ret = ms.cross_validate(algo, data, measures=['rmse', 'mae'], cv=pkf, verbose=1) # Basically just test that keys (dont) exist as they should assert len(ret['test_rmse']) == 1 assert len(ret['test_mae']) == 1 assert len(ret['fit_time']) == 1 assert len(ret['test_time']) == 1 assert 'test_fcp' not in ret assert 'train_rmse' not in ret assert 'train_mae' not in ret # Test that 5 fold CV is used when cv=None # Also check that train_* key exist when return_train_measures is True. data = Dataset.load_from_file(current_dir + '/custom_dataset', reader) ret = ms.cross_validate(algo, data, measures=['rmse', 'mae'], cv=None, return_train_measures=True, verbose=True) assert len(ret['test_rmse']) == 5 assert len(ret['test_mae']) == 5 assert len(ret['fit_time']) == 5 assert len(ret['test_time']) == 5 assert len(ret['train_rmse']) == 5 assert len(ret['train_mae']) == 5
def test_sgd_reg_field(): """Ensure the reg field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 0.02, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_002 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'reg': 1, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_reg_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_reg_002 != rmse_sgd_reg_1
def test_sgd_learning_rate_field(): """Ensure the learning_rate field is taken into account.""" bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] bsl_options = { 'method': 'sgd', 'n_epochs': 1, 'learning_rate': .00005, } algo = BaselineOnly(bsl_options=bsl_options) rmse_sgd_lr_00005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
def test_CoClustering_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = CoClustering(n_epochs=1, random_state=1) rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] # n_cltr_u algo = CoClustering(n_cltr_u=1, n_epochs=1, random_state=1) rmse_n_cltr_u = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_cltr_u # n_cltr_i algo = CoClustering(n_cltr_i=1, n_epochs=1, random_state=1) rmse_n_cltr_i = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_cltr_i # n_epochs algo = CoClustering(n_epochs=2, random_state=1) rmse_n_epochs = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_epochs
def test_name_field(): """Ensure the name field is taken into account.""" sim_options = {'name': 'cosine'} algo = KNNBasic(sim_options=sim_options) rmse_cosine = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'msd'} algo = KNNBasic(sim_options=sim_options) rmse_msd = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson'} algo = KNNBasic(sim_options=sim_options) rmse_pearson = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] sim_options = {'name': 'pearson_baseline'} bsl_options = {'n_epochs': 1} algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options) rmse_pearson_bsl = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] for rmse_a, rmse_b in combinations( (rmse_cosine, rmse_msd, rmse_pearson, rmse_pearson_bsl), 2): assert (rmse_a != rmse_b) with pytest.raises(NameError): sim_options = {'name': 'wrong_name'} algo = KNNBasic(sim_options=sim_options) cross_validate(algo, data, ['rmse'], pkf)
def test_SVD_parameters(): """Ensure that all parameters are taken into account.""" # The baseline against which to compare. algo = SVD(n_factors=1, n_epochs=1, random_state=1) rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] # n_factors algo = SVD(n_factors=2, n_epochs=1, random_state=1) rmse_factors = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_factors # n_epochs algo = SVD(n_factors=1, n_epochs=2, random_state=1) rmse_n_epochs = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_n_epochs # biased algo = SVD(n_factors=1, n_epochs=1, biased=False, random_state=1) rmse_biased = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_biased # lr_all algo = SVD(n_factors=1, n_epochs=1, lr_all=5, random_state=1) rmse_lr_all = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_all # reg_all algo = SVD(n_factors=1, n_epochs=1, reg_all=5, random_state=1) rmse_reg_all = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_all # lr_bu algo = SVD(n_factors=1, n_epochs=1, lr_bu=5, random_state=1) rmse_lr_bu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bu # lr_bi algo = SVD(n_factors=1, n_epochs=1, lr_bi=5, random_state=1) rmse_lr_bi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_bi # lr_pu algo = SVD(n_factors=1, n_epochs=1, lr_pu=5, random_state=1) rmse_lr_pu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_pu # lr_qi algo = SVD(n_factors=1, n_epochs=1, lr_qi=5, random_state=1) rmse_lr_qi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_lr_qi # reg_bu algo = SVD(n_factors=1, n_epochs=1, reg_bu=5, random_state=1) rmse_reg_bu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bu # reg_bi algo = SVD(n_factors=1, n_epochs=1, reg_bi=5, random_state=1) rmse_reg_bi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_bi # reg_pu algo = SVD(n_factors=1, n_epochs=1, reg_pu=5, random_state=1) rmse_reg_pu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_pu # reg_qi algo = SVD(n_factors=1, n_epochs=1, reg_qi=5, random_state=1) rmse_reg_qi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse'] assert rmse_default != rmse_reg_qi