Exemple #1
0
def test_user_based_field():
    """Ensure that the user_based field is taken into account (only) when
    needed."""

    algorithms = (KNNBasic, KNNWithMeans, KNNBaseline)
    for klass in algorithms:
        algo = klass(sim_options={'user_based': True})
        rmses_user_based = cross_validate(algo, data, ['rmse'],
                                          pkf)['test_rmse']
        algo = klass(sim_options={'user_based': False})
        rmses_item_based = cross_validate(algo, data, ['rmse'],
                                          pkf)['test_rmse']
        assert rmses_user_based != rmses_item_based
Exemple #2
0
def test_shrinkage_field():
    """Ensure the shrinkage field is taken into account."""

    sim_options = {'name': 'pearson_baseline', 'shrinkage': 0}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options)
    rmse_shrinkage_0 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson_baseline', 'shrinkage': 100}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_shrinkage_100 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_shrinkage_0 != rmse_shrinkage_100
Exemple #3
0
def test_SVDpp_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVDpp(n_factors=1, n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    # n_factors
    algo = SVDpp(n_factors=2, n_epochs=1, random_state=1)
    rmse_factors = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_factors

    # The rest is OK but just takes too long for now...
    """
Exemple #4
0
def test_gridsearchcv_best_estimator():
    """Ensure that the best estimator is the one giving the best score (by
    re-running it)"""

    train_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train')
    test_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_folds([(train_file, test_file)],
                                   Reader('ml-100k'))

    param_grid = {
        'n_epochs': [5],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6],
        'n_factors': [1],
        'init_std_dev': [0]
    }
    gs = GridSearchCV(SVD,
                      param_grid,
                      measures=['mae'],
                      cv=PredefinedKFold(),
                      joblib_verbose=100)
    gs.fit(data)
    best_estimator = gs.best_estimator['mae']

    # recompute MAE of best_estimator
    mae = cross_validate(best_estimator,
                         data,
                         measures=['MAE'],
                         cv=PredefinedKFold())['test_mae']

    assert mae == gs.best_score['mae']
Exemple #5
0
def test_method_field():
    """Ensure the method field is taken into account."""

    bsl_options = {'method': 'als'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'sgd'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_als != rmse_sgd

    with pytest.raises(ValueError):
        bsl_options = {'method': 'wrong_name'}
        algo = BaselineOnly(bsl_options=bsl_options)
        cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
Exemple #6
0
def test_als_reg_i_field():
    """Ensure the reg_i field is taken into account."""

    bsl_options = {
        'method': 'als',
        'reg_i': 0,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_0 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    bsl_options = {
        'method': 'als',
        'reg_i': 10,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_10 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_als_regi_0 != rmse_als_regi_10
Exemple #7
0
def test_sgd_n_epoch_field():
    """Ensure the n_epoch field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 20,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_5 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
Exemple #8
0
def test_cross_validate():

    # First test with a specified CV iterator.
    current_dir = os.path.dirname(os.path.realpath(__file__))
    folds_files = [(current_dir + '/custom_train',
                    current_dir + '/custom_test')]

    reader = Reader(line_format='user item rating',
                    sep=' ',
                    skip_lines=3,
                    rating_scale=(1, 5))
    data = Dataset.load_from_folds(folds_files=folds_files, reader=reader)

    algo = NormalPredictor()
    pkf = ms.PredefinedKFold()
    ret = ms.cross_validate(algo,
                            data,
                            measures=['rmse', 'mae'],
                            cv=pkf,
                            verbose=1)
    # Basically just test that keys (dont) exist as they should
    assert len(ret['test_rmse']) == 1
    assert len(ret['test_mae']) == 1
    assert len(ret['fit_time']) == 1
    assert len(ret['test_time']) == 1
    assert 'test_fcp' not in ret
    assert 'train_rmse' not in ret
    assert 'train_mae' not in ret

    # Test that 5 fold CV is used when cv=None
    # Also check that train_* key exist when return_train_measures is True.
    data = Dataset.load_from_file(current_dir + '/custom_dataset', reader)
    ret = ms.cross_validate(algo,
                            data,
                            measures=['rmse', 'mae'],
                            cv=None,
                            return_train_measures=True,
                            verbose=True)
    assert len(ret['test_rmse']) == 5
    assert len(ret['test_mae']) == 5
    assert len(ret['fit_time']) == 5
    assert len(ret['test_time']) == 5
    assert len(ret['train_rmse']) == 5
    assert len(ret['train_mae']) == 5
Exemple #9
0
def test_sgd_reg_field():
    """Ensure the reg field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'reg': 0.02,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_002 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'reg': 1,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_1 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_reg_002 != rmse_sgd_reg_1
Exemple #10
0
def test_sgd_learning_rate_field():
    """Ensure the learning_rate field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'learning_rate': .005,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'learning_rate': .00005,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_00005 = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
def test_CoClustering_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = CoClustering(n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    # n_cltr_u
    algo = CoClustering(n_cltr_u=1, n_epochs=1, random_state=1)
    rmse_n_cltr_u = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_cltr_u

    # n_cltr_i
    algo = CoClustering(n_cltr_i=1, n_epochs=1, random_state=1)
    rmse_n_cltr_i = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_cltr_i

    # n_epochs
    algo = CoClustering(n_epochs=2, random_state=1)
    rmse_n_epochs = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_epochs
Exemple #12
0
def test_name_field():
    """Ensure the name field is taken into account."""

    sim_options = {'name': 'cosine'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_cosine = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'msd'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_msd = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_pearson = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson_baseline'}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_pearson_bsl = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    for rmse_a, rmse_b in combinations(
        (rmse_cosine, rmse_msd, rmse_pearson, rmse_pearson_bsl), 2):
        assert (rmse_a != rmse_b)

    with pytest.raises(NameError):
        sim_options = {'name': 'wrong_name'}
        algo = KNNBasic(sim_options=sim_options)
        cross_validate(algo, data, ['rmse'], pkf)
Exemple #13
0
def test_SVD_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVD(n_factors=1, n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']

    # n_factors
    algo = SVD(n_factors=2, n_epochs=1, random_state=1)
    rmse_factors = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_factors

    # n_epochs
    algo = SVD(n_factors=1, n_epochs=2, random_state=1)
    rmse_n_epochs = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_epochs

    # biased
    algo = SVD(n_factors=1, n_epochs=1, biased=False, random_state=1)
    rmse_biased = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_biased

    # lr_all
    algo = SVD(n_factors=1, n_epochs=1, lr_all=5, random_state=1)
    rmse_lr_all = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_all

    # reg_all
    algo = SVD(n_factors=1, n_epochs=1, reg_all=5, random_state=1)
    rmse_reg_all = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_all

    # lr_bu
    algo = SVD(n_factors=1, n_epochs=1, lr_bu=5, random_state=1)
    rmse_lr_bu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bu

    # lr_bi
    algo = SVD(n_factors=1, n_epochs=1, lr_bi=5, random_state=1)
    rmse_lr_bi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bi

    # lr_pu
    algo = SVD(n_factors=1, n_epochs=1, lr_pu=5, random_state=1)
    rmse_lr_pu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_pu

    # lr_qi
    algo = SVD(n_factors=1, n_epochs=1, lr_qi=5, random_state=1)
    rmse_lr_qi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_qi

    # reg_bu
    algo = SVD(n_factors=1, n_epochs=1, reg_bu=5, random_state=1)
    rmse_reg_bu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bu

    # reg_bi
    algo = SVD(n_factors=1, n_epochs=1, reg_bi=5, random_state=1)
    rmse_reg_bi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bi

    # reg_pu
    algo = SVD(n_factors=1, n_epochs=1, reg_pu=5, random_state=1)
    rmse_reg_pu = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_pu

    # reg_qi
    algo = SVD(n_factors=1, n_epochs=1, reg_qi=5, random_state=1)
    rmse_reg_qi = cross_validate(algo, data, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_qi