Esempio n. 1
0
def test_cross_validate(toy_data):

    # First test with a specified CV iterator.
    current_dir = os.path.dirname(os.path.realpath(__file__))
    folds_files = [(current_dir + '/custom_train',
                    current_dir + '/custom_test')]

    reader = Reader(line_format='user item rating', sep=' ', skip_lines=3)
    data = Dataset.load_from_folds(folds_files=folds_files, reader=reader,
                                   rating_scale=(1, 5))

    algo = NormalPredictor()
    pkf = ms.PredefinedKFold()
    ret = ms.cross_validate(algo, data, measures=['rmse', 'mae'], cv=pkf,
                            verbose=1)
    # Basically just test that keys (dont) exist as they should
    assert len(ret['test_rmse']) == 1
    assert len(ret['test_mae']) == 1
    assert len(ret['fit_time']) == 1
    assert len(ret['test_time']) == 1
    assert 'test_fcp' not in ret
    assert 'train_rmse' not in ret
    assert 'train_mae' not in ret

    # Test that 5 fold CV is used when cv=None
    # Also check that train_* key exist when return_train_measures is True.
    ret = ms.cross_validate(algo, toy_data, measures=['rmse', 'mae'], cv=None,
                            return_train_measures=True, verbose=True)
    assert len(ret['test_rmse']) == 5
    assert len(ret['test_mae']) == 5
    assert len(ret['fit_time']) == 5
    assert len(ret['test_time']) == 5
    assert len(ret['train_rmse']) == 5
    assert len(ret['train_mae']) == 5
Esempio n. 2
0
def test_user_based_field(u1_ml100k, pkf):
    """Ensure that the user_based field is taken into account (only) when
    needed."""

    algorithms = (KNNBasic, KNNWithMeans, KNNBaseline)
    for klass in algorithms:
        algo = klass(sim_options={'user_based': True})
        rmses_user_based = cross_validate(algo, u1_ml100k, ['rmse'],
                                          pkf)['test_rmse']
        algo = klass(sim_options={'user_based': False})
        rmses_item_based = cross_validate(algo, u1_ml100k, ['rmse'],
                                          pkf)['test_rmse']
        assert rmses_user_based != rmses_item_based
Esempio n. 3
0
def test_SVDpp_parameters(u1_ml100k, pkf):
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVDpp(n_factors=1, n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    # n_factors
    algo = SVDpp(n_factors=2, n_epochs=1, random_state=1)
    rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_factors

    # The rest is OK but just takes too long for now...
    """
Esempio n. 4
0
def test_sgd_n_epoch_field(u1_ml100k, pkf):
    """Ensure the n_epoch field is taken into account."""

    bsl_options = {'method': 'sgd',
                   'n_epochs': 1,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_1 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'sgd',
                   'n_epochs': 20,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_5 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
Esempio n. 5
0
def test_als_reg_i_field(u1_ml100k, pkf):
    """Ensure the reg_i field is taken into account."""

    bsl_options = {'method': 'als',
                   'reg_i': 0,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_0 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'als',
                   'reg_i': 10,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_10 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_als_regi_0 != rmse_als_regi_10
Esempio n. 6
0
def test_method_field(u1_ml100k, pkf):
    """Ensure the method field is taken into account."""

    bsl_options = {'method': 'als'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'sgd'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_als != rmse_sgd

    with pytest.raises(ValueError):
        bsl_options = {'method': 'wrong_name'}
        algo = BaselineOnly(bsl_options=bsl_options)
        cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
Esempio n. 7
0
def test_shrinkage_field(u1_ml100k, pkf):
    """Ensure the shrinkage field is taken into account."""

    sim_options = {'name': 'pearson_baseline',
                   'shrinkage': 0
                   }
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options)
    rmse_shrinkage_0 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson_baseline',
                   'shrinkage': 100
                   }
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_shrinkage_100 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_shrinkage_0 != rmse_shrinkage_100
Esempio n. 8
0
def test_sgd_reg_field(u1_ml100k, pkf):
    """Ensure the reg field is taken into account."""

    bsl_options = {'method': 'sgd',
                   'n_epochs': 1,
                   'reg': 0.02,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_002 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'sgd',
                   'n_epochs': 1,
                   'reg': 1,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_1 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_reg_002 != rmse_sgd_reg_1
Esempio n. 9
0
def test_sgd_learning_rate_field(u1_ml100k, pkf):
    """Ensure the learning_rate field is taken into account."""

    bsl_options = {'method': 'sgd',
                   'n_epochs': 1,
                   'learning_rate': .005,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_005 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    bsl_options = {'method': 'sgd',
                   'n_epochs': 1,
                   'learning_rate': .00005,
                   }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_00005 = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
Esempio n. 10
0
def test_CoClustering_parameters(u1_ml100k, pkf):
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = CoClustering(n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    # n_cltr_u
    algo = CoClustering(n_cltr_u=1, n_epochs=1, random_state=1)
    rmse_n_cltr_u = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_cltr_u

    # n_cltr_i
    algo = CoClustering(n_cltr_i=1, n_epochs=1, random_state=1)
    rmse_n_cltr_i = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_cltr_i

    # n_epochs
    algo = CoClustering(n_epochs=2, random_state=1)
    rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_epochs
Esempio n. 11
0
def test_name_field(u1_ml100k, pkf):
    """Ensure the name field is taken into account."""

    sim_options = {'name': 'cosine'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_cosine = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'msd'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_msd = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_pearson = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    sim_options = {'name': 'pearson_baseline'}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_pearson_bsl = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    for rmse_a, rmse_b in combinations((rmse_cosine, rmse_msd, rmse_pearson,
                                        rmse_pearson_bsl), 2):
        assert (rmse_a != rmse_b)

    with pytest.raises(NameError):
        sim_options = {'name': 'wrong_name'}
        algo = KNNBasic(sim_options=sim_options)
        cross_validate(algo, u1_ml100k, ['rmse'], pkf)
Esempio n. 12
0
def test_gridsearchcv_best_estimator(u1_ml100k):
    """Ensure that the best estimator is the one giving the best score (by
    re-running it)"""

    param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    gs = GridSearchCV(SVD, param_grid, measures=['mae'],
                      cv=PredefinedKFold(), joblib_verbose=100)
    gs.fit(u1_ml100k)
    best_estimator = gs.best_estimator['mae']

    # recompute MAE of best_estimator
    mae = cross_validate(best_estimator, u1_ml100k, measures=['MAE'],
                         cv=PredefinedKFold())['test_mae']

    assert mae == gs.best_score['mae']
Esempio n. 13
0
def test_randomizedsearchcv_best_estimator(u1_ml100k):
    """Ensure that the best estimator is the one that gives the best score (by
    re-running it)"""

    param_distributions = {'n_epochs': [5], 'lr_all': uniform(0.002, 0.003),
                           'reg_all': uniform(0.04, 0.02), 'n_factors': [1],
                           'init_std_dev': [0]}
    rs = RandomizedSearchCV(SVD, param_distributions, measures=['mae'],
                            cv=PredefinedKFold(), joblib_verbose=100)
    rs.fit(u1_ml100k)
    best_estimator = rs.best_estimator['mae']

    # recompute MAE of best_estimator
    mae = cross_validate(best_estimator, u1_ml100k, measures=['MAE'],
                         cv=PredefinedKFold())['test_mae']

    assert mae == rs.best_score['mae']
Esempio n. 14
0
def main():
    class MyParser(argparse.ArgumentParser):
        '''A parser which prints the help message when an error occurs. Taken from
        http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu.'''  # noqa

        def error(self, message):
            sys.stderr.write('error: %s\n' % message)
            self.print_help()
            sys.exit(2)

    parser = MyParser(
        description='Evaluate the performance of a rating prediction ' +
        'algorithm ' +
        'on a given dataset using cross validation. You can use a built-in ' +
        'or a custom dataset, and you can choose to automatically split the ' +
        'dataset into folds, or manually specify train and test files. ' +
        'Please refer to the documentation page ' +
        '(http://amaze.readthedocs.io/) for more details.',
        epilog="""Example:\n
        amaze -algo SVD -params "{'n_epochs': 5, 'verbose': True}"
        -load-builtin ml-100k -n-folds 3""")

    algo_choices = {
        'NormalPredictor': NormalPredictor,
        'BaselineOnly': BaselineOnly,
        'KNNBasic': KNNBasic,
        'KNNBaseline': KNNBaseline,
        'KNNWithMeans': KNNWithMeans,
        'SVD': SVD,
        'SVDpp': SVDpp,
        'NMF': NMF,
        'SlopeOne': SlopeOne,
        'CoClustering': CoClustering,
    }

    parser.add_argument('-algo',
                        type=str,
                        choices=algo_choices,
                        help='The prediction algorithm to use. ' +
                        'Allowed values are ' +
                        ', '.join(algo_choices.keys()) + '.',
                        metavar='<prediction algorithm>')

    parser.add_argument('-params',
                        type=str,
                        metavar='<algorithm parameters>',
                        default='{}',
                        help='A kwargs dictionary that contains all the ' +
                        'algorithm parameters.' +
                        'Example: "{\'n_epochs\': 10}".')

    parser.add_argument('-load-builtin',
                        type=str,
                        dest='load_builtin',
                        metavar='<dataset name>',
                        default='ml-100k',
                        help='The name of the built-in dataset to use.' +
                        'Allowed values are ' +
                        ', '.join(dataset.BUILTIN_DATASETS.keys()) +
                        '. Default is ml-100k.')

    parser.add_argument(
        '-load-custom',
        type=str,
        dest='load_custom',
        metavar='<file path>',
        default=None,
        help='A file path to custom dataset to use. ' + 'Ignored if ' +
        '-loadbuiltin is set. The -reader parameter needs ' + 'to be set.')

    parser.add_argument('-folds-files',
                        type=str,
                        dest='folds_files',
                        metavar='<train1 test1 train2 test2... >',
                        default=None,
                        help='A list of custom train and test files. ' +
                        'Ignored if -load-builtin or -load-custom is set. '
                        'The -reader parameter needs to be set.')

    parser.add_argument('-reader',
                        type=str,
                        metavar='<reader>',
                        default=None,
                        help='A Reader to read the custom dataset. Example: ' +
                        '"Reader(line_format=\'user item rating timestamp\',' +
                        ' sep=\'\\t\')"')

    parser.add_argument('-n-folds',
                        type=int,
                        dest='n_folds',
                        metavar="<number of folds>",
                        default=5,
                        help='The number of folds for cross-validation. ' +
                        'Default is 5.')

    parser.add_argument('-seed',
                        type=int,
                        metavar='<random seed>',
                        default=None,
                        help='The seed to use for RNG. ' +
                        'Default is the current system time.')

    parser.add_argument('--with-dump',
                        dest='with_dump',
                        action='store_true',
                        help='Dump the algorithm ' +
                        'results in a file (one file per fold). ' +
                        'Default is False.')

    parser.add_argument('-dump-dir',
                        dest='dump_dir',
                        type=str,
                        metavar='<dir>',
                        default=None,
                        help='Where to dump the files. Ignored if ' +
                        'with-dump is not set. Default is ' +
                        os.path.join(get_dataset_dir(), 'dumps/'))

    parser.add_argument('--clean',
                        dest='clean',
                        action='store_true',
                        help='Remove the ' + get_dataset_dir() +
                        ' directory and exit.')

    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=__version__)

    args = parser.parse_args()

    if args.clean:
        folder = get_dataset_dir()
        shutil.rmtree(folder)
        print('Removed', folder)
        exit()

    # setup RNG
    rd.seed(args.seed)
    np.random.seed(args.seed)

    # setup algorithm
    params = eval(args.params)
    if args.algo is None:
        parser.error('No algorithm was specified.')
    algo = algo_choices[args.algo](**params)

    # setup dataset
    if args.load_custom is not None:  # load custom and split
        if args.reader is None:
            parser.error('-reader parameter is needed.')
        reader = eval(args.reader)
        data = Dataset.load_from_file(args.load_custom, reader=reader)
        cv = KFold(n_splits=args.n_folds, random_state=args.seed)

    elif args.folds_files is not None:  # load from files
        if args.reader is None:
            parser.error('-reader parameter is needed.')
        reader = eval(args.reader)
        folds_files = args.folds_files.split()
        folds_files = [(folds_files[i], folds_files[i + 1])
                       for i in range(0,
                                      len(folds_files) - 1, 2)]
        data = Dataset.load_from_folds(folds_files=folds_files, reader=reader)
        cv = PredefinedKFold()

    else:  # load builtin dataset and split
        data = Dataset.load_builtin(args.load_builtin)
        cv = KFold(n_splits=args.n_folds, random_state=args.seed)

    cross_validate(algo, data, cv=cv, verbose=True)
Esempio n. 15
0
                                   'http://grouplens.org/datasets/movielens/1m'),
        }


# set RNG
np.random.seed(0)
random.seed(0)

dataset = 'ml-1m'
data = Dataset.load_builtin(dataset)
kf = KFold(random_state=0)  # folds will be the same for all algorithms.

table = []
for klass in classes:
    start = time.time()
    out = cross_validate(klass(), data, ['rmse', 'mae'], kf)
    cv_time = str(datetime.timedelta(seconds=int(time.time() - start)))
    link = LINK[klass.__name__]
    mean_rmse = '{:.3f}'.format(np.mean(out['test_rmse']))
    mean_mae = '{:.3f}'.format(np.mean(out['test_mae']))

    new_line = [link, mean_rmse, mean_mae, cv_time]
    print(tabulate([new_line], tablefmt="pipe"))  # print current algo perf
    table.append(new_line)

header = [LINK[dataset],
          'RMSE',
          'MAE',
          'Time'
          ]
print(tabulate(table, header, tablefmt="pipe"))
Esempio n. 16
0
"""
This module descibes how to load a custom dataset from a single file.

As a custom dataset we will actually use the movielens-100k dataset, but act as
if it were not built-in.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import os

from amaze import BaselineOnly
from amaze import Dataset
from amaze import Reader
from amaze.model_selection import cross_validate

# path to dataset file
file_path = os.path.expanduser('~/.amaze_data/ml-100k/ml-100k/u.data')

# As we're loading a custom dataset, we need to define a reader. In the
# movielens-100k dataset, each line has the following format:
# 'user item rating timestamp', separated by '\t' characters.
reader = Reader(line_format='user item rating timestamp', sep='\t')

data = Dataset.load_from_file(file_path, reader=reader, rating_scale=(1, 5))

# We can now use this dataset as we please, e.g. calling cross_validate
cross_validate(BaselineOnly(), data, verbose=True)
Esempio n. 17
0
"""
This module descibes how to load a dataset from a pandas dataframe.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import pandas as pd

from amaze import NormalPredictor
from amaze import Dataset
from amaze.model_selection import cross_validate

# Creation of the dataframe. Column names are irrelevant.
ratings_dict = {
    'itemID': [1, 1, 1, 2, 2],
    'userID': [9, 32, 2, 45, 'user_foo'],
    'rating': [3, 2, 4, 3, 1]
}
df = pd.DataFrame(ratings_dict)

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']],
                            rating_scale=(1, 5))

# We can now use this dataset as we please, e.g. calling cross_validate
cross_validate(NormalPredictor(), data, cv=2)
Esempio n. 18
0
def test_SVD_parameters(u1_ml100k, pkf):
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVD(n_factors=1, n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    # n_factors
    algo = SVD(n_factors=2, n_epochs=1, random_state=1)
    rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_factors

    # n_epochs
    algo = SVD(n_factors=1, n_epochs=2, random_state=1)
    rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_epochs

    # biased
    algo = SVD(n_factors=1, n_epochs=1, biased=False, random_state=1)
    rmse_biased = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_biased

    # lr_all
    algo = SVD(n_factors=1, n_epochs=1, lr_all=5, random_state=1)
    rmse_lr_all = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_all

    # reg_all
    algo = SVD(n_factors=1, n_epochs=1, reg_all=5, random_state=1)
    rmse_reg_all = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_all

    # lr_bu
    algo = SVD(n_factors=1, n_epochs=1, lr_bu=5, random_state=1)
    rmse_lr_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bu

    # lr_bi
    algo = SVD(n_factors=1, n_epochs=1, lr_bi=5, random_state=1)
    rmse_lr_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bi

    # lr_pu
    algo = SVD(n_factors=1, n_epochs=1, lr_pu=5, random_state=1)
    rmse_lr_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_pu

    # lr_qi
    algo = SVD(n_factors=1, n_epochs=1, lr_qi=5, random_state=1)
    rmse_lr_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_qi

    # reg_bu
    algo = SVD(n_factors=1, n_epochs=1, reg_bu=5, random_state=1)
    rmse_reg_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bu

    # reg_bi
    algo = SVD(n_factors=1, n_epochs=1, reg_bi=5, random_state=1)
    rmse_reg_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bi

    # reg_pu
    algo = SVD(n_factors=1, n_epochs=1, reg_pu=5, random_state=1)
    rmse_reg_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_pu

    # reg_qi
    algo = SVD(n_factors=1, n_epochs=1, reg_qi=5, random_state=1)
    rmse_reg_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_qi
Esempio n. 19
0
computation.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from amaze import KNNBasic
from amaze import Dataset
from amaze.model_selection import cross_validate

# Load the movielens-100k dataset.
data = Dataset.load_builtin('ml-100k')

# Example using cosine similarity
sim_options = {
    'name': 'cosine',
    'user_based': False  # compute  similarities between items
}
algo = KNNBasic(sim_options=sim_options)

cross_validate(algo, data, verbose=True)

# Example using pearson_baseline similarity
sim_options = {
    'name': 'pearson_baseline',
    'shrinkage': 0  # no shrinkage
}
algo = KNNBasic(sim_options=sim_options)

cross_validate(algo, data, verbose=True)
Esempio n. 20
0
"""
This module describes the most basic usage of Amaze: you define a prediction
algorithm, (down)load a dataset and run a cross-validation procedure.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from amaze import SVD
from amaze import Dataset
from amaze.model_selection import cross_validate


# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')

# We'll use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
Esempio n. 21
0
def test_NMF_parameters(u1_ml100k, pkf):
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = NMF(n_factors=1, n_epochs=1, random_state=1)
    rmse_default = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']

    # n_factors
    algo = NMF(n_factors=2, n_epochs=1, random_state=1)
    rmse_factors = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_factors

    # n_epochs
    algo = NMF(n_factors=1, n_epochs=2, random_state=1)
    rmse_n_epochs = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_n_epochs

    # biased
    algo = NMF(n_factors=1, n_epochs=1, biased=True, random_state=1)
    rmse_biased = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_biased

    # reg_pu
    algo = NMF(n_factors=1, n_epochs=1, reg_pu=1, random_state=1)
    rmse_reg_pu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_pu

    # reg_qi
    algo = NMF(n_factors=1, n_epochs=1, reg_qi=1, random_state=1)
    rmse_reg_qi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_qi

    # reg_bu
    algo = NMF(n_factors=1, n_epochs=1, reg_bu=1, biased=True, random_state=1)
    rmse_reg_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bu

    # reg_bi
    algo = NMF(n_factors=1, n_epochs=1, reg_bi=1, biased=True, random_state=1)
    rmse_reg_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_reg_bi

    # lr_bu
    algo = NMF(n_factors=1, n_epochs=1, lr_bu=1, biased=True, random_state=1)
    rmse_lr_bu = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bu

    # lr_bi
    algo = NMF(n_factors=1, n_epochs=1, lr_bi=1, biased=True, random_state=1)
    rmse_lr_bi = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_lr_bi

    # init_low
    algo = NMF(n_factors=1, n_epochs=1, init_low=.5, random_state=1)
    rmse_init_low = cross_validate(algo, u1_ml100k, ['rmse'], pkf)['test_rmse']
    assert rmse_default != rmse_init_low

    # init_low
    with pytest.raises(ValueError):
        algo = NMF(n_factors=1, n_epochs=1, init_low=-1, random_state=1)

    # init_high
    algo = NMF(n_factors=1, n_epochs=1, init_high=.5, random_state=1)
    rmse_init_high = cross_validate(algo, u1_ml100k, ['rmse'],
                                    pkf)['test_rmse']
    assert rmse_default != rmse_init_high