예제 #1
0
def test_user_based_field():
    """Ensure that the user_based field is taken into account (only) when
    needed."""

    algorithms = (KNNBasic, KNNWithMeans, KNNBaseline)
    for klass in algorithms:
        algo = klass(sim_options={'user_based': True})
        rmses_user_based = evaluate(algo, data, measures=['rmse'])['rmse']
        algo = klass(sim_options={'user_based': False})
        rmses_item_based = evaluate(algo, data, measures=['rmse'])['rmse']
        assert rmses_user_based != rmses_item_based
예제 #2
0
def test_SVDpp_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVDpp(n_factors=1, n_epochs=1)
    rmse_default = evaluate(algo, data, measures=['rmse'])['rmse']

    # n_factors
    algo = SVDpp(n_factors=2, n_epochs=1)
    rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_factors

    # The rest is OK but just takes too long for now...
    """
예제 #3
0
def test_shrinkage_field():
    """Ensure the shrinkage field is taken into account."""

    sim_options = {'name': 'pearson_baseline', 'shrinkage': 0}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options)
    rmse_shrinkage_0 = evaluate(algo, data, measures=['rmse'])['rmse']

    sim_options = {'name': 'pearson_baseline', 'shrinkage': 100}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_shrinkage_100 = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_shrinkage_0 != rmse_shrinkage_100
예제 #4
0
def test_performances():
    """Test the returned dict. Also do dumping."""

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folds_files = [(current_dir + '/custom_train',
                    current_dir + '/custom_test')]

    reader = Reader(line_format='user item rating',
                    sep=' ',
                    skip_lines=3,
                    rating_scale=(1, 5))
    data = Dataset.load_from_folds(folds_files=folds_files, reader=reader)

    algo = NormalPredictor()
    tmp_dir = tempfile.mkdtemp()  # create tmp dir
    performances = evaluate(algo,
                            data,
                            measures=['RmSe', 'Mae'],
                            with_dump=True,
                            dump_dir=tmp_dir,
                            verbose=2)
    shutil.rmtree(tmp_dir)  # remove tmp dir

    print(performances)
    assert performances['RMSE'] is performances['rmse']
    assert performances['MaE'] is performances['mae']
예제 #5
0
def test_method_field():
    """Ensure the method field is taken into account."""

    bsl_options = {'method': 'als'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als = evaluate(algo, data, measures=['rmse'])['rmse']

    bsl_options = {'method': 'sgd'}
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_als != rmse_sgd

    with pytest.raises(ValueError):
        bsl_options = {'method': 'wrong_name'}
        algo = BaselineOnly(bsl_options=bsl_options)
        evaluate(algo, data)
예제 #6
0
def test_sgd_n_epoch_field():
    """Ensure the n_epoch field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_1 = evaluate(algo, data, measures=['rmse'])['rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 20,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_n_epoch_5 = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_sgd_n_epoch_1 != rmse_sgd_n_epoch_5
예제 #7
0
def test_als_reg_i_field():
    """Ensure the reg_i field is taken into account."""

    bsl_options = {
        'method': 'als',
        'reg_i': 0,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_0 = evaluate(algo, data, measures=['rmse'])['rmse']

    bsl_options = {
        'method': 'als',
        'reg_i': 10,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_als_regi_10 = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_als_regi_0 != rmse_als_regi_10
예제 #8
0
def test_sgd_reg_field():
    """Ensure the reg field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'reg': 0.02,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_002 = evaluate(algo, data, measures=['rmse'])['rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'reg': 1,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_reg_1 = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_sgd_reg_002 != rmse_sgd_reg_1
예제 #9
0
def test_sgd_learning_rate_field():
    """Ensure the learning_rate field is taken into account."""

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'learning_rate': .005,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_005 = evaluate(algo, data, measures=['rmse'])['rmse']

    bsl_options = {
        'method': 'sgd',
        'n_epochs': 1,
        'learning_rate': .00005,
    }
    algo = BaselineOnly(bsl_options=bsl_options)
    rmse_sgd_lr_00005 = evaluate(algo, data, measures=['rmse'])['rmse']

    assert rmse_sgd_lr_005 != rmse_sgd_lr_00005
예제 #10
0
def test_name_field():
    """Ensure the name field is taken into account."""

    sim_options = {'name': 'cosine'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_cosine = evaluate(algo, data, measures=['rmse'])['rmse']

    sim_options = {'name': 'msd'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_msd = evaluate(algo, data, measures=['rmse'])['rmse']

    sim_options = {'name': 'pearson'}
    algo = KNNBasic(sim_options=sim_options)
    rmse_pearson = evaluate(algo, data, measures=['rmse'])['rmse']

    sim_options = {'name': 'pearson_baseline'}
    bsl_options = {'n_epochs': 1}
    algo = KNNBasic(sim_options=sim_options, bsl_options=bsl_options)
    rmse_pearson_bsl = evaluate(algo, data, measures=['rmse'])['rmse']

    for rmse_a, rmse_b in combinations(
        (rmse_cosine, rmse_msd, rmse_pearson, rmse_pearson_bsl), 2):
        assert (rmse_a != rmse_b)

    with pytest.raises(NameError):
        sim_options = {'name': 'wrong_name'}
        algo = KNNBasic(sim_options=sim_options)
        evaluate(algo, data)
예제 #11
0
def main():

    class MyParser(argparse.ArgumentParser):
        '''A parser which prints the help message when an error occurs. Taken from
        http://stackoverflow.com/questions/4042452/display-help-message-with-python-argparse-when-script-is-called-without-any-argu.'''  # noqa

        def error(self, message):
            sys.stderr.write('error: %s\n' % message)
            self.print_help()
            sys.exit(2)

    parser = MyParser(
        description='Evaluate the performance of a rating prediction ' +
        'algorithm ' +
        'on a given dataset using cross validation. You can use a built-in ' +
        'or a custom dataset, and you can choose to automatically split the ' +
        'dataset into folds, or manually specify train and test files. ' +
        'Please refer to the documentation page ' +
        '(http://surprise.readthedocs.io/) for more details.',
        epilog="""Example:\n
        surprise -algo SVD -params "{'n_epochs': 5, 'verbose': True}"
        -load-builtin ml-100k -n-folds 3""")

    algo_choices = {
        'NormalPredictor': NormalPredictor,
        'BaselineOnly': BaselineOnly,
        'KNNBasic': KNNBasic,
        'KNNBaseline': KNNBaseline,
        'KNNWithMeans': KNNWithMeans,
        'SVD': SVD,
        'SVDpp': SVDpp,
        'QSVD': QSVD,
        'QSVDp': QSVDp,
        'QSVDpp': QSVDpp,
        'NMF': NMF,
        'SlopeOne': SlopeOne,
        'WeightedSlopeOne': WeightedSlopeOne,
        'BiPolarSlopeOne': BiPolarSlopeOne,
        'CoClustering': CoClustering,
        'CoClusteringRegression': CoClusteringRegression,
        'SoftBoundBiPolarSlopeOne': SoftBoundBiPolarSlopeOne
    }

    parser.add_argument('-algo', type=str,
                        choices=algo_choices,
                        help='The prediction algorithm to use. ' +
                        'Allowed values are ' +
                        ', '.join(algo_choices.keys()) + '.',
                        metavar='<prediction algorithm>')

    parser.add_argument('-params', type=str,
                        metavar='<algorithm parameters>',
                        default='{}',
                        help='A kwargs dictionary that contains all the ' +
                        'algorithm parameters.' +
                        'Example: "{\'n_epochs\': 10}".'
                        )

    parser.add_argument('-load-builtin', type=str, dest='load_builtin',
                        metavar='<dataset name>',
                        default='ml-100k',
                        help='The name of the built-in dataset to use.' +
                        'Allowed values are ' +
                        ', '.join(dataset.BUILTIN_DATASETS.keys()) +
                        '. Default is ml-100k.'
                        )

    parser.add_argument('-load-custom', type=str, dest='load_custom',
                        metavar='<file path>',
                        default=None,
                        help='A file path to custom dataset to use. ' +
                        'Ignored if ' +
                        '-loadbuiltin is set. The -reader parameter needs ' +
                        'to be set.'
                        )

    parser.add_argument('-folds-files', type=str, dest='folds_files',
                        metavar='<train1 test1 train2 test2... >',
                        default=None,
                        help='A list of custom train and test files. ' +
                        'Ignored if -load-builtin or -load-custom is set. '
                        'The -reader parameter needs to be set.'
                        )

    parser.add_argument('-reader', type=str,
                        metavar='<reader>',
                        default=None,
                        help='A Reader to read the custom dataset. Example: ' +
                        '"Reader(line_format=\'user item rating timestamp\',' +
                        ' sep=\'\\t\')"'
                        )

    parser.add_argument('-n-folds', type=int, dest='n_folds',
                        metavar="<number of folds>",
                        default=5,
                        help='The number of folds for cross-validation. ' +
                        'Default is 5.'
                        )

    parser.add_argument('-seed', type=int,
                        metavar='<random seed>',
                        default=None,
                        help='The seed to use for RNG. ' +
                        'Default is the current system time.'
                        )

    parser.add_argument('--with-dump', dest='with_dump', action='store_true',
                        help='Dump the algorithm ' +
                        'results in a file (one file per fold)' +
                        'Default is False.'
                        )

    parser.add_argument('-dump-dir', dest='dump_dir', type=str,
                        metavar='<dir>',
                        default=None,
                        help='Where to dump the files. Ignored if ' +
                        'with-dump is not set. Default is ' +
                        '~/.surprise_data/dumps.'
                        )

    parser.add_argument('--clean', dest='clean', action='store_true',
                        help='Remove the ' + dataset.DATASETS_DIR +
                        ' directory and exit.'
                        )

    parser.add_argument('-v', '--version', action='version',
                        version=__version__)

    args = parser.parse_args()

    if args.clean:
        shutil.rmtree(dataset.DATASETS_DIR)
        print('Removed', dataset.DATASETS_DIR)
        exit()

    # setup RNG
    rd.seed(args.seed)
    np.random.seed(args.seed)

    # setup algorithm
    params = eval(args.params)
    if args.algo is None:
        parser.error('No algorithm was specified.')
    algo = algo_choices[args.algo](**params)

    # setup dataset
    if args.load_custom is not None:  # load custom and split
        if args.reader is None:
            parser.error('-reader parameter is needed.')
        reader = eval(args.reader)
        data = Dataset.load_from_file(args.load_custom, reader=reader)
        data.split(n_folds=args.n_folds)

    elif args.folds_files is not None:  # load from files
        if args.reader is None:
            parser.error('-reader parameter is needed.')
        reader = eval(args.reader)
        folds_files = args.folds_files.split()
        folds_files = [(folds_files[i], folds_files[i + 1])
                       for i in range(0, len(folds_files) - 1, 2)]
        data = Dataset.load_from_folds(folds_files=folds_files, reader=reader)

    else:  # load builtin dataset and split
        data = Dataset.load_builtin(args.load_builtin)
        data.split(n_folds=args.n_folds)

    evaluate(algo, data, with_dump=args.with_dump, dump_dir=args.dump_dir)
예제 #12
0
def test_SVD_parameters():
    """Ensure that all parameters are taken into account."""

    # The baseline against which to compare.
    algo = SVD(n_factors=1, n_epochs=1)
    rmse_default = evaluate(algo, data, measures=['rmse'])['rmse']

    # n_factors
    algo = SVD(n_factors=2, n_epochs=1)
    rmse_factors = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_factors

    # n_epochs
    algo = SVD(n_factors=1, n_epochs=2)
    rmse_n_epochs = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_n_epochs

    # lr_all
    algo = SVD(n_factors=1, n_epochs=1, lr_all=5)
    rmse_lr_all = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_all

    # reg_all
    algo = SVD(n_factors=1, n_epochs=1, reg_all=5)
    rmse_reg_all = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_all

    # lr_bu
    algo = SVD(n_factors=1, n_epochs=1, lr_bu=5)
    rmse_lr_bu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_bu

    # lr_bi
    algo = SVD(n_factors=1, n_epochs=1, lr_bi=5)
    rmse_lr_bi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_bi

    # lr_pu
    algo = SVD(n_factors=1, n_epochs=1, lr_pu=5)
    rmse_lr_pu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_pu

    # lr_qi
    algo = SVD(n_factors=1, n_epochs=1, lr_qi=5)
    rmse_lr_qi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_lr_qi

    # reg_bu
    algo = SVD(n_factors=1, n_epochs=1, reg_bu=5)
    rmse_reg_bu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_bu

    # reg_bi
    algo = SVD(n_factors=1, n_epochs=1, reg_bi=5)
    rmse_reg_bi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_bi

    # reg_pu
    algo = SVD(n_factors=1, n_epochs=1, reg_pu=5)
    rmse_reg_pu = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_pu

    # reg_qi
    algo = SVD(n_factors=1, n_epochs=1, reg_qi=5)
    rmse_reg_qi = evaluate(algo, data, measures=['rmse'])['rmse']
    assert rmse_default != rmse_reg_qi