예제 #1
0
def test_trainset_testset():
    """Test the construct_trainset and construct_testset methods."""

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folds_files = [(current_dir + '/custom_train',
                    current_dir + '/custom_test')]

    data = Dataset.load_from_folds(folds_files=folds_files, reader=reader)

    for trainset, testset in data.folds():
        pass  # just need trainset and testset to be set

    # test rm:
    rm = trainset.rm
    assert rm[0, 0] == 4
    assert rm[1, 0] == 4
    assert rm[3, 1] == 5
    assert rm[40, 20000] == 0  # not in the trainset

    # test ur
    ur = trainset.ur
    assert ur[0] == [(0, 4)]
    assert ur[1] == [(0, 4), (1, 2)]
    assert ur[40] == []  # not in the trainset

    # test ir
    ir = trainset.ir
    assert ir[0] == [(0, 4), (1, 4), (2, 1)]
    assert ir[1] == [(1, 2), (2, 1), (3, 5)]
    assert ir[20000] == []  # not in the trainset

    # test n_users, n_items, r_min, r_max
    assert trainset.n_users == 4
    assert trainset.n_items == 2
    assert trainset.r_min == 1
    assert trainset.r_max == 5

    # test raw2inner: ensure inner ids are given in proper order
    raw2inner_id_users = trainset._raw2inner_id_users
    for i in range(4):
        assert raw2inner_id_users['user' + str(i)] == i

    raw2inner_id_items = trainset._raw2inner_id_items
    for i in range(2):
        assert raw2inner_id_items['item' + str(i)] == i
예제 #2
0
if it were not built-in.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from recsys import BaselineOnly
from recsys import Dataset
from recsys import evaluate
from recsys import Reader

# path to dataset folder
files_dir = '/home/nico/.recsys_data/ml-100k/ml-100k/'  # change this

# This time, we'll use the built-in reader.
reader = Reader('ml-100k')

# folds_files is a list of tuples containing file paths:
# [(u1.base, u1.test), (u2.base, u2.test), ... (u5.base, u5.test)]
train_file = files_dir + 'u%d.base'
test_file = files_dir + 'u%d.test'
folds_files = [(train_file % i, test_file % i) for i in (1, 2, 3, 4, 5)]

data = Dataset.load_from_folds(folds_files, reader=reader)

# We'll use an algorithm that predicts baseline estimates.
algo = BaselineOnly()

# Evaluate performances of our algorithm on the dataset.
evaluate(algo, data)
예제 #3
0
def test_wrong_file_name():
    """Ensure file names are checked when creating a (custom) Dataset."""
    wrong_files = [('does_not_exist', 'does_not_either')]

    with pytest.raises(ValueError):
        Dataset.load_from_folds(folds_files=wrong_files, reader=reader)