예제 #1
0
def test_testset():
    """Test TestSet"""
    data_file = './tests/data.txt'
    u_col = 0
    i_col = 1
    r_col = 2
    sep = '\t'

    triplet_data = Reader.read_uir_triplets(data_file,
                                            u_col,
                                            i_col,
                                            r_col,
                                            sep,
                                            skip_lines=0)

    test_set = TestSet.from_uir_triplets(triplet_data,
                                         pre_uid_map={},
                                         pre_iid_map={},
                                         pre_ui_set=set())

    assert test_set.get_uid('768') == 1
    assert test_set.get_iid('195') == 7

    assert all([a == b for a, b in zip(test_set.get_users(), range(10))])

    assert all([a == b for a, b in zip(test_set.get_ratings(2), [(2, 4)])])

    test_set = TestSet.from_uir_triplets(triplet_data,
                                         pre_uid_map={},
                                         pre_iid_map={},
                                         pre_ui_set=set([('76', '93')]),
                                         verbose=True)
    assert len(test_set.get_users()) == 9
예제 #2
0
def test_txt_to_triplets():
    """Test txt_to_triplets function"""

    data_file = './tests/data.txt'
    u_col = 0
    i_col = 1
    r_col = 2
    sep = '\t'

    triplet_data = Reader.read_uir_triplets(data_file, u_col, i_col, r_col, sep, skip_lines=0)

    assert len(triplet_data) == 10
    assert triplet_data[4][2] == 3
    assert triplet_data[6][1] == '478'
    assert triplet_data[8][0] == '543'

    try:
        Reader.read_uir_triplets(data_file, 10)
    except IndexError:
        assert True
예제 #3
0
def test_from_provided():
    data_file = './tests/data.txt'
    data = Reader.read_uir_triplets(data_file)

    try:
        BaseMethod.from_provided(train_data=None, test_data=None)
    except ValueError:
        assert True

    try:
        BaseMethod.from_provided(train_data=data, test_data=None)
    except ValueError:
        assert True

    bm = BaseMethod.from_provided(train_data=data, test_data=data)

    assert bm.total_users == 10
    assert bm.total_items == 10
예제 #4
0
def test_matrix_trainset_uir_iter():
    data_file = './tests/data.txt'
    triplet_data = Reader.read_uir_triplets(data_file)

    train_set = MatrixTrainSet.from_uir_triplets(triplet_data,
                                                 pre_uid_map={},
                                                 pre_iid_map={},
                                                 pre_ui_set=set(),
                                                 verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    items = [batch_items for _, batch_items, _ in train_set.uir_iter()]
    assert all([a == b for a, b in zip(items, range(10))])

    ratings = [batch_ratings for _, _, batch_ratings in train_set.uir_iter()]
    assert all(
        [a == b for a, b in zip(ratings, [4, 4, 4, 4, 3, 4, 4, 5, 3, 4])])
예제 #5
0
def test_matrix_trainset_uij_iter():
    data_file = './tests/data.txt'
    triplet_data = Reader.read_uir_triplets(data_file)

    train_set = MatrixTrainSet.from_uir_triplets(triplet_data,
                                                 pre_uid_map={},
                                                 pre_iid_map={},
                                                 pre_ui_set=set(),
                                                 verbose=True)

    users = [batch_users for batch_users, _, _ in train_set.uij_iter()]
    assert all([a == b for a, b in zip(users, range(10))])

    pos_items = [
        batch_pos_items for _, batch_pos_items, _ in train_set.uij_iter()
    ]
    assert all([a == b for a, b in zip(pos_items, range(10))])

    neg_items = [
        batch_neg_items for _, _, batch_neg_items in train_set.uij_iter()
    ]
    assert all([a != b for a, b in zip(neg_items, range(10))])
예제 #6
0
from cornac.data import Reader
from cornac.eval_methods import BaseMethod
from cornac.utils.download_utils import DownloadItem

# Download MovieLens 100K provided training and test splits
train_path = DownloadItem(
    url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.base',
    relative_path='u1.base',
    sub_dir='datasets/ml_100k').download_if_needed(True)
test_path = DownloadItem(
    url='http://files.grouplens.org/datasets/movielens/ml-100k/u1.test',
    relative_path='u1.test',
    sub_dir='datasets/ml_100k').download_if_needed(True)

# Load data using Reader
train_data = Reader.read_uir_triplets(train_path)
test_data = Reader.read_uir_triplets(test_path)

# Construct base evaluation method with given data
eval_method = BaseMethod.from_provided(train_data=train_data,
                                       test_data=test_data,
                                       exclude_unknowns=False,
                                       verbose=True)

# Model
mf = cn.models.MF(k=10,
                  max_iter=25,
                  learning_rate=0.01,
                  lambda_reg=0.02,
                  use_bias=True,
                  early_stop=True,
예제 #7
0
def test_matrix_trainset():
    """Test MatrixTrainSet"""

    data_file = './tests/data.txt'
    u_col = 0
    i_col = 1
    r_col = 2
    sep = '\t'

    triplet_data = Reader.read_uir_triplets(data_file,
                                            u_col,
                                            i_col,
                                            r_col,
                                            sep,
                                            skip_lines=0)

    train_set = MatrixTrainSet.from_uir_triplets(triplet_data,
                                                 pre_uid_map={},
                                                 pre_iid_map={},
                                                 pre_ui_set=set(),
                                                 verbose=True)

    assert train_set.matrix.shape == (10, 10)
    assert train_set.min_rating == 3
    assert train_set.max_rating == 5

    assert int(train_set.global_mean) == int((3 * 2 + 4 * 7 + 5) / 10)

    assert all([
        a == b for a, b in zip(train_set.item_ppl_rank,
                               [7, 9, 6, 5, 3, 2, 1, 0, 8, 4])
    ])

    assert train_set.num_users == 10
    assert train_set.num_items == 10

    assert train_set.is_unk_user(7) == False
    assert train_set.is_unk_user(13) == True

    assert train_set.is_unk_item(3) == False
    assert train_set.is_unk_item(16) == True

    assert train_set.get_uid('768') == 1
    assert train_set.get_iid('195') == 7

    assert all([a == b for a, b in zip(train_set.get_uid_list(), range(10))])
    assert all([
        a == b for a, b in zip(train_set.get_raw_uid_list(), [
            '76', '768', '642', '930', '329', '633', '716', '871', '543', '754'
        ])
    ])

    assert all([a == b for a, b in zip(train_set.get_iid_list(), range(10))])
    assert all([
        a == b for a, b in zip(train_set.get_raw_iid_list(), [
            '93', '257', '795', '709', '705', '226', '478', '195', '737', '282'
        ])
    ])

    train_set = MatrixTrainSet.from_uir_triplets(triplet_data,
                                                 pre_uid_map={},
                                                 pre_iid_map={},
                                                 pre_ui_set=set([('76', '93')
                                                                 ]),
                                                 verbose=True)
    assert train_set.num_users == 9
    assert train_set.num_items == 9