예제 #1
0
def test_test_loader(dataset_ncf_files_sorted):
    train_path, _, leave_one_out_test_path = dataset_ncf_files_sorted
    leave_one_out_test = pd.read_csv(leave_one_out_test_path)
    test_users = leave_one_out_test[DEFAULT_USER_COL].unique()

    n_neg = 1
    n_neg_test = 1
    dataset = Dataset(train_path, test_file=leave_one_out_test_path, n_neg=n_neg, n_neg_test=n_neg_test)
    assert set(dataset.test_full_datafile.users) == set(test_users)

    # test number of batches and data size is as expected after loading all test data
    expected_test_batches = leave_one_out_test.shape[0]
    assert max(dataset.test_full_datafile.batch_indices_range) + 1 == expected_test_batches
    batch_records = []
    for batch in dataset.test_loader(yield_id=True):
        assert type(batch[0][0]) == int
        assert type(batch[1][0]) == int
        assert type(batch[2][0]) == float
        batch_data = {
            DEFAULT_USER_COL: [dataset.id2user[user] for user in batch[0]],
            DEFAULT_ITEM_COL: [dataset.id2item[item] for item in batch[1]],
            DEFAULT_RATING_COL: batch[2]
        }
        batch_records.append(pd.DataFrame(batch_data))
    
    assert len(batch_records) == expected_test_batches
    test_loader_df = pd.concat(batch_records).reset_index(drop=True)
    assert test_loader_df.shape[0] == expected_test_batches * n_neg_test * 2
    assert set(test_loader_df[DEFAULT_USER_COL]) == set(test_users)
예제 #2
0
def test_test_loader(python_dataset_ncf):
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)

    # positive user-item dict, noting that the pool is train+test
    positive_pool = {}
    df = train.append(test)
    for u in df[DEFAULT_USER_COL].unique():
        positive_pool[u] = set(df[df[DEFAULT_USER_COL] == u][DEFAULT_ITEM_COL])

    for batch in data.test_loader():
        user, item, labels = batch
        # shape
        assert len(user) == N_NEG_TEST + 1
        assert len(item) == N_NEG_TEST + 1
        assert len(labels) == N_NEG_TEST + 1

        label_list = []

        for u, i, is_pos in zip(user, item, labels):
            if is_pos:
                assert i in positive_pool[u]
            else:
                assert i not in positive_pool[u]

            label_list.append(is_pos)

        # leave-one-out
        assert sum(label_list) == 1
        # right labels
        assert len(label_list) == (N_NEG_TEST + 1) * sum(label_list)