def test_test_loader(dataset_ncf_files_sorted): train_path, _, leave_one_out_test_path = dataset_ncf_files_sorted leave_one_out_test = pd.read_csv(leave_one_out_test_path) test_users = leave_one_out_test[DEFAULT_USER_COL].unique() n_neg = 1 n_neg_test = 1 dataset = Dataset(train_path, test_file=leave_one_out_test_path, n_neg=n_neg, n_neg_test=n_neg_test) assert set(dataset.test_full_datafile.users) == set(test_users) # test number of batches and data size is as expected after loading all test data expected_test_batches = leave_one_out_test.shape[0] assert max(dataset.test_full_datafile.batch_indices_range) + 1 == expected_test_batches batch_records = [] for batch in dataset.test_loader(yield_id=True): assert type(batch[0][0]) == int assert type(batch[1][0]) == int assert type(batch[2][0]) == float batch_data = { DEFAULT_USER_COL: [dataset.id2user[user] for user in batch[0]], DEFAULT_ITEM_COL: [dataset.id2item[item] for item in batch[1]], DEFAULT_RATING_COL: batch[2] } batch_records.append(pd.DataFrame(batch_data)) assert len(batch_records) == expected_test_batches test_loader_df = pd.concat(batch_records).reset_index(drop=True) assert test_loader_df.shape[0] == expected_test_batches * n_neg_test * 2 assert set(test_loader_df[DEFAULT_USER_COL]) == set(test_users)
def test_test_loader(python_dataset_ncf): train, test = python_dataset_ncf data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST) # positive user-item dict, noting that the pool is train+test positive_pool = {} df = train.append(test) for u in df[DEFAULT_USER_COL].unique(): positive_pool[u] = set(df[df[DEFAULT_USER_COL] == u][DEFAULT_ITEM_COL]) for batch in data.test_loader(): user, item, labels = batch # shape assert len(user) == N_NEG_TEST + 1 assert len(item) == N_NEG_TEST + 1 assert len(labels) == N_NEG_TEST + 1 label_list = [] for u, i, is_pos in zip(user, item, labels): if is_pos: assert i in positive_pool[u] else: assert i not in positive_pool[u] label_list.append(is_pos) # leave-one-out assert sum(label_list) == 1 # right labels assert len(label_list) == (N_NEG_TEST + 1) * sum(label_list)