def test_permutation(requests, the_truth):
    project_data, (r_a, r_b) = create_project_upload_data(
        requests, (the_truth['clks_a'], the_truth['clks_b']),
        result_type='permutations')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    mask_result = get_run_result(requests, project_data, run, timeout=240)
    perm_a_result = get_run_result(requests,
                                   project_data,
                                   run,
                                   result_token=r_a['receipt_token'],
                                   wait=False)
    perm_b_result = get_run_result(requests,
                                   project_data,
                                   run,
                                   result_token=r_b['receipt_token'],
                                   wait=False)
    # compare permutations and mask against mapping of the truth
    permutation_a = inverse_of_permutation(perm_a_result['permutation'])
    permutation_b = inverse_of_permutation(perm_b_result['permutation'])
    groups = the_truth['groups']
    # Use a mapping output to simplify the checking.
    mapping = dict(anonlink.solving.pairs_from_groups(groups))

    # NB: Anonlink is more strict on enforcing the k parameter, so there
    # is a small chance the below won't hold. This should only be the
    # case for more noisy problems.
    for a, b, m in zip(permutation_a, permutation_b, mask_result['mask']):
        if m == 1:
            assert a in mapping, f"Unexpected link was included - run {run}"
            assert mapping[
                a] == b, f"Expected link from {a} was incorrect - run {run}"
        else:
            assert a not in mapping, f"Expected link was masked out - run {run}"
def test_groups_correctness(requests):
    # We assume that anonlink computes the right results.

    with open(DATA_PATH, 'rb') as f:
        # Here's some filters I prepared earlier.
        filters = pickle.load(f)

    candidate_pairs = anonlink.candidate_generation.find_candidate_pairs(
        filters, anonlink.similarities.dice_coefficient_accelerated, THRESHOLD)
    true_groups = anonlink.solving.greedy_solve(candidate_pairs)

    filter_size = len(filters[0][0])
    assert all(
        len(filter_) == filter_size for dataset in filters
        for filter_ in dataset)
    packed_filters = [
        b''.join(binary_pack_for_upload(f, filter_size)) for f in filters
    ]
    project_data, _ = create_project_upload_data(requests,
                                                 packed_filters,
                                                 result_type='groups',
                                                 binary=True,
                                                 hash_size=DATA_HASH_SIZE)
    try:
        run = post_run(requests, project_data, threshold=THRESHOLD)
        result_groups = get_run_result(requests, project_data, run)['groups']
    finally:
        delete_project(requests, project_data)

    # Compare ES result with anonlink.
    result_group_set = {frozenset(map(tuple, g)) for g in result_groups}
    true_group_set = set(map(frozenset, true_groups))
    assert result_group_set == true_group_set
Exemple #3
0
def test_project_json_data_upload_with_mismatched_encoded_size(
        requests, result_type_number_parties):
    result_type, number_parties = result_type_number_parties

    data = [generate_json_serialized_clks(500, 64 if i == 0 else 256)
            for i in range(number_parties)]

    new_project_data, _ = create_project_upload_data(
        requests, data, result_type=result_type)

    with pytest.raises(AssertionError):
        run_id = post_run(requests, new_project_data, 0.9)
        get_run_result(requests, new_project_data, run_id, wait=True)
def test_groups(requests, the_truth):
    project_data, _ = create_project_upload_data(
        requests, (the_truth['clks_a'], the_truth['clks_b']),
        result_type='groups')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    result = get_run_result(requests, project_data, run, timeout=240)
    # compare mapping with the truth
    result_groups = result['groups']
    true_groups = the_truth['groups']

    result_groups = frozenset(
        frozenset(map(tuple, group)) for group in result_groups)
    true_groups = frozenset(map(frozenset, true_groups))

    assert result_groups == true_groups
def test_similarity_scores(requests, the_truth):
    project_data, _ = create_project_upload_data(
        requests,
        (the_truth['clks_a'], the_truth['clks_b']),
        result_type='similarity_scores')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    result = get_run_result(requests, project_data, run, timeout=60)
    
    true_scores = the_truth['similarity_scores']
    result_scores = {tuple(index for _, index in sorted([a, b])): score
                     for a, b, score in result['similarity_scores']}

    # Anonlink is more strict on enforcing the k parameter. Hence the
    # subset.
    assert true_scores.keys() <= result_scores.keys()

    for pair in true_scores:
        assert true_scores[pair] == result_scores[pair]

    delete_project(requests, project_data)