def test_groups_correctness(requests):
    # We assume that anonlink computes the right results.

    with open(DATA_PATH, 'rb') as f:
        # Here's some filters I prepared earlier.
        filters = pickle.load(f)

    candidate_pairs = anonlink.candidate_generation.find_candidate_pairs(
        filters, anonlink.similarities.dice_coefficient_accelerated, THRESHOLD)
    true_groups = anonlink.solving.greedy_solve(candidate_pairs)

    filter_size = len(filters[0][0])
    assert all(
        len(filter_) == filter_size for dataset in filters
        for filter_ in dataset)
    packed_filters = [
        b''.join(binary_pack_for_upload(f, filter_size)) for f in filters
    ]
    project_data, _ = create_project_upload_data(requests,
                                                 packed_filters,
                                                 result_type='groups',
                                                 binary=True,
                                                 hash_size=DATA_HASH_SIZE)
    try:
        run = post_run(requests, project_data, threshold=THRESHOLD)
        result_groups = get_run_result(requests, project_data, run)['groups']
    finally:
        delete_project(requests, project_data)

    # Compare ES result with anonlink.
    result_group_set = {frozenset(map(tuple, g)) for g in result_groups}
    true_group_set = set(map(frozenset, true_groups))
    assert result_group_set == true_group_set
コード例 #2
0
def test_delete_project_after_creating_run_with_clks(
        requests, result_type_number_parties):
    project, run_id = _create_data_linkage_run(requests,
                                               result_type_number_parties)
    delete_project(requests, project)
    with pytest.raises(AssertionError):
        get_project_description(requests, project)
コード例 #3
0
def project(request, requests, result_type_number_parties):
    result_type, number_parties = result_type_number_parties
    project = create_project_no_data(
        requests,
        result_type=result_type,
        number_parties=number_parties)
    yield project
    # Release project resource
    delete_project(requests, project)
コード例 #4
0
def a_blocking_project(request, requests):
    # a 2 party project with blocking disabled
    project = create_project_no_data(
        requests,
        result_type="groups",
        number_parties=2,
        uses_blocking=True
    )
    yield project
    # Release project resource
    delete_project(requests, project)
コード例 #5
0
def test_similarity_scores(requests, the_truth):
    project_data, _ = create_project_upload_data(
        requests,
        (the_truth['clks_a'], the_truth['clks_b']),
        result_type='similarity_scores')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    result = get_run_result(requests, project_data, run, timeout=60)
    
    true_scores = the_truth['similarity_scores']
    result_scores = {tuple(index for _, index in sorted([a, b])): score
                     for a, b, score in result['similarity_scores']}

    # Anonlink is more strict on enforcing the k parameter. Hence the
    # subset.
    assert true_scores.keys() <= result_scores.keys()

    for pair in true_scores:
        assert true_scores[pair] == result_scores[pair]

    delete_project(requests, project_data)
コード例 #6
0
def test_delete_project_types(requests, project):
    delete_project(requests, project)
コード例 #7
0
def groups_project(request, requests):
    size, overlap, encoding_size, uses_blocking = request.param
    prj = create_project_response(requests, size, overlap, 'groups', encoding_size, uses_blocking)
    yield prj
    delete_project(requests, prj)
コード例 #8
0
def permutations_project(request, requests):
    size, overlap, encoding_size = request.param
    prj = create_project_response(requests, size, overlap, 'permutations', encoding_size)
    yield prj
    delete_project(requests, prj)
コード例 #9
0
def similarity_scores_project(request, requests):
    size, overlap, encoding_size = request.param
    prj = create_project_response(requests, size, overlap, 'similarity_scores', encoding_size)
    yield prj
    delete_project(requests, prj)