Ejemplo n.º 1
0
def test_groups_correctness(requests):
    # We assume that anonlink computes the right results.

    with open(DATA_PATH, 'rb') as f:
        # Here's some filters I prepared earlier.
        filters = pickle.load(f)

    candidate_pairs = anonlink.candidate_generation.find_candidate_pairs(
        filters, anonlink.similarities.dice_coefficient_accelerated, THRESHOLD)
    true_groups = anonlink.solving.greedy_solve(candidate_pairs)

    filter_size = len(filters[0][0])
    assert all(
        len(filter_) == filter_size for dataset in filters
        for filter_ in dataset)
    packed_filters = [
        b''.join(binary_pack_filters(f, filter_size)) for f in filters
    ]
    project_data, _ = create_project_upload_data(requests,
                                                 packed_filters,
                                                 result_type='groups',
                                                 binary=True,
                                                 hash_size=DATA_HASH_SIZE)
    try:
        run = post_run(requests, project_data, threshold=THRESHOLD)
        result_groups = get_run_result(requests, project_data, run)['groups']
    finally:
        delete_project(requests, project_data)

    # Compare ES result with anonlink.
    result_group_set = {frozenset(map(tuple, g)) for g in result_groups}
    true_group_set = set(map(frozenset, true_groups))
    assert result_group_set == true_group_set
Ejemplo n.º 2
0
def project(request, requests, result_type_number_parties):
    result_type, number_parties = result_type_number_parties
    project = create_project_no_data(requests,
                                     result_type=result_type,
                                     number_parties=number_parties)
    yield project
    # Release project resource
    delete_project(requests, project)
Ejemplo n.º 3
0
def test_delete_project_after_creating_run_with_clks(
        requests, result_type_number_parties):
    result_type, number_parties = result_type_number_parties
    project, _ = create_project_upload_fake_data(requests,
                                                 [100] * number_parties,
                                                 overlap=0.5,
                                                 result_type=result_type)
    post_run(requests, project, 0.9)
    delete_project(requests, project)
Ejemplo n.º 4
0
def test_similarity_scores(requests, the_truth):
    project_data, _, _ = create_project_upload_data(requests, the_truth['clks_a'], the_truth['clks_b'],
                                                    result_type='similarity_scores')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    result = get_run_result(requests, project_data, run, timeout=60)
    # compare the result with the truth
    ss = result['similarity_scores']
    ts = the_truth['similarity_scores']
    assert len(ss) == len(ts)
    for es_score, true_score in zip(ss, ts):
        assert es_score[0] == true_score[0] and es_score[1] == true_score[2]
        assert es_score[2] == pytest.approx(true_score[1], 1e-10), 'similarity scores are different'

    delete_project(requests, project_data)
Ejemplo n.º 5
0
def test_similarity_scores(requests, the_truth):
    project_data, _ = create_project_upload_data(
        requests, (the_truth['clks_a'], the_truth['clks_b']),
        result_type='similarity_scores')
    run = post_run(requests, project_data, threshold=the_truth['threshold'])
    result = get_run_result(requests, project_data, run, timeout=60)

    true_scores = the_truth['similarity_scores']
    result_scores = {(a, b): sim for a, b, sim in result['similarity_scores']}

    # Anonlink is more strict on enforcing the k parameter. Hence the
    # subset.
    assert true_scores.keys() <= result_scores.keys()

    for pair in true_scores:
        assert true_scores[pair] == result_scores[pair]

    delete_project(requests, project_data)
Ejemplo n.º 6
0
def mapping_project(request, requests):
    size, overlap, encoding_size = request.param
    prj = create_project_response(requests, size, overlap, 'mapping',
                                  encoding_size)
    yield prj
    delete_project(requests, prj)
Ejemplo n.º 7
0
def permutations_project(request, requests):
    size, overlap, encoding_size = request.param
    prj = create_project_response(requests, size, overlap, 'permutations',
                                  encoding_size)
    yield prj
    delete_project(requests, prj)
Ejemplo n.º 8
0
def project(request, requests):
    result_type = request.param
    project = create_project_no_data(requests, result_type)
    yield project
    # Release project resource
    delete_project(requests, project)
Ejemplo n.º 9
0
def similarity_scores_project(request, requests):
    size, overlap, encoding_size = request.param
    prj = create_project_response(requests, size, overlap, 'similarity_scores',
                                  encoding_size)
    yield prj
    delete_project(requests, prj)
Ejemplo n.º 10
0
def test_delete_project_types(requests, project):
    delete_project(requests, project)
def test_delete_project_after_creating_run_with_clks(requests, result_type):
    project, dp1, dp2 = create_project_upload_fake_data(
        requests, [100, 100], overlap=0.5, result_type=result_type)
    post_run(requests, project, 0.9)
    delete_project(requests, project)