def test_permutation(requests, the_truth): project_data, (r_a, r_b) = create_project_upload_data( requests, (the_truth['clks_a'], the_truth['clks_b']), result_type='permutations') run = post_run(requests, project_data, threshold=the_truth['threshold']) mask_result = get_run_result(requests, project_data, run, timeout=240) perm_a_result = get_run_result(requests, project_data, run, result_token=r_a['receipt_token'], wait=False) perm_b_result = get_run_result(requests, project_data, run, result_token=r_b['receipt_token'], wait=False) # compare permutations and mask against mapping of the truth permutation_a = inverse_of_permutation(perm_a_result['permutation']) permutation_b = inverse_of_permutation(perm_b_result['permutation']) groups = the_truth['groups'] # Use a mapping output to simplify the checking. mapping = dict(anonlink.solving.pairs_from_groups(groups)) # NB: Anonlink is more strict on enforcing the k parameter, so there # is a small chance the below won't hold. This should only be the # case for more noisy problems. for a, b, m in zip(permutation_a, permutation_b, mask_result['mask']): if m == 1: assert a in mapping, f"Unexpected link was included - run {run}" assert mapping[ a] == b, f"Expected link from {a} was incorrect - run {run}" else: assert a not in mapping, f"Expected link was masked out - run {run}"
def test_groups_correctness(requests): # We assume that anonlink computes the right results. with open(DATA_PATH, 'rb') as f: # Here's some filters I prepared earlier. filters = pickle.load(f) candidate_pairs = anonlink.candidate_generation.find_candidate_pairs( filters, anonlink.similarities.dice_coefficient_accelerated, THRESHOLD) true_groups = anonlink.solving.greedy_solve(candidate_pairs) filter_size = len(filters[0][0]) assert all( len(filter_) == filter_size for dataset in filters for filter_ in dataset) packed_filters = [ b''.join(binary_pack_for_upload(f, filter_size)) for f in filters ] project_data, _ = create_project_upload_data(requests, packed_filters, result_type='groups', binary=True, hash_size=DATA_HASH_SIZE) try: run = post_run(requests, project_data, threshold=THRESHOLD) result_groups = get_run_result(requests, project_data, run)['groups'] finally: delete_project(requests, project_data) # Compare ES result with anonlink. result_group_set = {frozenset(map(tuple, g)) for g in result_groups} true_group_set = set(map(frozenset, true_groups)) assert result_group_set == true_group_set
def test_project_json_data_upload_with_mismatched_encoded_size( requests, result_type_number_parties): result_type, number_parties = result_type_number_parties data = [generate_json_serialized_clks(500, 64 if i == 0 else 256) for i in range(number_parties)] new_project_data, _ = create_project_upload_data( requests, data, result_type=result_type) with pytest.raises(AssertionError): run_id = post_run(requests, new_project_data, 0.9) get_run_result(requests, new_project_data, run_id, wait=True)
def test_groups(requests, the_truth): project_data, _ = create_project_upload_data( requests, (the_truth['clks_a'], the_truth['clks_b']), result_type='groups') run = post_run(requests, project_data, threshold=the_truth['threshold']) result = get_run_result(requests, project_data, run, timeout=240) # compare mapping with the truth result_groups = result['groups'] true_groups = the_truth['groups'] result_groups = frozenset( frozenset(map(tuple, group)) for group in result_groups) true_groups = frozenset(map(frozenset, true_groups)) assert result_groups == true_groups
def test_similarity_scores(requests, the_truth): project_data, _ = create_project_upload_data( requests, (the_truth['clks_a'], the_truth['clks_b']), result_type='similarity_scores') run = post_run(requests, project_data, threshold=the_truth['threshold']) result = get_run_result(requests, project_data, run, timeout=60) true_scores = the_truth['similarity_scores'] result_scores = {tuple(index for _, index in sorted([a, b])): score for a, b, score in result['similarity_scores']} # Anonlink is more strict on enforcing the k parameter. Hence the # subset. assert true_scores.keys() <= result_scores.keys() for pair in true_scores: assert true_scores[pair] == result_scores[pair] delete_project(requests, project_data)