def test_unsupported_shape(datasets_n): candidates = ( array('d', [.5]), tuple(array('I', [3]) for _ in range(datasets_n)), tuple(array('I', [2]) for _ in range(datasets_n))) with pytest.raises(NotImplementedError): greedy_solve(candidates)
def test_greedy_threeparty(): candidates = [(.9, ((1, 0), (2, 0))), (.8, ((0, 0), (1, 1))), (.8, ((0, 0), (2, 1))), (.8, ((1, 1), (2, 1))), (.7, ((0, 0), (1, 0))), (.7, ((0, 0), (2, 0)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 1), (2, 1)}, {(1, 0), (2, 0)}]) candidates = [(.8, ((0, 0), (1, 0))), (.8, ((0, 1), (2, 1))), (.8, ((1, 1), (2, 1))), (.7, ((0, 0), (2, 0))), (.7, ((0, 1), (1, 1)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0)}, {(0, 1), (1, 1), (2, 1)}]) candidates = [(1., ((0, 0), (1, 0))), (1., ((0, 0), (2, 0))), (1., ((2, 0), (2, 1)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0)}, {(2, 0), (2, 1)}]) candidates = [(1., ((0, 0), (1, 0))), (1., ((2, 0), (3, 0))), (1., ((2, 0), (4, 0))), (1., ((3, 0), (4, 0))), (1., ((0, 0), (2, 0))), (1., ((0, 0), (3, 0))), (1., ((0, 0), (4, 0))), (1., ((1, 0), (2, 0))), (1., ((1, 0), (3, 0))), (1., ((1, 0), (4, 0)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)}])
def test_inconsistent_dataset_number(): candidates = ( array('d', [.5]), (array('I', [3]), array('I', [4])), (array('I', [2]), array('I', [6]), array('I', [7]))) with pytest.raises(ValueError): greedy_solve(candidates)
def test_greedy_twoparty(): candidates = [(.8, ((0, 0), (1, 0)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0)}]) candidates = [(.8, ((0, 0), (1, 0))), (.7, ((0, 1), (1, 0)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0)}]) candidates = [] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, []) candidates = [(.8, ((0, 0), (1, 0))), (.7, ((0, 0), (1, 1))), (.7, ((0, 1), (1, 0))), (.6, ((0, 1), (1, 1)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0, 0), (1, 0)}, {(0, 1), (1, 1)}])
def test_greedy_fourparty(): candidates = [(.9, ((0, 0), (1, 0))), (.9, ((2, 0), (3, 0))), (.7, ((0, 0), (2, 0))), (.7, ((1, 0), (3, 0))), (.7, ((0, 0), (3, 0))), (.7, ((1, 0), (2, 0)))] result = greedy_solve(_zip_candidates(candidates)) _compare_matching(result, [{(0,0), (1,0), (2,0), (3,0)}])
def test_probabilistic_nonprobabilistic_match_ndedup(candidate_pairs): candidates = _zip_candidates(candidate_pairs) solution_probabilistic = probabilistic_greedy_solve( candidates, merge_threshold=1, deduplicated=False) solution_nonprobabilistic = greedy_solve(candidates) # We don't care about the order solution_probabilistic = frozenset(map(frozenset, solution_probabilistic)) solution_nonprobabilistic = frozenset(map(frozenset, solution_nonprobabilistic)) assert solution_probabilistic == solution_nonprobabilistic
def test_greedy_2p(candidate_pairs): candidates = _zip_candidates(candidate_pairs) solution = greedy_solve(candidates) assert all(len(group) <= 2 for group in solution) similarity_map = dict(map(reversed, candidate_pairs)) matches = {records: similarity_map[records] for records in map(tuple, map(sorted, solution))} # Every record that could have a match does have a match matched = set(itertools.chain.from_iterable(solution)) assert all(i in matched or j in matched for _, (i, j) in candidate_pairs) # Every pair is taken unless either of the candidates have a better match match_similarities = {i: sim for recs, sim in matches.items() for i in recs} for sim, (i, j) in candidate_pairs: assert ((i, j) in matches or match_similarities.get(i, float('-inf')) >= sim or match_similarities.get(j, float('-inf')) >= sim)
def test_greedy_np(candidate_pairs): candidates = _zip_candidates(candidate_pairs) all_candidate_pairs = {x for _, x in candidate_pairs} all_records = set(itertools.chain.from_iterable(all_candidate_pairs)) solution = list(greedy_solve(candidates)) matched = Counter(itertools.chain.from_iterable(solution)) # Every record is in at most one group assert all(matched[i] <= 1 and matched[j] <= 1 for _, (i, j) in candidate_pairs) # Include singleton groups all_groups = list(solution) all_groups.extend([x] for x in all_records - matched.keys()) # All groups that can be merged have been merged. for g1, g2 in itertools.combinations(all_groups, 2): assert any(tuple(sorted((r1, r2))) not in all_candidate_pairs for r1 in g1 for r2 in g2)
def test_inconsistent_entry_number(): candidates = (array('d', [.5, .3]), (array('I', [3]), array('I', [4])), (array('I', [2]), array('I', [6]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3, 3]), array('I', [4])), (array('I', [2]), array('I', [6]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3, 3]), array('I', [4, 6])), (array('I', [2]), array('I', [6]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3]), array('I', [4, 6])), (array('I', [2]), array('I', [6]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])), (array('I', [2]), array('I', [6, 3]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])), (array('I', [2, 1]), array('I', [6, 3]))) with pytest.raises(ValueError): greedy_solve(candidates) candidates = (array('d', [.5]), (array('I', [3]), array('I', [4])), (array('I', [2, 1]), array('I', [6]))) with pytest.raises(ValueError): greedy_solve(candidates)