def test_sorted_neighborhood_blocks(self): blocking = SortedNeighborhoodBlocking(ref_attr_index=1, target_attr_index=1, window_width=1) blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET) blocks = list(blocking.iter_id_blocks()) true_blocks = [(['a6'], ['b7', 'b5']), (['a3'], ['b5', 'b1']), (['a2'], ['b2']), (['a5'], ['b4']), (['a7'], ['b4', 'b6']), (['a1'], ['b6', 'b3']), (['a4'], ['b3'])] self.assertEqual(len(blocks), len(true_blocks)) for block in true_blocks: self.assertIn(block, blocks)
def test_sorted_neighborhood_keyfunc(self): """ Test sort reversing values """ blocking = SortedNeighborhoodBlocking(ref_attr_index=1, target_attr_index=1, key_func=lambda x:x[::-1], window_width=1) blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET) blocks = list(blocking.iter_id_blocks()) true_blocks = [(['a1'], ['b3']), (['a2'], ['b6']), (['a5'], ['b4']), (['a3'], ['b7', 'b1']), (['a6'], ['b2', 'b5']), (['a4'], ['b5'])] self.assertEqual(len(blocks), len(true_blocks)) for block in true_blocks: self.assertIn(block, blocks)