def test_soundex_couples(self): blocking = SoundexBlocking(ref_attr_index=1, target_attr_index=1, language='english') blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET) pairs = list(blocking.iter_id_pairs()) self.assertEqual(len(pairs), 8) for pair in SOUNDEX_PAIRS: self.assertIn(pair, pairs)
def test_soundex_blocks(self): blocking = SoundexBlocking(ref_attr_index=1, target_attr_index=1, language='english') blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET) blocks = list(blocking.iter_id_blocks()) self.assertEqual(len(blocks), 3) self.assertIn((['a1', 'a7'], ['b3', 'b6']), blocks) self.assertIn((['a2', 'a5'], ['b4']), blocks) self.assertIn((['a3'], ['b1', 'b2']), blocks)