def test_soundex_census(self): """Test abydos.phonetic.Soundex(Census variant method).""" pa_census = Soundex(var='Census') self.assertEqual(pa_census.encode('Vandeusen'), ('V532', 'D250')) self.assertEqual(pa_census.encode('van Deusen'), ('V532', 'D250')) self.assertEqual(pa_census.encode('McDonald'), 'M235') self.assertEqual(pa_census.encode('la Cruz'), ('L262', 'C620')) self.assertEqual(pa_census.encode('vanDamme'), ('V535', 'D500'))
def test_soundex_special(self): """Test abydos.phonetic.Soundex (special 1880-1910 variant method).""" pa_special = Soundex(var='special') self.assertEqual(pa_special.encode('Ashcroft'), 'A226') self.assertEqual(pa_special.encode('Asicroft'), 'A226') self.assertEqual(pa_special.encode('AsWcroft'), 'A226') self.assertEqual(pa_special.encode('Rupert'), 'R163') self.assertEqual(pa_special.encode('Rubin'), 'R150')
def test_caversham(self): """Test using Caversham test set (SoundEx, Metaphone, & Caverphone).""" soundex = Soundex() metaphone = Metaphone() with open(_corpus_file('variantNames.csv')) as cav_testset: next(cav_testset) for cav_line in cav_testset: ( name1, soundex1, metaphone1, caverphone1, name2, soundex2, metaphone2, caverphone2, soundex_same, metaphone_same, caverphone_same, ) = cav_line.strip().split(',') self.assertEqual(soundex.encode(name1), soundex1) self.assertEqual(soundex.encode(name2), soundex2) if soundex_same == '1': self.assertEqual( soundex.encode(name1), soundex.encode(name2) ) else: self.assertNotEqual( soundex.encode(name1), soundex.encode(name2) ) self.assertEqual(metaphone.encode(name1), metaphone1) self.assertEqual(metaphone.encode(name2), metaphone2) if metaphone_same == '1': self.assertEqual( metaphone.encode(name1), metaphone.encode(name2) ) else: self.assertNotEqual( metaphone.encode(name1), metaphone.encode(name2) ) self.assertEqual(self.pa.encode(name1), caverphone1) self.assertEqual(self.pa.encode(name2), caverphone2) if caverphone_same == '1': self.assertEqual( self.pa.encode(name1), self.pa.encode(name2) ) else: self.assertNotEqual( self.pa.encode(name1), self.pa.encode(name2) )
'sfinxbis_ml6': lambda _: ', '.join(sfinxbis_6.encode(_)), 'sound_d': SoundD().encode, 'sound_d_ml8': SoundD(max_length=8).encode, 'soundex': Soundex().encode, 'soundex_reverse': Soundex(reverse=True).encode, 'soundex_0pad_ml6': Soundex(zero_pad=True, max_length=6).encode, 'soundex_special': Soundex(var='special').encode, 'soundex_census': lambda _: ', '.join(soundex_census.encode(_)), 'soundex_br': SoundexBR().encode, 'spanish_metaphone': SpanishMetaphone().encode, 'spanish_metaphone_modified': SpanishMetaphone(modified=True).encode, 'spanish_metaphone_ml4': SpanishMetaphone(max_length=4).encode, 'spfc': lambda _: spfc.encode(_ + ' ' + _), 'statistics_canada': StatisticsCanada().encode, 'statistics_canada_ml8': StatisticsCanada(max_length=8).encode, 'waahlin':
spanish_metaphone = SpanishMetaphone() spfc = SPFC() statistics_canada = StatisticsCanada() algorithms = { 'russell_index': lambda _: str(russell.encode(_)), 'russell_index_num_to_alpha': lambda _: russell._to_alpha( # noqa: SF01 russell.encode(_)), 'russell_index_alpha': russell.encode_alpha, 'soundex': soundex.encode, 'reverse_soundex': lambda _: soundex.encode(_, reverse=True), 'soundex_0pad_ml6': lambda _: soundex.encode(_, zero_pad=True, max_length=6), 'soundex_special': lambda _: soundex.encode(_, var='special'), 'soundex_census': lambda _: ', '.join(soundex.encode(_, var='Census')), 'refined_soundex': refined_soundex.encode, 'refined_soundex_vowels': lambda _: refined_soundex.encode(_, retain_vowels=True), 'refined_soundex_0pad_ml6': lambda _: refined_soundex.encode(_, zero_pad=True, max_length=6), 'dm_soundex': lambda _: ', '.join(sorted(dm.encode(_))), 'koelner_phonetik':