def test_fuzzy_soundex(self): """Test abydos.phonetic.FuzzySoundex.""" self.assertEqual(self.pa.encode(''), '00000') # http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf self.assertEqual(self.pa.encode('Kristen'), 'K6935') self.assertEqual(self.pa.encode('Krissy'), 'K6900') self.assertEqual(self.pa.encode('Christen'), 'K6935') # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false self.assertEqual(self.pa_4.encode('peter'), 'P360') self.assertEqual(self.pa_4.encode('pete'), 'P300') self.assertEqual(self.pa_4.encode('pedro'), 'P360') self.assertEqual(self.pa_4.encode('stephen'), 'S315') self.assertEqual(self.pa_4.encode('steve'), 'S310') self.assertEqual(self.pa_4.encode('smith'), 'S530') self.assertEqual(self.pa_4.encode('smythe'), 'S530') self.assertEqual(self.pa_4.encode('gail'), 'G400') self.assertEqual(self.pa_4.encode('gayle'), 'G400') self.assertEqual(self.pa_4.encode('christine'), 'K693') self.assertEqual(self.pa_4.encode('christina'), 'K693') self.assertEqual(self.pa_4.encode('kristina'), 'K693') # etc. (for code coverage) self.assertEqual(self.pa.encode('Wight'), 'W3000') self.assertEqual(self.pa.encode('Hardt'), 'H6000') self.assertEqual(self.pa.encode('Knight'), 'N3000') self.assertEqual(self.pa.encode('Czech'), 'S7000') self.assertEqual(self.pa.encode('Tsech'), 'S7000') self.assertEqual(self.pa.encode('gnomic'), 'N5900') self.assertEqual(self.pa.encode('Wright'), 'R3000') self.assertEqual(self.pa.encode('Hrothgar'), 'R3760') self.assertEqual(self.pa.encode('Hwaet'), 'W3000') self.assertEqual(self.pa.encode('Grant'), 'G6300') self.assertEqual(self.pa.encode('Hart'), 'H6000') self.assertEqual(self.pa.encode('Hardt'), 'H6000') # max_length bounds tests self.assertEqual( FuzzySoundex(max_length=-1).encode('Niall'), 'N400000000000000000000000000000000000000000000000000000000000000', ) self.assertEqual(FuzzySoundex(max_length=0).encode('Niall'), 'N400') # zero_pad tests self.assertEqual( FuzzySoundex(max_length=-1, zero_pad=False).encode('Niall'), 'N4') self.assertEqual( FuzzySoundex(max_length=0, zero_pad=False).encode('Niall'), 'N4') self.assertEqual( FuzzySoundex(max_length=0, zero_pad=True).encode('Niall'), 'N400') self.assertEqual( FuzzySoundex(max_length=4, zero_pad=False).encode(''), '0') self.assertEqual( FuzzySoundex(max_length=4, zero_pad=True).encode(''), '0000') # encode_alpha self.assertEqual(self.pa.encode_alpha('pete'), 'PT') self.assertEqual(self.pa.encode_alpha('pedro'), 'PTR') self.assertEqual(self.pa.encode_alpha('stephen'), 'STPN') self.assertEqual(self.pa.encode_alpha('steve'), 'STP') # Test wrapper self.assertEqual(fuzzy_soundex('Kristen'), 'K6935')
'davidson': Davidson().encode, 'dolby': Dolby().encode, 'dolby_ml4': Dolby(max_length=4).encode, 'dolby_vowels': Dolby(keep_vowels=True).encode, 'double_metaphone': lambda _: ', '.join(double_metaphone.encode(_)), 'eudex': Eudex().encode, 'fonem': FONEM().encode, 'fuzzy_soundex': FuzzySoundex().encode, 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode, 'haase_phonetik': lambda _: ', '.join(haase.encode(_)), 'haase_phonetik_primary': lambda _: haase_primary.encode(_)[0], 'henry_early': HenryEarly().encode, 'henry_early_ml8': HenryEarly(max_length=8).encode, 'koelner_phonetik': koelner.encode, 'koelner_phonetik_num_to_alpha': ( lambda _: koelner._to_alpha(koelner.encode(_)) # noqa: SF01 ),
'bmpm_gen_exact': BeiderMorse(match_mode='exact').encode, 'bmpm_ash_approx': BeiderMorse(name_mode='ash').encode, 'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode, 'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode, 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode, 'caverphone_1': Caverphone(version=1).encode, 'caverphone_2': Caverphone().encode, 'daitch_mokotoff_soundex': DaitchMokotoff().encode, 'davidson': Davidson().encode, 'dolby': Dolby().encode, 'dolby_ml4': Dolby(max_length=4).encode, 'dolby_vowels': Dolby(keep_vowels=True).encode, 'double_metaphone': DoubleMetaphone().encode, 'eudex': Eudex().encode, 'fonem': FONEM().encode, 'fuzzy_soundex': FuzzySoundex().encode, 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode, 'haase_phonetik': Haase().encode, 'haase_phonetik_primary': Haase(primary_only=True).encode, 'henry_early': HenryEarly().encode, 'henry_early_ml8': HenryEarly(max_length=8).encode, 'koelner_phonetik': Koelner().encode, 'koelner_phonetik_alpha': Koelner().encode_alpha, 'lein': LEIN().encode, 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode, 'metasoundex': MetaSoundex().encode, 'metasoundex_es': MetaSoundex(lang='es').encode, 'metaphone': Metaphone().encode, 'mra': MRA().encode, 'norphone': Norphone().encode, 'nrl': NRL().encode,
SpanishMetaphone, StatisticsCanada, ) from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char alpha_sis = AlphaSIS() bm = BeiderMorse() caverphone = Caverphone() davidson = Davidson() dm = DaitchMokotoff() dolby = Dolby() double_metaphone = DoubleMetaphone() eudex = Eudex() fonem = FONEM() fuzzy_soundex = FuzzySoundex() haase = Haase() henry_early = HenryEarly() koelner = Koelner() lein = Lein() metaphone = Metaphone() metasoundex = MetaSoundex() mra = MRA() norphone = Norphone() nrl = NRL() nysiis = NYSIIS() onca = ONCA() parmar_kumbharana = ParmarKumbharana() phonem = Phonem() phonet = Phonet() phonetic_spanish = PhoneticSpanish()