Example #1
0
    def test_fuzzy_soundex(self):
        """Test abydos.phonetic.FuzzySoundex."""
        self.assertEqual(self.pa.encode(''), '00000')
        # http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf
        self.assertEqual(self.pa.encode('Kristen'), 'K6935')
        self.assertEqual(self.pa.encode('Krissy'), 'K6900')
        self.assertEqual(self.pa.encode('Christen'), 'K6935')

        # http://books.google.com/books?id=LZrT6eWf9NMC&lpg=PA76&ots=Tex3FqNwGP&dq=%22phonix%20algorithm%22&pg=PA75#v=onepage&q=%22phonix%20algorithm%22&f=false
        self.assertEqual(self.pa_4.encode('peter'), 'P360')
        self.assertEqual(self.pa_4.encode('pete'), 'P300')
        self.assertEqual(self.pa_4.encode('pedro'), 'P360')
        self.assertEqual(self.pa_4.encode('stephen'), 'S315')
        self.assertEqual(self.pa_4.encode('steve'), 'S310')
        self.assertEqual(self.pa_4.encode('smith'), 'S530')
        self.assertEqual(self.pa_4.encode('smythe'), 'S530')
        self.assertEqual(self.pa_4.encode('gail'), 'G400')
        self.assertEqual(self.pa_4.encode('gayle'), 'G400')
        self.assertEqual(self.pa_4.encode('christine'), 'K693')
        self.assertEqual(self.pa_4.encode('christina'), 'K693')
        self.assertEqual(self.pa_4.encode('kristina'), 'K693')

        # etc. (for code coverage)
        self.assertEqual(self.pa.encode('Wight'), 'W3000')
        self.assertEqual(self.pa.encode('Hardt'), 'H6000')
        self.assertEqual(self.pa.encode('Knight'), 'N3000')
        self.assertEqual(self.pa.encode('Czech'), 'S7000')
        self.assertEqual(self.pa.encode('Tsech'), 'S7000')
        self.assertEqual(self.pa.encode('gnomic'), 'N5900')
        self.assertEqual(self.pa.encode('Wright'), 'R3000')
        self.assertEqual(self.pa.encode('Hrothgar'), 'R3760')
        self.assertEqual(self.pa.encode('Hwaet'), 'W3000')
        self.assertEqual(self.pa.encode('Grant'), 'G6300')
        self.assertEqual(self.pa.encode('Hart'), 'H6000')
        self.assertEqual(self.pa.encode('Hardt'), 'H6000')

        # max_length bounds tests
        self.assertEqual(
            FuzzySoundex(max_length=-1).encode('Niall'),
            'N400000000000000000000000000000000000000000000000000000000000000',
        )
        self.assertEqual(FuzzySoundex(max_length=0).encode('Niall'), 'N400')

        # zero_pad tests
        self.assertEqual(
            FuzzySoundex(max_length=-1, zero_pad=False).encode('Niall'), 'N4')
        self.assertEqual(
            FuzzySoundex(max_length=0, zero_pad=False).encode('Niall'), 'N4')
        self.assertEqual(
            FuzzySoundex(max_length=0, zero_pad=True).encode('Niall'), 'N400')
        self.assertEqual(
            FuzzySoundex(max_length=4, zero_pad=False).encode(''), '0')
        self.assertEqual(
            FuzzySoundex(max_length=4, zero_pad=True).encode(''), '0000')

        # encode_alpha
        self.assertEqual(self.pa.encode_alpha('pete'), 'PT')
        self.assertEqual(self.pa.encode_alpha('pedro'), 'PTR')
        self.assertEqual(self.pa.encode_alpha('stephen'), 'STPN')
        self.assertEqual(self.pa.encode_alpha('steve'), 'STP')

        # Test wrapper
        self.assertEqual(fuzzy_soundex('Kristen'), 'K6935')
Example #2
0
 'davidson':
 Davidson().encode,
 'dolby':
 Dolby().encode,
 'dolby_ml4':
 Dolby(max_length=4).encode,
 'dolby_vowels':
 Dolby(keep_vowels=True).encode,
 'double_metaphone':
 lambda _: ', '.join(double_metaphone.encode(_)),
 'eudex':
 Eudex().encode,
 'fonem':
 FONEM().encode,
 'fuzzy_soundex':
 FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8':
 FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik':
 lambda _: ', '.join(haase.encode(_)),
 'haase_phonetik_primary':
 lambda _: haase_primary.encode(_)[0],
 'henry_early':
 HenryEarly().encode,
 'henry_early_ml8':
 HenryEarly(max_length=8).encode,
 'koelner_phonetik':
 koelner.encode,
 'koelner_phonetik_num_to_alpha': (
     lambda _: koelner._to_alpha(koelner.encode(_))  # noqa: SF01
 ),
Example #3
0
 'bmpm_gen_exact': BeiderMorse(match_mode='exact').encode,
 'bmpm_ash_approx': BeiderMorse(name_mode='ash').encode,
 'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode,
 'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode,
 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode,
 'caverphone_1': Caverphone(version=1).encode,
 'caverphone_2': Caverphone().encode,
 'daitch_mokotoff_soundex': DaitchMokotoff().encode,
 'davidson': Davidson().encode,
 'dolby': Dolby().encode,
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
Example #4
0
    SpanishMetaphone,
    StatisticsCanada,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()