def test_henry_early(self):
        """Test abydos.phonetic.HenryEarly."""
        # Base case
        self.assertEqual(self.pa.encode(''), '')

        # Examples from Legare 1972 paper
        self.assertEqual(self.pa.encode('Descarry'), 'DKR')
        self.assertEqual(self.pa.encode('Descaries'), 'DKR')
        self.assertEqual(self.pa.encode('Campo'), 'KP')
        self.assertEqual(self.pa.encode('Campot'), 'KP')
        self.assertEqual(self.pa.encode('Gausselin'), 'GSL')
        self.assertEqual(self.pa.encode('Gosselin'), 'GSL')
        self.assertEqual(self.pa.encode('Bergeron'), 'BRJ')
        self.assertEqual(self.pa.encode('Bergereau'), 'BRJ')
        self.assertEqual(self.pa.encode('Bosseron'), 'BSR')
        self.assertEqual(self.pa.encode('Cicire'), 'SSR')
        self.assertEqual(self.pa.encode('Lechevalier'), 'LCV')
        self.assertEqual(self.pa.encode('Chevalier'), 'CVL')
        self.assertEqual(self.pa.encode('Peloy'), 'PL')
        self.assertEqual(self.pa.encode('Beloy'), 'BL')
        self.assertEqual(self.pa.encode('Beret'), 'BR')
        self.assertEqual(self.pa.encode('Benet'), 'BN')
        self.assertEqual(self.pa.encode('Turcot'), 'TRK')
        self.assertEqual(self.pa.encode('Turgot'), 'TRG')
        self.assertEqual(self.pa.encode('Vigier'), 'VJ')
        self.assertEqual(self.pa.encode('Vigiere'), 'VJR')
        self.assertEqual(self.pa.encode('Dodin'), 'DD')
        self.assertEqual(self.pa.encode('Dodelin'), 'DDL')

        # Tests to complete coverage
        self.assertEqual(self.pa.encode('Anil'), 'ANL')
        self.assertEqual(self.pa.encode('Emmanuel'), 'AMN')
        self.assertEqual(self.pa.encode('Ainu'), 'EN')
        self.assertEqual(self.pa.encode('Oeuf'), 'OF')
        self.assertEqual(self.pa.encode('Yves'), 'IV')
        self.assertEqual(self.pa.encode('Yo'), 'I')
        self.assertEqual(self.pa.encode('Umman'), 'EM')
        self.assertEqual(self.pa.encode('Omman'), 'OM')
        self.assertEqual(self.pa.encode('Zoe'), 'S')
        self.assertEqual(self.pa.encode('Beauchamp'), 'BCP')
        self.assertEqual(self.pa.encode('Chloe'), 'KL')
        self.assertEqual(self.pa.encode('Gerard'), 'JRR')
        self.assertEqual(self.pa.encode('Agnes'), 'ANN')
        self.assertEqual(self.pa.encode('Pinot'), 'PN')
        self.assertEqual(self.pa.encode('Philo'), 'FL')
        self.assertEqual(self.pa.encode('Quisling'), 'GL')
        self.assertEqual(self.pa.encode('Qualite'), 'KLT')
        self.assertEqual(self.pa.encode('Sainte-Marie'), 'XMR')
        self.assertEqual(self.pa.encode('Saint-Jean'), 'XJ')
        self.assertEqual(self.pa.encode('Ste-Marie'), 'XMR')
        self.assertEqual(self.pa.encode('St-Jean'), 'XJ')
        self.assertEqual(self.pa.encode('Cloe'), 'KL')
        self.assertEqual(self.pa.encode('Ahch-To'), 'AKT')
        self.assertEqual(self.pa.encode('Zdavros'), 'SDV')
        self.assertEqual(self.pa.encode('Sdavros'), 'DVR')
        self.assertEqual(self.pa.encode('Coulomb'), 'KLB')
        self.assertEqual(self.pa.encode('Calm'), 'K')
        self.assertEqual(self.pa.encode('Omnia'), 'ON')
        self.assertEqual(self.pa.encode('Ramps'), 'RPS')
        self.assertEqual(self.pa.encode('Renault'), 'RN')
        self.assertEqual(self.pa.encode('Czech'), 'CSK')
        self.assertEqual(self.pa.encode('Imran'), 'ER')
        self.assertEqual(
            HenryEarly(max_length=-1).encode('Christopher'), 'KRXF')
 'double_metaphone':
 lambda _: ', '.join(double_metaphone.encode(_)),
 'eudex':
 Eudex().encode,
 'fonem':
 FONEM().encode,
 'fuzzy_soundex':
 FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8':
 FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik':
 lambda _: ', '.join(haase.encode(_)),
 'haase_phonetik_primary':
 lambda _: haase_primary.encode(_)[0],
 'henry_early':
 HenryEarly().encode,
 'henry_early_ml8':
 HenryEarly(max_length=8).encode,
 'koelner_phonetik':
 koelner.encode,
 'koelner_phonetik_num_to_alpha': (
     lambda _: koelner._to_alpha(koelner.encode(_))  # noqa: SF01
 ),
 'koelner_phonetik_alpha':
 koelner.encode_alpha,
 'lein':
 LEIN().encode,
 'lein_nopad_ml8':
 LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex':
 MetaSoundex().encode,
Exemple #3
0
 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode,
 'caverphone_1': Caverphone(version=1).encode,
 'caverphone_2': Caverphone().encode,
 'daitch_mokotoff_soundex': DaitchMokotoff().encode,
 'davidson': Davidson().encode,
 'dolby': Dolby().encode,
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
 'nysiis': NYSIIS().encode,
 'nysiis_modified': NYSIIS(modified=True).encode,
 'nysiis_ml_inf': NYSIIS(max_length=-1).encode,
 'onca': ONCA().encode,
Exemple #4
0
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()
phonex = Phonex()
phonix = Phonix()