'bmpm_french': BeiderMorse(language_arg='french').encode, 'bmpm_gen_exact': BeiderMorse(match_mode='exact').encode, 'bmpm_ash_approx': BeiderMorse(name_mode='ash').encode, 'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode, 'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode, 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode, 'caverphone_1': Caverphone(version=1).encode, 'caverphone_2': Caverphone().encode, 'daitch_mokotoff_soundex': DaitchMokotoff().encode, 'davidson': Davidson().encode, 'dolby': Dolby().encode, 'dolby_ml4': Dolby(max_length=4).encode, 'dolby_vowels': Dolby(keep_vowels=True).encode, 'double_metaphone': DoubleMetaphone().encode, 'eudex': Eudex().encode, 'fonem': FONEM().encode, 'fuzzy_soundex': FuzzySoundex().encode, 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode, 'haase_phonetik': Haase().encode, 'haase_phonetik_primary': Haase(primary_only=True).encode, 'henry_early': HenryEarly().encode, 'henry_early_ml8': HenryEarly(max_length=8).encode, 'koelner_phonetik': Koelner().encode, 'koelner_phonetik_alpha': Koelner().encode_alpha, 'lein': LEIN().encode, 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode, 'metasoundex': MetaSoundex().encode, 'metasoundex_es': MetaSoundex(lang='es').encode, 'metaphone': Metaphone().encode, 'mra': MRA().encode, 'norphone': Norphone().encode,
class FONEMTestCases(unittest.TestCase): """Test FONEM functions. test cases for abydos.phonetic.FONEM """ pa = FONEM() def test_fonem(self): """Test abydos.phonetic.FONEM.""" # Base cases self.assertEqual(self.pa.encode(''), '') # Test cases, mostly from the FONEM specification, # but copied from Talisman: # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js test_cases = ( ('BEAULAC', 'BOLAK'), ('BAULAC', 'BOLAK'), ('IMBEAULT', 'INBO'), ('DUFAUT', 'DUFO'), ('THIBOUTOT', 'TIBOUTOT'), ('DEVAUX', 'DEVO'), ('RONDEAUX', 'RONDO'), ('BOURGAULX', 'BOURGO'), ('PINCHAUD', 'PINCHO'), ('PEDNAULD', 'PEDNO'), ('MAZENOD', 'MASENOD'), ('ARNOLD', 'ARNOL'), ('BERTOLD', 'BERTOL'), ('BELLAY', 'BELE'), ('SANDAY', 'SENDE'), ('GAY', 'GAI'), ('FAYARD', 'FAYAR'), ('LEMIEUX', 'LEMIEU'), ('LHEUREUX', 'LEUREU'), ('BELLEY', 'BELE'), ('WELLEY', 'WELE'), ('MEYER', 'MEYER'), ('BOILY', 'BOILI'), ('LOYSEAU', 'LOISO'), ('MAYRAND', 'MAIREN'), ('GUYON', 'GUYON'), ('FAILLARD', 'FAYAR'), ('FAIARD', 'FAYAR'), ('MEIER', 'MEYER'), ('MEILLER', 'MEYER'), ('GUILLON', 'GUYON'), ('LAVILLE', 'LAVILLE'), ('COUET', 'CWET'), ('EDOUARD', 'EDWAR'), ('GIROUARD', 'JIRWAR'), ('OZOUADE', 'OSWADE'), # differs from test set ('BOUILLE', 'BOUYE'), ('POUYEZ', 'POUYES'), # differs from test set ('LEMEE', 'LEME'), ('ABRAAM', 'ABRAM'), ('ARCHEMBAULT', 'ARCHENBO'), ('AMTHIME', 'ENTIME'), ('ROMPRE', 'RONPRE'), ('BOMSECOURS', 'BONSECOURS'), ('BOULANGER', 'BOULENJER'), ('TANCREDE', 'TENKREDE'), ('BLAIN', 'BLIN'), ('BLAINVILLE', 'BLINVILLE'), ('MAINARD', 'MAINAR'), ('RAIMOND', 'RAIMON'), ('BLACKBORN', 'BLAKBURN'), ('SEABOURNE', 'SEABURN'), ('IMBO', 'INBO'), ('RIMFRET', 'RINFRET'), ('LEFEBVRE', 'LEFEVRE'), ('MACE', 'MASSE'), ('MACON', 'MACON'), ('MARCELIN', 'MARSELIN'), ('MARCEAU', 'MARSO'), ('VINCELETTE', 'VINSELETE'), ('FORCADE', 'FORCADE'), ('CELINE', 'SELINE'), ('CERAPHIN', 'SERAFIN'), ('CAMILLE', 'KAMILLE'), ('CAYETTE', 'KAYETE'), ('CARINE', 'KARINE'), ('LUC', 'LUK'), ('LEBLANC', 'LEBLEN'), ('VICTOR', 'VIKTOR'), ('LACCOULINE', 'LAKOULINE'), ('MACCIMILIEN', 'MAXIMILIEN'), ('MAGELLA', 'MAJELA'), ('GINETTE', 'JINETE'), ('GANDET', 'GANDET'), ('GEORGES', 'JORJES'), ('GEOFFROID', 'JOFROID'), ('PAGEAU', 'PAJO'), ('GAGNION', 'GAGNON'), ('MIGNIER', 'MIGNER'), ('HALLEY', 'ALE'), ('GAUTHIER', 'GOTIER'), ('CHARTIER', 'CHARTIER'), ('JEANNE', 'JANE'), ('MACGREGOR', 'MACGREGOR'), ('MACKAY', 'MACKE'), ('MCNICOL', 'MACNICOL'), ('MCNEIL', 'MACNEIL'), ('PHANEUF', 'FANEUF'), ('PHILIPPE', 'FILIPE'), ('QUENNEVILLE', 'KENEVILLE'), ('LAROCQUE', 'LAROKE'), ('SCIPION', 'SIPION'), ('ASCELIN', 'ASSELIN'), ('VASCO', 'VASKO'), ('PASCALINE', 'PASKALINE'), ('ESHEMBACK', 'ECHENBAK'), ('ASHED', 'ACHED'), ('GRATIA', 'GRASSIA'), ('PATRITIA', 'PATRISSIA'), ('BERTIO', 'BERTIO'), ('MATIEU', 'MATIEU'), ('BERTIAUME', 'BERTIOME'), ('MUNROW', 'MUNRO'), ('BRANISLAW', 'BRANISLA'), ('LOWMEN', 'LOMEN'), ('ANDREW', 'ENDREW'), ('EXCEL', 'EXEL'), ('EXCERINE', 'EXERINE'), ('EXSILDA', 'EXILDA'), ('EXZELDA', 'EXELDA'), ('CAZEAU', 'KASO'), ('BRAZEAU', 'BRASO'), ('FITZPATRICK', 'FITSPATRIK'), ('SINGELAIS', 'ST-JELAIS'), ('CINQMARS', 'ST-MARS'), ('SAINT-AMAND', 'ST-AMEN'), ('SAINTECROIX', 'STE-KROIX'), ('ST-HILAIRE', 'ST-ILAIRE'), ('STE-CROIX', 'STE-KROIX'), ('LAVALLEE', 'LAVALE'), ('CORINNE', 'KORINE'), ('DUTILE', 'DUTILLE'), ) for name, encoding in test_cases: self.assertEqual(self.pa.encode(name), encoding) # Test wrapper self.assertEqual(fonem('MARCEAU'), 'MARSO')
'daitch_mokotoff_soundex': lambda _: ', '.join(sorted(daitch_mokotoff.encode(_))), 'davidson': Davidson().encode, 'dolby': Dolby().encode, 'dolby_ml4': Dolby(max_length=4).encode, 'dolby_vowels': Dolby(keep_vowels=True).encode, 'double_metaphone': lambda _: ', '.join(double_metaphone.encode(_)), 'eudex': Eudex().encode, 'fonem': FONEM().encode, 'fuzzy_soundex': FuzzySoundex().encode, 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode, 'haase_phonetik': lambda _: ', '.join(haase.encode(_)), 'haase_phonetik_primary': lambda _: haase_primary.encode(_)[0], 'henry_early': HenryEarly().encode, 'henry_early_ml8': HenryEarly(max_length=8).encode, 'koelner_phonetik': koelner.encode, 'koelner_phonetik_num_to_alpha': (
SoundexBR, SpanishMetaphone, StatisticsCanada, ) from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char alpha_sis = AlphaSIS() bm = BeiderMorse() caverphone = Caverphone() davidson = Davidson() dm = DaitchMokotoff() dolby = Dolby() double_metaphone = DoubleMetaphone() eudex = Eudex() fonem = FONEM() fuzzy_soundex = FuzzySoundex() haase = Haase() henry_early = HenryEarly() koelner = Koelner() lein = Lein() metaphone = Metaphone() metasoundex = MetaSoundex() mra = MRA() norphone = Norphone() nrl = NRL() nysiis = NYSIIS() onca = ONCA() parmar_kumbharana = ParmarKumbharana() phonem = Phonem() phonet = Phonet()