Example #1
0
 'bmpm_french': BeiderMorse(language_arg='french').encode,
 'bmpm_gen_exact': BeiderMorse(match_mode='exact').encode,
 'bmpm_ash_approx': BeiderMorse(name_mode='ash').encode,
 'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode,
 'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode,
 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode,
 'caverphone_1': Caverphone(version=1).encode,
 'caverphone_2': Caverphone().encode,
 'daitch_mokotoff_soundex': DaitchMokotoff().encode,
 'davidson': Davidson().encode,
 'dolby': Dolby().encode,
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
Example #2
0
class FONEMTestCases(unittest.TestCase):
    """Test FONEM functions.

    test cases for abydos.phonetic.FONEM
    """

    pa = FONEM()

    def test_fonem(self):
        """Test abydos.phonetic.FONEM."""
        # Base cases
        self.assertEqual(self.pa.encode(''), '')

        # Test cases, mostly from the FONEM specification,
        # but copied from Talisman:
        # https://github.com/Yomguithereal/talisman/blob/master/test/phonetics/french/fonem.js
        test_cases = (
            ('BEAULAC', 'BOLAK'),
            ('BAULAC', 'BOLAK'),
            ('IMBEAULT', 'INBO'),
            ('DUFAUT', 'DUFO'),
            ('THIBOUTOT', 'TIBOUTOT'),
            ('DEVAUX', 'DEVO'),
            ('RONDEAUX', 'RONDO'),
            ('BOURGAULX', 'BOURGO'),
            ('PINCHAUD', 'PINCHO'),
            ('PEDNAULD', 'PEDNO'),
            ('MAZENOD', 'MASENOD'),
            ('ARNOLD', 'ARNOL'),
            ('BERTOLD', 'BERTOL'),
            ('BELLAY', 'BELE'),
            ('SANDAY', 'SENDE'),
            ('GAY', 'GAI'),
            ('FAYARD', 'FAYAR'),
            ('LEMIEUX', 'LEMIEU'),
            ('LHEUREUX', 'LEUREU'),
            ('BELLEY', 'BELE'),
            ('WELLEY', 'WELE'),
            ('MEYER', 'MEYER'),
            ('BOILY', 'BOILI'),
            ('LOYSEAU', 'LOISO'),
            ('MAYRAND', 'MAIREN'),
            ('GUYON', 'GUYON'),
            ('FAILLARD', 'FAYAR'),
            ('FAIARD', 'FAYAR'),
            ('MEIER', 'MEYER'),
            ('MEILLER', 'MEYER'),
            ('GUILLON', 'GUYON'),
            ('LAVILLE', 'LAVILLE'),
            ('COUET', 'CWET'),
            ('EDOUARD', 'EDWAR'),
            ('GIROUARD', 'JIRWAR'),
            ('OZOUADE', 'OSWADE'),  # differs from test set
            ('BOUILLE', 'BOUYE'),
            ('POUYEZ', 'POUYES'),  # differs from test set
            ('LEMEE', 'LEME'),
            ('ABRAAM', 'ABRAM'),
            ('ARCHEMBAULT', 'ARCHENBO'),
            ('AMTHIME', 'ENTIME'),
            ('ROMPRE', 'RONPRE'),
            ('BOMSECOURS', 'BONSECOURS'),
            ('BOULANGER', 'BOULENJER'),
            ('TANCREDE', 'TENKREDE'),
            ('BLAIN', 'BLIN'),
            ('BLAINVILLE', 'BLINVILLE'),
            ('MAINARD', 'MAINAR'),
            ('RAIMOND', 'RAIMON'),
            ('BLACKBORN', 'BLAKBURN'),
            ('SEABOURNE', 'SEABURN'),
            ('IMBO', 'INBO'),
            ('RIMFRET', 'RINFRET'),
            ('LEFEBVRE', 'LEFEVRE'),
            ('MACE', 'MASSE'),
            ('MACON', 'MACON'),
            ('MARCELIN', 'MARSELIN'),
            ('MARCEAU', 'MARSO'),
            ('VINCELETTE', 'VINSELETE'),
            ('FORCADE', 'FORCADE'),
            ('CELINE', 'SELINE'),
            ('CERAPHIN', 'SERAFIN'),
            ('CAMILLE', 'KAMILLE'),
            ('CAYETTE', 'KAYETE'),
            ('CARINE', 'KARINE'),
            ('LUC', 'LUK'),
            ('LEBLANC', 'LEBLEN'),
            ('VICTOR', 'VIKTOR'),
            ('LACCOULINE', 'LAKOULINE'),
            ('MACCIMILIEN', 'MAXIMILIEN'),
            ('MAGELLA', 'MAJELA'),
            ('GINETTE', 'JINETE'),
            ('GANDET', 'GANDET'),
            ('GEORGES', 'JORJES'),
            ('GEOFFROID', 'JOFROID'),
            ('PAGEAU', 'PAJO'),
            ('GAGNION', 'GAGNON'),
            ('MIGNIER', 'MIGNER'),
            ('HALLEY', 'ALE'),
            ('GAUTHIER', 'GOTIER'),
            ('CHARTIER', 'CHARTIER'),
            ('JEANNE', 'JANE'),
            ('MACGREGOR', 'MACGREGOR'),
            ('MACKAY', 'MACKE'),
            ('MCNICOL', 'MACNICOL'),
            ('MCNEIL', 'MACNEIL'),
            ('PHANEUF', 'FANEUF'),
            ('PHILIPPE', 'FILIPE'),
            ('QUENNEVILLE', 'KENEVILLE'),
            ('LAROCQUE', 'LAROKE'),
            ('SCIPION', 'SIPION'),
            ('ASCELIN', 'ASSELIN'),
            ('VASCO', 'VASKO'),
            ('PASCALINE', 'PASKALINE'),
            ('ESHEMBACK', 'ECHENBAK'),
            ('ASHED', 'ACHED'),
            ('GRATIA', 'GRASSIA'),
            ('PATRITIA', 'PATRISSIA'),
            ('BERTIO', 'BERTIO'),
            ('MATIEU', 'MATIEU'),
            ('BERTIAUME', 'BERTIOME'),
            ('MUNROW', 'MUNRO'),
            ('BRANISLAW', 'BRANISLA'),
            ('LOWMEN', 'LOMEN'),
            ('ANDREW', 'ENDREW'),
            ('EXCEL', 'EXEL'),
            ('EXCERINE', 'EXERINE'),
            ('EXSILDA', 'EXILDA'),
            ('EXZELDA', 'EXELDA'),
            ('CAZEAU', 'KASO'),
            ('BRAZEAU', 'BRASO'),
            ('FITZPATRICK', 'FITSPATRIK'),
            ('SINGELAIS', 'ST-JELAIS'),
            ('CINQMARS', 'ST-MARS'),
            ('SAINT-AMAND', 'ST-AMEN'),
            ('SAINTECROIX', 'STE-KROIX'),
            ('ST-HILAIRE', 'ST-ILAIRE'),
            ('STE-CROIX', 'STE-KROIX'),
            ('LAVALLEE', 'LAVALE'),
            ('CORINNE', 'KORINE'),
            ('DUTILE', 'DUTILLE'),
        )
        for name, encoding in test_cases:
            self.assertEqual(self.pa.encode(name), encoding)

        # Test wrapper
        self.assertEqual(fonem('MARCEAU'), 'MARSO')
Example #3
0
 'daitch_mokotoff_soundex':
 lambda _: ', '.join(sorted(daitch_mokotoff.encode(_))),
 'davidson':
 Davidson().encode,
 'dolby':
 Dolby().encode,
 'dolby_ml4':
 Dolby(max_length=4).encode,
 'dolby_vowels':
 Dolby(keep_vowels=True).encode,
 'double_metaphone':
 lambda _: ', '.join(double_metaphone.encode(_)),
 'eudex':
 Eudex().encode,
 'fonem':
 FONEM().encode,
 'fuzzy_soundex':
 FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8':
 FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik':
 lambda _: ', '.join(haase.encode(_)),
 'haase_phonetik_primary':
 lambda _: haase_primary.encode(_)[0],
 'henry_early':
 HenryEarly().encode,
 'henry_early_ml8':
 HenryEarly(max_length=8).encode,
 'koelner_phonetik':
 koelner.encode,
 'koelner_phonetik_num_to_alpha': (
Example #4
0
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()