예제 #1
0
class HaaseTestCases(unittest.TestCase):
    """Test Haase Phonetik functions.

    test cases for abydos.phonetic.Haase
    """

    pa = Haase()

    def test_haase_phonetik(self):
        """Test abydos.phonetic.Haase."""
        # Base cases
        self.assertEqual(self.pa.encode(''), ('', ))

        # equivalents
        self.assertEqual(self.pa.encode('Häschen'), self.pa.encode('Haeschen'))
        self.assertEqual(self.pa.encode('Schloß'), self.pa.encode('Schloss'))
        self.assertEqual(self.pa.encode('üben'), self.pa.encode('ueben'))
        self.assertEqual(self.pa.encode('Eichörnchen'),
                         self.pa.encode('Eichoernchen'))

        # coverage completion
        self.assertEqual(self.pa.encode('Häschen'), ('9896', '9496'))
        self.assertEqual(
            Haase(primary_only=True).encode('Häschen'), ('9896', ))
        self.assertEqual(self.pa.encode('Eichörnchen'), ('94976496', ))
        self.assertEqual(self.pa.encode('Hexe'), ('9489', ))
        self.assertEqual(self.pa.encode('Chemie'), ('4969', '8969'))

        self.assertEqual(self.pa.encode('Brille'), ('17959', '179'))
        self.assertEqual(self.pa.encode('Brilleille'),
                         ('1795959', '17959', '179'))
        self.assertEqual(self.pa.encode('Niveau'), ('6939', ))
        self.assertEqual(self.pa.encode('Korb'), ('4971', '4973'))
        self.assertEqual(self.pa.encode('Heino'), ('969', '9693'))
        self.assertEqual(self.pa.encode('Nekka'), ('6949', '69497'))
        self.assertEqual(self.pa.encode('Aleph'), ('9593', ))
        self.assertEqual(self.pa.encode('Aleppo'), ('95919', '959193'))
        self.assertEqual(self.pa.encode('Endzipfel'), ('96891395', ))
        self.assertEqual(self.pa.encode('verbrandt'), ('39717962', '39737962'))
        self.assertEqual(self.pa.encode('Cent'), ('8962', ))
        self.assertEqual(self.pa.encode('addiscendae'), ('92989629', ))
        self.assertEqual(self.pa.encode('kickx'), ('4948', ))
        self.assertEqual(self.pa.encode('sanctionen'), ('896829696', ))

        # encode_alpha
        self.assertEqual(self.pa.encode_alpha('Niveau'), ('NAFA', ))
        self.assertEqual(self.pa.encode_alpha('Korb'), ('KARP', 'KARF'))
        self.assertEqual(self.pa.encode_alpha('Heino'), ('ANA', 'ANAF'))
        self.assertEqual(self.pa.encode_alpha('Nekka'), ('NAKA', 'NAKAR'))

        # Test wrapper
        self.assertEqual(haase_phonetik('Häschen'), ('9896', '9496'))
예제 #2
0
    def test_haase_phonetik(self):
        """Test abydos.phonetic.Haase."""
        # Base cases
        self.assertEqual(self.pa.encode(''), '')

        # equivalents
        self.assertEqual(self.pa.encode('Häschen'), self.pa.encode('Haeschen'))
        self.assertEqual(self.pa.encode('Schloß'), self.pa.encode('Schloss'))
        self.assertEqual(self.pa.encode('üben'), self.pa.encode('ueben'))
        self.assertEqual(self.pa.encode('Eichörnchen'),
                         self.pa.encode('Eichoernchen'))

        # coverage completion
        self.assertEqual(self.pa.encode('Häschen'), '9896,9496')
        self.assertEqual(Haase(primary_only=True).encode('Häschen'), '9896')
        self.assertEqual(self.pa.encode('Eichörnchen'), '94976496')
        self.assertEqual(self.pa.encode('Hexe'), '9489')
        self.assertEqual(self.pa.encode('Chemie'), '4969,8969')

        self.assertEqual(self.pa.encode('Brille'), '17959,179')
        self.assertEqual(self.pa.encode('Brilleille'), '1795959,17959,179')
        self.assertEqual(self.pa.encode('Niveau'), '6939')
        self.assertEqual(self.pa.encode('Korb'), '4971,4973')
        self.assertEqual(self.pa.encode('Heino'), '969,9693')
        self.assertEqual(self.pa.encode('Nekka'), '6949,69497')
        self.assertEqual(self.pa.encode('Aleph'), '9593')
        self.assertEqual(self.pa.encode('Aleppo'), '95919,959193')
        self.assertEqual(self.pa.encode('Endzipfel'), '96891395')
        self.assertEqual(self.pa.encode('verbrandt'), '39717962,39737962')
        self.assertEqual(self.pa.encode('Cent'), '8962')
        self.assertEqual(self.pa.encode('addiscendae'), '92989629')
        self.assertEqual(self.pa.encode('kickx'), '4948')
        self.assertEqual(self.pa.encode('sanctionen'), '896829696')

        # encode_alpha
        self.assertEqual(self.pa.encode_alpha('Niveau'), 'NAFA')
        self.assertEqual(self.pa.encode_alpha('Korb'), 'KARP,KARF')
        self.assertEqual(self.pa.encode_alpha('Heino'), 'ANA,ANAF')
        self.assertEqual(self.pa.encode_alpha('Nekka'), 'NAKA,NAKAR')
예제 #3
0
 'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode,
 'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode,
 'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode,
 'caverphone_1': Caverphone(version=1).encode,
 'caverphone_2': Caverphone().encode,
 'daitch_mokotoff_soundex': DaitchMokotoff().encode,
 'davidson': Davidson().encode,
 'dolby': Dolby().encode,
 'dolby_ml4': Dolby(max_length=4).encode,
 'dolby_vowels': Dolby(keep_vowels=True).encode,
 'double_metaphone': DoubleMetaphone().encode,
 'eudex': Eudex().encode,
 'fonem': FONEM().encode,
 'fuzzy_soundex': FuzzySoundex().encode,
 'fuzzy_soundex_0pad_ml8': FuzzySoundex(max_length=8, zero_pad=True).encode,
 'haase_phonetik': Haase().encode,
 'haase_phonetik_primary': Haase(primary_only=True).encode,
 'henry_early': HenryEarly().encode,
 'henry_early_ml8': HenryEarly(max_length=8).encode,
 'koelner_phonetik': Koelner().encode,
 'koelner_phonetik_alpha': Koelner().encode_alpha,
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
 'nysiis': NYSIIS().encode,
 'nysiis_modified': NYSIIS(modified=True).encode,
예제 #4
0
    SPFC,
    SfinxBis,
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
    Waahlin,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
daitch_mokotoff = DaitchMokotoff()
double_metaphone = DoubleMetaphone()
haase = Haase()
haase_primary = Haase(primary_only=True)
koelner = Koelner()
russell = RussellIndex()
sfinxbis = SfinxBis()
sfinxbis_6 = SfinxBis(max_length=6)
soundex_census = Soundex(var='Census')
spfc = SPFC()

algorithms = {
    'ainsworth':
    Ainsworth().encode,
    'alpha_sis':
    lambda _: ', '.join(alpha_sis.encode(_)),
    'bmpm':
    BeiderMorse().encode,
예제 #5
0
    StatisticsCanada,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
bm = BeiderMorse()
caverphone = Caverphone()
davidson = Davidson()
dm = DaitchMokotoff()
dolby = Dolby()
double_metaphone = DoubleMetaphone()
eudex = Eudex()
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()
phonex = Phonex()