Ejemplo n.º 1
0
class PhoneticTestCases(unittest.TestCase):
    """Test phonetic fingerprint functions.

    abydos.fingerprint.Phonetic
    """

    fp = Phonetic()
    fp_phonet = Phonetic(Phonet())
    fp_soundex = Phonetic(Soundex())
    soundex = Soundex()

    def test_phonetic_fingerprint(self):
        """Test abydos.fingerprint.Phonetic."""
        # Base case
        self.assertEqual(self.fp.fingerprint(''), '')

        self.assertEqual(
            self.fp.fingerprint(' '.join(NIALL)), 'a anl mknl njl nklk nl'
        )
        self.assertEqual(
            self.fp_phonet.fingerprint(' '.join(NIALL)),
            'knile makneil maknele neil nel nele nial nigeli '
            + 'nigl nil noigialach oneil ui',
        )
        self.assertEqual(
            self.fp_soundex.fingerprint(' '.join(NIALL)),
            'k540 m254 n240 n242 n400 o540 u000',
        )
Ejemplo n.º 2
0
    def test_phonet_nolang(self):
        """Test abydos.phonetic.Phonet (no language)."""
        self.assertEqual(Phonet(lang='none').encode(''), '')

        # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java
        self.assertEqual(self.pa_1none.encode(''), '')
        self.assertEqual(self.pa_1none.encode('Zedlitz'), 'ZEDLITZ')
        self.assertEqual(self.pa_1none.encode('Bremerhaven'), 'BREMERHAVEN')
        self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG')
        self.assertEqual(self.pa_1none.encode('Brückmann'), 'BRUECKMAN')
        self.assertEqual(self.pa_1none.encode('Krauße'), 'KRAUSE')

        self.assertEqual(self.pa_2none.encode(''), '')
        self.assertEqual(self.pa_2none.encode('Zedlitz'), 'ZEDLITZ')
        self.assertEqual(self.pa_2none.encode('Bremerhaven'), 'BREMERHAVEN')
        self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG')
        self.assertEqual(self.pa_2none.encode('Brückmann'), 'BRUECKMAN')
        self.assertEqual(self.pa_2none.encode('Krauße'), 'KRAUSE')
Ejemplo n.º 3
0
 'lein': LEIN().encode,
 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode,
 'metasoundex': MetaSoundex().encode,
 'metasoundex_es': MetaSoundex(lang='es').encode,
 'metaphone': Metaphone().encode,
 'mra': MRA().encode,
 'norphone': Norphone().encode,
 'nrl': NRL().encode,
 'nysiis': NYSIIS().encode,
 'nysiis_modified': NYSIIS(modified=True).encode,
 'nysiis_ml_inf': NYSIIS(max_length=-1).encode,
 'onca': ONCA().encode,
 'onca_nopad_ml8': ONCA(max_length=8, zero_pad=False).encode,
 'parmar_kumbharana': ParmarKumbharana().encode,
 'phonem': Phonem().encode,
 'phonet_1': Phonet().encode,
 'phonet_2': Phonet(mode=2).encode,
 'phonet_1_none': Phonet(lang='none').encode,
 'phonet_2_none': Phonet(mode=2, lang='none').encode,
 'phonetic_spanish': PhoneticSpanish().encode,
 'phonetic_spanish_ml4': PhoneticSpanish(max_length=4).encode,
 'phonex': Phonex().encode,
 'phonex_0pad_ml6': Phonex(max_length=6, zero_pad=True).encode,
 'phonic': PHONIC().encode,
 'phonic_0pad_ml6': PHONIC(max_length=6, zero_pad=True).encode,
 'phonic_ext': PHONIC(extended=True).encode,
 'phonix': Phonix().encode,
 'phonix_0pad_ml6': Phonix(max_length=6, zero_pad=True).encode,
 'pshp_soundex_first': PSHPSoundexFirst().encode,
 'pshp_soundex_first_german': PSHPSoundexFirst(german=True).encode,
 'pshp_soundex_first_ml8': PSHPSoundexFirst(max_length=8).encode,
Ejemplo n.º 4
0
 'nysiis':
 NYSIIS().encode,
 'nysiis_modified':
 NYSIIS(modified=True).encode,
 'nysiis_ml_inf':
 NYSIIS(max_length=-1).encode,
 'onca':
 ONCA().encode,
 'onca_nopad_ml8':
 ONCA(max_length=8, zero_pad=False).encode,
 'parmar_kumbharana':
 ParmarKumbharana().encode,
 'phonem':
 Phonem().encode,
 'phonet_1':
 Phonet().encode,
 'phonet_2':
 Phonet(mode=2).encode,
 'phonet_1_none':
 Phonet(lang='none').encode,
 'phonet_2_none':
 Phonet(mode=2, lang='none').encode,
 'phonetic_spanish':
 PhoneticSpanish().encode,
 'phonetic_spanish_ml4':
 PhoneticSpanish(max_length=4).encode,
 'phonex':
 Phonex().encode,
 'phonex_0pad_ml6':
 Phonex(max_length=6, zero_pad=True).encode,
 'phonic':
Ejemplo n.º 5
0
class PhonetTestCases(unittest.TestCase):
    """Test Phonet functions.

    test cases for abydos.phonetic.Phonet
    """

    pa = Phonet()
    pa_1 = Phonet(1)
    pa_2 = Phonet(2)
    pa_1none = Phonet(1, 'none')
    pa_2none = Phonet(2, 'none')

    def test_phonet_german(self):
        """Test abydos.phonetic.Phonet (German)."""
        self.assertEqual(self.pa.encode(''), '')

        # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java
        self.assertEqual(self.pa_1.encode(''), '')
        self.assertEqual(self.pa_1.encode('Zedlitz'), 'ZETLIZ')
        self.assertEqual(self.pa_1.encode('Bremerhaven'), 'BREMAHAFN')
        self.assertEqual(self.pa_1.encode('Hamburger Hafen'), 'HAMBURGA HAFN')
        self.assertEqual(self.pa_1.encode('Jesper'), 'IESPA')
        self.assertEqual(self.pa_1.encode('elisabeth'), 'ELISABET')
        self.assertEqual(self.pa_1.encode('elisabet'), 'ELISABET')
        self.assertEqual(self.pa_1.encode('Ziegler'), 'ZIKLA')
        self.assertEqual(self.pa_1.encode('Scherer'), 'SHERA')
        self.assertEqual(self.pa_1.encode('Bartels'), 'BARTLS')
        self.assertEqual(self.pa_1.encode('Jansen'), 'IANSN')
        self.assertEqual(self.pa_1.encode('Sievers'), 'SIWAS')
        self.assertEqual(self.pa_1.encode('Michels'), 'MICHLS')
        self.assertEqual(self.pa_1.encode('Ewers'), 'EWERS')
        self.assertEqual(self.pa_1.encode('Evers'), 'EWERS')
        self.assertEqual(self.pa_1.encode('Wessels'), 'WESLS')
        self.assertEqual(self.pa_1.encode('Gottschalk'), 'GOSHALK')
        self.assertEqual(self.pa_1.encode('Brückmann'), 'BRÜKMAN')
        self.assertEqual(self.pa_1.encode('Blechschmidt'), 'BLECHSHMIT')
        self.assertEqual(self.pa_1.encode('Kolodziej'), 'KOLOTZI')
        self.assertEqual(self.pa_1.encode('Krauße'), 'KRAUSE')
        self.assertEqual(self.pa_1.encode('Cachel'), 'KESHL')

        self.assertEqual(self.pa_2.encode(''), '')
        self.assertEqual(self.pa_2.encode('Zedlitz'), 'ZETLIZ')
        self.assertEqual(self.pa_2.encode('Bremerhaven'), 'BRENAFN')
        self.assertEqual(self.pa_2.encode('Schönberg'), 'ZÖNBAK')
        self.assertEqual(self.pa_2.encode('Hamburger Hafen'), 'ANBURKA AFN')
        self.assertEqual(self.pa_2.encode('Ziegler'), 'ZIKLA')
        self.assertEqual(self.pa_2.encode('Scherer'), 'ZERA')
        self.assertEqual(self.pa_2.encode('Jansen'), 'IANZN')
        self.assertEqual(self.pa_2.encode('Eberhardt'), 'EBART')
        self.assertEqual(self.pa_2.encode('Gottschalk'), 'KUZALK')
        self.assertEqual(self.pa_2.encode('Brückmann'), 'BRIKNAN')
        self.assertEqual(self.pa_2.encode('Blechschmidt'), 'BLEKZNIT')
        self.assertEqual(self.pa_2.encode('Kolodziej'), 'KULUTZI')
        self.assertEqual(self.pa_2.encode('Krauße'), 'KRAUZE')

        # etc. (for code coverage)
        self.assertEqual(self.pa_1.encode('Jesper'), 'IESPA')
        self.assertEqual(self.pa_1.encode('Glacéhandschuh'), 'GLAZANSHU')
        self.assertEqual(self.pa_1.encode('Blechschmidt'), 'BLECHSHMIT')
        self.assertEqual(self.pa_1.encode('Burgdorf'), 'BURKDORF')
        self.assertEqual(self.pa_1.encode('Holzschuh'), 'HOLSHU')
        self.assertEqual(self.pa_1.encode('Aachen'), 'ACHN')
        self.assertEqual(self.pa_1.encode('Abendspaziergang'),
                         'ABENTSPAZIRGANK')

        # Test wrapper
        self.assertEqual(phonet('Bremerhaven', 1), 'BREMAHAFN')

    def test_phonet_nolang(self):
        """Test abydos.phonetic.Phonet (no language)."""
        self.assertEqual(Phonet(lang='none').encode(''), '')

        # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java
        self.assertEqual(self.pa_1none.encode(''), '')
        self.assertEqual(self.pa_1none.encode('Zedlitz'), 'ZEDLITZ')
        self.assertEqual(self.pa_1none.encode('Bremerhaven'), 'BREMERHAVEN')
        self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG')
        self.assertEqual(self.pa_1none.encode('Brückmann'), 'BRUECKMAN')
        self.assertEqual(self.pa_1none.encode('Krauße'), 'KRAUSE')

        self.assertEqual(self.pa_2none.encode(''), '')
        self.assertEqual(self.pa_2none.encode('Zedlitz'), 'ZEDLITZ')
        self.assertEqual(self.pa_2none.encode('Bremerhaven'), 'BREMERHAVEN')
        self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG')
        self.assertEqual(self.pa_2none.encode('Brückmann'), 'BRUECKMAN')
        self.assertEqual(self.pa_2none.encode('Krauße'), 'KRAUSE')

        # Test wrapper
        self.assertEqual(phonet('Bremerhaven', 1, 'none'), 'BREMERHAVEN')

    def test_phonet_nachnamen(self):
        """Test abydos.phonetic.Phonet (Nachnamen set)."""
        if not ALLOW_RANDOM:
            return
        with codecs.open(_corpus_file('nachnamen.csv'),
                         encoding='utf-8') as nachnamen_testset:
            for nn_line in nachnamen_testset:
                if nn_line[0] != '#':
                    nn_line = nn_line.strip().split(',')
                    # This test set is very large (~10000 entries)
                    # so let's just randomly select about 100 for testing
                    if len(nn_line) >= 3 and _one_in(100):
                        (term, ph1, ph2) = nn_line
                        self.assertEqual(self.pa_1.encode(term), ph1)
                        self.assertEqual(self.pa_2.encode(term), ph2)

    def test_phonet_ngerman(self):
        """Test abydos.phonetic.Phonet (ngerman set)."""
        if not ALLOW_RANDOM:
            return
        with codecs.open(_corpus_file('ngerman.csv'),
                         encoding='utf-8') as ngerman_testset:
            for ng_line in ngerman_testset:
                if ng_line[0] != '#':
                    ng_line = ng_line.strip().split(',')
                    # This test set is very large (~3000000 entries)
                    # so let's just randomly select about 30 for testing
                    if len(ng_line) >= 3 and _one_in(10000):
                        (term, ph1, ph2) = ng_line
                        self.assertEqual(self.pa_1.encode(term), ph1)
                        self.assertEqual(self.pa_2.encode(term), ph2)
Ejemplo n.º 6
0
fonem = FONEM()
fuzzy_soundex = FuzzySoundex()
haase = Haase()
henry_early = HenryEarly()
koelner = Koelner()
lein = Lein()
metaphone = Metaphone()
metasoundex = MetaSoundex()
mra = MRA()
norphone = Norphone()
nrl = NRL()
nysiis = NYSIIS()
onca = ONCA()
parmar_kumbharana = ParmarKumbharana()
phonem = Phonem()
phonet = Phonet()
phonetic_spanish = PhoneticSpanish()
phonex = Phonex()
phonix = Phonix()
pshp_soundex_first = PSHPSoundexFirst()
pshp_soundex_last = PSHPSoundexLast()
refined_soundex = RefinedSoundex()
reth_schek = RethSchek()
roger_root = RogerRoot()
russell = RussellIndex()
sfinxbis = SfinxBis()
sound_d = SoundD()
soundex = Soundex()
soundex_br = SoundexBR()
spanish_metaphone = SpanishMetaphone()
spfc = SPFC()