class PhoneticTestCases(unittest.TestCase): """Test phonetic fingerprint functions. abydos.fingerprint.Phonetic """ fp = Phonetic() fp_phonet = Phonetic(Phonet()) fp_soundex = Phonetic(Soundex()) soundex = Soundex() def test_phonetic_fingerprint(self): """Test abydos.fingerprint.Phonetic.""" # Base case self.assertEqual(self.fp.fingerprint(''), '') self.assertEqual( self.fp.fingerprint(' '.join(NIALL)), 'a anl mknl njl nklk nl' ) self.assertEqual( self.fp_phonet.fingerprint(' '.join(NIALL)), 'knile makneil maknele neil nel nele nial nigeli ' + 'nigl nil noigialach oneil ui', ) self.assertEqual( self.fp_soundex.fingerprint(' '.join(NIALL)), 'k540 m254 n240 n242 n400 o540 u000', )
def test_phonet_nolang(self): """Test abydos.phonetic.Phonet (no language).""" self.assertEqual(Phonet(lang='none').encode(''), '') # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java self.assertEqual(self.pa_1none.encode(''), '') self.assertEqual(self.pa_1none.encode('Zedlitz'), 'ZEDLITZ') self.assertEqual(self.pa_1none.encode('Bremerhaven'), 'BREMERHAVEN') self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG') self.assertEqual(self.pa_1none.encode('Brückmann'), 'BRUECKMAN') self.assertEqual(self.pa_1none.encode('Krauße'), 'KRAUSE') self.assertEqual(self.pa_2none.encode(''), '') self.assertEqual(self.pa_2none.encode('Zedlitz'), 'ZEDLITZ') self.assertEqual(self.pa_2none.encode('Bremerhaven'), 'BREMERHAVEN') self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG') self.assertEqual(self.pa_2none.encode('Brückmann'), 'BRUECKMAN') self.assertEqual(self.pa_2none.encode('Krauße'), 'KRAUSE')
'lein': LEIN().encode, 'lein_nopad_ml8': LEIN(max_length=8, zero_pad=False).encode, 'metasoundex': MetaSoundex().encode, 'metasoundex_es': MetaSoundex(lang='es').encode, 'metaphone': Metaphone().encode, 'mra': MRA().encode, 'norphone': Norphone().encode, 'nrl': NRL().encode, 'nysiis': NYSIIS().encode, 'nysiis_modified': NYSIIS(modified=True).encode, 'nysiis_ml_inf': NYSIIS(max_length=-1).encode, 'onca': ONCA().encode, 'onca_nopad_ml8': ONCA(max_length=8, zero_pad=False).encode, 'parmar_kumbharana': ParmarKumbharana().encode, 'phonem': Phonem().encode, 'phonet_1': Phonet().encode, 'phonet_2': Phonet(mode=2).encode, 'phonet_1_none': Phonet(lang='none').encode, 'phonet_2_none': Phonet(mode=2, lang='none').encode, 'phonetic_spanish': PhoneticSpanish().encode, 'phonetic_spanish_ml4': PhoneticSpanish(max_length=4).encode, 'phonex': Phonex().encode, 'phonex_0pad_ml6': Phonex(max_length=6, zero_pad=True).encode, 'phonic': PHONIC().encode, 'phonic_0pad_ml6': PHONIC(max_length=6, zero_pad=True).encode, 'phonic_ext': PHONIC(extended=True).encode, 'phonix': Phonix().encode, 'phonix_0pad_ml6': Phonix(max_length=6, zero_pad=True).encode, 'pshp_soundex_first': PSHPSoundexFirst().encode, 'pshp_soundex_first_german': PSHPSoundexFirst(german=True).encode, 'pshp_soundex_first_ml8': PSHPSoundexFirst(max_length=8).encode,
'nysiis': NYSIIS().encode, 'nysiis_modified': NYSIIS(modified=True).encode, 'nysiis_ml_inf': NYSIIS(max_length=-1).encode, 'onca': ONCA().encode, 'onca_nopad_ml8': ONCA(max_length=8, zero_pad=False).encode, 'parmar_kumbharana': ParmarKumbharana().encode, 'phonem': Phonem().encode, 'phonet_1': Phonet().encode, 'phonet_2': Phonet(mode=2).encode, 'phonet_1_none': Phonet(lang='none').encode, 'phonet_2_none': Phonet(mode=2, lang='none').encode, 'phonetic_spanish': PhoneticSpanish().encode, 'phonetic_spanish_ml4': PhoneticSpanish(max_length=4).encode, 'phonex': Phonex().encode, 'phonex_0pad_ml6': Phonex(max_length=6, zero_pad=True).encode, 'phonic':
class PhonetTestCases(unittest.TestCase): """Test Phonet functions. test cases for abydos.phonetic.Phonet """ pa = Phonet() pa_1 = Phonet(1) pa_2 = Phonet(2) pa_1none = Phonet(1, 'none') pa_2none = Phonet(2, 'none') def test_phonet_german(self): """Test abydos.phonetic.Phonet (German).""" self.assertEqual(self.pa.encode(''), '') # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java self.assertEqual(self.pa_1.encode(''), '') self.assertEqual(self.pa_1.encode('Zedlitz'), 'ZETLIZ') self.assertEqual(self.pa_1.encode('Bremerhaven'), 'BREMAHAFN') self.assertEqual(self.pa_1.encode('Hamburger Hafen'), 'HAMBURGA HAFN') self.assertEqual(self.pa_1.encode('Jesper'), 'IESPA') self.assertEqual(self.pa_1.encode('elisabeth'), 'ELISABET') self.assertEqual(self.pa_1.encode('elisabet'), 'ELISABET') self.assertEqual(self.pa_1.encode('Ziegler'), 'ZIKLA') self.assertEqual(self.pa_1.encode('Scherer'), 'SHERA') self.assertEqual(self.pa_1.encode('Bartels'), 'BARTLS') self.assertEqual(self.pa_1.encode('Jansen'), 'IANSN') self.assertEqual(self.pa_1.encode('Sievers'), 'SIWAS') self.assertEqual(self.pa_1.encode('Michels'), 'MICHLS') self.assertEqual(self.pa_1.encode('Ewers'), 'EWERS') self.assertEqual(self.pa_1.encode('Evers'), 'EWERS') self.assertEqual(self.pa_1.encode('Wessels'), 'WESLS') self.assertEqual(self.pa_1.encode('Gottschalk'), 'GOSHALK') self.assertEqual(self.pa_1.encode('Brückmann'), 'BRÜKMAN') self.assertEqual(self.pa_1.encode('Blechschmidt'), 'BLECHSHMIT') self.assertEqual(self.pa_1.encode('Kolodziej'), 'KOLOTZI') self.assertEqual(self.pa_1.encode('Krauße'), 'KRAUSE') self.assertEqual(self.pa_1.encode('Cachel'), 'KESHL') self.assertEqual(self.pa_2.encode(''), '') self.assertEqual(self.pa_2.encode('Zedlitz'), 'ZETLIZ') self.assertEqual(self.pa_2.encode('Bremerhaven'), 'BRENAFN') self.assertEqual(self.pa_2.encode('Schönberg'), 'ZÖNBAK') self.assertEqual(self.pa_2.encode('Hamburger Hafen'), 'ANBURKA AFN') self.assertEqual(self.pa_2.encode('Ziegler'), 'ZIKLA') self.assertEqual(self.pa_2.encode('Scherer'), 'ZERA') self.assertEqual(self.pa_2.encode('Jansen'), 'IANZN') self.assertEqual(self.pa_2.encode('Eberhardt'), 'EBART') self.assertEqual(self.pa_2.encode('Gottschalk'), 'KUZALK') self.assertEqual(self.pa_2.encode('Brückmann'), 'BRIKNAN') self.assertEqual(self.pa_2.encode('Blechschmidt'), 'BLEKZNIT') self.assertEqual(self.pa_2.encode('Kolodziej'), 'KULUTZI') self.assertEqual(self.pa_2.encode('Krauße'), 'KRAUZE') # etc. (for code coverage) self.assertEqual(self.pa_1.encode('Jesper'), 'IESPA') self.assertEqual(self.pa_1.encode('Glacéhandschuh'), 'GLAZANSHU') self.assertEqual(self.pa_1.encode('Blechschmidt'), 'BLECHSHMIT') self.assertEqual(self.pa_1.encode('Burgdorf'), 'BURKDORF') self.assertEqual(self.pa_1.encode('Holzschuh'), 'HOLSHU') self.assertEqual(self.pa_1.encode('Aachen'), 'ACHN') self.assertEqual(self.pa_1.encode('Abendspaziergang'), 'ABENTSPAZIRGANK') # Test wrapper self.assertEqual(phonet('Bremerhaven', 1), 'BREMAHAFN') def test_phonet_nolang(self): """Test abydos.phonetic.Phonet (no language).""" self.assertEqual(Phonet(lang='none').encode(''), '') # https://code.google.com/p/phonet4java/source/browse/trunk/src/test/java/com/googlecode/phonet4java/Phonet1Test.java self.assertEqual(self.pa_1none.encode(''), '') self.assertEqual(self.pa_1none.encode('Zedlitz'), 'ZEDLITZ') self.assertEqual(self.pa_1none.encode('Bremerhaven'), 'BREMERHAVEN') self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG') self.assertEqual(self.pa_1none.encode('Brückmann'), 'BRUECKMAN') self.assertEqual(self.pa_1none.encode('Krauße'), 'KRAUSE') self.assertEqual(self.pa_2none.encode(''), '') self.assertEqual(self.pa_2none.encode('Zedlitz'), 'ZEDLITZ') self.assertEqual(self.pa_2none.encode('Bremerhaven'), 'BREMERHAVEN') self.assertEqual(self.pa_2none.encode('Schönberg'), 'SCHOENBERG') self.assertEqual(self.pa_2none.encode('Brückmann'), 'BRUECKMAN') self.assertEqual(self.pa_2none.encode('Krauße'), 'KRAUSE') # Test wrapper self.assertEqual(phonet('Bremerhaven', 1, 'none'), 'BREMERHAVEN') def test_phonet_nachnamen(self): """Test abydos.phonetic.Phonet (Nachnamen set).""" if not ALLOW_RANDOM: return with codecs.open(_corpus_file('nachnamen.csv'), encoding='utf-8') as nachnamen_testset: for nn_line in nachnamen_testset: if nn_line[0] != '#': nn_line = nn_line.strip().split(',') # This test set is very large (~10000 entries) # so let's just randomly select about 100 for testing if len(nn_line) >= 3 and _one_in(100): (term, ph1, ph2) = nn_line self.assertEqual(self.pa_1.encode(term), ph1) self.assertEqual(self.pa_2.encode(term), ph2) def test_phonet_ngerman(self): """Test abydos.phonetic.Phonet (ngerman set).""" if not ALLOW_RANDOM: return with codecs.open(_corpus_file('ngerman.csv'), encoding='utf-8') as ngerman_testset: for ng_line in ngerman_testset: if ng_line[0] != '#': ng_line = ng_line.strip().split(',') # This test set is very large (~3000000 entries) # so let's just randomly select about 30 for testing if len(ng_line) >= 3 and _one_in(10000): (term, ph1, ph2) = ng_line self.assertEqual(self.pa_1.encode(term), ph1) self.assertEqual(self.pa_2.encode(term), ph2)
fonem = FONEM() fuzzy_soundex = FuzzySoundex() haase = Haase() henry_early = HenryEarly() koelner = Koelner() lein = Lein() metaphone = Metaphone() metasoundex = MetaSoundex() mra = MRA() norphone = Norphone() nrl = NRL() nysiis = NYSIIS() onca = ONCA() parmar_kumbharana = ParmarKumbharana() phonem = Phonem() phonet = Phonet() phonetic_spanish = PhoneticSpanish() phonex = Phonex() phonix = Phonix() pshp_soundex_first = PSHPSoundexFirst() pshp_soundex_last = PSHPSoundexLast() refined_soundex = RefinedSoundex() reth_schek = RethSchek() roger_root = RogerRoot() russell = RussellIndex() sfinxbis = SfinxBis() sound_d = SoundD() soundex = Soundex() soundex_br = SoundexBR() spanish_metaphone = SpanishMetaphone() spfc = SPFC()