Esempio n. 1
0
 def test_soundex_census(self):
     """Test abydos.phonetic.Soundex(Census variant method)."""
     pa_census = Soundex(var='Census')
     self.assertEqual(pa_census.encode('Vandeusen'), ('V532', 'D250'))
     self.assertEqual(pa_census.encode('van Deusen'), ('V532', 'D250'))
     self.assertEqual(pa_census.encode('McDonald'), 'M235')
     self.assertEqual(pa_census.encode('la Cruz'), ('L262', 'C620'))
     self.assertEqual(pa_census.encode('vanDamme'), ('V535', 'D500'))
Esempio n. 2
0
 def test_soundex_special(self):
     """Test abydos.phonetic.Soundex (special 1880-1910 variant method)."""
     pa_special = Soundex(var='special')
     self.assertEqual(pa_special.encode('Ashcroft'), 'A226')
     self.assertEqual(pa_special.encode('Asicroft'), 'A226')
     self.assertEqual(pa_special.encode('AsWcroft'), 'A226')
     self.assertEqual(pa_special.encode('Rupert'), 'R163')
     self.assertEqual(pa_special.encode('Rubin'), 'R150')
Esempio n. 3
0
 def test_soundex_census(self):
     """Test abydos.phonetic.Soundex(Census variant method)."""
     pa_census = Soundex(var='Census')
     self.assertEqual(pa_census.encode('Vandeusen'), ('V532', 'D250'))
     self.assertEqual(pa_census.encode('van Deusen'), ('V532', 'D250'))
     self.assertEqual(pa_census.encode('McDonald'), 'M235')
     self.assertEqual(pa_census.encode('la Cruz'), ('L262', 'C620'))
     self.assertEqual(pa_census.encode('vanDamme'), ('V535', 'D500'))
Esempio n. 4
0
 def test_soundex_special(self):
     """Test abydos.phonetic.Soundex (special 1880-1910 variant method)."""
     pa_special = Soundex(var='special')
     self.assertEqual(pa_special.encode('Ashcroft'), 'A226')
     self.assertEqual(pa_special.encode('Asicroft'), 'A226')
     self.assertEqual(pa_special.encode('AsWcroft'), 'A226')
     self.assertEqual(pa_special.encode('Rupert'), 'R163')
     self.assertEqual(pa_special.encode('Rubin'), 'R150')
Esempio n. 5
0
    def test_caversham(self):
        """Test using Caversham test set (SoundEx, Metaphone, & Caverphone)."""
        soundex = Soundex()
        metaphone = Metaphone()

        with open(_corpus_file('variantNames.csv')) as cav_testset:
            next(cav_testset)
            for cav_line in cav_testset:
                (
                    name1,
                    soundex1,
                    metaphone1,
                    caverphone1,
                    name2,
                    soundex2,
                    metaphone2,
                    caverphone2,
                    soundex_same,
                    metaphone_same,
                    caverphone_same,
                ) = cav_line.strip().split(',')

                self.assertEqual(soundex.encode(name1), soundex1)
                self.assertEqual(soundex.encode(name2), soundex2)
                if soundex_same == '1':
                    self.assertEqual(
                        soundex.encode(name1), soundex.encode(name2)
                    )
                else:
                    self.assertNotEqual(
                        soundex.encode(name1), soundex.encode(name2)
                    )

                self.assertEqual(metaphone.encode(name1), metaphone1)
                self.assertEqual(metaphone.encode(name2), metaphone2)
                if metaphone_same == '1':
                    self.assertEqual(
                        metaphone.encode(name1), metaphone.encode(name2)
                    )
                else:
                    self.assertNotEqual(
                        metaphone.encode(name1), metaphone.encode(name2)
                    )

                self.assertEqual(self.pa.encode(name1), caverphone1)
                self.assertEqual(self.pa.encode(name2), caverphone2)
                if caverphone_same == '1':
                    self.assertEqual(
                        self.pa.encode(name1), self.pa.encode(name2)
                    )
                else:
                    self.assertNotEqual(
                        self.pa.encode(name1), self.pa.encode(name2)
                    )
Esempio n. 6
0
 'sfinxbis_ml6':
 lambda _: ', '.join(sfinxbis_6.encode(_)),
 'sound_d':
 SoundD().encode,
 'sound_d_ml8':
 SoundD(max_length=8).encode,
 'soundex':
 Soundex().encode,
 'soundex_reverse':
 Soundex(reverse=True).encode,
 'soundex_0pad_ml6':
 Soundex(zero_pad=True, max_length=6).encode,
 'soundex_special':
 Soundex(var='special').encode,
 'soundex_census':
 lambda _: ', '.join(soundex_census.encode(_)),
 'soundex_br':
 SoundexBR().encode,
 'spanish_metaphone':
 SpanishMetaphone().encode,
 'spanish_metaphone_modified':
 SpanishMetaphone(modified=True).encode,
 'spanish_metaphone_ml4':
 SpanishMetaphone(max_length=4).encode,
 'spfc':
 lambda _: spfc.encode(_ + ' ' + _),
 'statistics_canada':
 StatisticsCanada().encode,
 'statistics_canada_ml8':
 StatisticsCanada(max_length=8).encode,
 'waahlin':
Esempio n. 7
0
spanish_metaphone = SpanishMetaphone()
spfc = SPFC()
statistics_canada = StatisticsCanada()

algorithms = {
    'russell_index':
    lambda _: str(russell.encode(_)),
    'russell_index_num_to_alpha':
    lambda _: russell._to_alpha(  # noqa: SF01
        russell.encode(_)),
    'russell_index_alpha':
    russell.encode_alpha,
    'soundex':
    soundex.encode,
    'reverse_soundex':
    lambda _: soundex.encode(_, reverse=True),
    'soundex_0pad_ml6':
    lambda _: soundex.encode(_, zero_pad=True, max_length=6),
    'soundex_special':
    lambda _: soundex.encode(_, var='special'),
    'soundex_census':
    lambda _: ', '.join(soundex.encode(_, var='Census')),
    'refined_soundex':
    refined_soundex.encode,
    'refined_soundex_vowels':
    lambda _: refined_soundex.encode(_, retain_vowels=True),
    'refined_soundex_0pad_ml6':
    lambda _: refined_soundex.encode(_, zero_pad=True, max_length=6),
    'dm_soundex':
    lambda _: ', '.join(sorted(dm.encode(_))),
    'koelner_phonetik':