예제 #1
0
    def test_alpha_sis_encode(self):
        """Test abydos.phonetic.AlphaSIS."""
        self.assertEqual(self.pa.encode('')[0], '00000000000000')

        self.assertEqual(self.pa.encode('Rodgers')[0], '04740000000000')
        self.assertEqual(self.pa.encode('Rogers')[0], '04740000000000')
        self.assertEqual(self.pa.encode('Kant')[0], '07210000000000')
        self.assertEqual(self.pa.encode('Knuth')[0], '02100000000000')
        self.assertEqual(self.pa.encode('Harper')[0], '24940000000000')
        self.assertEqual(self.pa.encode('Collier')[0], '07540000000000')
        self.assertEqual(self.pa.encode('Schultz')[0], '06500000000000')
        self.assertEqual(self.pa.encode('Livingston')[0], '05827012000000')

        # tests of repeated letters
        self.assertEqual(self.pa.encode('Colllier')[0], '07554000000000')
        self.assertEqual(self.pa.encode('Collllier')[0], '07554000000000')
        self.assertEqual(self.pa.encode('Colllllier')[0], '07555400000000')
        self.assertEqual(self.pa.encode('Collllllier')[0], '07555400000000')
        self.assertEqual(self.pa.encode('Colalalier')[0], '07555400000000')

        # max_length bounds tests
        self.assertEqual(
            AlphaSIS(max_length=-1).encode('Niall')[0],
            '0250000000000000000000000000000000000000000000000000000000000000',
        )
        self.assertEqual(AlphaSIS(max_length=0).encode('Niall')[0], '0250')

        # encode_alpha
        self.assertEqual(self.pa.encode_alpha('Rogers')[0], 'RKR')
        self.assertEqual(self.pa.encode_alpha('Kant')[0], 'KNT')
        self.assertEqual(self.pa.encode_alpha('Knuth')[0], 'NT')
        self.assertEqual(self.pa.encode_alpha('Harper')[0], 'HRPR')

        # Test wrapper
        self.assertEqual(alpha_sis('Livingston')[0], '05827012000000')
예제 #2
0
class AlphaSISTestCases(unittest.TestCase):
    """Test Alpha-SIS functions.

    test cases for abydos.phonetic.AlphaSIS
    """

    pa = AlphaSIS()

    def test_alpha_sis_encode(self):
        """Test abydos.phonetic.AlphaSIS."""
        self.assertEqual(self.pa.encode(''), '00000000000000')

        self.assertEqual(self.pa.encode('Rodgers'), '04740000000000')
        self.assertEqual(self.pa.encode('Rogers'), '04740000000000')
        self.assertEqual(
            self.pa.encode('Kant'), '07210000000000,06210000000000'
        )
        self.assertEqual(self.pa.encode('Knuth'), '02100000000000')
        self.assertEqual(self.pa.encode('Harper'), '24940000000000')
        self.assertEqual(
            self.pa.encode('Collier'), '07540000000000,06540000000000'
        )
        self.assertEqual(
            self.pa.encode('Schultz'), '06500000000000,06510000000000'
        )
        self.assertEqual(self.pa.encode('Livingston'), '05827012000000')

        # tests of repeated letters
        self.assertEqual(
            self.pa.encode('Colllier'), '07554000000000,06554000000000'
        )
        self.assertEqual(
            self.pa.encode('Collllier'), '07554000000000,06554000000000'
        )
        self.assertEqual(
            self.pa.encode('Colllllier'), '07555400000000,06555400000000'
        )
        self.assertEqual(
            self.pa.encode('Collllllier'), '07555400000000,06555400000000'
        )
        self.assertEqual(
            self.pa.encode('Colalalier'), '07555400000000,06555400000000'
        )

        # max_length bounds tests
        self.assertEqual(
            AlphaSIS(max_length=-1).encode('Niall'),
            '0250000000000000000000000000000000000000000000000000000000000000',
        )
        self.assertEqual(AlphaSIS(max_length=0).encode('Niall'), '0250')

        # encode_alpha
        self.assertEqual(self.pa.encode_alpha('Rogers'), 'RKR')
        self.assertEqual(self.pa.encode_alpha('Kant'), 'KNT,JNT')
        self.assertEqual(self.pa.encode_alpha('Knuth'), 'NT')
        self.assertEqual(self.pa.encode_alpha('Harper'), 'HRPR')
예제 #3
0
    SfinxBis,
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
    Waahlin,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

spfc = SPFC()

algorithms = {
    'ainsworth': Ainsworth().encode,
    'alpha_sis': AlphaSIS().encode,
    'bmpm': BeiderMorse().encode,
    'bmpm_german': BeiderMorse(language_arg='german').encode,
    'bmpm_french': BeiderMorse(language_arg='french').encode,
    'bmpm_gen_exact': BeiderMorse(match_mode='exact').encode,
    'bmpm_ash_approx': BeiderMorse(name_mode='ash').encode,
    'bmpm_ash_exact': BeiderMorse(name_mode='ash', match_mode='exact').encode,
    'bmpm_sep_approx': BeiderMorse(name_mode='sep').encode,
    'bmpm_sep_exact': BeiderMorse(name_mode='sep', match_mode='exact').encode,
    'caverphone_1': Caverphone(version=1).encode,
    'caverphone_2': Caverphone().encode,
    'daitch_mokotoff_soundex': DaitchMokotoff().encode,
    'davidson': Davidson().encode,
    'dolby': Dolby().encode,
    'dolby_ml4': Dolby(max_length=4).encode,
    'dolby_vowels': Dolby(keep_vowels=True).encode,
예제 #4
0
    RethSchek,
    RogerRoot,
    RussellIndex,
    SPFC,
    SfinxBis,
    SoundD,
    Soundex,
    SoundexBR,
    SpanishMetaphone,
    StatisticsCanada,
    Waahlin,
)

from . import EXTREME_TEST, _corpus_file, _fuzz, _random_char

alpha_sis = AlphaSIS()
daitch_mokotoff = DaitchMokotoff()
double_metaphone = DoubleMetaphone()
haase = Haase()
haase_primary = Haase(primary_only=True)
koelner = Koelner()
russell = RussellIndex()
sfinxbis = SfinxBis()
sfinxbis_6 = SfinxBis(max_length=6)
soundex_census = Soundex(var='Census')
spfc = SPFC()

algorithms = {
    'ainsworth':
    Ainsworth().encode,
    'alpha_sis':