def test_snowball_german(self):
        """Test abydos.stemmer.SnowballGerman (Snowball testset).

        These test cases are from
        http://snowball.tartarus.org/algorithms/german/diffs.txt
        """
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        #  Snowball German test set
        with codecs.open(
            _corpus_file('snowball_german.csv'), encoding='utf-8'
        ) as snowball_ts:
            next(snowball_ts)
            for line in snowball_ts:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(self.stmr.stem(word), stem.lower())

        # missed branch test cases
        self.assertEqual(self.stmr.stem('ikeit'), 'ikeit')

        # Test wrapper
        self.assertEqual(sb_german('dämmerung'), 'dammer')
Exemple #2
0
    def test_sb_german_snowball(self):
        """Test abydos.stemmer.sb_german (Snowball testset).

        These test cases are from
        http://snowball.tartarus.org/algorithms/german/diffs.txt
        """
        # base case
        self.assertEqual(sb_german(''), '')

        #  Snowball German test set
        with codecs.open(TESTDIR + '/corpora/snowball_german.csv',
                         encoding='utf-8') as snowball_testset:
            next(snowball_testset)
            for line in snowball_testset:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(sb_german(word), stem.lower())

        # missed branch test cases
        self.assertEqual(sb_german('ikeit'), 'ikeit')
Exemple #3
0
    def test_sb_german_snowball(self):
        """test abydos.stemmer.sb_german (Snowball testset)

        These test cases are from
        http://snowball.tartarus.org/algorithms/german/diffs.txt
        """
        # base case
        self.assertEqual(sb_german(''), '')

        #  Snowball German test set
        with codecs.open(TESTDIR+'/corpora/snowball_german.csv',
                         encoding='utf-8') as snowball_testset:
            next(snowball_testset)
            for line in snowball_testset:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(sb_german(word), stem.lower())

        # missed branch test cases
        self.assertEqual(sb_german('ikeit'), 'ikeit')
    def test_snowball_german(self):
        """Test abydos.stemmer.SnowballGerman (Snowball testset).

        These test cases are from
        http://snowball.tartarus.org/algorithms/german/diffs.txt
        """
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        #  Snowball German test set
        with codecs.open(_corpus_file('snowball_german.csv'),
                         encoding='utf-8') as snowball_ts:
            next(snowball_ts)
            for line in snowball_ts:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(self.stmr.stem(word), stem.lower())

        # missed branch test cases
        self.assertEqual(self.stmr.stem('ikeit'), 'ikeit')

        # Test wrapper
        self.assertEqual(sb_german('dämmerung'), 'dammer')
Exemple #5
0
    def test_sb_german_snowball_alt(self):
        """Test abydos.stemmer.sb_german (alternate vowels)."""
        # base case
        self.assertEqual(sb_german('', alternate_vowels=True), '')

        # dämmerung,dammer
        self.assertEqual(sb_german('dämmerung', alternate_vowels=True),
                         'dammer')
        self.assertEqual(sb_german('daemmerung', alternate_vowels=True),
                         'dammer')
        self.assertEqual(sb_german('dämmerung'), 'dammer')
        self.assertEqual(sb_german('daemmerung'), 'daemmer')

        # brötchen,brotch
        self.assertEqual(sb_german('brötchen', alternate_vowels=True),
                         'brotch')
        self.assertEqual(sb_german('broetchen', alternate_vowels=True),
                         'brotch')
        self.assertEqual(sb_german('brötchen'), 'brotch')
        self.assertEqual(sb_german('broetchen'), 'broetch')

        # büro,buro
        self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro')
        self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro')
        self.assertEqual(sb_german('büro'), 'buro')
        self.assertEqual(sb_german('buero'), 'buero')

        # häufen,hauf
        self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf')
        self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf')
        self.assertEqual(sb_german('häufen'), 'hauf')
        self.assertEqual(sb_german('haeufen'), 'haeuf')

        # quelle,quell
        self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull')
        self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell')
        self.assertEqual(sb_german('qülle'), 'qull')
        self.assertEqual(sb_german('quelle'), 'quell')

        # feuer,feuer
        self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur')
        self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu')
        self.assertEqual(sb_german('feür'), 'feur')
        self.assertEqual(sb_german('feuer'), 'feu')

        # über,uber
        self.assertEqual(sb_german('über', alternate_vowels=True), 'uber')
        self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber')
        self.assertEqual(sb_german('über'), 'uber')
        self.assertEqual(sb_german('ueber'), 'ueb')
Exemple #6
0
    def test_sb_german_snowball_alt(self):
        """test abydos.stemmer.sb_german (alternate vowels)
        """
        # base case
        self.assertEqual(sb_german('', alternate_vowels=True), '')

        # dämmerung,dammer
        self.assertEqual(sb_german('dämmerung', alternate_vowels=True),
                         'dammer')
        self.assertEqual(sb_german('daemmerung', alternate_vowels=True),
                         'dammer')
        self.assertEqual(sb_german('dämmerung'), 'dammer')
        self.assertEqual(sb_german('daemmerung'), 'daemmer')

        # brötchen,brotch
        self.assertEqual(sb_german('brötchen', alternate_vowels=True),
                         'brotch')
        self.assertEqual(sb_german('broetchen', alternate_vowels=True),
                         'brotch')
        self.assertEqual(sb_german('brötchen'), 'brotch')
        self.assertEqual(sb_german('broetchen'), 'broetch')

        # büro,buro
        self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro')
        self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro')
        self.assertEqual(sb_german('büro'), 'buro')
        self.assertEqual(sb_german('buero'), 'buero')

        # häufen,hauf
        self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf')
        self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf')
        self.assertEqual(sb_german('häufen'), 'hauf')
        self.assertEqual(sb_german('haeufen'), 'haeuf')

        # quelle,quell
        self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull')
        self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell')
        self.assertEqual(sb_german('qülle'), 'qull')
        self.assertEqual(sb_german('quelle'), 'quell')

        # feuer,feuer
        self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur')
        self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu')
        self.assertEqual(sb_german('feür'), 'feur')
        self.assertEqual(sb_german('feuer'), 'feu')

        # über,uber
        self.assertEqual(sb_german('über', alternate_vowels=True), 'uber')
        self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber')
        self.assertEqual(sb_german('über'), 'uber')
        self.assertEqual(sb_german('ueber'), 'ueb')