def test_snowball_german(self): """Test abydos.stemmer.SnowballGerman (Snowball testset). These test cases are from http://snowball.tartarus.org/algorithms/german/diffs.txt """ # base case self.assertEqual(self.stmr.stem(''), '') # Snowball German test set with codecs.open( _corpus_file('snowball_german.csv'), encoding='utf-8' ) as snowball_ts: next(snowball_ts) for line in snowball_ts: if line[0] != '#': line = line.strip().split(',') word, stem = line[0], line[1] self.assertEqual(self.stmr.stem(word), stem.lower()) # missed branch test cases self.assertEqual(self.stmr.stem('ikeit'), 'ikeit') # Test wrapper self.assertEqual(sb_german('dämmerung'), 'dammer')
def test_sb_german_snowball(self): """Test abydos.stemmer.sb_german (Snowball testset). These test cases are from http://snowball.tartarus.org/algorithms/german/diffs.txt """ # base case self.assertEqual(sb_german(''), '') # Snowball German test set with codecs.open(TESTDIR + '/corpora/snowball_german.csv', encoding='utf-8') as snowball_testset: next(snowball_testset) for line in snowball_testset: if line[0] != '#': line = line.strip().split(',') word, stem = line[0], line[1] self.assertEqual(sb_german(word), stem.lower()) # missed branch test cases self.assertEqual(sb_german('ikeit'), 'ikeit')
def test_sb_german_snowball(self): """test abydos.stemmer.sb_german (Snowball testset) These test cases are from http://snowball.tartarus.org/algorithms/german/diffs.txt """ # base case self.assertEqual(sb_german(''), '') # Snowball German test set with codecs.open(TESTDIR+'/corpora/snowball_german.csv', encoding='utf-8') as snowball_testset: next(snowball_testset) for line in snowball_testset: if line[0] != '#': line = line.strip().split(',') word, stem = line[0], line[1] self.assertEqual(sb_german(word), stem.lower()) # missed branch test cases self.assertEqual(sb_german('ikeit'), 'ikeit')
def test_snowball_german(self): """Test abydos.stemmer.SnowballGerman (Snowball testset). These test cases are from http://snowball.tartarus.org/algorithms/german/diffs.txt """ # base case self.assertEqual(self.stmr.stem(''), '') # Snowball German test set with codecs.open(_corpus_file('snowball_german.csv'), encoding='utf-8') as snowball_ts: next(snowball_ts) for line in snowball_ts: if line[0] != '#': line = line.strip().split(',') word, stem = line[0], line[1] self.assertEqual(self.stmr.stem(word), stem.lower()) # missed branch test cases self.assertEqual(self.stmr.stem('ikeit'), 'ikeit') # Test wrapper self.assertEqual(sb_german('dämmerung'), 'dammer')
def test_sb_german_snowball_alt(self): """Test abydos.stemmer.sb_german (alternate vowels).""" # base case self.assertEqual(sb_german('', alternate_vowels=True), '') # dämmerung,dammer self.assertEqual(sb_german('dämmerung', alternate_vowels=True), 'dammer') self.assertEqual(sb_german('daemmerung', alternate_vowels=True), 'dammer') self.assertEqual(sb_german('dämmerung'), 'dammer') self.assertEqual(sb_german('daemmerung'), 'daemmer') # brötchen,brotch self.assertEqual(sb_german('brötchen', alternate_vowels=True), 'brotch') self.assertEqual(sb_german('broetchen', alternate_vowels=True), 'brotch') self.assertEqual(sb_german('brötchen'), 'brotch') self.assertEqual(sb_german('broetchen'), 'broetch') # büro,buro self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro') self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro') self.assertEqual(sb_german('büro'), 'buro') self.assertEqual(sb_german('buero'), 'buero') # häufen,hauf self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf') self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf') self.assertEqual(sb_german('häufen'), 'hauf') self.assertEqual(sb_german('haeufen'), 'haeuf') # quelle,quell self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull') self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell') self.assertEqual(sb_german('qülle'), 'qull') self.assertEqual(sb_german('quelle'), 'quell') # feuer,feuer self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur') self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu') self.assertEqual(sb_german('feür'), 'feur') self.assertEqual(sb_german('feuer'), 'feu') # über,uber self.assertEqual(sb_german('über', alternate_vowels=True), 'uber') self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber') self.assertEqual(sb_german('über'), 'uber') self.assertEqual(sb_german('ueber'), 'ueb')
def test_sb_german_snowball_alt(self): """test abydos.stemmer.sb_german (alternate vowels) """ # base case self.assertEqual(sb_german('', alternate_vowels=True), '') # dämmerung,dammer self.assertEqual(sb_german('dämmerung', alternate_vowels=True), 'dammer') self.assertEqual(sb_german('daemmerung', alternate_vowels=True), 'dammer') self.assertEqual(sb_german('dämmerung'), 'dammer') self.assertEqual(sb_german('daemmerung'), 'daemmer') # brötchen,brotch self.assertEqual(sb_german('brötchen', alternate_vowels=True), 'brotch') self.assertEqual(sb_german('broetchen', alternate_vowels=True), 'brotch') self.assertEqual(sb_german('brötchen'), 'brotch') self.assertEqual(sb_german('broetchen'), 'broetch') # büro,buro self.assertEqual(sb_german('büro', alternate_vowels=True), 'buro') self.assertEqual(sb_german('buero', alternate_vowels=True), 'buro') self.assertEqual(sb_german('büro'), 'buro') self.assertEqual(sb_german('buero'), 'buero') # häufen,hauf self.assertEqual(sb_german('häufen', alternate_vowels=True), 'hauf') self.assertEqual(sb_german('haeufen', alternate_vowels=True), 'hauf') self.assertEqual(sb_german('häufen'), 'hauf') self.assertEqual(sb_german('haeufen'), 'haeuf') # quelle,quell self.assertEqual(sb_german('qülle', alternate_vowels=True), 'qull') self.assertEqual(sb_german('quelle', alternate_vowels=True), 'quell') self.assertEqual(sb_german('qülle'), 'qull') self.assertEqual(sb_german('quelle'), 'quell') # feuer,feuer self.assertEqual(sb_german('feür', alternate_vowels=True), 'feur') self.assertEqual(sb_german('feuer', alternate_vowels=True), 'feu') self.assertEqual(sb_german('feür'), 'feur') self.assertEqual(sb_german('feuer'), 'feu') # über,uber self.assertEqual(sb_german('über', alternate_vowels=True), 'uber') self.assertEqual(sb_german('ueber', alternate_vowels=True), 'uber') self.assertEqual(sb_german('über'), 'uber') self.assertEqual(sb_german('ueber'), 'ueb')