Ejemplo n.º 1
0
    def test_slicing(self):
        wl = tb.WordList(self.words)
        first = wl[0]
        assert_true(isinstance(first, tb.Word))
        assert_equal(first, 'Schön')

        dogs = wl[0:2]
        assert_true(isinstance(dogs, tb.WordList))
        assert_equal(dogs, tb.WordList(['Schön', 'ist']))
Ejemplo n.º 2
0
 def test_empty_sentence(self):
     assert_equal(self.empty_sentence.tags, [])
     assert_equal(self.empty_sentence.tokens, tb.WordList([]))
     assert_equal(self.empty_sentence.words, tb.WordList([]))
     assert_equal(self.empty_sentence.noun_phrases, tb.WordList([]))
     assert_equal(self.empty_sentence.np_counts, {})
     assert_equal(self.empty_sentence.word_counts, {})
     assert_equal(self.empty_sentence.ngrams(), [])
     assert_equal(self.empty_sentence.parse(), "")
Ejemplo n.º 3
0
 def test_singularize(self):
     wl = tb.WordList([
         'Hunde',
         'Katzen',
         'Büffel',
         # 'Menschen', 'Mäuse' not processed correctly
     ])
     assert_equal(
         wl.singularize(),
         tb.WordList([
             'Hund',
             'Katze',
             'Büffel',
             # 'Mensch', 'Maus' processed as
             # 'Menschen', 'Mäus'
         ]))
Ejemplo n.º 4
0
 def test_slice_repr(self):
     wl = tb.WordList(['Schön', 'ist', 'besser'])
     if PY2:
         assert_equal(unicode(repr(wl[:2])),
                      u"WordList([u'Sch\\xf6n', u'ist'])")
     else:
         assert_equal(repr(wl[:2]), "WordList(['Schön', 'ist'])")
Ejemplo n.º 5
0
 def test_overrides(self):
     b = tb.BlobberDE(tokenizer=SentenceTokenizer())
     blob = b("Was nun? Dumme Kuh?")
     assert_true(isinstance(blob.tokenizer, SentenceTokenizer))
     assert_equal(blob.tokens, tb.WordList(["Was nun?", "Dumme Kuh?"]))
     blob2 = b("Ein anderer Blob")
     # blobs have the same tokenizer
     assert_true(blob.tokenizer is blob2.tokenizer)
     # but aren't the same object
     assert_not_equal(blob, blob2)
Ejemplo n.º 6
0
 def test_repr(self):
     wl = tb.WordList(['Schön', 'ist', 'besser'])
     # This compat clause is necessary because from __future__ import unicode_literals
     # turns the whole second argument into one single unicode string:
     # Without it you get an AssertionError on PY2:
     # "WordList([u'Sch\\xf6n', u'ist', u'besser'])" != \
     # u"WordList(['Sch\xf6n', 'ist', 'besser'])"
     if PY2:
         assert_equal(unicode(repr(wl)),
                      u"WordList([u'Sch\\xf6n', u'ist', u'besser'])")
     else:
         assert_equal(repr(wl), "WordList(['Schön', 'ist', 'besser'])")
Ejemplo n.º 7
0
 def test_len(self):
     wl = tb.WordList(['Schön', 'ist', 'besser'])
     assert_equal(len(wl), 3)
Ejemplo n.º 8
0
 def test_extend(self):
     wl = tb.WordList(["Hunde", "Katzen"])
     wl.extend(["Büffel", 4])
     assert_true(isinstance(wl[2], tb.Word))
     assert_true(isinstance(wl[3], int))
Ejemplo n.º 9
0
 def test_append(self):
     wl = tb.WordList(['Hund'])
     wl.append("Katze")
     assert_true(isinstance(wl[1], tb.Word))
     wl.append(('ein', 'Tupel'))
     assert_true(isinstance(wl[2], tuple))
Ejemplo n.º 10
0
 def test_convert_to_list(self):
     wl = tb.WordList(self.words)
     assert_equal(list(wl), self.words)
Ejemplo n.º 11
0
 def test_count(self):
     wl = tb.WordList(['monty', 'python', 'Python', 'Monty'])
     assert_equal(wl.count('monty'), 2)
     assert_equal(wl.count('monty', case_sensitive=True), 1)
     assert_equal(wl.count('mon'), 0)
Ejemplo n.º 12
0
 def test_lower(self):
     wl = tb.WordList(['Philosophie', 'voN', 'PYTHON'])
     assert_equal(wl.lower(), tb.WordList(['philosophie', 'von', 'python']))
Ejemplo n.º 13
0
 def test_upper(self):
     wl = tb.WordList(self.words)
     assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words]))
Ejemplo n.º 14
0
 def test_lemmatize(self):
     wl = tb.WordList(["Katze", "Hunde", "Ochsen"])
     assert_equal(wl.lemmatize(), tb.WordList(['Katze', 'Hund', 'Ochse']))
Ejemplo n.º 15
0
 def test_pluralize(self):
     wl = tb.WordList(['Hund', 'Katze', 'Büffel'])
     assert_equal(wl.pluralize(), tb.WordList(['Hunde', 'Katzen',
                                               'Büffel']))
Ejemplo n.º 16
0
 def test_str(self):
     wl = tb.WordList(self.words)
     assert_equal(str(wl), str(self.words))