Esempio n. 1
0
 def test_tokenize_method(self):
     tokenizer = nltk.tokenize.TabTokenizer()
     blob = tb.TextBlob("This is\ttext.")
     # If called without arguments, should default to WordTokenizer
     assert_equal(blob.tokenize(), tb.WordList(["This", "is", "text", "."]))
     # Pass in the TabTokenizer
     assert_equal(blob.tokenize(tokenizer), tb.WordList(["This is", "text."]))
Esempio n. 2
0
    def test_slicing(self):
        wl = tb.WordList(self.words)
        first = wl[0]
        assert_true(isinstance(first, tb.Word))
        assert_equal(first, 'Beautiful')

        dogs = wl[0:2]
        assert_true(isinstance(dogs, tb.WordList))
        assert_equal(dogs, tb.WordList(['Beautiful', 'is']))
Esempio n. 3
0
 def test_ngrams(self):
     blob = tb.TextBlob("I am eating a pizza.")
     three_grams = blob.ngrams()
     assert_equal(three_grams, [
         tb.WordList(('I', 'am', 'eating')),
         tb.WordList(('am', 'eating', 'a')),
         tb.WordList(('eating', 'a', 'pizza'))
     ])
     four_grams = blob.ngrams(n=4)
     assert_equal(four_grams, [
         tb.WordList(('I', 'am', 'eating', 'a')),
         tb.WordList(('am', 'eating', 'a', 'pizza'))
     ])
Esempio n. 4
0
 def test_join(self):
     l = ['explicit', 'is', 'better']
     wl = tb.WordList(l)
     assert_equal(
         tb.TextBlob(' ').join(l), tb.TextBlob('explicit is better'))
     assert_equal(
         tb.TextBlob(' ').join(wl), tb.TextBlob('explicit is better'))
Esempio n. 5
0
 def test_repr(self):
     wl = tb.WordList(['Beautiful', 'is', 'better'])
     if PY2:
         assert_equal(repr(wl),
                      "WordList([u'Beautiful', u'is', u'better'])")
     else:
         assert_equal(repr(wl), "WordList(['Beautiful', 'is', 'better'])")
Esempio n. 6
0
 def test_overrides(self):
     b = tb.Blobber(tokenizer=SentenceTokenizer(),
                    np_extractor=ConllExtractor())
     blob = b("How now? Brown cow?")
     assert_true(isinstance(blob.tokenizer, SentenceTokenizer))
     assert_equal(blob.tokens, tb.WordList(["How now?", "Brown cow?"]))
     blob2 = b("Another blob")
     # blobs have the same tokenizer
     assert_true(blob.tokenizer is blob2.tokenizer)
     # but aren't the same object
     assert_not_equal(blob, blob2)
Esempio n. 7
0
 def test_words(self):
     blob = tb.TextBlob('Beautiful is better than ugly. '
                         'Explicit is better than implicit.')
     assert_true(isinstance(blob.words, tb.WordList))
     assert_equal(blob.words, tb.WordList([
         'Beautiful',
         'is',
         'better',
         'than',
         'ugly',
         'Explicit',
         'is',
         'better',
         'than',
         'implicit',
         ]))
     short = tb.TextBlob("Just a bundle of words")
     assert_equal(short.words, tb.WordList([
         'Just', 'a', 'bundle', 'of', 'words'
         ]))
Esempio n. 8
0
 def test_count(self):
     wl = tb.WordList(['monty', 'python', 'Python', 'Monty'])
     assert_equal(wl.count('monty'), 2)
     assert_equal(wl.count('monty', case_sensitive=True), 1)
     assert_equal(wl.count('mon'), 0)
Esempio n. 9
0
 def test_strip_and_words(self):
     blob = tb.TextBlob('Beautiful is better! ')
     assert_equal(blob.strip().words,
                  tb.WordList(['Beautiful', 'is', 'better']))
Esempio n. 10
0
 def test_upper(self):
     wl = tb.WordList(self.words)
     assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words]))
Esempio n. 11
0
 def test_lower(self):
     wl = tb.WordList(['Zen', 'oF', 'PYTHON'])
     assert_equal(wl.lower(), tb.WordList(['zen', 'of', 'python']))
Esempio n. 12
0
 def test_can_use_an_different_tokenizer(self):
     tokenizer = nltk.tokenize.TabTokenizer()
     blob = tb.TextBlob("This is\ttext.", tokenizer=tokenizer)
     assert_equal(blob.tokens, tb.WordList(["This is", "text."]))
Esempio n. 13
0
 def test_lemmatize(self):
     wl = tb.WordList(["cat", "dogs", "oxen"])
     assert_equal(wl.lemmatize(), tb.WordList(['cat', 'dog', 'ox']))
Esempio n. 14
0
 def test_tokens_property(self):
     assert_true(self.blob.tokens,
                 tb.WordList(WordTokenizer().tokenize(self.text)))
Esempio n. 15
0
 def test_pluralize(self):
     wl = tb.WordList(['dog', 'cat', 'buffalo'])
     assert_equal(wl.pluralize(), tb.WordList(['dogs', 'cats',
                                               'buffaloes']))
Esempio n. 16
0
 def test_extend(self):
     wl = tb.WordList(["cats", "dogs"])
     wl.extend(["buffalo", 4])
     assert_true(isinstance(wl[2], tb.Word))
     assert_true(isinstance(wl[3], int))
Esempio n. 17
0
 def test_convert_to_list(self):
     wl = tb.WordList(self.words)
     assert_equal(list(wl), self.words)
Esempio n. 18
0
 def test_len(self):
     wl = tb.WordList(['Beautiful', 'is', 'better'])
     assert_equal(len(wl), 3)
Esempio n. 19
0
 def test_upper_and_words(self):
     blob = tb.TextBlob('beautiful is better')
     assert_equal(blob.upper().words,
                  tb.WordList(['BEAUTIFUL', 'IS', 'BETTER']))
Esempio n. 20
0
 def test_split(self):
     blob = tb.TextBlob('Beautiful is better')
     assert_equal(blob.split(), tb.WordList(['Beautiful', 'is', 'better']))
Esempio n. 21
0
 def test_append(self):
     wl = tb.WordList(['dog'])
     wl.append("cat")
     assert_true(isinstance(wl[1], tb.Word))
     wl.append(('a', 'tuple'))
     assert_true(isinstance(wl[2], tuple))
Esempio n. 22
0
 def test_singularize(self):
     wl = tb.WordList(['dogs', 'cats', 'buffaloes', 'men', 'mice'])
     assert_equal(wl.singularize(),
                  tb.WordList(['dog', 'cat', 'buffalo', 'man', 'mouse']))