def test_str(self): wl = tb.WordList(self.words) assert_equal(str(wl), str(self.words))
def test_split(self): blob = tb.TextBlob('Beautiful is better') assert_equal(blob.split(), tb.WordList(['Beautiful', 'is', 'better']))
def test_strip_and_words(self): blob = tb.TextBlob('Beautiful is better! ') assert_equal(blob.strip().words, tb.WordList(['Beautiful', 'is', 'better']))
def test_reverse(self): wl = tb.WordList(['head', 'shoulders', 'knees', 'toes']) wl.reverse() assert_equal(list(wl), ['toes', 'knees', 'shoulders', 'head'])
def test_upper_and_words(self): blob = tb.TextBlob('beautiful is better') assert_equal(blob.upper().words, tb.WordList(['BEAUTIFUL', 'IS', 'BETTER']))
def test_singularize(self): wl = tb.WordList(['dogs', 'cats', 'buffaloes', 'men', 'mice']) assert_equal(wl.singularize(), tb.WordList(['dog', 'cat', 'buffalo', 'man', 'mouse']))
def test_lemmatize(self): wl = tb.WordList(["cat", "dogs", "oxen"]) assert_equal(wl.lemmatize(), tb.WordList(['cat', 'dog', 'ox']))
def test_count(self): wl = tb.WordList(['monty', 'python', 'Python', 'Monty']) assert_equal(wl.count('monty'), 2) assert_equal(wl.count('monty', case_sensitive=True), 1) assert_equal(wl.count('mon'), 0)
def test_convert_to_list(self): wl = tb.WordList(self.words) assert_equal(list(wl), self.words)
def test_slice_repr(self): wl = tb.WordList(['Beautiful', 'is', 'better']) if PY2: assert_equal(repr(wl[:2]), "WordList([u'Beautiful', u'is'])") else: assert_equal(repr(wl[:2]), "WordList(['Beautiful', 'is'])")
def test_lower(self): wl = tb.WordList(['Zen', 'oF', 'PYTHON']) assert_equal(wl.lower(), tb.WordList(['zen', 'of', 'python']))
def test_stem(self): #only PorterStemmer tested wl = tb.WordList(["cat", "dogs", "oxen"]) assert_equal(wl.stem(), tb.WordList(['cat', 'dog', 'oxen']))
def test_join(self): l = ['explicit', 'is', 'better'] wl = tb.WordList(l) assert_equal(tb.TextBlob(' ').join(l), tb.TextBlob('explicit is better')) assert_equal(tb.TextBlob(' ').join(wl), tb.TextBlob('explicit is better'))
def test_words_includes_apostrophes_in_contractions(self): blob = tb.TextBlob("Let's test this.") assert_equal(blob.words, tb.WordList(['Let', "'s", "test", "this"])) blob2 = tb.TextBlob("I can't believe it's not butter.") assert_equal(blob2.words, tb.WordList(['I', 'ca', "n't", "believe", 'it', "'s", "not", "butter"]))
def test_tokens_property(self): assert_true(self.blob.tokens, tb.WordList(WordTokenizer().tokenize(self.text)))
def test_append(self): wl = tb.WordList(['dog']) wl.append("cat") assert_true(isinstance(wl[1], tb.Word)) wl.append(('a', 'tuple')) assert_true(isinstance(wl[2], tuple))
def test_can_use_an_different_tokenizer(self): tokenizer = nltk.tokenize.TabTokenizer() blob = tb.TextBlob("This is\ttext.", tokenizer=tokenizer) assert_equal(blob.tokens, tb.WordList(["This is", "text."]))
def test_extend(self): wl = tb.WordList(["cats", "dogs"]) wl.extend(["buffalo", 4]) assert_true(isinstance(wl[2], tb.Word)) assert_true(isinstance(wl[3], int))
def test_pluralize(self): wl = tb.WordList(['dog', 'cat', 'buffalo']) assert_equal(wl.pluralize(), tb.WordList(['dogs', 'cats', 'buffaloes']))
def test_len(self): wl = tb.WordList(['Beautiful', 'is', 'better']) assert_equal(len(wl), 3)
def test_upper(self): wl = tb.WordList(self.words) assert_equal(wl.upper(), tb.WordList([w.upper() for w in self.words]))
def test_setitem(self): wl = tb.WordList(['I', 'love', 'JavaScript']) wl[2] = tb.Word('Python') assert_equal(wl[2], tb.Word('Python'))