Exemple #1
0
 def test_join(self):
     l = ['explicit', 'is', 'better']
     wl = tb.WordList(l)
     assert_equal(
         tb.TextBlob(' ').join(l), tb.TextBlob('explicit is better'))
     assert_equal(
         tb.TextBlob(' ').join(wl), tb.TextBlob('explicit is better'))
Exemple #2
0
 def test_bad_init(self):
     with assert_raises(TypeError):
         tb.TextBlob(['bad'])
     with assert_raises(ValueError):
         tb.TextBlob("this is fine", np_extractor="this is not fine")
     with assert_raises(ValueError):
         tb.TextBlob("this is fine", pos_tagger="this is not fine")
Exemple #3
0
 def test_indices_with_multiple_puncutations(self):
     blob = tb.TextBlob(
         "Hello world. How do you do?! This has an ellipses...")
     sent1, sent2, sent3 = blob.sentences
     assert_equal(blob[sent2.start:sent2.end],
                  tb.TextBlob("How do you do?!"))
     assert_equal(blob[sent3.start:sent3.end],
                  tb.TextBlob("This has an ellipses..."))
Exemple #4
0
    def test_translate_non_ascii(self):
        blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
        translated = blob.translate(from_lang="ar", to="en")
        assert_equal(translated, "With full sovereignty")

        chinese_blob = tb.TextBlob(unicode("美丽优于丑陋"))
        translated = chinese_blob.translate(from_lang="zh-CN", to='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #5
0
 def test_sentiment(self):
     positive = tb.TextBlob('This is the best, most amazing '
                         'text-processing library ever!')
     assert_true(positive.sentiment[0] > 0.0)
     negative = tb.TextBlob("bad bad bitches that's my muthufuckin problem.")
     assert_true(negative.sentiment[0] < 0.0)
     zen = tb.TextBlob(self.text)
     assert_equal(round(zen.sentiment[0], 1), 0.2)
Exemple #6
0
    def test_cmp(self):
        blob1 = tb.TextBlob('lorem ipsum')
        blob2 = tb.TextBlob('lorem ipsum')
        blob3 = tb.TextBlob('dolor sit amet')

        assert_true(blob1 == blob2)  # test ==
        assert_true(blob1 > blob3)  # test >
        assert_true(blob3 < blob2)  # test <
Exemple #7
0
 def test_translate(self):
     blob = tb.TextBlob("This is a sentence.")
     translated = blob.translate(to="es")
     assert_true(isinstance(translated, tb.TextBlob))
     assert_equal(translated, "Esta es una frase.")
     es_blob = tb.TextBlob("Esta es una frase.")
     to_en = es_blob.translate(from_lang="es", to="en")
     assert_equal(to_en, "This is a phrase .")
Exemple #8
0
 def test_multiple_punctuation_at_end_of_sentence(self):
     '''Test sentences that have multiple punctuation marks
     at the end of the sentence.'''
     blob = tb.TextBlob('Get ready! This has an ellipses...')
     assert_equal(len(blob.sentences), 2)
     assert_equal(blob.sentences[1].raw, 'This has an ellipses...')
     blob2 = tb.TextBlob('OMG! I am soooo LOL!!!')
     assert_equal(len(blob2.sentences), 2)
     assert_equal(blob2.sentences[1].raw, 'I am soooo LOL!!!')
Exemple #9
0
 def test_correct(self):
     blob = tb.TextBlob("I havv bad speling.")
     assert_true(isinstance(blob.correct(), tb.TextBlob))
     assert_equal(blob.correct(), tb.TextBlob("I have bad spelling."))
     blob2 = tb.TextBlob("I am so exciited!!!")
     assert_equal(blob2.correct(), "I am so excited!!!")
     blob3 = tb.TextBlob("The meaning of life is 42.0.")
     assert_equal(blob3.correct(), "The meaning of life is 42.0.")
     blob4 = tb.TextBlob("?")
     assert_equal(blob4.correct(), "?")
Exemple #10
0
 def test_passing_bad_init_params(self):
     tagger = PatternTagger()
     with assert_raises(ValueError):
         tb.TextBlob("blah", parser=tagger)
     with assert_raises(ValueError):
         tb.TextBlob("blah", np_extractor=tagger)
     with assert_raises(ValueError):
         tb.TextBlob("blah", tokenizer=tagger)
     with assert_raises(ValueError):
         tb.TextBlob("blah", analyzer=tagger)
     analyzer = PatternAnalyzer
     with assert_raises(ValueError):
         tb.TextBlob("blah", pos_tagger=analyzer)
Exemple #11
0
    def test_add(self):
        blob1 = tb.TextBlob('Hello, world! ')
        blob2 = tb.TextBlob('Hola mundo!')
        # Can add two text blobs
        assert_equal(blob1 + blob2, tb.TextBlob('Hello, world! Hola mundo!'))
        # Can also add a string to a tb.TextBlob
        assert_equal(blob1 + 'Hola mundo!',
                     tb.TextBlob('Hello, world! Hola mundo!'))
        # Or both
        assert_equal(blob1 + blob2 + ' Goodbye!',
                     tb.TextBlob('Hello, world! Hola mundo! Goodbye!'))

        # operands must be strings
        assert_raises(TypeError, blob1.__add__, ['hello'])
Exemple #12
0
 def test_sentiment_of_foreign_text(self):
     blob = tb.TextBlob(
         u'Nous avons cherch\xe9 un motel dans la r\xe9gion de '
         'Madison, mais les motels ne sont pas nombreux et nous avons '
         'finalement choisi un Motel 6, attir\xe9s par le bas '
         'prix de la chambre.')
     assert_true(isinstance(blob.sentiment[0], float))
Exemple #13
0
 def test_classify(self):
     blob = tb.TextBlob(
         "This is an amazing library. What an awesome classifier!",
         classifier=classifier)
     assert_equal(blob.classify(), 'pos')
     for s in blob.sentences:
         assert_equal(s.classify(), 'pos')
Exemple #14
0
 def test_tokenize_method(self):
     tokenizer = nltk.tokenize.TabTokenizer()
     blob = tb.TextBlob("This is\ttext.")
     # If called without arguments, should default to WordTokenizer
     assert_equal(blob.tokenize(), tb.WordList(["This", "is", "text", "."]))
     # Pass in the TabTokenizer
     assert_equal(blob.tokenize(tokenizer), tb.WordList(["This is", "text."]))
Exemple #15
0
 def test_clean_html(self):
     html = '<b>Python</b> is a widely used <a href="/wiki/General-purpose_programming_language" title="General-purpose programming language">general-purpose</a>, <a href="/wiki/High-level_programming_language" title="High-level programming language">high-level programming language</a>.'
     blob = tb.TextBlob(html, clean_html=True)
     assert_equal(
         blob.string,
         'Python is a widely used general-purpose , high-level programming language .'
     )
Exemple #16
0
 def test_blob_with_no_sentences(self):
     text = "this isn't really a sentence it's just a long string of words"
     blob = tb.TextBlob(text)
     # the blob just has one sentence
     assert_equal(len(blob.sentences), 1)
     # the start index is 0, the end index is len(text) - 1
     assert_equal(blob.sentences[0].start_index, 0)
     assert_equal(blob.sentences[0].end_index, len(text))
Exemple #17
0
    def test_init(self):
        blob = tb.TextBlob('Wow I love this place. It really rocks my socks!!!')
        assert_equal(len(blob.sentences), 2)
        assert_equal(blob.sentences[1].stripped, 'it really rocks my socks')
        assert_equal(blob.string, blob.raw)

        # Must initialize with a string
        assert_raises(TypeError, tb.TextBlob.__init__, ['invalid'])
Exemple #18
0
    def setUp(self):
        self.text = \
            """Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!"""
        self.blob = tb.TextBlob(self.text)

        self.np_test_text = '''
Python is a widely used general-purpose, high-level programming language.
Its design philosophy emphasizes code readability, and its syntax allows
programmers to express concepts in fewer
lines of code than would be possible in languages such as C.
The language provides constructs intended to enable clear programs on both a small and large scale.
Python supports multiple programming paradigms, including object-oriented,
imperative and functional programming or procedural styles.
It features a dynamic type system and automatic memory management and
has a large and comprehensive standard library. Like other dynamic languages, Python is often used as a scripting language,
but is also used in a wide range of non-scripting contexts.
Using third-party tools, Python code can be packaged into standalone executable
programs. Python interpreters are available for many operating systems. CPython, the reference implementation of Python, is free and open source software and h
as a community-based development model, as do nearly all of its alternative implementations. CPython
is managed by the non-profit Python Software Foundation.'''
        self.np_test_blob = tb.TextBlob(self.np_test_text)

        self.short = "Beautiful is better than ugly. "
        self.short_blob = tb.TextBlob(self.short)
Exemple #19
0
 def test_words(self):
     blob = tb.TextBlob('Beautiful is better than ugly. '
                         'Explicit is better than implicit.')
     assert_true(isinstance(blob.words, tb.WordList))
     assert_equal(blob.words, tb.WordList([
         'Beautiful',
         'is',
         'better',
         'than',
         'ugly',
         'Explicit',
         'is',
         'better',
         'than',
         'implicit',
         ]))
     short = tb.TextBlob("Just a bundle of words")
     assert_equal(short.words, tb.WordList([
         'Just', 'a', 'bundle', 'of', 'words'
         ]))
Exemple #20
0
 def test_json(self):
     blob = tb.TextBlob('Beautiful is better than ugly. ')
     assert_equal(blob.json, blob.to_json())
     blob_dict = json.loads(blob.json)[0]
     assert_equal(blob_dict['stripped'], 'beautiful is better than ugly')
     assert_equal(blob_dict['noun_phrases'], blob.sentences[0].noun_phrases)
     assert_equal(blob_dict['start_index'], blob.sentences[0].start)
     assert_equal(blob_dict['end_index'], blob.sentences[0].end)
     assert_almost_equal(blob_dict['polarity'],
                         blob.sentences[0].polarity, places=4)
     assert_almost_equal(blob_dict['subjectivity'],
                         blob.sentences[0].subjectivity, places=4)
Exemple #21
0
    def test_word_counts(self):
        blob = tb.TextBlob('Buffalo buffalo ate my blue buffalo.')
        assert_equal(dict(blob.word_counts), {
            'buffalo': 3,
            'ate': 1,
            'my': 1,
            'blue': 1
        })
        assert_equal(blob.word_counts['buffalo'], 3)
        assert_equal(blob.words.count('buffalo'), 3)
        assert_equal(blob.words.count('buffalo', case_sensitive=True), 2)
        assert_equal(blob.word_counts['blue'], 1)
        assert_equal(blob.words.count('blue'), 1)
        assert_equal(blob.word_counts['ate'], 1)
        assert_equal(blob.words.count('ate'), 1)
        assert_equal(blob.word_counts['buff'], 0)
        assert_equal(blob.words.count('buff'), 0)

        blob2 = tb.TextBlob(self.text)
        assert_equal(blob2.words.count('special'), 2)
        assert_equal(blob2.words.count('special', case_sensitive=True), 1)
Exemple #22
0
 def test_ngrams(self):
     blob = tb.TextBlob("I am eating a pizza.")
     three_grams = blob.ngrams()
     assert_equal(three_grams, [
         tb.WordList(('I', 'am', 'eating')),
         tb.WordList(('am', 'eating', 'a')),
         tb.WordList(('eating', 'a', 'pizza'))
     ])
     four_grams = blob.ngrams(n=4)
     assert_equal(four_grams, [
         tb.WordList(('I', 'am', 'eating', 'a')),
         tb.WordList(('am', 'eating', 'a', 'pizza'))
     ])
Exemple #23
0
 def test_pos_tags(self):
     blob = tb.TextBlob('Simple is better than complex. '
                        'Complex is better than complicated.')
     assert_equal(blob.pos_tags, [
         ('Simple', 'NN'),
         ('is', 'VBZ'),
         ('better', 'JJR'),
         ('than', 'IN'),
         ('complex', 'NN'),
         ('Complex', 'NNP'),
         ('is', 'VBZ'),
         ('better', 'RBR'),
         ('than', 'IN'),
         ('complicated', 'VBN'),
     ])
Exemple #24
0
 def tokenize(self, text):
     all_chars = []
     words = blob.TextBlob(text).words
     for w in words:
         all_chars.extend(list('^' + w + '$'))
     return all_chars
Exemple #25
0
import text.blob as blob
import text.tokenizers as tokenizers


class CharTokenizer(tokenizers.BaseTokenizer):
    def tokenize(self, text):
        all_chars = []
        words = blob.TextBlob(text).words
        for w in words:
            all_chars.extend(list('^' + w + '$'))
        return all_chars


eg_text = "Textblob is amazingly simple to use. What great fun!"
bigrams = blob.TextBlob(eg_text, tokenizer=CharTokenizer())
Exemple #26
0
 def test_classify_without_classifier(self):
     blob = tb.TextBlob("This isn't gonna be good")
     with assert_raises(NameError):
         blob.classify()
Exemple #27
0
 def test_parse(self):
     blob = tb.TextBlob("And now for something completely different.")
     assert_equal(blob.parse(), PatternParser().parse(blob.string))
Exemple #28
0
 def test_can_use_an_different_tokenizer(self):
     tokenizer = nltk.tokenize.TabTokenizer()
     blob = tb.TextBlob("This is\ttext.", tokenizer=tokenizer)
     assert_equal(blob.tokens, tb.WordList(["This is", "text."]))
Exemple #29
0
 def test_detect_non_ascii(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.detect_language(), "ar")
Exemple #30
0
 def test_detect(self):
     es_blob = tb.TextBlob("Hola")
     assert_equal(es_blob.detect_language(), "es")
     en_blob = tb.TextBlob("Hello")
     assert_equal(en_blob.detect_language(), "en")