def test_detect_non_ascii(self):
     lang = self.translator.detect(unicode("关于中文维基百科"))
     assert_equal(lang, 'zh-CN')
     lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
     assert_equal(lang2, "bg")
     lang3 = self.translator.detect(unicode("Избранная статья"))
     assert_equal(lang3, "ru")
Exemple #2
0
 def test_detect_non_ascii(self):
     lang = self.translator.detect(unicode("关于中文维基百科"))
     assert_equal(lang, 'zh-CN')
     lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
     assert_equal(lang2, "bg")
     lang3 = self.translator.detect(unicode("Избранная статья"))
     assert_equal(lang3, "ru")
Exemple #3
0
    def test_translate_non_ascii(self):
        blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
        translated = blob.translate(from_lang="ar", to="en")
        assert_equal(translated, "With full sovereignty")

        chinese_blob = tb.TextBlob(unicode("美丽优于丑陋"))
        translated = chinese_blob.translate(from_lang="zh-CN", to='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #4
0
    def test_translate_non_ascii(self):
        blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
        translated = blob.translate(from_lang="ar", to="en")
        assert_equal(translated, "With full sovereignty")

        chinese_blob = tb.TextBlob(unicode("美丽优于丑陋"))
        translated = chinese_blob.translate(from_lang="zh-CN", to='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #5
0
    def pos_tags(self):
        '''Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        '''
        return [(Word(word, pos_tag=t), unicode(t))
                for word, t in self.pos_tagger.tag(self.raw)
                if not PUNCTUATION_REGEX.match(unicode(t))]
Exemple #6
0
 def __repr__(self):
     '''Returns a string representation for debugging.'''
     class_name = self.__class__.__name__
     # String representation of words
     strings = [unicode(w) for w in self._collection]
     if len(self) > 60:
         return '{cls}({beginning}...{end})'.format(cls=class_name,
                                             beginning=strings[:3],
                                             end=strings[-3:])
     else:
         return '{cls}({lst})'.format(cls=class_name, lst=strings)
Exemple #7
0
 def test_detect_non_ascii(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.detect_language(), "ar")
Exemple #8
0
 def test_translate_detects_language_by_default(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.translate(), "With full sovereignty")
Exemple #9
0
def parse(s, *args, **kwargs):
    """ Returns a tagged Unicode string.
    """
    return parser.parse(unicode(s), *args, **kwargs)
Exemple #10
0
def positive(s, threshold=0.1, **kwargs):
    """ Returns True if the given sentence has a positive sentiment (polarity >= threshold).
    """
    return polarity(unicode(s), **kwargs) >= threshold
Exemple #11
0
def subjectivity(s, **kwargs):
    """ Returns the sentence subjectivity (objective/subjective) between 0.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[1]
Exemple #12
0
def polarity(s, **kwargs):
    """ Returns the sentence polarity (positive/negative) between -1.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[0]
Exemple #13
0
def parsetree(s, *args, **kwargs):
    """ Returns a parsed Text from the given string.
    """
    return Text(parse(unicode(s), *args, **kwargs))
Exemple #14
0
 def test_detect_non_ascii(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.detect_language(), "ar")
Exemple #15
0
 def test_translate_detects_language_by_default(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.translate(), "With full sovereignty")