Exemple #1
0
 def test_detect_non_ascii(self):
     lang = self.translator.detect(unicode("关于中文维基百科"))
     assert_equal(lang, 'zh-CN')
     lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
     assert_equal(lang2, "bg")
     lang3 = self.translator.detect(unicode("Избранная статья"))
     assert_equal(lang3, "ru")
Exemple #2
0
 def test_detect(self, mock_get_json5):
     mock_get_json5.return_value = unicode(
         '[[["This is a sentence",'
         '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]')
     lang = self.translator.detect(self.sentence)
     assert_equal(lang, "en")
     mock_get_json5.return_value = unicode(
         '[[["Hello","Hola","",""]],[["interjection",'
         '["Hello!","Hi!","Hey!","Hullo!","Hallo!",'
         '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","'
         '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","'
         '\xa1Bueno!","\xa1Vale!"],,0.39160562],'
         '["Hi!",["\xa1Hola!"],,0.24506053],'
         '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]'
         ',["Hullo!",["\xa1Hola!","\xa1Caramba!",'
         '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",'
         '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",'
         '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],'
         '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",'
         '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],'
         '"es",,[["Hello",[1],true,false,783,0,1,0]],'
         '[["Hola",1,[["Hello",783,true,false],'
         '["Hi",214,true,false],["Hola",1,true,false],'
         '["Hey",0,true,false],["Welcome",0,true,false]],'
         '[[0,4]],"Hola"]],,,[],4]')
     lang2 = self.translator.detect("Hola")
     assert_equal(lang2, "es")
Exemple #3
0
 def test_detect_non_ascii(self):
     lang = self.translator.detect(unicode("关于中文维基百科"))
     assert_equal(lang, 'zh-CN')
     lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
     assert_equal(lang2, "bg")
     lang3 = self.translator.detect(unicode("Избранная статья"))
     assert_equal(lang3, "ru")
Exemple #4
0
 def test_detect(self, mock_get_json5):
     mock_get_json5.return_value = unicode('[[["This is a sentence",'
         '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]')
     lang = self.translator.detect(self.sentence)
     assert_equal(lang, "en")
     mock_get_json5.return_value = unicode('[[["Hello","Hola","",""]],[["interjection",'
                                     '["Hello!","Hi!","Hey!","Hullo!","Hallo!",'
                                     '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","'
                                     '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","'
                                     '\xa1Bueno!","\xa1Vale!"],,0.39160562],'
                                     '["Hi!",["\xa1Hola!"],,0.24506053],'
                                     '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]'
                                     ',["Hullo!",["\xa1Hola!","\xa1Caramba!",'
                                     '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",'
                                     '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",'
                                     '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],'
                                     '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",'
                                     '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],'
                                     '"es",,[["Hello",[1],true,false,783,0,1,0]],'
                                     '[["Hola",1,[["Hello",783,true,false],'
                                     '["Hi",214,true,false],["Hola",1,true,false],'
                                     '["Hey",0,true,false],["Welcome",0,true,false]],'
                                     '[[0,4]],"Hola"]],,,[],4]')
     lang2 = self.translator.detect("Hola")
     assert_equal(lang2, "es")
Exemple #5
0
    def test_translate_non_ascii(self):
        text = unicode("ذات سيادة كاملة")
        translated = self.translator.translate(text, from_lang='ar', to_lang='en')
        assert_equal(translated, "With full sovereignty")

        text2 = unicode("美丽优于丑陋")
        translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #6
0
    def test_translate_non_ascii(self):
        text = unicode("ذات سيادة كاملة")
        translated = self.translator.translate(text, from_lang='ar', to_lang='en')
        assert_equal(translated, "With full sovereignty")

        text2 = unicode("美丽优于丑陋")
        translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #7
0
    def test_translate_non_ascii(self):
        blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
        translated = blob.translate(from_lang="ar", to="en")
        assert_equal(translated, "With full sovereignty")

        chinese_blob = tb.TextBlob(unicode("美丽优于丑陋"))
        translated = chinese_blob.translate(from_lang="zh-CN", to='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #8
0
    def test_translate_non_ascii(self):
        blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
        translated = blob.translate(from_lang="ar", to="en")
        assert_equal(translated, "With full sovereignty")

        chinese_blob = tb.TextBlob(unicode("美丽优于丑陋"))
        translated = chinese_blob.translate(from_lang="zh-CN", to='en')
        assert_equal(translated, "Beautiful is better than ugly")
Exemple #9
0
 def test_failed_translation_raises_not_translated(self, mock_get_json5):
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"n0tv\\u0026l1d","orig":'
                                     '"n0tv\\u0026l1d","translit":"",'
                                     '"src_translit":""}],'
                                     '"src":"en","server_time":2}')
     text = unicode(' n0tv&l1d ')
     assert_raises(NotTranslated,
                   self.translator.translate, text, to_lang="es")
     assert_true(mock_get_json5.called_once)
Exemple #10
0
    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        return [(Word(word, pos_tag=t), unicode(t))
                for word, t in self.pos_tagger.tag(self.raw)
                if not PUNCTUATION_REGEX.match(unicode(t))]
Exemple #11
0
 def test_detect_parses_json5(self, mock_get_json5):
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"This is a sentence.","orig":'
                                     '"This is a sentence.","translit":"",'
                                     '"src_translit":""}],"src":"en",'
                                     '"server_time":1}')
     lang = self.translator.detect(self.sentence)
     assert_equal(lang, "en")
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"Hello","orig":"Hola",'
                                     '"translit":"","src_translit":""}],'
                                     '"src":"es","server_time":2}')
     lang2 = self.translator.detect("Hola")
     assert_equal(lang2, "es")
Exemple #12
0
 def test_detect_parses_json5(self, mock_get_json5):
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"This is a sentence.","orig":'
                                     '"This is a sentence.","translit":"",'
                                     '"src_translit":""}],"src":"en",'
                                     '"server_time":1}')
     lang = self.translator.detect(self.sentence)
     assert_equal(lang, "en")
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"Hello","orig":"Hola",'
                                     '"translit":"","src_translit":""}],'
                                     '"src":"es","server_time":2}')
     lang2 = self.translator.detect("Hola")
     assert_equal(lang2, "es")
Exemple #13
0
    def pos_tags(self):
        '''Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        '''
        return [(Word(word, pos_tag=t), unicode(t))
                for word, t in self.pos_tagger.tag(self.raw)
                if not PUNCTUATION_REGEX.match(unicode(t))]
Exemple #14
0
 def test_translate_detects_language_by_default(self, mock_detect,
         mock_get_json5, mock_get_language, mock_get_translation):
     mock_get_language.return_value = 'ar'
     mock_get_translation.return_value = 'Fully sovereign'
     text = unicode("ذات سيادة كاملة")
     blob = tb.TextBlob(text)
     assert_true(mock_detect.called_once_with(text))
Exemple #15
0
 def test_translate_detects_language_by_default(self, mock_detect,
         mock_get_json5, mock_get_language, mock_get_translation):
     mock_get_language.return_value = 'ar'
     mock_get_translation.return_value = 'Fully sovereign'
     text = unicode("ذات سيادة كاملة")
     blob = tb.TextBlob(text)
     assert_true(mock_detect.called_once_with(text))
Exemple #16
0
    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        if isinstance(self, TextBlob):
            return [val for sublist in [s.pos_tags for s in self.sentences] for val in sublist]
        else:
            return [(Word(unicode(word), pos_tag=t), unicode(t))
                    for word, t in self.pos_tagger.tag(self)
                    if not PUNCTUATION_REGEX.match(unicode(t))]
Exemple #17
0
 def test_translate(self, mock_get_json5):
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"Esta es una frase.","orig":'
                                     '"This is a sentence.","translit":"",'
                                     '"src_translit":""}],"src":"en",'
                                     '"server_time":2}')
     t = self.translator.translate(self.sentence, to_lang="es")
     assert_equal(t, "Esta es una frase.")
     assert_true(mock_get_json5.called_once)
Exemple #18
0
 def test_translate(self, mock_get_json5):
     mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                     '"Esta es una frase.","orig":'
                                     '"This is a sentence.","translit":"",'
                                     '"src_translit":""}],"src":"en",'
                                     '"server_time":2}')
     t = self.translator.translate(self.sentence, to_lang="es")
     assert_equal(t, "Esta es una frase.")
     assert_true(mock_get_json5.called_once)
Exemple #19
0
    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        if isinstance(self, TextBlob):
            return [
                val for sublist in [s.pos_tags for s in self.sentences]
                for val in sublist
            ]
        else:
            return [(Word(word, pos_tag=t), unicode(t))
                    for word, t in self.pos_tagger.tag(self)
                    if not PUNCTUATION_REGEX.match(unicode(t))]
Exemple #20
0
 def __repr__(self):
     '''Returns a string representation for debugging.'''
     class_name = self.__class__.__name__
     # String representation of words
     strings = [unicode(w) for w in self._collection]
     if len(self) > 60:
         return '{cls}({beginning}...{end})'.format(cls=class_name,
                                                    beginning=strings[:3],
                                                    end=strings[-3:])
     else:
         return '{cls}({lst})'.format(cls=class_name, lst=strings)
Exemple #21
0
 def __repr__(self):
     '''Returns a string representation for debugging.'''
     class_name = self.__class__.__name__
     # String representation of words
     strings = [unicode(w) for w in self._collection]
     if len(self) > 60:
         return '{cls}({beginning}...{end})'.format(cls=class_name,
                                             beginning=strings[:3],
                                             end=strings[-3:])
     else:
         return '{cls}({lst})'.format(cls=class_name, lst=strings)
Exemple #22
0
 def test_translate(self, mock_get_json5):
     mock_get_json5.return_value = unicode('[[["Esta es una frase","This is a '
         'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]'
         ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es'
         ' una",374,true,false],["Se trata de una",6,true,false],'
         '["Este es un",0,true,false],["Se trata de un",0,true,false],'
         '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],'
         '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],'
         '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"'
         ',0,true,false]],[[10,18]],""]],,,[["en"]],29]')
     t = self.translator.translate(self.sentence, to_lang="es")
     assert_equal(t, "Esta es una frase")
     assert_true(mock_get_json5.called_once)
Exemple #23
0
 def test_translate(self, mock_get_json5):
     mock_get_json5.return_value = unicode(
         '[[["Esta es una frase","This is a '
         'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]'
         ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es'
         ' una",374,true,false],["Se trata de una",6,true,false],'
         '["Este es un",0,true,false],["Se trata de un",0,true,false],'
         '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],'
         '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],'
         '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"'
         ',0,true,false]],[[10,18]],""]],,,[["en"]],29]')
     t = self.translator.translate(self.sentence, to_lang="es")
     assert_equal(t, "Esta es una frase")
     assert_true(mock_get_json5.called_once)
Exemple #24
0
 def test_translate_detects_language_by_default(self, mock_translate):
     text = unicode("ذات سيادة كاملة")
     mock_translate.return_value = "With full sovereignty"
     blob = tb.TextBlob(text)
     blob.translate()
     assert_true(mock_translate.called_once_with(text, from_lang='auto'))
Exemple #25
0
 def test_translate_detects_language_by_default(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.translate(), "With full sovereignty")
Exemple #26
0
 def test_detect_non_ascii(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.detect_language(), "ar")
Exemple #27
0
def parse(s, *args, **kwargs):
    """ Returns a tagged Unicode string.
    """
    return parser.parse(unicode(s), *args, **kwargs)
Exemple #28
0
def parsetree(s, *args, **kwargs):
    """ Returns a parsed Text from the given string.
    """
    return Text(parse(unicode(s), *args, **kwargs))
Exemple #29
0
 def test_detect_non_ascii(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.detect_language(), "ar")
Exemple #30
0
def positive(s, threshold=0.1, **kwargs):
    """ Returns True if the given sentence has a positive sentiment (polarity >= threshold).
    """
    return polarity(unicode(s), **kwargs) >= threshold
Exemple #31
0
def parse(s, *args, **kwargs):
    """ Returns a tagged Unicode string.
    """
    return parser.parse(unicode(s), *args, **kwargs)
Exemple #32
0
def positive(s, threshold=0.1, **kwargs):
    """ Returns True if the given sentence has a positive sentiment (polarity >= threshold).
    """
    return polarity(unicode(s), **kwargs) >= threshold
Exemple #33
0
def subjectivity(s, **kwargs):
    """ Returns the sentence subjectivity (objective/subjective) between 0.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[1]
Exemple #34
0
def subjectivity(s, **kwargs):
    """ Returns the sentence subjectivity (objective/subjective) between 0.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[1]
Exemple #35
0
def polarity(s, **kwargs):
    """ Returns the sentence polarity (positive/negative) between -1.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[0]
Exemple #36
0
 def test_translate_detects_language_by_default(self):
     blob = tb.TextBlob(unicode("ذات سيادة كاملة"))
     assert_equal(blob.translate(), "With full sovereignty")
Exemple #37
0
def parsetree(s, *args, **kwargs):
    """ Returns a parsed Text from the given string.
    """
    return Text(parse(unicode(s), *args, **kwargs))
Exemple #38
0
def polarity(s, **kwargs):
    """ Returns the sentence polarity (positive/negative) between -1.0 and 1.0.
    """
    return sentiment(unicode(s), **kwargs)[0]