def test_detect_non_ascii(self): lang = self.translator.detect(unicode("关于中文维基百科")) assert_equal(lang, 'zh-CN') lang2 = self.translator.detect(unicode("известен още с псевдонимите")) assert_equal(lang2, "bg") lang3 = self.translator.detect(unicode("Избранная статья")) assert_equal(lang3, "ru")
def test_detect(self, mock_get_json5): mock_get_json5.return_value = unicode( '[[["This is a sentence",' '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]') lang = self.translator.detect(self.sentence) assert_equal(lang, "en") mock_get_json5.return_value = unicode( '[[["Hello","Hola","",""]],[["interjection",' '["Hello!","Hi!","Hey!","Hullo!","Hallo!",' '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","' '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","' '\xa1Bueno!","\xa1Vale!"],,0.39160562],' '["Hi!",["\xa1Hola!"],,0.24506053],' '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]' ',["Hullo!",["\xa1Hola!","\xa1Caramba!",' '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",' '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",' '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],' '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",' '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],' '"es",,[["Hello",[1],true,false,783,0,1,0]],' '[["Hola",1,[["Hello",783,true,false],' '["Hi",214,true,false],["Hola",1,true,false],' '["Hey",0,true,false],["Welcome",0,true,false]],' '[[0,4]],"Hola"]],,,[],4]') lang2 = self.translator.detect("Hola") assert_equal(lang2, "es")
def test_detect(self, mock_get_json5): mock_get_json5.return_value = unicode('[[["This is a sentence",' '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]') lang = self.translator.detect(self.sentence) assert_equal(lang, "en") mock_get_json5.return_value = unicode('[[["Hello","Hola","",""]],[["interjection",' '["Hello!","Hi!","Hey!","Hullo!","Hallo!",' '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","' '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","' '\xa1Bueno!","\xa1Vale!"],,0.39160562],' '["Hi!",["\xa1Hola!"],,0.24506053],' '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]' ',["Hullo!",["\xa1Hola!","\xa1Caramba!",' '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",' '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",' '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],' '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",' '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],' '"es",,[["Hello",[1],true,false,783,0,1,0]],' '[["Hola",1,[["Hello",783,true,false],' '["Hi",214,true,false],["Hola",1,true,false],' '["Hey",0,true,false],["Welcome",0,true,false]],' '[[0,4]],"Hola"]],,,[],4]') lang2 = self.translator.detect("Hola") assert_equal(lang2, "es")
def test_translate_non_ascii(self): text = unicode("ذات سيادة كاملة") translated = self.translator.translate(text, from_lang='ar', to_lang='en') assert_equal(translated, "With full sovereignty") text2 = unicode("美丽优于丑陋") translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en') assert_equal(translated, "Beautiful is better than ugly")
def test_translate_non_ascii(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) translated = blob.translate(from_lang="ar", to="en") assert_equal(translated, "With full sovereignty") chinese_blob = tb.TextBlob(unicode("美丽优于丑陋")) translated = chinese_blob.translate(from_lang="zh-CN", to='en') assert_equal(translated, "Beautiful is better than ugly")
def test_failed_translation_raises_not_translated(self, mock_get_json5): mock_get_json5.return_value = unicode('{"sentences":[{"trans":' '"n0tv\\u0026l1d","orig":' '"n0tv\\u0026l1d","translit":"",' '"src_translit":""}],' '"src":"en","server_time":2}') text = unicode(' n0tv&l1d ') assert_raises(NotTranslated, self.translator.translate, text, to_lang="es") assert_true(mock_get_json5.called_once)
def pos_tags(self): """Returns an list of tuples of the form (word, POS tag). Example: :: [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN')] :rtype: list of tuples """ return [(Word(word, pos_tag=t), unicode(t)) for word, t in self.pos_tagger.tag(self.raw) if not PUNCTUATION_REGEX.match(unicode(t))]
def test_detect_parses_json5(self, mock_get_json5): mock_get_json5.return_value = unicode('{"sentences":[{"trans":' '"This is a sentence.","orig":' '"This is a sentence.","translit":"",' '"src_translit":""}],"src":"en",' '"server_time":1}') lang = self.translator.detect(self.sentence) assert_equal(lang, "en") mock_get_json5.return_value = unicode('{"sentences":[{"trans":' '"Hello","orig":"Hola",' '"translit":"","src_translit":""}],' '"src":"es","server_time":2}') lang2 = self.translator.detect("Hola") assert_equal(lang2, "es")
def pos_tags(self): '''Returns an list of tuples of the form (word, POS tag). Example: :: [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN')] :rtype: list of tuples ''' return [(Word(word, pos_tag=t), unicode(t)) for word, t in self.pos_tagger.tag(self.raw) if not PUNCTUATION_REGEX.match(unicode(t))]
def test_translate_detects_language_by_default(self, mock_detect, mock_get_json5, mock_get_language, mock_get_translation): mock_get_language.return_value = 'ar' mock_get_translation.return_value = 'Fully sovereign' text = unicode("ذات سيادة كاملة") blob = tb.TextBlob(text) assert_true(mock_detect.called_once_with(text))
def pos_tags(self): """Returns an list of tuples of the form (word, POS tag). Example: :: [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN')] :rtype: list of tuples """ if isinstance(self, TextBlob): return [val for sublist in [s.pos_tags for s in self.sentences] for val in sublist] else: return [(Word(unicode(word), pos_tag=t), unicode(t)) for word, t in self.pos_tagger.tag(self) if not PUNCTUATION_REGEX.match(unicode(t))]
def test_translate(self, mock_get_json5): mock_get_json5.return_value = unicode('{"sentences":[{"trans":' '"Esta es una frase.","orig":' '"This is a sentence.","translit":"",' '"src_translit":""}],"src":"en",' '"server_time":2}') t = self.translator.translate(self.sentence, to_lang="es") assert_equal(t, "Esta es una frase.") assert_true(mock_get_json5.called_once)
def pos_tags(self): """Returns an list of tuples of the form (word, POS tag). Example: :: [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'), ('Thursday', 'NNP'), ('morning', 'NN')] :rtype: list of tuples """ if isinstance(self, TextBlob): return [ val for sublist in [s.pos_tags for s in self.sentences] for val in sublist ] else: return [(Word(word, pos_tag=t), unicode(t)) for word, t in self.pos_tagger.tag(self) if not PUNCTUATION_REGEX.match(unicode(t))]
def __repr__(self): '''Returns a string representation for debugging.''' class_name = self.__class__.__name__ # String representation of words strings = [unicode(w) for w in self._collection] if len(self) > 60: return '{cls}({beginning}...{end})'.format(cls=class_name, beginning=strings[:3], end=strings[-3:]) else: return '{cls}({lst})'.format(cls=class_name, lst=strings)
def test_translate(self, mock_get_json5): mock_get_json5.return_value = unicode('[[["Esta es una frase","This is a ' 'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]' ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es' ' una",374,true,false],["Se trata de una",6,true,false],' '["Este es un",0,true,false],["Se trata de un",0,true,false],' '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],' '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],' '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"' ',0,true,false]],[[10,18]],""]],,,[["en"]],29]') t = self.translator.translate(self.sentence, to_lang="es") assert_equal(t, "Esta es una frase") assert_true(mock_get_json5.called_once)
def test_translate(self, mock_get_json5): mock_get_json5.return_value = unicode( '[[["Esta es una frase","This is a ' 'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]' ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es' ' una",374,true,false],["Se trata de una",6,true,false],' '["Este es un",0,true,false],["Se trata de un",0,true,false],' '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],' '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],' '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"' ',0,true,false]],[[10,18]],""]],,,[["en"]],29]') t = self.translator.translate(self.sentence, to_lang="es") assert_equal(t, "Esta es una frase") assert_true(mock_get_json5.called_once)
def test_translate_detects_language_by_default(self, mock_translate): text = unicode("ذات سيادة كاملة") mock_translate.return_value = "With full sovereignty" blob = tb.TextBlob(text) blob.translate() assert_true(mock_translate.called_once_with(text, from_lang='auto'))
def test_translate_detects_language_by_default(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) assert_equal(blob.translate(), "With full sovereignty")
def test_detect_non_ascii(self): blob = tb.TextBlob(unicode("ذات سيادة كاملة")) assert_equal(blob.detect_language(), "ar")
def parse(s, *args, **kwargs): """ Returns a tagged Unicode string. """ return parser.parse(unicode(s), *args, **kwargs)
def parsetree(s, *args, **kwargs): """ Returns a parsed Text from the given string. """ return Text(parse(unicode(s), *args, **kwargs))
def positive(s, threshold=0.1, **kwargs): """ Returns True if the given sentence has a positive sentiment (polarity >= threshold). """ return polarity(unicode(s), **kwargs) >= threshold
def subjectivity(s, **kwargs): """ Returns the sentence subjectivity (objective/subjective) between 0.0 and 1.0. """ return sentiment(unicode(s), **kwargs)[1]
def polarity(s, **kwargs): """ Returns the sentence polarity (positive/negative) between -1.0 and 1.0. """ return sentiment(unicode(s), **kwargs)[0]