예제 #1
0
class TestTranslator(unittest.TestCase):

    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    def test_translate(self):
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase.")

    def test_detect(self):
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")
        lang3 = self.translator.detect("Kumusta ka na?")
        assert_equal(lang3, "tl")
        lang4 = self.translator.detect("Programmiersprache")
        assert_equal(lang4, 'de')

    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")


    def test_get_language_from_json5(self):
        json5 = '[[["This is a sentence.","This is a sentence.","",""]],,"en",,,,,,[["en"]],0]'
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #2
0
    def analyze(self, text):
        """Return the sentiment as a tuple of the form:
        ``(polarity, subjectivity)``

        First the text is translated into english text
        using base translator from textblob then
        the sentiment is calculated by PatternAnalyzer
        """
        translated_text = Translator().translate(text, from_lang='ar')
        return PA().analyze(translated_text)
예제 #3
0
class TestTranslator(unittest.TestCase):
    """Unit tests with external requests mocked out."""
    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._request')
    def test_translate(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase.")
        assert_true(mock_request.called_once)

    @mock.patch('textblob.translate.Translator._request')
    def test_failed_translation_raises_not_translated(self, mock_request):
        failed_responses = ['""', '[""]', '["",""]', '" n0tv&l1d "']
        mock_request.side_effect = failed_responses
        text = ' n0tv&l1d '
        for response in failed_responses:
            assert_raises(NotTranslated,
                          self.translator.translate,
                          text,
                          to_lang="es")
        assert_equal(mock_request.call_count, len(failed_responses))

    @mock.patch("textblob.translate.Translator._request")
    def test_tk_parameter_included_in_request_url(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        self.translator.translate(self.sentence, to_lang="es")
        assert_true(mock_request.called_once)
        args, kwargs = mock_request.call_args
        url = args[0]
        assert_true(re.match('.+&tk=\d+\.\d+$', url))

    @mock.patch('textblob.translate.Translator._request')
    def test_detect(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        language = self.translator.detect(self.sentence)
        assert_equal(language, "en")
        assert_true(mock_request.called_once)

    def test_detect_requires_more_than_two_characters(self):
        assert_raises(TranslatorError, lambda: self.translator.detect('f'))
        assert_raises(TranslatorError, lambda: self.translator.detect('fo'))
예제 #4
0
class TestTranslator(unittest.TestCase):

    """Unit tests with external requests mocked out."""

    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._request')
    def test_translate(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase.")
        assert_true(mock_request.called_once)

    @mock.patch('textblob.translate.Translator._request')
    def test_failed_translation_raises_not_translated(self, mock_request):
        failed_responses = ['""', '[""]', '["",""]', '" n0tv&l1d "']
        mock_request.side_effect = failed_responses
        text = ' n0tv&l1d '
        for response in failed_responses:
            assert_raises(NotTranslated,
                          self.translator.translate, text, to_lang="es")
        assert_equal(mock_request.call_count, len(failed_responses))

    @mock.patch("textblob.translate.Translator._request")
    def test_tk_parameter_included_in_requests(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        self.translator.translate(self.sentence, to_lang="es")
        assert_true(mock_request.called_once)
        args, kwargs = mock_request.call_args
        tk = kwargs['data']['tk']
        assert_true(re.match(r'^\d+\.\d+$', tk))

    @mock.patch('textblob.translate.Translator._request')
    def test_detect(self, mock_request):
        mock_request.return_value = '["Esta es una frase.","en"]'
        language = self.translator.detect(self.sentence)
        assert_equal(language, "en")
        assert_true(mock_request.called_once)

    def test_detect_requires_more_than_two_characters(self):
        assert_raises(TranslatorError, lambda: self.translator.detect('f'))
        assert_raises(TranslatorError, lambda: self.translator.detect('fo'))
예제 #5
0
 def setUp(self):
     self.translator = Translator()
     self.sentence = "This is a sentence."
예제 #6
0
class TestTranslator(unittest.TestCase):

    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_translate(self, mock_get_json5):
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"Esta es una frase.","orig":'
                                        '"This is a sentence.","translit":"",'
                                        '"src_translit":""}],"src":"en",'
                                        '"server_time":2}')
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase.")
        assert_true(mock_get_json5.called_once)

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_detect_parses_json5(self, mock_get_json5):
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"This is a sentence.","orig":'
                                        '"This is a sentence.","translit":"",'
                                        '"src_translit":""}],"src":"en",'
                                        '"server_time":1}')
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"Hello","orig":"Hola",'
                                        '"translit":"","src_translit":""}],'
                                        '"src":"es","server_time":2}')
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_failed_translation_raises_not_translated(self, mock_get_json5):
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"n0tv\\u0026l1d","orig":'
                                        '"n0tv\\u0026l1d","translit":"",'
                                        '"src_translit":""}],'
                                        '"src":"en","server_time":2}')
        text = unicode(' n0tv&l1d ')
        assert_raises(NotTranslated,
                      self.translator.translate, text, to_lang="es")
        assert_true(mock_get_json5.called_once)

    @attr("requires_internet")
    def test_detect(self):
        assert_equal(self.translator.detect('Hola'), "es")
        assert_equal(self.translator.detect('Hello'), "en")

    @attr('requires_internet')
    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")

    @attr("requires_internet")
    def test_translate_spaces(self):
        es_text = u"Hola, me llamo Adrián! Cómo estás? Yo bien"
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "Hello, my name is Adrian! How are you? I am good")

    @attr("requires_internet")
    def test_translate_missing_from_language_auto_detects(self):
        text = u"Ich besorge das Bier"
        translated = self.translator.translate(text, to_lang="en")
        assert_equal(translated, u"I'll get the beer")

    @attr("requires_internet")
    def test_translate_text(self):
        text = "This is a sentence."
        translated = self.translator.translate(text, to_lang="es")
        assert_equal(translated, "Esta es una frase.")
        es_text = "Esta es una frase."
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "This is a sentence.")

    @attr("requires_internet")
    def test_translate_non_ascii(self):
        text = unicode("ذات سيادة كاملة")
        translated = self.translator.translate(text, from_lang='ar', to_lang='en')
        assert_equal(translated, "With full sovereignty")

        text2 = unicode("美丽优于丑陋")
        translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")

    @attr("requires_internet")
    @mock.patch('textblob.translate.Translator._translation_successful')
    def test_translate_unicode_escape(self, trans_success_mock):
        trans_success_mock.return_value = True
        text = "Jenner & Block LLP"
        translated = self.translator.translate(text, from_lang="en", to_lang="en")
        assert_equal(translated, "Jenner & Block LLP")

    def test_detect_requires_more_than_two_characters(self):
        assert_raises(TranslatorError, lambda: self.translator.detect('f'))
        assert_raises(TranslatorError, lambda: self.translator.detect('fo'))

    def test_get_language_from_json5(self):
        json5 = ('{"sentences":[{"trans":"This is a sentence.",'
                 '"orig":"This is a sentence.","translit":"",'
                 '"src_translit":""}],"src":"en","server_time":1}')
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #7
0
 def setUp(self):
     self.translator = Translator()
     self.sentence = "This is a sentence."
예제 #8
0
class TestTranslator(unittest.TestCase):
    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_translate(self, mock_get_json5):
        mock_get_json5.return_value = unicode(
            '[[["Esta es una frase","This is a '
            'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]'
            ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es'
            ' una",374,true,false],["Se trata de una",6,true,false],'
            '["Este es un",0,true,false],["Se trata de un",0,true,false],'
            '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],'
            '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],'
            '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"'
            ',0,true,false]],[[10,18]],""]],,,[["en"]],29]')
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase")
        assert_true(mock_get_json5.called_once)

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_detect(self, mock_get_json5):
        mock_get_json5.return_value = unicode(
            '[[["This is a sentence",'
            '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]')
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        mock_get_json5.return_value = unicode(
            '[[["Hello","Hola","",""]],[["interjection",'
            '["Hello!","Hi!","Hey!","Hullo!","Hallo!",'
            '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","'
            '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","'
            '\xa1Bueno!","\xa1Vale!"],,0.39160562],'
            '["Hi!",["\xa1Hola!"],,0.24506053],'
            '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]'
            ',["Hullo!",["\xa1Hola!","\xa1Caramba!",'
            '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",'
            '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",'
            '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],'
            '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",'
            '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],'
            '"es",,[["Hello",[1],true,false,783,0,1,0]],'
            '[["Hola",1,[["Hello",783,true,false],'
            '["Hi",214,true,false],["Hola",1,true,false],'
            '["Hey",0,true,false],["Welcome",0,true,false]],'
            '[[0,4]],"Hola"]],,,[],4]')
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")

    @attr('requires_internet')
    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")

    def test_get_language_from_json5(self):
        json5 = '[[["This is a sentence.","This is a sentence.","",""]],,"en",,,,,,[["en"]],0]'
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #9
0
class TestTranslator(unittest.TestCase):

    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_translate(self, mock_get_json5):
        mock_get_json5.return_value = unicode('[[["Esta es una frase","This is a '
            'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]'
            ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es'
            ' una",374,true,false],["Se trata de una",6,true,false],'
            '["Este es un",0,true,false],["Se trata de un",0,true,false],'
            '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],'
            '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],'
            '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"'
            ',0,true,false]],[[10,18]],""]],,,[["en"]],29]')
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase")
        assert_true(mock_get_json5.called_once)

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_detect(self, mock_get_json5):
        mock_get_json5.return_value = unicode('[[["This is a sentence",'
            '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]')
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        mock_get_json5.return_value = unicode('[[["Hello","Hola","",""]],[["interjection",'
                                        '["Hello!","Hi!","Hey!","Hullo!","Hallo!",'
                                        '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","'
                                        '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","'
                                        '\xa1Bueno!","\xa1Vale!"],,0.39160562],'
                                        '["Hi!",["\xa1Hola!"],,0.24506053],'
                                        '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]'
                                        ',["Hullo!",["\xa1Hola!","\xa1Caramba!",'
                                        '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",'
                                        '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",'
                                        '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],'
                                        '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",'
                                        '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],'
                                        '"es",,[["Hello",[1],true,false,783,0,1,0]],'
                                        '[["Hola",1,[["Hello",783,true,false],'
                                        '["Hi",214,true,false],["Hola",1,true,false],'
                                        '["Hey",0,true,false],["Welcome",0,true,false]],'
                                        '[[0,4]],"Hola"]],,,[],4]')
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")

    @attr('requires_internet')
    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")

    def test_get_language_from_json5(self):
        json5 = '[[["This is a sentence.","This is a sentence.","",""]],,"en",,,,,,[["en"]],0]'
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #10
0
 def setUp(self):
     self.translator = Translator()
예제 #11
0
class TestTranslator(unittest.TestCase):
    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_translate(self, mock_get_json5):
        mock_get_json5.return_value = unicode(
            '[[["Esta es una frase","This is a '
            'sentence","",""]],,"en",,[["Esta es una",[1],true,false,374,0,3,0]'
            ',["frase",[2],true,false,470,3,4,0]],[["This is a",1,[["Esta es'
            ' una",374,true,false],["Se trata de una",6,true,false],'
            '["Este es un",0,true,false],["Se trata de un",0,true,false],'
            '["Esto es un",0,true,false]],[[0,9]],"This is a sentence"],'
            '["sentence",2,[["frase",470,true,false],["sentencia",6,true,false],'
            '["oraci\xf3n",0,true,false],["pena",0,true,false],["condena"'
            ',0,true,false]],[[10,18]],""]],,,[["en"]],29]')
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase")
        assert_true(mock_get_json5.called_once)

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_detect_parses_json5(self, mock_get_json5):
        mock_get_json5.return_value = unicode(
            '[[["This is a sentence",'
            '"This is a sentence","",""]],,"en",,,,,,[["en"]],4]')
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        mock_get_json5.return_value = unicode(
            '[[["Hello","Hola","",""]],[["interjection",'
            '["Hello!","Hi!","Hey!","Hullo!","Hallo!",'
            '"Hoy!","Hail!"],[["Hello!",["\xa1Hola!","'
            '\xa1Caramba!","\xa1Oiga!","\xa1Diga!","'
            '\xa1Bueno!","\xa1Vale!"],,0.39160562],'
            '["Hi!",["\xa1Hola!"],,0.24506053],'
            '["Hey!",["\xa1Hola!","\xa1Eh!"],,0.038173068]'
            ',["Hullo!",["\xa1Hola!","\xa1Caramba!",'
            '"\xa1Oiga!","\xa1Diga!","\xa1Bueno!",'
            '"\xa1Al\xf3!"]],["Hallo!",["\xa1Hola!",'
            '"\xa1Caramba!","\xa1Oiga!","\xa1Bueno!"]],'
            '["Hoy!",["\xa1Eh!","\xa1Hola!"]],["Hail!",'
            '["\xa1Salve!","\xa1Hola!"]]],"\xa1Hola!",9]],'
            '"es",,[["Hello",[1],true,false,783,0,1,0]],'
            '[["Hola",1,[["Hello",783,true,false],'
            '["Hi",214,true,false],["Hola",1,true,false],'
            '["Hey",0,true,false],["Welcome",0,true,false]],'
            '[[0,4]],"Hola"]],,,[],4]')
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")

    @attr("requires_internet")
    def test_detect(self):
        assert_equal(self.translator.detect('Hola'), "es")
        assert_equal(self.translator.detect('Hello'), "en")

    @attr('requires_internet')
    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")

    @attr("requires_internet")
    def test_translate(self):
        text = "This is a sentence."
        translated = self.translator.translate(text, to_lang="es")
        assert_equal(translated, "Esta es una frase.")
        es_text = "Esta es una frase."
        to_en = self.translator.translate(es_text,
                                          from_lang="es",
                                          to_lang="en")
        assert_equal(to_en, "This is a sentence.")

    @attr("requires_internet")
    def test_translate_non_ascii(self):
        text = unicode("ذات سيادة كاملة")
        translated = self.translator.translate(text,
                                               from_lang='ar',
                                               to_lang='en')
        assert_equal(translated, "Fully sovereign")

        text2 = unicode("美丽优于丑陋")
        translated = self.translator.translate(text2,
                                               from_lang="zh-CN",
                                               to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")

    @attr("requires_internet")
    def test_translate_unicode_escape(self):
        text = "Jenner & Block LLP"
        translated = self.translator.translate(text,
                                               from_lang="en",
                                               to_lang="en")
        assert_equal(translated, "Jenner & Block LLP")

    def test_detect_requires_more_than_two_characters(self):
        assert_raises(TranslatorError, lambda: self.translator.detect('f'))
        assert_raises(TranslatorError, lambda: self.translator.detect('fo'))

    def test_get_language_from_json5(self):
        json5 = '[[["This is a sentence.","This is a sentence.","",""]],,"en",,,,,,[["en"]],0]'
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #12
0
class TestTranslator(unittest.TestCase):

    def setUp(self):
        self.translator = Translator()
        self.sentence = "This is a sentence."

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_translate(self, mock_get_json5):
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"Esta es una frase.","orig":'
                                        '"This is a sentence.","translit":"",'
                                        '"src_translit":""}],"src":"en",'
                                        '"server_time":2}')
        t = self.translator.translate(self.sentence, to_lang="es")
        assert_equal(t, "Esta es una frase.")
        assert_true(mock_get_json5.called_once)

    @mock.patch('textblob.translate.Translator._get_json5')
    def test_detect_parses_json5(self, mock_get_json5):
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"This is a sentence.","orig":'
                                        '"This is a sentence.","translit":"",'
                                        '"src_translit":""}],"src":"en",'
                                        '"server_time":1}')
        lang = self.translator.detect(self.sentence)
        assert_equal(lang, "en")
        mock_get_json5.return_value = unicode('{"sentences":[{"trans":'
                                        '"Hello","orig":"Hola",'
                                        '"translit":"","src_translit":""}],'
                                        '"src":"es","server_time":2}')
        lang2 = self.translator.detect("Hola")
        assert_equal(lang2, "es")

    @attr("requires_internet")
    def test_detect(self):
        assert_equal(self.translator.detect('Hola'), "es")
        assert_equal(self.translator.detect('Hello'), "en")

    @attr('requires_internet')
    def test_detect_non_ascii(self):
        lang = self.translator.detect(unicode("关于中文维基百科"))
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect(unicode("известен още с псевдонимите"))
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect(unicode("Избранная статья"))
        assert_equal(lang3, "ru")

    @attr("requires internet")
    def test_translate_spaces(self):
        es_text = u"Hola, me llamo Adrián! Cómo estás? Yo bien"
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "Hello, my name is Adrian! How are you? I'm fine")

    @attr("requires_internet")
    def test_translate_text(self):
        text = "This is a sentence."
        translated = self.translator.translate(text, to_lang="es")
        assert_equal(translated, "Esta es una frase.")
        es_text = "Esta es una frase."
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "This is a sentence.")

    @attr("requires_internet")
    def test_translate_non_ascii(self):
        text = unicode("ذات سيادة كاملة")
        translated = self.translator.translate(text, from_lang='ar', to_lang='en')
        assert_equal(translated, "With full sovereignty")

        text2 = unicode("美丽优于丑陋")
        translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")

    @attr("requires_internet")
    def test_translate_unicode_escape(self):
        text = "Jenner & Block LLP"
        translated = self.translator.translate(text, from_lang="en", to_lang="en")
        assert_equal(translated, "Jenner & Block LLP")

    def test_detect_requires_more_than_two_characters(self):
        assert_raises(TranslatorError, lambda: self.translator.detect('f'))
        assert_raises(TranslatorError, lambda: self.translator.detect('fo'))

    def test_get_language_from_json5(self):
        json5 = ('{"sentences":[{"trans":"This is a sentence.",'
                 '"orig":"This is a sentence.","translit":"",'
                 '"src_translit":""}],"src":"en","server_time":1}')
        lang = self.translator._get_language_from_json5(json5)
        assert_equal(lang, "en")
예제 #13
0
 def setUp(self):
     self.translator = Translator()
예제 #14
0
class TestTranslatorIntegration(unittest.TestCase):

    """Integration tests that actually call the translation API."""

    def setUp(self):
        self.translator = Translator()

    def test_detect(self):
        assert_equal(self.translator.detect('Hola'), "es")
        assert_equal(self.translator.detect('Hello'), "en")

    def test_detect_non_ascii(self):
        lang = self.translator.detect("关于中文维基百科")
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect("известен още с псевдонимите")
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect("Избранная статья")
        assert_equal(lang3, "ru")

    def test_translate_spaces(self):
        es_text = "Hola, me llamo Adrián! Cómo estás? Yo bien"
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "Hello, my name is Adrian! How are you? I am good")

    def test_translate_missing_from_language_auto_detects(self):
        text = "Ich besorge das Bier"
        translated = self.translator.translate(text, to_lang="en")
        assert_equal(translated, "I'll get the beer")

    def test_translate_text(self):
        text = "This is a sentence."
        translated = self.translator.translate(text, to_lang="es")
        assert_equal(translated, "Esta es una frase.")
        es_text = "Esta es una frase."
        to_en = self.translator.translate(es_text, from_lang="es", to_lang="en")
        assert_equal(to_en, "This is a sentence.")

    def test_translate_non_ascii(self):
        text = "ذات سيادة كاملة"
        translated = self.translator.translate(text, from_lang='ar', to_lang='en')
        assert_equal(translated, "With full sovereignty")

        text2 = "美丽优于丑陋"
        translated = self.translator.translate(text2, from_lang="zh-CN", to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")

    @mock.patch('textblob.translate.Translator._validate_translation', mock.MagicMock())
    def test_translate_unicode_escape(self):
        text = "Jenner & Block LLP"
        translated = self.translator.translate(text, from_lang="en", to_lang="en")
        assert_equal(translated, "Jenner & Block LLP")
예제 #15
0
class BaseBlob(StringlikeMixin, BlobComparableMixin):
    """An abstract base class that all textblob classes will inherit from.
    Includes words, POS tag, NP, and word count properties. Also includes
    basic dunder and string methods for making objects like Python strings.

    :param text: A string.
    :param tokenizer: (optional) A tokenizer instance. If ``None``,
        defaults to :class:`WordTokenizer() <textblob.tokenizers.WordTokenizer>`.
    :param np_extractor: (optional) An NPExtractor instance. If ``None``,
        defaults to :class:`FastNPExtractor() <textblob.en.np_extractors.FastNPExtractor>`.
    :param pos_tagger: (optional) A Tagger instance. If ``None``,
        defaults to :class:`NLTKTagger <textblob.en.taggers.NLTKTagger>`.
    :param analyzer: (optional) A sentiment analyzer. If ``None``,
        defaults to :class:`PatternAnalyzer <textblob.en.sentiments.PatternAnalyzer>`.
    :param parser: A parser. If ``None``, defaults to
        :class:`PatternParser <textblob.en.parsers.PatternParser>`.
    :param classifier: A classifier.

    .. versionchanged:: 0.6.0
        ``clean_html`` parameter deprecated, as it was in NLTK.
    """
    np_extractor = FastNPExtractor()
    pos_tagger = NLTKTagger()
    tokenizer = WordTokenizer()
    translator = Translator()
    analyzer = PatternAnalyzer()
    parser = PatternParser()

    def __init__(self,
                 text,
                 tokenizer=None,
                 pos_tagger=None,
                 np_extractor=None,
                 analyzer=None,
                 parser=None,
                 classifier=None,
                 clean_html=False):
        if not isinstance(text, basestring):
            raise TypeError('The `text` argument passed to `__init__(text)` '
                            'must be a string, not {0}'.format(type(text)))
        if clean_html:
            raise NotImplementedError(
                "clean_html has been deprecated. "
                "To remove HTML markup, use BeautifulSoup's "
                "get_text() function")
        self.raw = self.string = text
        self.stripped = lowerstrip(self.raw, all=True)
        _initialize_models(self, tokenizer, pos_tagger, np_extractor, analyzer,
                           parser, classifier)

    @cached_property
    def words(self):
        """Return a list of word tokens. This excludes punctuation characters.
        If you want to include punctuation characters, access the ``tokens``
        property.

        :returns: A :class:`WordList <WordList>` of word tokens.
        """
        return WordList(word_tokenize(self.raw, include_punc=False))

    @cached_property
    def tokens(self):
        """Return a list of tokens, using this blob's tokenizer object
        (defaults to :class:`WordTokenizer <textblob.tokenizers.WordTokenizer>`).
        """
        return WordList(self.tokenizer.tokenize(self.raw))

    def tokenize(self, tokenizer=None):
        """Return a list of tokens, using ``tokenizer``.

        :param tokenizer: (optional) A tokenizer object. If None, defaults to
            this blob's default tokenizer.
        """
        t = tokenizer if tokenizer is not None else self.tokenizer
        return WordList(t.tokenize(self.raw))

    def parse(self, parser=None):
        """Parse the text.

        :param parser: (optional) A parser instance. If ``None``, defaults to
            this blob's default parser.

        .. versionadded:: 0.6.0
        """
        p = parser if parser is not None else self.parser
        return p.parse(self.raw)

    def classify(self):
        """Classify the blob using the blob's ``classifier``."""
        if self.classifier is None:
            raise NameError("This blob has no classifier. Train one first!")
        return self.classifier.classify(self.raw)

    @cached_property
    def sentiment(self):
        """Return a tuple of form (polarity, subjectivity ) where polarity
        is a float within the range [-1.0, 1.0] and subjectivity is a float
        within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is
        very subjective.

        :rtype: namedtuple of the form ``Sentiment(polarity, subjectivity)``
        """
        return self.analyzer.analyze(self.raw)

    @cached_property
    def sentiment_assessments(self):
        """Return a tuple of form (polarity, subjectivity, assessments ) where
        polarity is a float within the range [-1.0, 1.0], subjectivity is a
        float within the range [0.0, 1.0] where 0.0 is very objective and 1.0
        is very subjective, and assessments is a list of polarity and
        subjectivity scores for the assessed tokens.

        :rtype: namedtuple of the form ``Sentiment(polarity, subjectivity,
        assessments)``
        """
        return self.analyzer.analyze(self.raw, keep_assessments=True)

    @cached_property
    def polarity(self):
        """Return the polarity score as a float within the range [-1.0, 1.0]

        :rtype: float
        """
        return PatternAnalyzer().analyze(self.raw)[0]

    @cached_property
    def subjectivity(self):
        """Return the subjectivity score as a float within the range [0.0, 1.0]
        where 0.0 is very objective and 1.0 is very subjective.

        :rtype: float
        """
        return PatternAnalyzer().analyze(self.raw)[1]

    @cached_property
    def noun_phrases(self):
        """Returns a list of noun phrases for this blob."""
        return WordList([
            phrase.strip().lower()
            for phrase in self.np_extractor.extract(self.raw)
            if len(phrase) > 1
        ])

    @cached_property
    def pos_tags(self):
        """Returns an list of tuples of the form (word, POS tag).

        Example:
        ::

            [('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
                    ('Thursday', 'NNP'), ('morning', 'NN')]

        :rtype: list of tuples
        """
        if isinstance(self, TextBlob):
            return [
                val for sublist in [s.pos_tags for s in self.sentences]
                for val in sublist
            ]
        else:
            return [(Word(word, pos_tag=t), unicode(t))
                    for word, t in self.pos_tagger.tag(self)
                    if not PUNCTUATION_REGEX.match(unicode(t))]

    tags = pos_tags

    @cached_property
    def word_counts(self):
        """Dictionary of word frequencies in this text.
        """
        counts = defaultdict(int)
        stripped_words = [lowerstrip(word) for word in self.words]
        for word in stripped_words:
            counts[word] += 1
        return counts

    @cached_property
    def np_counts(self):
        """Dictionary of noun phrase frequencies in this text.
        """
        counts = defaultdict(int)
        for phrase in self.noun_phrases:
            counts[phrase] += 1
        return counts

    def ngrams(self, n=3):
        """Return a list of n-grams (tuples of n successive words) for this
        blob.

        :rtype: List of :class:`WordLists <WordList>`
        """
        if n <= 0:
            return []
        grams = [
            WordList(self.words[i:i + n])
            for i in range(len(self.words) - n + 1)
        ]
        return grams

    def translate(self, from_lang="auto", to="en"):
        """Translate the blob to another language.
        Uses the Google Translate API. Returns a new TextBlob.

        Requires an internet connection.

        Usage:
        ::

            >>> b = TextBlob("Simple is better than complex")
            >>> b.translate(to="es")
            TextBlob('Lo simple es mejor que complejo')

        Language code reference:
            https://developers.google.com/translate/v2/using_rest#language-params

        .. versionadded:: 0.5.0.

        :param str from_lang: Language to translate from. If ``None``, will attempt
            to detect the language.
        :param str to: Language to translate to.
        :rtype: :class:`BaseBlob <BaseBlob>`
        """
        return self.__class__(
            self.translator.translate(self.raw,
                                      from_lang=from_lang,
                                      to_lang=to))

    def detect_language(self):
        """Detect the blob's language using the Google Translate API.

        Requires an internet connection.

        Usage:
        ::

            >>> b = TextBlob("bonjour")
            >>> b.detect_language()
            u'fr'

        Language code reference:
            https://developers.google.com/translate/v2/using_rest#language-params

        .. versionadded:: 0.5.0

        :rtype: str
        """
        return self.translator.detect(self.raw)

    def correct(self):
        """Attempt to correct the spelling of a blob.

        .. versionadded:: 0.6.0

        :rtype: :class:`BaseBlob <BaseBlob>`
        """
        # regex matches: word or punctuation or whitespace
        tokens = nltk.tokenize.regexp_tokenize(self.raw, "\w+|[^\w\s]|\s")
        corrected = (Word(w).correct() for w in tokens)
        ret = ''.join(corrected)
        return self.__class__(ret)

    def _cmpkey(self):
        """Key used by ComparableMixin to implement all rich comparison
        operators.
        """
        return self.raw

    def _strkey(self):
        """Key used by StringlikeMixin to implement string methods."""
        return self.raw

    def __hash__(self):
        return hash(self._cmpkey())

    def __add__(self, other):
        '''Concatenates two text objects the same way Python strings are
        concatenated.

        Arguments:
        - `other`: a string or a text object
        '''
        if isinstance(other, basestring):
            return self.__class__(self.raw + other)
        elif isinstance(other, BaseBlob):
            return self.__class__(self.raw + other.raw)
        else:
            raise TypeError(
                'Operands must be either strings or {0} objects'.format(
                    self.__class__.__name__))

    def split(self, sep=None, maxsplit=sys.maxsize):
        """Behaves like the built-in str.split() except returns a
        WordList.

        :rtype: :class:`WordList <WordList>`
        """
        return WordList(self._strkey().split(sep, maxsplit))
예제 #16
0
class TestTranslatorIntegration(unittest.TestCase):
    """Integration tests that actually call the translation API."""
    def setUp(self):
        self.translator = Translator()

    def test_detect(self):
        assert_equal(self.translator.detect('Hola'), "es")
        assert_equal(self.translator.detect('Hello'), "en")

    def test_detect_non_ascii(self):
        lang = self.translator.detect("关于中文维基百科")
        assert_equal(lang, 'zh-CN')
        lang2 = self.translator.detect("известен още с псевдонимите")
        assert_equal(lang2, "bg")
        lang3 = self.translator.detect("Избранная статья")
        assert_equal(lang3, "ru")

    def test_translate_spaces(self):
        es_text = "Hola, me llamo Adrián! Cómo estás? Yo bien"
        to_en = self.translator.translate(es_text,
                                          from_lang="es",
                                          to_lang="en")
        assert_equal(to_en, "Hi, my name is Adrián! How are you? I am good")

    def test_translate_missing_from_language_auto_detects(self):
        text = "Ich hole das Bier"
        translated = self.translator.translate(text, to_lang="en")
        assert_equal(translated, "I'll get the beer")

    def test_translate_text(self):
        text = "This is a sentence."
        translated = self.translator.translate(text, to_lang="es")
        assert_equal(translated, "Esta es una frase.")
        es_text = "Esta es una frase."
        to_en = self.translator.translate(es_text,
                                          from_lang="es",
                                          to_lang="en")
        assert_equal(to_en, "This is a phrase.")

    def test_translate_non_ascii(self):
        text = "ذات سيادة كاملة"
        translated = self.translator.translate(text,
                                               from_lang='ar',
                                               to_lang='en')
        assert_equal(translated, "Fully sovereign")

        text2 = "美丽比丑陋更好"
        translated = self.translator.translate(text2,
                                               from_lang="zh-CN",
                                               to_lang='en')
        assert_equal(translated, "Beautiful is better than ugly")

    @mock.patch('textblob.translate.Translator._validate_translation',
                mock.MagicMock())
    def test_translate_unicode_escape(self):
        text = "Jenner & Block LLP"
        translated = self.translator.translate(text,
                                               from_lang="en",
                                               to_lang="en")
        assert_equal(translated, "Jenner & Block LLP")
예제 #17
0
class Word(unicode):
    """A simple word representation. Includes methods for inflection,
    translation, and WordNet integration.
    """

    translator = Translator()

    def __new__(cls, string, pos_tag=None):
        """Return a new instance of the class. It is necessary to override
        this method in order to handle the extra pos_tag argument in the
        constructor.
        """
        return super(Word, cls).__new__(cls, string)

    def __init__(self, string, pos_tag=None):
        self.string = string
        self.pos_tag = pos_tag

    def __repr__(self):
        return repr(self.string)

    def __str__(self):
        return self.string

    def singularize(self):
        """Return the singular version of the word as a string."""
        return Word(_singularize(self.string))

    def pluralize(self):
        '''Return the plural version of the word as a string.'''
        return Word(_pluralize(self.string))

    def translate(self, from_lang='auto', to="en"):
        '''Translate the word to another language using Google's
        Translate API.

        .. versionadded:: 0.5.0
        '''
        return self.translator.translate(self.string,
                                         from_lang=from_lang,
                                         to_lang=to)

    def detect_language(self):
        '''Detect the word's language using Google's Translate API.

        .. versionadded:: 0.5.0
        '''
        return self.translator.detect(self.string)

    def spellcheck(self):
        '''Return a list of (word, confidence) tuples of spelling corrections.

        Based on: Peter Norvig, "How to Write a Spelling Corrector"
        (http://norvig.com/spell-correct.html) as implemented in the pattern
        library.

        .. versionadded:: 0.6.0
        '''
        return suggest(self.string)

    def correct(self):
        '''Correct the spelling of the word. Returns the word with the highest
        confidence using the spelling corrector.

        .. versionadded:: 0.6.0
        '''
        return Word(self.spellcheck()[0][0])

    @cached_property
    @requires_nltk_corpus
    def lemma(self):
        """Return the lemma of this word using Wordnet's morphy function.
        """
        tag = _penn_to_wordnet(self.pos_tag) if (self.pos_tag
                                                 is not None) else None
        return self.lemmatize(pos=tag)

    @requires_nltk_corpus
    def lemmatize(self, pos=None):
        """Return the lemma for a word using WordNet's morphy function.

        :param pos: Part of speech to filter upon. If `None`, defaults to
            ``_wordnet.NOUN``.

        .. versionadded:: 0.8.1
        """
        if pos is None:
            pos = _wordnet.NOUN
        lemmatizer = nltk.stem.WordNetLemmatizer()
        return lemmatizer.lemmatize(self.string, pos)

    PorterStemmer = nltk.stem.porter.PorterStemmer()
    LancasterStemmer = nltk.stem.lancaster.LancasterStemmer()
    SnowballStemmer = nltk.stem.snowball.SnowballStemmer("english")

    #added 'stemmer' on lines of lemmatizer
    #based on nltk
    def stem(self, stemmer=PorterStemmer):
        """Stem a word using various NLTK stemmers. (Default: Porter Stemmer)

        .. versionadded:: 0.12.0
        """
        return stemmer.stem(self.string)

    @cached_property
    def synsets(self):
        """The list of Synset objects for this Word.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        """
        return self.get_synsets(pos=None)

    @cached_property
    def definitions(self):
        """The list of definitions for this word. Each definition corresponds
        to a synset.

        .. versionadded:: 0.7.0
        """
        return self.define(pos=None)

    def get_synsets(self, pos=None):
        """Return a list of Synset objects for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, all
            synsets for all parts of speech will be loaded.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        """
        return _wordnet.synsets(self.string, pos)

    def define(self, pos=None):
        """Return a list of definitions for this word. Each definition
        corresponds to a synset for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, definitions
            for all parts of speech will be loaded.
        :rtype: List of strings

        .. versionadded:: 0.7.0
        """
        return [syn.definition() for syn in self.get_synsets(pos=pos)]
예제 #18
0
class Word(unicode):
    '''A simple word representation. Includes methods for inflection,
    translation, and WordNet integration.
    '''

    translator = Translator()

    def __new__(cls, string, pos_tag=None):
        '''Return a new instance of the class. It is necessary to override
        this method in order to handle the extra pos_tag argument in the
        constructor.
        '''
        return super(Word, cls).__new__(cls, string)

    def __init__(self, string, pos_tag=None):
        self.string = string
        self.pos_tag = pos_tag

    def __repr__(self):
        return repr(self.string)

    def __str__(self):
        return self.string

    def singularize(self):
        '''Return the singular version of the word as a string.'''
        return Word(_singularize(self.string))

    def pluralize(self):
        '''Return the plural version of the word as a string.'''
        return Word(_pluralize(self.string))

    def translate(self, from_lang=None, to="en"):
        '''Translate the word to another language using Google's
        Translate API.

        .. versionadded:: 0.5.0
        '''
        if from_lang is None:
            from_lang = self.translator.detect(self.string)
        return self.translator.translate(self.string,
                                         from_lang=from_lang,
                                         to_lang=to)

    def detect_language(self):
        '''Detect the word's language using Google's Translate API.

        .. versionadded:: 0.5.0
        '''
        return self.translator.detect(self.string)

    def spellcheck(self):
        '''Return a list of (word, confidence) tuples of spelling corrections.

        Based on: Peter Norvig, "How to Write a Spelling Corrector"
        (http://norvig.com/spell-correct.html) as implemented in the pattern
        library.

        .. versionadded:: 0.6.0
        '''
        return suggest(self.string)

    def correct(self):
        '''Correct the spelling of the word. Returns the word with the highest
        confidence using the spelling corrector.

        .. versionadded:: 0.6.0
        '''
        return Word(self.spellcheck()[0][0])

    @cached_property
    @requires_nltk_corpus
    def lemma(self):
        '''Return the lemma for a word using WordNet's morphy function.'''
        lemmatizer = nltk.stem.WordNetLemmatizer()
        return lemmatizer.lemmatize(self.string)

    @cached_property
    def synsets(self):
        '''The list of Synset objects for this Word.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        '''
        return self.get_synsets(pos=None)

    @cached_property
    def definitions(self):
        '''The list of definitions for this word. Each definition corresponds
        to a synset.

        .. versionadded:: 0.7.0
        '''
        return self.define(pos=None)

    def get_synsets(self, pos=None):
        '''Return a list of Synset objects for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, all
            synsets for all parts of speech will be loaded.

        :rtype: list of Synsets

        .. versionadded:: 0.7.0
        '''
        return _wordnet.synsets(self.string, pos)

    def define(self, pos=None):
        '''Return a list of definitions for this word. Each definition
        corresponds to a synset for this word.

        :param pos: A part-of-speech tag to filter upon. If ``None``, definitions
            for all parts of speech will be loaded.

        .. versionadded:: 0.7.0
        '''
        return [syn.definition for syn in self.get_synsets(pos=pos)]