예제 #1
0
def total_translate():
    file2 = open('en.txt', mode='w', encoding='utf-8')

    with open('cn.txt', mode='r', encoding='utf-8') as f:
        urls = []
        num = 0
        for line in f:
            line = line.strip()
            token = translator.token_acquirer.do(line)
            url = "https://translate.google.cn/translate_a/single?client=t&sl=zh-cn&tl=en&hl=en&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format(
                token, line)
            urls.append(url)

        res = work(urls)
        for r in res:
            num += 1
            if hasattr(r, 'status_code'):
                if r.status_code == 200:
                    try:
                        a = format_json(r.text)
                        target = ''.join([d[0] if d[0] else '' for d in a[0]])
                        source = ''.join([d[1] if d[1] else '' for d in a[0]])
                    except Exception as e:
                        logger.error('when format:%s', e)
                        logger.error('%s\n%s', r.text)
                        source = ''
                        target = ''
                    if len(source) != 0 and len(target) != 0:
                        file2.write(target + '\n')
                    else:
                        file2.write('\n')
                else:
                    file2.write('\n')
            logger.info('finish %s sentence, now at %s', len(res), num)
    file2.close()
예제 #2
0
    def _translate(self, text, dest='en', src='auto'):
        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text,
                                    src=src,
                                    dest=dest,
                                    token=token)
        url = urls.TRANSLATE.format(host=self._pick_service_url())

        if len(text) < 200:
            response = self.session.get(url, params=params)
        else:
            data = {'q': params.pop('q')}
            query = '?'
            for key in params:
                value = params[key]
                if type(value) == list:
                    for item in value:
                        query += key + '=' + item + '&'
                elif type(value) != str:
                    query += key + '=' + str(value) + '&'
                else:
                    query += key + '=' + value + '&'
            response = self.session.post(url + query, data=data)

        result = utils.format_json(response.text)
        data = ''.join([d[0] if d[0] else '' for d in result[0]])
        return data
예제 #3
0
    def _translate(self, text, dest='en', src='auto'):
        if src != 'auto':
            if src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys():
                src = SPECIAL_CASES[src]
            elif src not in LANGUAGES.keys():
                raise ValueError('invalid source language')

        if dest not in LANGUAGES.keys():
            if dest in SPECIAL_CASES.keys():
                dest = SPECIAL_CASES[dest]
            else:
                raise ValueError('invalid destination language')

        if not PY3 and isinstance(text, str):  # pragma: nocover
            text = text.decode('utf-8')

        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text,
                                    src=src,
                                    dest=dest,
                                    token=token)
        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url, params=params)

        data = utils.format_json(r.text)
        return data
예제 #4
0
    def _translate(self, text, dest, src, override):
        token = 'xxxx'  #dummy default value here as it is not used by api client
        if self.client_type == 'webapp':
            token = self.token_acquirer.do(text)

        params = utils.build_params(client=self.client_type,
                                    query=text,
                                    src=src,
                                    dest=dest,
                                    token=token,
                                    override=override)

        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.client.get(url, params=params)

        if r.status_code == 200:
            data = utils.format_json(r.text)
            return data, r

        if self.raise_exception:
            raise Exception('Unexpected status code "{}" from {}'.format(
                r.status_code, self.service_urls))

        DUMMY_DATA[0][0][0] = text
        return DUMMY_DATA, r
예제 #5
0
def _translate(text, dest, src):
    token = token_acquirer.do(text)
    params = build_params(query=text, src=src, dest=dest, token=token)
    url = 'https://{host}/translate_a/single'.format(host=_pick_service_url())
    r = session.get(url, params=params)
    print(url)
    data = utils.format_json(r.text)
    return r
예제 #6
0
def test_format_json():
    text = '[,,"en",,,,0.96954316,,[["en"],,[0.96954316]]]'

    result = utils.format_json(text)

    assert result == [
        None, None, 'en', None, None, None, 0.96954316, None,
        [['en'], None, [0.96954316]]
    ]
예제 #7
0
    def _translate(self, text, dest, src, override):
        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text, src=src, dest=dest,
                                    token=token, override=override)

        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url, params=params)

        data = utils.format_json(r.text)
        return data
예제 #8
0
    def _translate(self, text, dest, src):
        if not PY3 and isinstance(text, str):  # pragma: nocover
            text = text.decode('utf-8')

        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text, src=src, dest=dest,
                                    token=token)
        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url, params=params)

        data = utils.format_json(r.text)
        return data
예제 #9
0
    def _translate(self, text, dest, src):
        if not PY3 and isinstance(text, str):  # pragma: nocover
            text = text.decode('utf-8')

        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text, src=src, dest=dest,
                                    token=token)
        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url,verify=False, params=params)

        data = utils.format_json(r.text)
        return data
예제 #10
0
    def _translate(self, text, dest, src):
        if not PY3 and isinstance(text, str):  # pragma: nocover
            text = text.decode('utf-8')

        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text, src=src, dest=dest,
                                    token=token)
        params['client'] = 'webapp'
        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url, params=params)
        if r.status_code == 200:
            data = utils.format_json(r.text)
            return data
        else:
            return None
예제 #11
0
 def _translate(self, text, dest, src):
     token = self.token_acquirer.do(text)
     params = utils.build_params(query=text,
                                 src=src,
                                 dest=dest,
                                 token=token)
     params['client'] = 'webapp'
     url = urls.TRANSLATE.format(host=self._pick_service_url())
     r = self.session.get(url, params=params)
     if r.status_code == 200:
         data = utils.format_json(r.text)
         return data
     else:
         if self.raise_exception:
             raise Exception('Unexpected status code "{}" from {}'.format(
                 r.status_code, self.service_urls))
         DUMMY_DATA[0][0][0] = text
         return DUMMY_DATA
예제 #12
0
    def _translate(self, text, dest, src):
        if not PY3 and isinstance(text, str):  # pragma: nocover
            text = text.decode('utf-8')

        token = self.token_acquirer.do(text)
        params = utils.build_params(query=text,
                                    src=src,
                                    dest=dest,
                                    token=token)
        url = urls.TRANSLATE.format(host=self._pick_service_url())
        r = self.session.get(url, params=params)

        if r.status_code == 200:
            data = utils.format_json(r.text)
            return data
        else:
            if self.raise_exception:
                raise Exception('Unexpected status code "{}" from {}'.format(
                    r.status_code, self.service_urls))
            DUMMY_DATA[0][0][0] = text
            return DUMMY_DATA
예제 #13
0
def test_format_malformed_json():
    text = '[,,"en",,,,0.96954316,,[["en"],,0.96954316]]]'

    with raises(ValueError):
        utils.format_json(text)
예제 #14
0
def totaltranslate():
    #批量翻译文章

    start = 1  #需要翻译的起始文章编号
    end = 100  #需要翻译的末尾文章编号

    for i in range(start, end):
        print(i)
        file2 = io.open(
            'C:/Users/86135/Desktop/id_work/final_data/id_article_translate/id_article_'
            + str(i) + '.json',
            mode='a',
            encoding='utf-8')  #追加在文件末尾
        with io.open(
                'C:/Users/86135/Desktop/id_work/final_data/id_article_content/id_article_content_'
                + str(i) + '.json',
                mode='r',
                encoding='utf-8') as f:
            urls = []
            for line in f:

                line = line.strip()  #去掉句子开头结尾的符号
                token = translator.token_acquirer.do(line)
                url = "https://translate.google.cn/translate_a/single?client=t&sl=id&tl=zh-cn&hl=zh-cn&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format(
                    token, line)  #生成URL
                urls.append(url)

                if len(urls) >= 0:
                    res = work(urls)
                    print(res)
                    for r in res:
                        if hasattr(r, 'status_code'):
                            if r.status_code == 200:  #网络请求成功
                                try:
                                    a = format_json(r.text)
                                    target = ''.join(
                                        [d[0] if d[0] else '' for d in a[0]])
                                    print(target)
                                    source = ''.join(
                                        [d[1] if d[1] else '' for d in a[0]])
                                except Exception as e:
                                    logger.error('when format:%s', e)
                                    logger.error('%s\n%s', r.text)
                                    source = ''
                                    target = ''
                                if len(source) != 0 and len(target) != 0:
                                    file2.write(target + '\n')
                                else:
                                    file2.write('\n')
                            elif r.status_code == 403 or r.status_code == 413:
                                line.replace(" #|\\|\"|& ", "")
                                list = line.split(",")
                                result = []
                                for l in range(len(list)):
                                    urls = []  #置空
                                    line = list[l].strip()  # 去掉句子开头结尾的符号
                                    token = translator.token_acquirer.do(line)
                                    url = "https://translate.google.cn/translate_a/single?client=t&sl=id&tl=zh-cn&hl=zh-cn&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format(
                                        token, line)  # 生成URL
                                    urls.append(url)

                                    if len(urls) >= 0:
                                        res = work(urls)
                                        print(res)
                                        for r in res:
                                            if hasattr(
                                                    r, 'status_code'
                                            ):  #判断r是否存在status_code属性
                                                if r.status_code == 200:  # 网络请求成功
                                                    try:
                                                        a = format_json(r.text)
                                                        target = ''.join([
                                                            d[0]
                                                            if d[0] else ''
                                                            for d in a[0]
                                                        ])
                                                        print(target)
                                                        source = ''.join([
                                                            d[1]
                                                            if d[1] else ''
                                                            for d in a[0]
                                                        ])
                                                    except Exception as e:
                                                        logger.error(
                                                            'when format:%s',
                                                            e)
                                                        logger.error(
                                                            '%s\n%s', r.text)
                                                        source = ''
                                                        target = ''
                                                    if len(source
                                                           ) != 0 and len(
                                                               target) != 0:
                                                        result.append(target)
                                                    else:
                                                        result.append(" ")
                                file2.write(",".join(result) + "\n")
                            else:
                                print("error!")
                            time.sleep(random.randint(1, 3))  #设置睡眠时长,防止被封

                    urls = []
                    logger.info('finished articles: %s', i)
                    time.sleep(random.randint(1, 3))
        file2.close()