def total_translate(): file2 = open('en.txt', mode='w', encoding='utf-8') with open('cn.txt', mode='r', encoding='utf-8') as f: urls = [] num = 0 for line in f: line = line.strip() token = translator.token_acquirer.do(line) url = "https://translate.google.cn/translate_a/single?client=t&sl=zh-cn&tl=en&hl=en&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format( token, line) urls.append(url) res = work(urls) for r in res: num += 1 if hasattr(r, 'status_code'): if r.status_code == 200: try: a = format_json(r.text) target = ''.join([d[0] if d[0] else '' for d in a[0]]) source = ''.join([d[1] if d[1] else '' for d in a[0]]) except Exception as e: logger.error('when format:%s', e) logger.error('%s\n%s', r.text) source = '' target = '' if len(source) != 0 and len(target) != 0: file2.write(target + '\n') else: file2.write('\n') else: file2.write('\n') logger.info('finish %s sentence, now at %s', len(res), num) file2.close()
def _translate(self, text, dest='en', src='auto'): token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) if len(text) < 200: response = self.session.get(url, params=params) else: data = {'q': params.pop('q')} query = '?' for key in params: value = params[key] if type(value) == list: for item in value: query += key + '=' + item + '&' elif type(value) != str: query += key + '=' + str(value) + '&' else: query += key + '=' + value + '&' response = self.session.post(url + query, data=data) result = utils.format_json(response.text) data = ''.join([d[0] if d[0] else '' for d in result[0]]) return data
def _translate(self, text, dest='en', src='auto'): if src != 'auto': if src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): src = SPECIAL_CASES[src] elif src not in LANGUAGES.keys(): raise ValueError('invalid source language') if dest not in LANGUAGES.keys(): if dest in SPECIAL_CASES.keys(): dest = SPECIAL_CASES[dest] else: raise ValueError('invalid destination language') if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data
def _translate(self, text, dest, src, override): token = 'xxxx' #dummy default value here as it is not used by api client if self.client_type == 'webapp': token = self.token_acquirer.do(text) params = utils.build_params(client=self.client_type, query=text, src=src, dest=dest, token=token, override=override) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.client.get(url, params=params) if r.status_code == 200: data = utils.format_json(r.text) return data, r if self.raise_exception: raise Exception('Unexpected status code "{}" from {}'.format( r.status_code, self.service_urls)) DUMMY_DATA[0][0][0] = text return DUMMY_DATA, r
def _translate(text, dest, src): token = token_acquirer.do(text) params = build_params(query=text, src=src, dest=dest, token=token) url = 'https://{host}/translate_a/single'.format(host=_pick_service_url()) r = session.get(url, params=params) print(url) data = utils.format_json(r.text) return r
def test_format_json(): text = '[,,"en",,,,0.96954316,,[["en"],,[0.96954316]]]' result = utils.format_json(text) assert result == [ None, None, 'en', None, None, None, 0.96954316, None, [['en'], None, [0.96954316]] ]
def _translate(self, text, dest, src, override): token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token, override=override) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data
def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data
def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url,verify=False, params=params) data = utils.format_json(r.text) return data
def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) params['client'] = 'webapp' url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) if r.status_code == 200: data = utils.format_json(r.text) return data else: return None
def _translate(self, text, dest, src): token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) params['client'] = 'webapp' url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) if r.status_code == 200: data = utils.format_json(r.text) return data else: if self.raise_exception: raise Exception('Unexpected status code "{}" from {}'.format( r.status_code, self.service_urls)) DUMMY_DATA[0][0][0] = text return DUMMY_DATA
def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) if r.status_code == 200: data = utils.format_json(r.text) return data else: if self.raise_exception: raise Exception('Unexpected status code "{}" from {}'.format( r.status_code, self.service_urls)) DUMMY_DATA[0][0][0] = text return DUMMY_DATA
def test_format_malformed_json(): text = '[,,"en",,,,0.96954316,,[["en"],,0.96954316]]]' with raises(ValueError): utils.format_json(text)
def totaltranslate(): #批量翻译文章 start = 1 #需要翻译的起始文章编号 end = 100 #需要翻译的末尾文章编号 for i in range(start, end): print(i) file2 = io.open( 'C:/Users/86135/Desktop/id_work/final_data/id_article_translate/id_article_' + str(i) + '.json', mode='a', encoding='utf-8') #追加在文件末尾 with io.open( 'C:/Users/86135/Desktop/id_work/final_data/id_article_content/id_article_content_' + str(i) + '.json', mode='r', encoding='utf-8') as f: urls = [] for line in f: line = line.strip() #去掉句子开头结尾的符号 token = translator.token_acquirer.do(line) url = "https://translate.google.cn/translate_a/single?client=t&sl=id&tl=zh-cn&hl=zh-cn&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format( token, line) #生成URL urls.append(url) if len(urls) >= 0: res = work(urls) print(res) for r in res: if hasattr(r, 'status_code'): if r.status_code == 200: #网络请求成功 try: a = format_json(r.text) target = ''.join( [d[0] if d[0] else '' for d in a[0]]) print(target) source = ''.join( [d[1] if d[1] else '' for d in a[0]]) except Exception as e: logger.error('when format:%s', e) logger.error('%s\n%s', r.text) source = '' target = '' if len(source) != 0 and len(target) != 0: file2.write(target + '\n') else: file2.write('\n') elif r.status_code == 403 or r.status_code == 413: line.replace(" #|\\|\"|& ", "") list = line.split(",") result = [] for l in range(len(list)): urls = [] #置空 line = list[l].strip() # 去掉句子开头结尾的符号 token = translator.token_acquirer.do(line) url = "https://translate.google.cn/translate_a/single?client=t&sl=id&tl=zh-cn&hl=zh-cn&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&ssel=3&tsel=0&kc=1&tk={0}&q={1}".format( token, line) # 生成URL urls.append(url) if len(urls) >= 0: res = work(urls) print(res) for r in res: if hasattr( r, 'status_code' ): #判断r是否存在status_code属性 if r.status_code == 200: # 网络请求成功 try: a = format_json(r.text) target = ''.join([ d[0] if d[0] else '' for d in a[0] ]) print(target) source = ''.join([ d[1] if d[1] else '' for d in a[0] ]) except Exception as e: logger.error( 'when format:%s', e) logger.error( '%s\n%s', r.text) source = '' target = '' if len(source ) != 0 and len( target) != 0: result.append(target) else: result.append(" ") file2.write(",".join(result) + "\n") else: print("error!") time.sleep(random.randint(1, 3)) #设置睡眠时长,防止被封 urls = [] logger.info('finished articles: %s', i) time.sleep(random.randint(1, 3)) file2.close()