def parse_result(): """Parses a translation result.""" raw = request.form['raw'] parsed = parse_javascript(raw) # Extract translated sentences translations = [x[0] for x in parsed[0]] # Filter out None elements translations = filter(None, translations) return jsonify({'translated': ''.join(translations)})
def __translate__(text, source, target, client='x', user_agent=DEFAULT_USER_AGENT): """ text: text to be translated source: source language target: target language """ if source == target: return text if not re.match(r'Mozilla/\d+\.\d+ \(.*', user_agent): user_agent = 'Mozilla/5.0 (%s)' % user_agent headers = { 'Referer': 'http://translate.google.com', 'User-Agent': user_agent, 'Content-Length': str(sys.getsizeof(text)) } payload = { 'client': client, 'sl': source, 'tl': target, 'text': text, } url = 'http://translate.google.com/translate_a/t' req = requests.post(url, headers=headers, data=payload) if req.status_code != 200: raise HTTPException( ('Google Translate returned HTTP {}'.format(req.status_code)), req.status_code) if client == 'x': data = json.loads(req.text) try: sentences = data['sentences'] except: sentences = data['results'][0]['sentences'] result = ' '.join(map(lambda x: x['trans'], sentences)) # Remove unneccessary white spaces return '\n'.join(map(lambda x: x.strip(), result.split('\n'))) elif client == 't': return parse_javascript(req.text) else: raise Exception("Unsupported client '{}'".format(client))
def corpus_raw(): """Collects raw corpus data.""" raw, source_lang, target_lang = \ map(lambda x: request.form[x], ('raw', 'sl', 'tl')) try: # See if 'raw' is a valid JavaScript string parsed = parse_javascript(raw) # Then insert it to the database CorpusRaw.insert( hash=hashlib.sha1(raw.encode('utf-8')).hexdigest(), raw=json.dumps(parsed), source_lang=source_lang, target_lang=target_lang, ) except Exception as e: logger.exception(e) db.session.rollback() return str(e), 500 return ''
def _parse_javascript(): raw = request.form['raw'] return jsonify({'parsed': parse_javascript(raw)})
def __translate__(text, source, target, client='x', user_agent=DEFAULT_USER_AGENT): """ text: text to be translated source: source language target: target language """ if source == target: return text if not re.match(r'Mozilla/\d+\.\d+ \(.*', user_agent): user_agent = 'Mozilla/5.0 (%s)' % user_agent headers = { 'Referer': 'http://translate.google.com', 'User-Agent': user_agent, 'Content-Length': str(sys.getsizeof(text)) } payload = { 'client': client, 'sl': source, 'tl': target, 'text': text, } url = 'http://translate.google.com/translate_a/t' req = requests.post(url, headers=headers, data=payload) if req.status_code != 200: raise HTTPException( ('Google Translate returned HTTP {}'.format(req.status_code)), req.status_code) if client == 'x': data = json.loads(req.text) # It appears in some cases the Google Translate returns a string # rather than a dictionary try: if isinstance(data, unicode): return data except NameError: if isinstance(data, str): return data try: sentences = data['sentences'] except TypeError: sentences = data['results'][0]['sentences'] result = ' '.join(map(lambda x: x['trans'], sentences)) # Remove unneccessary white spaces return '\n'.join(map(lambda x: x.strip(), result.split('\n'))) elif client == 't': return parse_javascript(req.text) else: raise Exception("Unsupported client '{}'".format(client))