This repository has been archived by the owner on Feb 19, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
linguist.py
102 lines (73 loc) · 3.21 KB
/
linguist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import re
import json
import argparse
from os.path import join, isdir
from sys import argv, exit
from urllib2 import HTTPError
from goslate import Goslate
from settings import USE_SSL, GOOGLE_DOMAINS
service_urls = [('https://' if USE_SSL else 'http://') + 'translate' + domain for domain in GOOGLE_DOMAINS]
gs = Goslate(service_urls=service_urls)
def wrap(string):
pattern_start = re.compile('{{')
pattern_end = re.compile('}}')
string = pattern_start.sub('<span>{{', string)
string = pattern_end.sub('}}</span>', string)
return string
def unwrap(string):
pattern_start = re.compile('<span> {{', re.IGNORECASE)
pattern_end = re.compile('}} </span>', re.IGNORECASE)
string = string.replace('</ ', '</')
string = pattern_start.sub('{{', string)
string = pattern_end.sub('}}', string)
return string
def translate(string, source, target):
try:
return gs.translate(string, target, source).encode('utf-8')
except HTTPError, err:
if err.code == 404:
exit('Bad Google Translate Domain: {0}'.format(err.url))
raise err
def translate_all(strings):
for string in strings:
if type(strings[string]) == type(dict()):
translate_all(strings[string])
else:
original_string = wrap(strings[string])
translated_string = unwrap(translate(original_string, source_language, target_language))
original_variables = re.findall('{{.*}}', original_string)
translated_variables = re.findall('{{.*}}', translated_string)
if len(original_variables) > 0:
for i, var in enumerate(original_variables):
translated_string = translated_string.replace(translated_variables[i], str(var))
strings[string] = translated_string
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='input help')
parser.add_argument('-o', '--output', help='output help')
parser.add_argument('source_language', type=str)
parser.add_argument('target_language', type=str)
args = parser.parse_args();
input_dir = args.input if args.input is not None else 'i18n'
output_dir = args.output if args.output is not None else 'i18n'
source_language = args.source_language.lower()
target_language = args.target_language.lower()
supported = [x.lower() for x in gs.get_languages().keys()]
if source_language not in supported:
exit('{0} not supported.'.format(source_language))
if target_language not in supported:
exit('{0} not supported.'.format(target_language))
source_dir = join(input_dir, source_language)
target_dir = join(output_dir, target_language)
for root, dirs, files in os.walk(source_dir):
for filename in files:
filepath = join(root, filename)
outpath = join(target_dir, filename)
if not isdir(target_dir):
os.mkdir(target_dir)
with open(filepath, 'r') as f:
strings = json.load(f, 'utf-8')
translate_all(strings)
with open(outpath, 'w+') as f:
json.dump(strings, f, indent=4, separators=(',', ': '), ensure_ascii=False)