def run(): """Main function to query Microsoft Translator API and translate words :return: """ try: client_id = getenv('MS_TRANSLATOR_CLIENT_ID', None) api_key = getenv('MS_TRANSLATOR_API_KEY', None) if not api_key: client_id = str(raw_input('Microsoft Translator Client ID: ')) api_key = str(raw_input('Microsoft Translator API Key: ')) if client_id and api_key: translator = Translator(client_id, api_key) en_word_list = read_word_list() result_matrix = [] for lang in translator.get_languages(): # result = get_translated_text(en_word_list, lang, client_id, api_key) result_matrix.append(result) # Transpose the matrix for csv writer to_write_matrix = [list(i) for i in zip(*result_matrix)] write_result(to_write_matrix) except: sys.exit(2)
class Mangle: def __init__(self): self.translator = Translator(client_id, client_secret) self.langs = self.translator.get_languages() def mangle(self, message_text, language='en', times=0): # If they didn't specify, pick a random number of # times to scramble. if times == 0: times = random.randint(low, high) for i in range(times): rand_lang = random.choice(self.langs) message_text = self.translator.translate(message_text, from_lang=language, to_lang=rand_lang) message_text = self.translator.translate(message_text, from_lang=rand_lang, to_lang=language) return message_text
class MicrosoftTranslator(AbstractTranslator): name = "microsoft" def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get("client_id") client_secret = self.options.get("client_secret") if client_id is None or client_secret is None: raise ValueError( "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret" ) self.client = MSTranslator(client_id=client_id, client_secret=client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] return self._languages def _translate(self, texts, language, origin_language="en"): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} app.logger.debug("Translating %r to %r using Microsoft Translator API" % (texts, language)) try: ms_translations = self.client.translate_array(texts=texts, to_lang=language, from_lang=origin_language) except MSTranslatorApiException as e: traceback.print_exc() app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info=True) return {} app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(ms_translations)) translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get("TranslatedText") if translated_text: translations[text] = translated_text return translations
class Mangle: def __init__(self): self.translator = Translator(client_id, client_secret) self.langs = self.translator.get_languages() def mangle(self, message_text, language='en', times=0): # If they didn't specify, pick a random number of # times to scramble. if times == 0: times = random.randint(low, high) for i in range(times): rand_lang = random.choice(self.langs) message_text = self.translator.translate(message_text, from_lang=language, to_lang=rand_lang) message_text = self.translator.translate(message_text, from_lang=rand_lang, to_lang=language) return message_text
# -*- coding: utf-8 -*- from microsofttranslator import Translator translator = Translator('celiacPass', '+gpq62pRXYbq0eAQVb6RhvO4qUpCZKGdaSi+Prov/og=') languages = translator.get_languages() f = open('country_codes_to_countries.txt') codeToTranslation = {} for line in f: code = line.split(' ')[0] codeToTranslation[code] = translator.translate('I have Celiac Disease, a very serious intolerance to gluten, which is found in wheat, oats, barley and rye.', code) print codeToTranslation
def test_get_languages(self): client = Translator(client_id, client_secret, debug=True) languages = client.get_languages() self.assertEqual(type(languages), list) self.assertTrue(set(default_languages).issubset(set(languages)))
class MicrosoftTranslator(AbstractTranslator): name = 'microsoft' def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get('client_id') client_secret = self.options.get('client_secret') if client_id is None or client_secret is None: raise ValueError("Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret") self.client = MSTranslator(client_id = client_id, client_secret = client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] except Exception: return [] return self._languages def _translate(self, texts, language, origin_language = 'en'): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} slices = [ # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice) # [ element1, element2, element3 ...] [], ] current_slice = slices[0] for text in texts: current_slice.append(text) if len(u''.join(current_slice).encode('utf8')) > 2000: current_slice = [] slices.append(current_slice) app.logger.debug("Texts splitted in {} slices".format(len(slices))) for pos, slice in enumerate(slices): app.logger.debug(" slice: {}: {} characters".format(pos, len(''.join(slice).encode('utf8')))) ms_translations = [] errors = False for current_slice in slices: if current_slice: app.logger.debug("Translating %r to %r using Microsoft Translator API" % (current_slice, language)) try: current_ms_translations = self.client.translate_array(texts = current_slice, to_lang = language, from_lang = origin_language) except (MSTranslatorApiException, ArgumentOutOfRangeException, ValueError, Exception) as e: traceback.print_exc() app.logger.warn("Error translating using Microsoft Translator API: %s" % e, exc_info = True) errors = True continue else: ms_translations.extend(list(current_ms_translations)) app.logger.debug("Translated %s sentences using Microsoft Translator API" % len(current_ms_translations)) if errors and not ms_translations: return {} translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get('TranslatedText') if translated_text: translations[text] = translated_text return translations
class MicrosoftTranslator(AbstractTranslator): name = 'microsoft' def __init__(self): super(MicrosoftTranslator, self).__init__() if self.options is not None: client_id = self.options.get('client_id') client_secret = self.options.get('client_secret') if client_id is None or client_secret is None: raise ValueError( "Misconfigured application. If you use the Microsoft Translator, provide a client_id and a client_secret" ) self.client = MSTranslator(client_id=client_id, client_secret=client_secret) else: self.client = None self._languages = None @property def languages(self): if self._languages is not None: return self._languages if self.client is None: self._languages = [] try: self._languages = self.client.get_languages() except MSTranslatorApiException: return [] except Exception: return [] return self._languages def _translate(self, texts, language, origin_language='en'): """ [ 'Hello' ], 'es' => { 'Hello' : 'Hola' } """ if self.client is None: return {} if language not in self.languages: return {} slices = [ # the size of a slice can't be over 10k characters in theory (we try to keep them under 5k in practice) # [ element1, element2, element3 ...] [], ] current_slice = slices[0] for text in texts: current_slice.append(text) if len(u''.join(current_slice).encode('utf8')) > 2000: current_slice = [] slices.append(current_slice) app.logger.debug("Texts splitted in {} slices".format(len(slices))) for pos, slice in enumerate(slices): app.logger.debug(" slice: {}: {} characters".format( pos, len(''.join(slice).encode('utf8')))) ms_translations = [] errors = False for current_slice in slices: if current_slice: app.logger.debug( "Translating %r to %r using Microsoft Translator API" % (current_slice, language)) try: current_ms_translations = self.client.translate_array( texts=current_slice, to_lang=language, from_lang=origin_language) except (MSTranslatorApiException, ArgumentOutOfRangeException, ValueError, Exception) as e: traceback.print_exc() app.logger.warn( "Error translating using Microsoft Translator API: %s" % e, exc_info=True) errors = True continue else: ms_translations.extend(list(current_ms_translations)) app.logger.debug( "Translated %s sentences using Microsoft Translator API" % len(current_ms_translations)) if errors and not ms_translations: return {} translations = {} for text, translation in zip(texts, ms_translations): translated_text = translation.get('TranslatedText') if translated_text: translations[text] = translated_text return translations
def main(): parser = argparse.ArgumentParser(description='Automatically translate and synchronize .strings files from defined base language.') parser.add_argument('-b','--base-lang-name', help='A base(or source) localizable resource name.(default=\'Base\'), (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default='Base', required=False) parser.add_argument('-x','--excluding-lang-names', type=str, help='A localizable resource name that you want to exclude. (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default=[], required=False, nargs='+') parser.add_argument('-c','--client-id', help='Client ID for MS Translation API', required=True) parser.add_argument('-s','--client-secret', help='Client Secret key for MS Translation API', required=True) parser.add_argument('-f','--force-translate-keys', type=str, help='Keys in the strings to update and translate by force. (input nothing for all keys.)', default=[], required=False, nargs='*') parser.add_argument('-fb','--following-base-keys', type=str, help='Keys in the strings to follow from "Base".', default=[], required=False, nargs='+') parser.add_argument('target path', help='Target localizable resource path. (root path of Base.lproj, default=./)', default='./', nargs='?') args = vars(parser.parse_args()) reload(sys) sys.setdefaultencoding('utf-8') # configure arguments __LANG_SEP__ = '-' __DIR_SUFFIX__ = ".lproj" __FILE_SUFFIX__ = ".strings" __RESOURCE_PATH__ = expanduser(args['target path']) __BASE_LANG__ = args['base_lang_name'] __EXCLUDING_LANGS__ = args['excluding_lang_names'] __KEYS_FORCE_TRANSLATE__ = args['force_translate_keys'] __KEYS_FORCE_TRANSLATE_ALL__ = ('--force-translate-keys' in sys.argv or '-f' in sys.argv) and not __KEYS_FORCE_TRANSLATE__ __KEYS_FOLLOW_BASE__ = args['following_base_keys'] __BASE_RESOUCE_DIR__ = None __LITERNAL_FORMAT__ = "%@" __LITERNAL_FORMAT_RE__ = re.compile(r"(%\s{1,}@)|(@\s{0,}%)") __LITERNAL_REPLACEMENT__ = "**" __LITERNAL_REPLACEMENT_RE__ = re.compile(r"\*\s{0,}\*") __QUOTES_RE__ = re.compile(r"\"") __QUOTES_REPLACEMENT__ = "'" if __BASE_LANG__.endswith(__DIR_SUFFIX__): __BASE_RESOUCE_DIR__ = __BASE_LANG__ __BASE_LANG__ = __BASE_LANG__.split(__DIR_SUFFIX__)[0] else: __BASE_RESOUCE_DIR__ = __BASE_LANG__+__DIR_SUFFIX__ # setup Translator & langs # read ios langs print '(i) Fetching supported locale codes for ios9 ...' __IOS9_CODES__ = [lang_row[0] for lang_row in csv.reader(open(resolve_file_path('lc_ios9.tsv'),'rb'), delimiter='\t')] print '(i) Supported numbers of locale code :', len(__IOS9_CODES__) __MS_CODE_ALIASES__ = { # MS API Supported : ios9 supported ISO639 1-2 codes 'zh-CHS' : ['zh-Hans', 'zh-CN', 'zh-SG'], 'zh-CHT' : ['zh-Hant', 'zh-MO', 'zh-HK', 'zh-TW'], 'en' : ['en-AU', 'en-GB'], 'es' : ['es-MX'], 'fr' : ['fr-CA'], 'pt' : ['pt-BR','pt-PT'] } # read mst langs print '(i) Fetching supported locales from Microsoft Translation API...' trans = Translator(args['client_id'], args['client_secret']) __MS_LANG_FILE__ = resolve_file_path('lc_ms.cached.tsv') __MS_SUPPORTED_CODES__ = None if os.path.exists(__MS_LANG_FILE__): __MS_SUPPORTED_CODES__ = [l.strip() for l in open(__MS_LANG_FILE__,'rb').readlines()] else: __MS_SUPPORTED_CODES__ = trans.get_languages() cfile = open(__MS_LANG_FILE__,'w') codes = '' for code in __MS_SUPPORTED_CODES__: codes += code+'\n' cfile.write(codes) cfile.close() print '(i) Supported numbers of locale code :', len(__MS_SUPPORTED_CODES__) # methods def supported_lang(code): alias = [ms for ms, ios in __MS_CODE_ALIASES__.items() if code in ios] # check es-{Custom defined alias} if len(alias)==1: return alias[0] # check es-MX elif code in __MS_SUPPORTED_CODES__: return code # check es elif code.split(__LANG_SEP__)[0] in __MS_SUPPORTED_CODES__: return code.split(__LANG_SEP__)[0] else: return None def preprocessing_translate_strs(strs): return [__LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, s.strip()).replace(__LITERNAL_FORMAT__, __LITERNAL_REPLACEMENT__) for s in strs] def postprocessing_translate_str(str): str = str.strip() # remove Quotes str = __QUOTES_RE__.sub(__QUOTES_REPLACEMENT__, str) # replace tp liternal replacement str = validate_liternal_replacement(str) # liternal replacement to liternal for format str = str.replace(__LITERNAL_REPLACEMENT__, __LITERNAL_FORMAT__) return str def validate_liternal_format(str): return __LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, str) def validate_liternal_replacement(str): return __LITERNAL_REPLACEMENT_RE__.sub(__LITERNAL_FORMAT__, str) def translate_ms(strs, to): lang = supported_lang(to) strs = preprocessing_translate_strs(strs) return [postprocessing_translate_str(r['TranslatedText']) for r in trans.translate_array(strs, lang)] if lang else strs def strings_obj_from_file(file): return strsparser.parse_strings(filename=file) def merge_two_dicts(x, y): '''Given two dicts, merge them into a new dict as a shallow copy.''' z = x.copy() z.update(y) return z # core function def insert_or_translate(target_file, lc): #parse target file target_kv = {} target_error_lines = [] if not notexist_or_empty_file(target_file): parsed_strings = strsparser.parse_strings(filename=target_file) for item in parsed_strings: k, v, e = item['key'], item['value'], item['error'] # line error if e: target_error_lines.append(e) if not target_error_lines: target_kv[k] = v #parsing complete or return. if target_error_lines: print '(!) Syntax error - Skip' return False, None, None, target_error_lines #base base_content = base_dict[os.path.basename(target_file)] base_kv = {} for item in base_content: base_kv[item['key']] = item['value'] force_adding_keys = base_kv.keys() if __KEYS_FORCE_TRANSLATE_ALL__ else __KEYS_FORCE_TRANSLATE__ adding_keys = list(((set(base_kv.keys()) - set(target_kv.keys())) | (set(base_kv.keys()) & set(force_adding_keys))) - set(__KEYS_FOLLOW_BASE__)) removing_keys = list(set(target_kv.keys()) - set(base_kv.keys())) existing_keys = list(set(base_kv.keys()) - (set(adding_keys) | set(removing_keys))) updated_keys = [] """ perform translate """ translated_kv = {}; if len(adding_keys): print 'Translating...' translated_kv = dict(zip(adding_keys, translate_ms([base_kv[k] for k in adding_keys], lc))) updated_content = [] for item in base_content: k = item['key'] newitem = dict.fromkeys(item.keys()) newitem['key'] = k #added if k in adding_keys: if k in translated_kv: newitem['value'] = translated_kv[k] newitem['comment'] = 'Translated from: {0}'.format(base_kv[k]) print '[Add] "{0}" = "{1}" <- {2}'.format(k, newitem['value'], base_kv[k]) else: newitem['value'] = target_kv[k] newitem['comment'] = 'Translate failed from: {0}'.format(base_kv[k]) print '[Error] "{0}" = "{1}" X <- {2}'.format(k, newitem['value'], base_kv[k]) #exists elif k in existing_keys: target_value = target_kv.get(k) if k in __KEYS_FOLLOW_BASE__: newitem['value'] = base_kv[k] if target_value != base_kv[k]: updated_keys.append(k) else: newitem['value'] = target_value or base_kv[k] if not target_value: updated_keys.append(k) updated_content.append(newitem) #removed or wrong for k in removing_keys: print '[Remove]', k if len(adding_keys) or len(removing_keys): print '(i) Changed Keys: Added {0}, Updated {1}, Removed {2}'.format(len(adding_keys), len(updated_keys), len(removing_keys)) return updated_content and (len(adding_keys)>0 or len(updated_keys)>0 or len(removing_keys)>0), updated_content, translated_kv, target_error_lines def write_file(target_file, list_of_content): suc = False try: f = codecs.open(target_file, "w", "utf-8") contents = '' for content in list_of_content: if content['comment']: contents += '/* {0} */'.format(content['comment']) + '\n' contents += '"{0}" = "{1}";'.format(content['key'], content['value']) + '\n' f.write(contents) suc = True except IOError: print 'IOError to open', target_file finally: f.close() return suc def remove_file(target_file): try: os.rename(target_file, target_file+'.deleted') return True except IOError: print 'IOError to rename', target_file return False def create_file(target_file): open(target_file, 'a').close() def notexist_or_empty_file(target_file): return not os.path.exists(target_file) or os.path.getsize(target_file)==0 def resolve_file_names(target_file_names): return map(lambda f: f.decode('utf-8'), filter(lambda f: f.endswith(__FILE_SUFFIX__), target_file_names)) base_dict = {} results_dict = {} # Get Base Language Specs walked = list(os.walk(__RESOURCE_PATH__, topdown=True)) for dir, subdirs, files in walked: if os.path.basename(dir)==__BASE_RESOUCE_DIR__: for _file in resolve_file_names(files): f = os.path.join(dir, _file) if notexist_or_empty_file(f): continue base_dict[_file] = strings_obj_from_file(f) if not base_dict: print '[!] Not found "{0}" in target path "{1}"'.format(__BASE_RESOUCE_DIR__, __RESOURCE_PATH__) sys.exit(0) print 'Start synchronizing...' for file in base_dict: print 'Target:', file for dir, subdirs, files in walked: files = resolve_file_names(files) if dir.endswith((__DIR_SUFFIX__)): lc = os.path.basename(dir).split(__DIR_SUFFIX__)[0] if lc.find('_'): lc = lc.replace('_', __LANG_SEP__) if lc == __BASE_LANG__: continue if lc in __EXCLUDING_LANGS__: print 'Skip: ', lc continue # lc = supported_lang(lc) results_dict[lc] = { 'deleted_files' : [], 'added_files' : [], 'updated_files' : [], 'skipped_files' : [], 'translated_files_lines' : {}, 'error_lines_kv' : {} } if not supported_lang(lc): print 'Does not supported: ', lc results_dict[lc]['skipped_files'] = join_path_all(dir, files) continue print '\n', 'Analayzing localizables... {1} (at {0})'.format(dir, lc) added_files = list(set(base_dict.keys()) - set(files)) removed_files = list(set(files) - set(base_dict.keys())) existing_files = list(set(files) - (set(added_files) | set(removed_files))) added_files = join_path_all(dir, added_files) removed_files = join_path_all(dir, removed_files) existing_files = join_path_all(dir, existing_files) added_cnt, updated_cnt, removed_cnt = 0, 0, 0 translated_files_lines = results_dict[lc]['translated_files_lines'] error_files = results_dict[lc]['error_lines_kv'] #remove - file for removed_file in removed_files: print 'Removing File... {0}'.format(removed_file) if remove_file(removed_file): removed_cnt+=1 #add - file for added_file in added_files: print 'Adding File... {0}'.format(added_file) create_file(added_file) u, c, t, e = insert_or_translate(added_file, lc) #error if e: error_files[added_file] = e #normal elif u and write_file(added_file, c): added_cnt+=1 translated_files_lines[added_file] = t #exist - lookup lines for ext_file in existing_files: u, c, t, e = insert_or_translate(ext_file, lc) #error if e: error_files[ext_file] = e #normal elif u: print 'Updating File... {0}'.format(ext_file) if write_file(ext_file, c): updated_cnt=+1 translated_files_lines[ext_file] = t if added_cnt or updated_cnt or removed_cnt or error_files: print '(i) Changed Files : Added {0}, Updated {1}, Removed {2}, Error {3}'.format(added_cnt, updated_cnt, removed_cnt, len(error_files.keys())) else: print 'Nothing to translate or add.' """ Results """ results_dict[lc]['deleted_files'] = removed_files results_dict[lc]['added_files'] = list(set(added_files) & set(translated_files_lines.keys())) results_dict[lc]['updated_files'] = list(set(existing_files) & set(translated_files_lines.keys())) if error_files: print error_files results_dict[lc]['error_lines_kv'] = error_files # print total Results print '' t_file_cnt, t_line_cnt = 0, 0 file_add_cnt, file_remove_cnt, file_update_cnt, file_skip_cnt = 0,0,0,0 for lc in results_dict.keys(): result_lc = results_dict[lc] file_add_cnt += len(result_lc['added_files']) file_remove_cnt += len(result_lc['deleted_files']) file_update_cnt += len(result_lc['updated_files']) file_skip_cnt += len(result_lc['skipped_files']) for f in result_lc['added_files']: print 'Added',f for f in result_lc['deleted_files']: print 'Removed',f for f in result_lc['updated_files']: print 'Updated',f for f in result_lc['skipped_files']: print 'Skiped',f tfiles = result_lc['translated_files_lines'] if tfiles: # print '============ Results for langcode : {0} ============='.format(lc) for f in tfiles: t_file_cnt += 1 if len(tfiles[f]): # print '', f for key in tfiles[f]: t_line_cnt += 1 # print key, ' = ', tfiles[f][key] print '' found_warining = filter(lambda i: i or None, rget(results_dict, 'error_lines_kv')) if file_add_cnt or file_update_cnt or file_remove_cnt or file_skip_cnt or found_warining: print 'Total New Translated Strings : {0}'.format(t_line_cnt) print 'Changed Files Total : Added {0}, Updated {1}, Removed {2}, Skipped {3}'.format(file_add_cnt, file_update_cnt, file_remove_cnt, file_skip_cnt) print "Synchronized." if found_warining: print '\n[!!] WARNING: Found strings that contains the syntax error. Please confirm.' for a in found_warining: for k in a: print 'at', k for i in a[k]: print ' ', i else: print "All strings are already synchronized. Nothing to translate or add." return
class Mangle: def __init__(self, client_id, client_secret, language, low, high, language_blacklist): self.language = language self.translator = Translator(client_id, client_secret) self.languages = set( self.translator.get_languages()) - language_blacklist self.low = low self.high = high def mangle(self, message_text, times=0, method=None, language_list=None): if method == MangleMethod.manual and not language_list: raise ValueError("No language list given.") if method is None: method = random.sample( set(MangleMethod) - set([MangleMethod.manual]), 1)[0] if times < 0: raise ValueError("Parameter times must be greater than 0.") if times == 0: times = random.randint(self.low, self.high) if method == MangleMethod.manual: language_list.insert(0, self.language) language_list.append(self.language) elif method == MangleMethod.flipflop: language_list = [] language_list.append(self.language) for i in range(int(times / 2)): language_list.extend( [random.sample(self.languages, 1)[0], self.language]) elif method == MangleMethod.straight: language_list = [] language_list.append(self.language) language_list.extend(random.sample(self.languages, times)) language_list.append(self.language) else: raise NotImplementedError( "MangleMethod {} not implemented.".format(method)) all_messages = [message_text] for i in range(len(language_list)): if i == 0: continue try: text = self.translator.translate(all_messages[i - 1], from_lang=language_list[i - 1], to_lang=language_list[i]) all_messages.append(text) except Exception as e: all_messages = False break message_info = { 'method': str(method), 'languages': language_list, 'all_messages': all_messages } return message_info
def main(): parser = argparse.ArgumentParser( description= 'Automatically translate and synchronize .strings files from defined base language.' ) parser.add_argument( '-b', '--base-lang-name', help= 'A base(or source) localizable resource name.(default=\'Base\'), (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default='Base', required=False) parser.add_argument( '-x', '--excluding-lang-names', type=str, help= 'A localizable resource name that you want to exclude. (e.g. "Base" via \'Base.lproj\', "en" via \'en.lproj\')', default=[], required=False, nargs='+') parser.add_argument('-c', '--client-id', help='Client ID for MS Translation API', required=True) parser.add_argument('-s', '--client-secret', help='Client Secret key for MS Translation API', required=True) parser.add_argument( '-f', '--force-translate-keys', type=str, help= 'Keys in the strings to update and translate by force. (input nothing for all keys.)', default=[], required=False, nargs='*') parser.add_argument('-fb', '--following-base-keys', type=str, help='Keys in the strings to follow from "Base".', default=[], required=False, nargs='+') parser.add_argument( 'target path', help= 'Target localizable resource path. (root path of Base.lproj, default=./)', default='./', nargs='?') args = vars(parser.parse_args()) reload(sys) sys.setdefaultencoding('utf-8') # configure arguments __LANG_SEP__ = '-' __DIR_SUFFIX__ = ".lproj" __FILE_SUFFIX__ = ".strings" __RESOURCE_PATH__ = expanduser(args['target path']) __BASE_LANG__ = args['base_lang_name'] __EXCLUDING_LANGS__ = args['excluding_lang_names'] __KEYS_FORCE_TRANSLATE__ = args['force_translate_keys'] __KEYS_FORCE_TRANSLATE_ALL__ = ( '--force-translate-keys' in sys.argv or '-f' in sys.argv) and not __KEYS_FORCE_TRANSLATE__ __KEYS_FOLLOW_BASE__ = args['following_base_keys'] __BASE_RESOUCE_DIR__ = None __LITERNAL_FORMAT__ = "%@" __LITERNAL_FORMAT_RE__ = re.compile(r"(%\s{1,}@)|(@\s{0,}%)") __LITERNAL_REPLACEMENT__ = "**" __LITERNAL_REPLACEMENT_RE__ = re.compile(r"\*\s{0,}\*") __QUOTES_RE__ = re.compile(r"\"") __QUOTES_REPLACEMENT__ = "'" if __BASE_LANG__.endswith(__DIR_SUFFIX__): __BASE_RESOUCE_DIR__ = __BASE_LANG__ __BASE_LANG__ = __BASE_LANG__.split(__DIR_SUFFIX__)[0] else: __BASE_RESOUCE_DIR__ = __BASE_LANG__ + __DIR_SUFFIX__ # setup Translator & langs # read ios langs print '(i) Fetching supported locale codes for ios9 ...' __IOS9_CODES__ = [ lang_row[0] for lang_row in csv.reader( open(resolve_file_path('lc_ios9.tsv'), 'rb'), delimiter='\t') ] print '(i) Supported numbers of locale code :', len(__IOS9_CODES__) __MS_CODE_ALIASES__ = { # MS API Supported : ios9 supported ISO639 1-2 codes 'zh-CHS': ['zh-Hans', 'zh-CN', 'zh-SG'], 'zh-CHT': ['zh-Hant', 'zh-MO', 'zh-HK', 'zh-TW'], 'en': ['en-AU', 'en-GB'], 'es': ['es-MX'], 'fr': ['fr-CA'], 'pt': ['pt-BR', 'pt-PT'] } # read mst langs print '(i) Fetching supported locales from Microsoft Translation API...' trans = Translator(args['client_id'], args['client_secret']) __MS_LANG_FILE__ = resolve_file_path('lc_ms.cached.tsv') __MS_SUPPORTED_CODES__ = None if os.path.exists(__MS_LANG_FILE__): __MS_SUPPORTED_CODES__ = [ l.strip() for l in open(__MS_LANG_FILE__, 'rb').readlines() ] else: __MS_SUPPORTED_CODES__ = trans.get_languages() cfile = open(__MS_LANG_FILE__, 'w') codes = '' for code in __MS_SUPPORTED_CODES__: codes += code + '\n' cfile.write(codes) cfile.close() print '(i) Supported numbers of locale code :', len(__MS_SUPPORTED_CODES__) # methods def supported_lang(code): alias = [ms for ms, ios in __MS_CODE_ALIASES__.items() if code in ios] # check es-{Custom defined alias} if len(alias) == 1: return alias[0] # check es-MX elif code in __MS_SUPPORTED_CODES__: return code # check es elif code.split(__LANG_SEP__)[0] in __MS_SUPPORTED_CODES__: return code.split(__LANG_SEP__)[0] else: return None def preprocessing_translate_strs(strs): return [ __LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, s.strip()).replace( __LITERNAL_FORMAT__, __LITERNAL_REPLACEMENT__) for s in strs ] def postprocessing_translate_str(str): str = str.strip() # remove Quotes str = __QUOTES_RE__.sub(__QUOTES_REPLACEMENT__, str) # replace tp liternal replacement str = validate_liternal_replacement(str) # liternal replacement to liternal for format str = str.replace(__LITERNAL_REPLACEMENT__, __LITERNAL_FORMAT__) return str def validate_liternal_format(str): return __LITERNAL_FORMAT_RE__.sub(__LITERNAL_FORMAT__, str) def validate_liternal_replacement(str): return __LITERNAL_REPLACEMENT_RE__.sub(__LITERNAL_FORMAT__, str) def translate_ms(strs, to): lang = supported_lang(to) strs = preprocessing_translate_strs(strs) return [ postprocessing_translate_str(r['TranslatedText']) for r in trans.translate_array(strs, lang) ] if lang else strs def strings_obj_from_file(file): return strsparser.parse_strings(filename=file) def merge_two_dicts(x, y): '''Given two dicts, merge them into a new dict as a shallow copy.''' z = x.copy() z.update(y) return z # core function def insert_or_translate(target_file, lc): #parse target file target_kv = {} target_error_lines = [] if not notexist_or_empty_file(target_file): parsed_strings = strsparser.parse_strings(filename=target_file) for item in parsed_strings: k, v, e = item['key'], item['value'], item['error'] # line error if e: target_error_lines.append(e) if not target_error_lines: target_kv[k] = v #parsing complete or return. if target_error_lines: print '(!) Syntax error - Skip' return False, None, None, target_error_lines #base base_content = base_dict[os.path.basename(target_file)] base_kv = {} for item in base_content: base_kv[item['key']] = item['value'] force_adding_keys = base_kv.keys( ) if __KEYS_FORCE_TRANSLATE_ALL__ else __KEYS_FORCE_TRANSLATE__ adding_keys = list(((set(base_kv.keys()) - set(target_kv.keys())) | (set(base_kv.keys()) & set(force_adding_keys))) - set(__KEYS_FOLLOW_BASE__)) removing_keys = list(set(target_kv.keys()) - set(base_kv.keys())) existing_keys = list( set(base_kv.keys()) - (set(adding_keys) | set(removing_keys))) updated_keys = [] """ perform translate """ translated_kv = {} if len(adding_keys): print 'Translating...' translated_kv = dict( zip(adding_keys, translate_ms([base_kv[k] for k in adding_keys], lc))) updated_content = [] for item in base_content: k = item['key'] newitem = dict.fromkeys(item.keys()) newitem['key'] = k #added if k in adding_keys: if k in translated_kv: newitem['value'] = translated_kv[k] newitem['comment'] = 'Translated from: {0}'.format( base_kv[k]) print '[Add] "{0}" = "{1}" <- {2}'.format( k, newitem['value'], base_kv[k]) else: newitem['value'] = target_kv[k] newitem['comment'] = 'Translate failed from: {0}'.format( base_kv[k]) print '[Error] "{0}" = "{1}" X <- {2}'.format( k, newitem['value'], base_kv[k]) #exists elif k in existing_keys: target_value = target_kv.get(k) if k in __KEYS_FOLLOW_BASE__: newitem['value'] = base_kv[k] if target_value != base_kv[k]: updated_keys.append(k) else: newitem['value'] = target_value or base_kv[k] if not target_value: updated_keys.append(k) updated_content.append(newitem) #removed or wrong for k in removing_keys: print '[Remove]', k if len(adding_keys) or len(removing_keys): print '(i) Changed Keys: Added {0}, Updated {1}, Removed {2}'.format( len(adding_keys), len(updated_keys), len(removing_keys)) return updated_content and ( len(adding_keys) > 0 or len(updated_keys) > 0 or len(removing_keys) > 0), updated_content, translated_kv, target_error_lines def write_file(target_file, list_of_content): suc = False try: f = codecs.open(target_file, "w", "utf-8") contents = '' for content in list_of_content: if content['comment']: contents += '/* {0} */'.format(content['comment']) + '\n' contents += '"{0}" = "{1}";'.format(content['key'], content['value']) + '\n' f.write(contents) suc = True except IOError: print 'IOError to open', target_file finally: f.close() return suc def remove_file(target_file): try: os.rename(target_file, target_file + '.deleted') return True except IOError: print 'IOError to rename', target_file return False def create_file(target_file): open(target_file, 'a').close() def notexist_or_empty_file(target_file): return not os.path.exists(target_file) or os.path.getsize( target_file) == 0 def resolve_file_names(target_file_names): return map( lambda f: f.decode('utf-8'), filter(lambda f: f.endswith(__FILE_SUFFIX__), target_file_names)) base_dict = {} results_dict = {} # Get Base Language Specs walked = list(os.walk(__RESOURCE_PATH__, topdown=True)) for dir, subdirs, files in walked: if os.path.basename(dir) == __BASE_RESOUCE_DIR__: for _file in resolve_file_names(files): f = os.path.join(dir, _file) if notexist_or_empty_file(f): continue base_dict[_file] = strings_obj_from_file(f) if not base_dict: print '[!] Not found "{0}" in target path "{1}"'.format( __BASE_RESOUCE_DIR__, __RESOURCE_PATH__) sys.exit(0) print 'Start synchronizing...' for file in base_dict: print 'Target:', file for dir, subdirs, files in walked: files = resolve_file_names(files) if dir.endswith((__DIR_SUFFIX__)): lc = os.path.basename(dir).split(__DIR_SUFFIX__)[0] if lc.find('_'): lc = lc.replace('_', __LANG_SEP__) if lc == __BASE_LANG__: continue if lc in __EXCLUDING_LANGS__: print 'Skip: ', lc continue # lc = supported_lang(lc) results_dict[lc] = { 'deleted_files': [], 'added_files': [], 'updated_files': [], 'skipped_files': [], 'translated_files_lines': {}, 'error_lines_kv': {} } if not supported_lang(lc): print 'Does not supported: ', lc results_dict[lc]['skipped_files'] = join_path_all(dir, files) continue print '\n', 'Analayzing localizables... {1} (at {0})'.format( dir, lc) added_files = list(set(base_dict.keys()) - set(files)) removed_files = list(set(files) - set(base_dict.keys())) existing_files = list( set(files) - (set(added_files) | set(removed_files))) added_files = join_path_all(dir, added_files) removed_files = join_path_all(dir, removed_files) existing_files = join_path_all(dir, existing_files) added_cnt, updated_cnt, removed_cnt = 0, 0, 0 translated_files_lines = results_dict[lc]['translated_files_lines'] error_files = results_dict[lc]['error_lines_kv'] #remove - file for removed_file in removed_files: print 'Removing File... {0}'.format(removed_file) if remove_file(removed_file): removed_cnt += 1 #add - file for added_file in added_files: print 'Adding File... {0}'.format(added_file) create_file(added_file) u, c, t, e = insert_or_translate(added_file, lc) #error if e: error_files[added_file] = e #normal elif u and write_file(added_file, c): added_cnt += 1 translated_files_lines[added_file] = t #exist - lookup lines for ext_file in existing_files: u, c, t, e = insert_or_translate(ext_file, lc) #error if e: error_files[ext_file] = e #normal elif u: print 'Updating File... {0}'.format(ext_file) if write_file(ext_file, c): updated_cnt = +1 translated_files_lines[ext_file] = t if added_cnt or updated_cnt or removed_cnt or error_files: print '(i) Changed Files : Added {0}, Updated {1}, Removed {2}, Error {3}'.format( added_cnt, updated_cnt, removed_cnt, len(error_files.keys())) else: print 'Nothing to translate or add.' """ Results """ results_dict[lc]['deleted_files'] = removed_files results_dict[lc]['added_files'] = list( set(added_files) & set(translated_files_lines.keys())) results_dict[lc]['updated_files'] = list( set(existing_files) & set(translated_files_lines.keys())) if error_files: print error_files results_dict[lc]['error_lines_kv'] = error_files # print total Results print '' t_file_cnt, t_line_cnt = 0, 0 file_add_cnt, file_remove_cnt, file_update_cnt, file_skip_cnt = 0, 0, 0, 0 for lc in results_dict.keys(): result_lc = results_dict[lc] file_add_cnt += len(result_lc['added_files']) file_remove_cnt += len(result_lc['deleted_files']) file_update_cnt += len(result_lc['updated_files']) file_skip_cnt += len(result_lc['skipped_files']) for f in result_lc['added_files']: print 'Added', f for f in result_lc['deleted_files']: print 'Removed', f for f in result_lc['updated_files']: print 'Updated', f for f in result_lc['skipped_files']: print 'Skiped', f tfiles = result_lc['translated_files_lines'] if tfiles: # print '============ Results for langcode : {0} ============='.format(lc) for f in tfiles: t_file_cnt += 1 if len(tfiles[f]): # print '', f for key in tfiles[f]: t_line_cnt += 1 # print key, ' = ', tfiles[f][key] print '' found_warining = filter(lambda i: i or None, rget(results_dict, 'error_lines_kv')) if file_add_cnt or file_update_cnt or file_remove_cnt or file_skip_cnt or found_warining: print 'Total New Translated Strings : {0}'.format(t_line_cnt) print 'Changed Files Total : Added {0}, Updated {1}, Removed {2}, Skipped {3}'.format( file_add_cnt, file_update_cnt, file_remove_cnt, file_skip_cnt) print "Synchronized." if found_warining: print '\n[!!] WARNING: Found strings that contains the syntax error. Please confirm.' for a in found_warining: for k in a: print 'at', k for i in a[k]: print ' ', i else: print "All strings are already synchronized. Nothing to translate or add." return
def test_get_languages(self): client = Translator(client_id, client_secret, debug=True) languages = client.get_languages() self.assertEqual(type(languages), list) self.assertTrue(set(default_languages).issubset(set(languages)))
else: global commandPrefix message.Chat.SendMessage('Changed the command prefix from "%s" to "%s".' % (commandPrefix, arguments[1])) commandPrefix = arguments[1] elif arguments[0] == 'reboot': message.Chat.SendMessage('Attempting reboot...') try: os.execv(__file__, sys.argv) except Exception, error: message.Chat.SendMessage('There was an error: %s' % error) elif arguments[0] == 'translate': translator = Translator('JEREMY-skype-bot', 'iEvV4ZWgMjAM45Jub+WHXHHI9CZ4QJspduxTrFOXrkc=') if arguments[1] == 'get-langs': languages = translator.get_languages() message.Chat.SendMessage(', '.join(languages.sort())) elif arguments[1] == 'detect': langOut = arguments[2] restOfMessage = arguments del restOfMessage[0] del restOfMessage[1] del restOfMessage[2] messageToTranslate = ' '.join(restOfMessage) try: translatedMessage = translator.translate(messageToTranslate, langOut) message.Chat.SendMessage('Language detected: ' + translator.translate(messageToTranslate) + '\nLanguage to translate to: ' + langOut + '\nMessage to translate: ' + messageToTranslate + '\nTranslated message: ' + translatedMessage) except Exception, error: