def CollectTranslatedStrings(): """Collects all the translations for all the strings specified by kStringIds. Returns a list of tuples of (string_id, language, translated string). The list is sorted by language codes.""" kGeneratedResourcesPath = os.path.join(path_utils.ScriptDir(), '..', '..', '..', 'app/google_chrome_strings.grd') kTranslationDirectory = os.path.join(path_utils.ScriptDir(), '..', '..', '..', 'app', 'resources') kTranslationFiles = glob.glob( os.path.join(kTranslationDirectory, 'google_chrome_strings*.xtb')) # Get the strings out of generated_resources.grd. dom = minidom.parse(kGeneratedResourcesPath) # message_nodes is a list of message dom nodes corresponding to the string # ids we care about. We want to make sure that this list is in the same # order as kStringIds so we can associate them together. message_nodes = [] all_message_nodes = dom.getElementsByTagName('message') for string_id in kStringIds: message_nodes.append([ x for x in all_message_nodes if x.getAttribute('name') == string_id ][0]) message_texts = [ node.firstChild.nodeValue.strip() for node in message_nodes ] # The fingerprint of the string is the message ID in the translation files # (xtb files). translation_ids = [str(FP.FingerPrint(text)) for text in message_texts] # Manually put _EN_US in the list of translated strings because it doesn't # have a .xtb file. translated_strings = [] for string_id, message_text in zip(kStringIds, message_texts): translated_strings.append( TranslationStruct(string_id + '_EN_US', 'EN_US', message_text)) # Gather the translated strings from the .xtb files. If an .xtb file doesn't # have the string we want, use the en-US string. for xtb_filename in kTranslationFiles: dom = minidom.parse(xtb_filename) language = dom.documentElement.getAttribute('lang') language = language.replace('-', '_').upper() translation_nodes = {} for translation_node in dom.getElementsByTagName('translation'): translation_id = translation_node.getAttribute('id') if translation_id in translation_ids: translation_nodes[translation_id] = ( translation_node.firstChild.nodeValue.strip()) for i, string_id in enumerate(kStringIds): translated_string = translation_nodes.get(translation_ids[i], message_texts[i]) translated_strings.append( TranslationStruct(string_id + '_' + language, language, translated_string)) translated_strings.sort() return translated_strings
def get_fingerprint_for_xtb(message_tag): """Obtains the fingerprint meant for xtb files from a message tag.""" string_to_hash = message_tag.text string_phs = message_tag.findall('ph') for string_ph in string_phs: string_to_hash = string_to_hash + string_ph.get('name').upper() + string_ph.tail string_to_hash = string_to_hash.strip().encode('utf-8') string_to_hash = clean_triple_quoted_string(string_to_hash) return FP.FingerPrint(string_to_hash) & 0x7fffffffffffffffL
def UpdateBraveIds(grd_file): messages = xml.etree.ElementTree.parse(grd_file).getroot().find( 'release').find('messages') for message_tag in messages.findall('message'): brave_string = message_tag.text brave_string_phs = message_tag.findall('ph') for brave_string_ph in brave_string_phs: brave_string = brave_string + brave_string_ph.get( 'name').upper() + brave_string_ph.tail if brave_string is None: continue brave_string = brave_string.strip().encode('utf-8') if brave_company in brave_string: # Calculate Brave string id brave_string_fp = FP.FingerPrint( brave_string) & 0x7fffffffffffffffL print(str(brave_string_fp) + ' - ' + brave_string) chrome_string = brave_string.replace(brave_company, google_company) # Calculate Chrome string id # Todo: it gets incorrect id here, need to figure out why next time, for now it is replaced it manually chrome_string_fp = FP.FingerPrint( chrome_string) & 0x7fffffffffffffffL print(str(chrome_string_fp) + ' - ' + chrome_string) if not chrome_string_fp in brave_ids: brave_ids[chrome_string_fp] = brave_string_fp print('\n') elif brave_brand_string in brave_string: # Calculate Brave string id brave_string_fp = FP.FingerPrint( brave_string) & 0x7fffffffffffffffL print(str(brave_string_fp) + ' - ' + brave_string) for chrome_brand_string in chrome_brand_strings: chrome_string = brave_string.replace(brave_brand_string, chrome_brand_string) # Calculate Chrome string id chrome_string_fp = FP.FingerPrint( chrome_string) & 0x7fffffffffffffffL print(str(chrome_string_fp) + ' - ' + chrome_string) if not chrome_string_fp in brave_ids: brave_ids[chrome_string_fp] = brave_string_fp print('\n')
def get_fingerprint_for_xtb(message_tag): """Obtains the fingerprint meant for xtb files from a message tag.""" string_to_hash = message_tag.text string_phs = message_tag.findall('ph') for string_ph in string_phs: string_to_hash = (string_to_hash + string_ph.get('name').upper() + (string_ph.tail or '')) string_to_hash = (string_to_hash or '').strip().encode('utf-8') string_to_hash = clean_triple_quoted_string(string_to_hash) fp = FP.FingerPrint(string_to_hash.decode('utf-8')) meaning = (message_tag.get('meaning') if 'meaning' in message_tag.attrib else None) if meaning: # combine the fingerprints of message and meaning fp2 = FP.FingerPrint(meaning) if fp < 0: fp = fp2 + (fp << 1) + 1 else: fp = fp2 + (fp << 1) # To avoid negative ids we strip the high-order bit return str(fp & 0x7fffffffffffffffL)
# Gives translation ids for messages. # # Messages with placeholders should go like that: # original message = "I'll buy a <ph name="WAVELENGTH">%d<ex>200</ex></ph> nm laser at <ph name="STORE_NAME">%s<ex>the grocery store</ex></ph>." # message to get id = "I'll buy a WAVELENGTH nm laser at STORE_NAME." # # Messages with line breaks should go like that: # original message = "She gathered #wood, charcoal, and #a sledge hammer." # message to get id = "She gathered\nwood, charcoal, and\na sledge hammer." import FP oldString = "old string" newString = "new string" fp = FP.FingerPrint(newString) fp2 = FP.FingerPrint(oldString) file_ = open('strings.txt', 'w') file_.write(str(fp2 & 0x7fffffffffffffffL) + ' - ' + oldString + '\r\n') file_.write(str(fp & 0x7fffffffffffffffL) + ' - ' + newString + '\r\n') file_.close()
def SyncTransifexToTranslations(): # load all strings and calculate their translation id (ignore memory consumption at this point to speed up whole process) brave_strings={} e = xml.etree.ElementTree.parse(base_strings_file).getroot() for string_tag in e.findall('string'): string_name = string_tag.get('name') string_value = string_tag.text if not string_name: sys.exit('String name is empty') if not string_value: sys.exit("String value is empty") # calculate translation id string_fp = FP.FingerPrint(string_value) & 0x7fffffffffffffffL if string_name in brave_strings: sys.exit('String name "' + string_name + '" is duplicated') brave_strings[string_name] = string_fp # go through all .xtb files in translations_folder replacingNumber = 1 addingNumber = 1 for (dirpath, dirnames, filenames) in walk(translations_folder): for filename in filenames: if filename.endswith('.xtb'): translations_tree = xml.etree.ElementTree.parse(translations_folder + '/' + filename) translations = translations_tree.getroot() # get language id lang_id = translations.get('lang').replace('-', '_') if not lang_id: sys.exit('Language id not found for ' + filename) # if not lang_id == 'uk': # continue #there are some differences in language codes, so correct them if lang_id == 'ca': lang_id = 'ca_ES' elif lang_id == 'bg': lang_id = 'bg_BG' elif lang_id == 'iw': lang_id = 'he' elif lang_id == 'cs': lang_id = 'cs_CZ' print('Processing language "' + lang_id + '"...') # find appropriate xml file in transifex folder xml_file_name = transifex_folder + '/stringsxml_' + lang_id + '.xml' if os.path.isfile(xml_file_name): # go through all strings in a file name strings = xml.etree.ElementTree.parse(xml_file_name).getroot() translations_file_was_changed = False for string_tag in strings.findall('string'): string_name = string_tag.get('name') string_value = string_tag.text if string_name in brave_strings: # we have its translation id, lets look for it in .xtb file translation_id_found = False for translation_tag in translations.findall('translation'): translation_id = translation_tag.get('id') translation_text = translation_tag.text # we found id, so replace it if translation_id == str(brave_strings[string_name]): if not translation_text == string_value: print(str(replacingNumber) + ' replacing "' + translation_text + '" with "' + string_value + '"') replacingNumber += 1 translation_tag.text = string_value translations_file_was_changed = True translation_id_found = True break # could not find translation id, so append it to the end if not translation_id_found: print(str(addingNumber) + ' adding "' + string_name + '" with "' + string_value + '"') addingNumber += 1 new_translation_tag = xml.etree.ElementTree.Element('translation') new_translation_tag.set('id', str(brave_strings[string_name])) new_translation_tag.text = string_value new_translation_tag.tail = '\n' translations.append(new_translation_tag) translations_file_was_changed = True else: sys.exit('String name "' + string_name + '" not found in base strings') # write changes if translations_file_was_changed: translations_file_name = translations_folder + '/' + filename translations_tree.write(translations_file_name, encoding="utf-8", xml_declaration=False) # we need to add prepend headers f = open(translations_file_name, 'r+') # load all content to the memory to make it faster (size is less than 1Mb, so should not be a problem) content = f.read() f.seek(0, 0) f.write(('<?xml version="1.0" ?>\n<!DOCTYPE translationbundle>\n') + content) f.close() else: sys.exit('Language xml file not found ' + xml_file_name) break print('Sync transifex to translations finished successfully')
def SyncTranslationsToTransifex(): # load all strings and calculate their translation id (ignore memory consumption at this point to speed up whole process) brave_strings={} e = xml.etree.ElementTree.parse(base_strings_file).getroot() for string_tag in e.findall('string'): string_name = string_tag.get('name') string_value = string_tag.text if not string_name: sys.exit('String name is empty') if not string_value: sys.exit("String value is empty") # calculate translation id string_fp = FP.FingerPrint(string_value) & 0x7fffffffffffffffL if string_name in brave_strings: sys.exit('String name "' + string_name + '" is duplicated') brave_strings[string_name] = string_fp # go through all .xtb files in translations_folder replacingNumber = 1 for (dirpath, dirnames, filenames) in walk(translations_folder): for filename in filenames: if filename.endswith('.xtb'): translations = xml.etree.ElementTree.parse(translations_folder + '/' + filename).getroot() # get language id lang_id = translations.get('lang').replace('-', '_') if not lang_id: sys.exit('Language id not found for ' + filename) # if not lang_id == 'uk': # continue #there are some differences in language codes, so correct them if lang_id == 'ca': lang_id = 'ca_ES' elif lang_id == 'bg': lang_id = 'bg_BG' elif lang_id == 'iw': lang_id = 'he' elif lang_id == 'cs': lang_id = 'cs_CZ' print('Processing language "' + lang_id + '"...') # find appropriate xml file in transifex folder xml_file_name = transifex_folder + '/stringsxml_' + lang_id + '.xml' if os.path.isfile(xml_file_name): # go through all strings in a file name strings_tree = xml.etree.ElementTree.parse(xml_file_name) strings_file_was_changed = False strings = strings_tree.getroot() for string_tag in strings.findall('string'): string_name = string_tag.get('name') string_value = string_tag.text if string_name in brave_strings: # we have its translation id, lets look for it in .xtb file translation_id_found = False for translation_tag in translations.findall('translation'): translation_id = translation_tag.get('id') translation_text = translation_tag.text # we found id, so replace it if translation_id == str(brave_strings[string_name]): if not translation_text == string_value: print(str(replacingNumber) + ' replacing "' + string_value + '" with "' + translation_text + '"') replacingNumber += 1 string_tag.text = translation_text strings_file_was_changed = True translation_id_found = True break # could not find translation id, so append it to the end if not translation_id_found: sys.exit('Translation id "' + str(brave_strings[string_name]) + '" for "' + string_name + '" not found') else: sys.exit('String name "' + string_name + '" not found in base strings') if strings_file_was_changed: strings_tree.write(xml_file_name, encoding="utf-8", xml_declaration=False) else: sys.exit('Language xml file not found ' + xml_file_name) break print('Sync translations to transifex finished successfully')