def CollectTranslatedStrings():
    """Collects all the translations for all the strings specified by kStringIds.
  Returns a list of tuples of (string_id, language, translated string). The
  list is sorted by language codes."""
    kGeneratedResourcesPath = os.path.join(path_utils.ScriptDir(), '..', '..',
                                           '..',
                                           'app/google_chrome_strings.grd')
    kTranslationDirectory = os.path.join(path_utils.ScriptDir(), '..', '..',
                                         '..', 'app', 'resources')
    kTranslationFiles = glob.glob(
        os.path.join(kTranslationDirectory, 'google_chrome_strings*.xtb'))

    # Get the strings out of generated_resources.grd.
    dom = minidom.parse(kGeneratedResourcesPath)
    # message_nodes is a list of message dom nodes corresponding to the string
    # ids we care about.  We want to make sure that this list is in the same
    # order as kStringIds so we can associate them together.
    message_nodes = []
    all_message_nodes = dom.getElementsByTagName('message')
    for string_id in kStringIds:
        message_nodes.append([
            x for x in all_message_nodes if x.getAttribute('name') == string_id
        ][0])
    message_texts = [
        node.firstChild.nodeValue.strip() for node in message_nodes
    ]

    # The fingerprint of the string is the message ID in the translation files
    # (xtb files).
    translation_ids = [str(FP.FingerPrint(text)) for text in message_texts]

    # Manually put _EN_US in the list of translated strings because it doesn't
    # have a .xtb file.
    translated_strings = []
    for string_id, message_text in zip(kStringIds, message_texts):
        translated_strings.append(
            TranslationStruct(string_id + '_EN_US', 'EN_US', message_text))

    # Gather the translated strings from the .xtb files.  If an .xtb file doesn't
    # have the string we want, use the en-US string.
    for xtb_filename in kTranslationFiles:
        dom = minidom.parse(xtb_filename)
        language = dom.documentElement.getAttribute('lang')
        language = language.replace('-', '_').upper()
        translation_nodes = {}
        for translation_node in dom.getElementsByTagName('translation'):
            translation_id = translation_node.getAttribute('id')
            if translation_id in translation_ids:
                translation_nodes[translation_id] = (
                    translation_node.firstChild.nodeValue.strip())
        for i, string_id in enumerate(kStringIds):
            translated_string = translation_nodes.get(translation_ids[i],
                                                      message_texts[i])
            translated_strings.append(
                TranslationStruct(string_id + '_' + language, language,
                                  translated_string))

    translated_strings.sort()
    return translated_strings
Exemple #2
0
def get_fingerprint_for_xtb(message_tag):
  """Obtains the fingerprint meant for xtb files from a message tag."""
  string_to_hash = message_tag.text
  string_phs = message_tag.findall('ph')
  for string_ph in string_phs:
    string_to_hash = string_to_hash + string_ph.get('name').upper() + string_ph.tail
  string_to_hash = string_to_hash.strip().encode('utf-8')
  string_to_hash = clean_triple_quoted_string(string_to_hash)
  return FP.FingerPrint(string_to_hash) & 0x7fffffffffffffffL
Exemple #3
0
def UpdateBraveIds(grd_file):
    messages = xml.etree.ElementTree.parse(grd_file).getroot().find(
        'release').find('messages')
    for message_tag in messages.findall('message'):
        brave_string = message_tag.text
        brave_string_phs = message_tag.findall('ph')
        for brave_string_ph in brave_string_phs:
            brave_string = brave_string + brave_string_ph.get(
                'name').upper() + brave_string_ph.tail
        if brave_string is None:
            continue
        brave_string = brave_string.strip().encode('utf-8')
        if brave_company in brave_string:
            # Calculate Brave string id
            brave_string_fp = FP.FingerPrint(
                brave_string) & 0x7fffffffffffffffL
            print(str(brave_string_fp) + ' - ' + brave_string)
            chrome_string = brave_string.replace(brave_company, google_company)
            # Calculate Chrome string id
            # Todo: it gets incorrect id here, need to figure out why next time, for now it is replaced it manually
            chrome_string_fp = FP.FingerPrint(
                chrome_string) & 0x7fffffffffffffffL
            print(str(chrome_string_fp) + ' - ' + chrome_string)
            if not chrome_string_fp in brave_ids:
                brave_ids[chrome_string_fp] = brave_string_fp
            print('\n')
        elif brave_brand_string in brave_string:
            # Calculate Brave string id
            brave_string_fp = FP.FingerPrint(
                brave_string) & 0x7fffffffffffffffL
            print(str(brave_string_fp) + ' - ' + brave_string)
            for chrome_brand_string in chrome_brand_strings:
                chrome_string = brave_string.replace(brave_brand_string,
                                                     chrome_brand_string)
                # Calculate Chrome string id
                chrome_string_fp = FP.FingerPrint(
                    chrome_string) & 0x7fffffffffffffffL
                print(str(chrome_string_fp) + ' - ' + chrome_string)
                if not chrome_string_fp in brave_ids:
                    brave_ids[chrome_string_fp] = brave_string_fp
            print('\n')
Exemple #4
0
def get_fingerprint_for_xtb(message_tag):
    """Obtains the fingerprint meant for xtb files from a message tag."""
    string_to_hash = message_tag.text
    string_phs = message_tag.findall('ph')
    for string_ph in string_phs:
        string_to_hash = (string_to_hash + string_ph.get('name').upper() +
                          (string_ph.tail or ''))
    string_to_hash = (string_to_hash or '').strip().encode('utf-8')
    string_to_hash = clean_triple_quoted_string(string_to_hash)
    fp = FP.FingerPrint(string_to_hash.decode('utf-8'))
    meaning = (message_tag.get('meaning')
               if 'meaning' in message_tag.attrib else None)
    if meaning:
        # combine the fingerprints of message and meaning
        fp2 = FP.FingerPrint(meaning)
        if fp < 0:
            fp = fp2 + (fp << 1) + 1
        else:
            fp = fp2 + (fp << 1)
    # To avoid negative ids we strip the high-order bit
    return str(fp & 0x7fffffffffffffffL)
# Gives translation ids for messages.
#
# Messages with placeholders should go like that:
# original message = "I'll buy a <ph name="WAVELENGTH">%d<ex>200</ex></ph> nm laser at <ph name="STORE_NAME">%s<ex>the grocery store</ex></ph>."
# message to get id = "I'll buy a WAVELENGTH nm laser at STORE_NAME."
#
# Messages with line breaks should go like that:
# original message = "She gathered
#wood, charcoal, and
#a sledge hammer."
# message to get id = "She gathered\nwood, charcoal, and\na sledge hammer."

import FP

oldString = "old string"
newString = "new string"
fp = FP.FingerPrint(newString)
fp2 = FP.FingerPrint(oldString)
file_ = open('strings.txt', 'w')
file_.write(str(fp2 & 0x7fffffffffffffffL) + ' - ' + oldString + '\r\n')
file_.write(str(fp & 0x7fffffffffffffffL) + ' - ' + newString + '\r\n')
file_.close()
def SyncTransifexToTranslations():
    # load all strings and calculate their translation id (ignore memory consumption at this point to speed up whole process)
    brave_strings={}
    e = xml.etree.ElementTree.parse(base_strings_file).getroot()
    for string_tag in e.findall('string'):
        string_name = string_tag.get('name')
        string_value = string_tag.text    
        if not string_name:
            sys.exit('String name is empty')
        if not string_value:
            sys.exit("String value is empty")
        # calculate translation id
        string_fp = FP.FingerPrint(string_value) & 0x7fffffffffffffffL
        if string_name in brave_strings:
            sys.exit('String name "' + string_name + '" is duplicated')
        brave_strings[string_name] = string_fp

    # go through all .xtb files in translations_folder
    replacingNumber = 1
    addingNumber = 1
    for (dirpath, dirnames, filenames) in walk(translations_folder):
        for filename in filenames:
            if filename.endswith('.xtb'):
                translations_tree = xml.etree.ElementTree.parse(translations_folder + '/' + filename)
                translations = translations_tree.getroot()
                # get language id
                lang_id = translations.get('lang').replace('-', '_')
                if not lang_id:
                    sys.exit('Language id not found for ' + filename)
                # if not lang_id == 'uk':
                #   continue
                #there are some differences in language codes, so correct them
                if lang_id == 'ca':
                    lang_id = 'ca_ES'
                elif lang_id == 'bg':
                    lang_id = 'bg_BG'
                elif lang_id == 'iw':
                    lang_id = 'he'
                elif lang_id == 'cs':
                    lang_id = 'cs_CZ'            
                print('Processing language "' + lang_id + '"...')
                # find appropriate xml file in transifex folder
                xml_file_name = transifex_folder + '/stringsxml_' + lang_id + '.xml'
                if os.path.isfile(xml_file_name):
                    # go through all strings in a file name
                    strings = xml.etree.ElementTree.parse(xml_file_name).getroot()
                    translations_file_was_changed = False
                    for string_tag in strings.findall('string'):
                        string_name = string_tag.get('name')
                        string_value = string_tag.text
                        if string_name in brave_strings:
                            # we have its translation id, lets look for it in .xtb file
                            translation_id_found = False                        
                            for translation_tag in translations.findall('translation'):
                                translation_id = translation_tag.get('id')
                                translation_text = translation_tag.text
                                # we found id, so replace it
                                if translation_id == str(brave_strings[string_name]):
                                    if not translation_text == string_value:
                                        print(str(replacingNumber) + ' replacing "' + translation_text + '" with "' + string_value + '"')
                                        replacingNumber += 1
                                        translation_tag.text = string_value
                                        translations_file_was_changed = True
                                    translation_id_found = True
                                    break
                            # could not find translation id, so append it to the end
                            if not translation_id_found:
                                print(str(addingNumber) + ' adding "' + string_name + '" with "' + string_value + '"')
                                addingNumber += 1
                                new_translation_tag = xml.etree.ElementTree.Element('translation')
                                new_translation_tag.set('id', str(brave_strings[string_name]))
                                new_translation_tag.text = string_value
                                new_translation_tag.tail = '\n'
                                translations.append(new_translation_tag)
                                translations_file_was_changed = True
                        else:
                            sys.exit('String name "' + string_name + '" not found in base strings')
                    # write changes
                    if translations_file_was_changed:
                        translations_file_name = translations_folder + '/' + filename
                        translations_tree.write(translations_file_name, encoding="utf-8", xml_declaration=False)
                        # we need to add prepend headers
                        f = open(translations_file_name, 'r+')
                        # load all content to the memory to make it faster (size is less than 1Mb, so should not be a problem)
                        content = f.read()
                        f.seek(0, 0)
                        f.write(('<?xml version="1.0" ?>\n<!DOCTYPE translationbundle>\n') + content)
                        f.close()
                else:
                    sys.exit('Language xml file not found ' + xml_file_name)
        break
    print('Sync transifex to translations finished successfully')
def SyncTranslationsToTransifex():
    # load all strings and calculate their translation id (ignore memory consumption at this point to speed up whole process)
    brave_strings={}
    e = xml.etree.ElementTree.parse(base_strings_file).getroot()
    for string_tag in e.findall('string'):
        string_name = string_tag.get('name')
        string_value = string_tag.text    
        if not string_name:
            sys.exit('String name is empty')
        if not string_value:
            sys.exit("String value is empty")
        # calculate translation id
        string_fp = FP.FingerPrint(string_value) & 0x7fffffffffffffffL
        if string_name in brave_strings:
            sys.exit('String name "' + string_name + '" is duplicated')
        brave_strings[string_name] = string_fp

    # go through all .xtb files in translations_folder
    replacingNumber = 1
    for (dirpath, dirnames, filenames) in walk(translations_folder):
        for filename in filenames:
            if filename.endswith('.xtb'):
                translations = xml.etree.ElementTree.parse(translations_folder + '/' + filename).getroot()
                # get language id
                lang_id = translations.get('lang').replace('-', '_')
                if not lang_id:
                    sys.exit('Language id not found for ' + filename)
                # if not lang_id == 'uk':
                #   continue
                #there are some differences in language codes, so correct them
                if lang_id == 'ca':
                    lang_id = 'ca_ES'
                elif lang_id == 'bg':
                    lang_id = 'bg_BG'
                elif lang_id == 'iw':
                    lang_id = 'he'
                elif lang_id == 'cs':
                    lang_id = 'cs_CZ'            
                print('Processing language "' + lang_id + '"...')
                # find appropriate xml file in transifex folder
                xml_file_name = transifex_folder + '/stringsxml_' + lang_id + '.xml'
                if os.path.isfile(xml_file_name):
                    # go through all strings in a file name
                    strings_tree = xml.etree.ElementTree.parse(xml_file_name)
                    strings_file_was_changed = False
                    strings = strings_tree.getroot()
                    for string_tag in strings.findall('string'):
                        string_name = string_tag.get('name')
                        string_value = string_tag.text
                        if string_name in brave_strings:
                            # we have its translation id, lets look for it in .xtb file
                            translation_id_found = False                        
                            for translation_tag in translations.findall('translation'):
                                translation_id = translation_tag.get('id')
                                translation_text = translation_tag.text
                                # we found id, so replace it
                                if translation_id == str(brave_strings[string_name]):
                                    if not translation_text == string_value:
                                        print(str(replacingNumber) + ' replacing "' + string_value + '" with "' + translation_text + '"')
                                        replacingNumber += 1
                                        string_tag.text = translation_text
                                        strings_file_was_changed = True
                                    translation_id_found = True
                                    break
                            # could not find translation id, so append it to the end
                            if not translation_id_found:
                                sys.exit('Translation id "' + str(brave_strings[string_name]) + '" for "' + string_name + '" not found')
                        else:
                            sys.exit('String name "' + string_name + '" not found in base strings')
                    if strings_file_was_changed:
                        strings_tree.write(xml_file_name, encoding="utf-8", xml_declaration=False)
                else:
                    sys.exit('Language xml file not found ' + xml_file_name)
        break
    print('Sync translations to transifex finished successfully')