def po2xml(catalog, with_untranslated=False, resfilter=None, warnfunc=dummy_warn): """Convert the gettext catalog in ``catalog`` to a ``ResourceTree`` instance (our in-memory representation of an Android XML resource) This currently relies entirely in the fact that we can use the context of each message to specify the Android resource name (which we need to do to handle duplicates, but this is a nice by-product). However that also means we cannot handle arbitrary catalogs. The latter would in theory be possible by using the original, untranslated XML to match up a messages id to a resource name, but right now we don't support this (and it's not clear it would be necessary, even). If ``with_untranslated`` is given, then strings in the catalog that have no translation are written out with the original id, whenever this is safely possible. This does not include string-arrays, which for technical reasons always must include all elements, and it does not include plurals, for which the same is true. """ # Validate that the plurals in the .po catalog match those that # we expect on the Android side per CLDR definition. However, we # only want to trouble the user with this if plurals are actually # used. plural_validation = {'done': False} plural_keywords = lambda: cldr_db.get_plural_keywords(catalog.language.code) def validate_plural_config(): if plural_validation['done']: return if catalog.num_plurals != len(plural_keywords()): warnfunc(('Catalog defines %d plurals, we expect %d for ' 'this language. See the README for an ' 'explanation. plurals have very likely been ' 'incorrectly written.') % ( catalog.num_plurals, len(plural_keywords())), 'error') pass plural_validation['done'] = True xml_tree = ResourceTree(getattr(catalog, 'language', None)) for message in catalog: if not message.id: # This is the header continue if not message.context: warnfunc(('Ignoring message "%s": has no context; somebody other '+ 'than android2po seems to have added to this '+ 'catalog.') % message.id, 'error') continue if resfilter and resfilter(message): continue # Both string and id will contain a tuple of this is a plural value = message.string or message.id # A colon indicates a string array if ':' in message.context: # Collect all the strings of this array with their indices, # so when we're done processing the whole catalog, we can # sort by index and restore the proper array order. name, index = message.context.split(':', 2) index = int(index) xml_tree.setdefault(name, StringArray()) while index >= len(xml_tree[name]): xml_tree[name].append(None) # fill None for missing indices if xml_tree[name][index] is not None: warnfunc(('Duplicate index %s in array "%s"; ignoring '+ 'the message. The catalog has possibly been '+ 'corrupted.') % (index, name), 'error') xml_tree[name][index] = value # A plurals message elif isinstance(message.string, tuple): validate_plural_config() # Untranslated: Do not include those even with with_untranslated # is enabled - this is because even if we could put the plural # definition from the master resource here, it wouldn't make # sense in the context of another language. Instead, let access # to the untranslated master version continue to work. if not any(message.string): continue # We need to work with ``message.string`` directly rather than # ``value``, since ``message.id`` will only be a 2-tuple made # up of the msgid and msgid_plural definitions. xml_tree[message.context] = Plurals([ (k, None) for k in plural_keywords()]) for index, keyword in enumerate(plural_keywords()): # Assume each keyword matches one index. try: xml_tree[message.context][keyword] = message.string[index] except IndexError: # Plurals are not matching up, validate_plural_config() # has already raised a warning. break # A standard string. else: if not message.string and not with_untranslated: # Untranslated. continue xml_tree[message.context] = value return xml_tree
def xml2po(resources, translations=None, resfilter=None, warnfunc=dummy_warn): """Return ``resources`` as a Babel .po ``Catalog`` instance. If given, ``translations`` will be used for the translated values. In this case, the returned value is a 2-tuple (catalog, unmatched), with the latter being a list of Android string resource names that are in the translated file, but not in the original. Both ``resources`` and ``translations`` must be ``ResourceTree`` objects, as returned by ``read_xml()``. From the application perspective, it will call this function with a ``translations`` object when initializing a new .po file based on an existing resource file (the 'init' command). For 'export', this function is called without translations. It will thus generate what is essentially a POT file (an empty .po file), and this will be merged into the existing .po catalogs, as per how gettext usually """ assert not translations or translations.language creation_date = datetime.datetime.utcfromtimestamp(0).replace(tzinfo=UTC) catalog = Catalog(creation_date=creation_date) if translations is not None: catalog.locale = translations.language.locale # We cannot let Babel determine the plural expr for the locale by # itself. It will use a custom list of plural expressions rather # than generate them based on CLDR. # See http://babel.edgewall.org/ticket/290. set_catalog_plural_forms(catalog, translations.language) for name, org_value in resources.items(): if resfilter and resfilter(name): continue trans_value = None if translations: trans_value = translations.pop(name, trans_value) if isinstance(org_value, StringArray): # a string-array, write as "name:index" if len(org_value) == 0: warnfunc("Warning: string-array '%s' is empty" % name, 'warning') continue if not isinstance(trans_value, StringArray): if trans_value: warnfunc(('""%s" is a string-array in the reference ' 'file, but not in the translation.') % name, 'warning') trans_value = StringArray() for index, item in enumerate(org_value): item_trans = trans_value[index].text if index < len(trans_value) else '' # If the string has formatting markers, indicate it in # the gettext output flags = [] if item.formatted: flags.append('c-format') ctx = "%s:%d" % (name, index) catalog.add(item.text, item_trans, auto_comments=item.comments, flags=flags, context=ctx) elif isinstance(org_value, Plurals): # a plurals, convert to a gettext plurals if len(org_value) == 0: warnfunc("Warning: plurals '%s' is empty" % name, 'warning') continue if not isinstance(trans_value, Plurals): if trans_value: warnfunc(('""%s" is a plurals in the reference ' 'file, but not in the translation.') % name, 'warning') trans_value = Plurals() # Taking the Translation objects for each quantity in ``org_value``, # we build a list of strings, which is how plurals are represented # in Babel. # # Since gettext only allows comments/flags on the whole # thing at once, we merge the comments/flags of all individual # plural strings into one. formatted = False comments = [] for _, translation in list(org_value.items()): if translation.formatted: formatted = True comments.extend(translation.comments) # For the message id, choose any two plural forms, but prefer # "one" and "other", assuming an English master resource. temp = org_value.copy() singular =\ temp.pop('one') if 'one' in temp else\ temp.pop('other') if 'other' in temp else\ temp.pop(list(temp.keys())[0]) plural =\ temp.pop('other') if 'other' in temp else\ temp[list(temp.keys())[0]] if temp else\ singular msgid = (singular.text, plural.text) del temp, singular, plural # We pick the quantities supported by the language (the rest # would be ignored by Android as well). msgstr = '' if trans_value: allowed_keywords = cldr_db.get_plural_keywords(translations.language.code) msgstr = ['' for i in range(len(allowed_keywords))] for quantity, translation in list(trans_value.items()): try: index = allowed_keywords.index(quantity) except ValueError: warnfunc( ('"plurals "%s" uses quantity "%s", which ' 'is not supported for this language. See ' 'the README for an explanation. The ' 'quantity has been ignored') % (name, quantity), 'warning') else: msgstr[index] = translation.text flags = [] if formatted: flags.append('c-format') catalog.add(msgid, tuple(msgstr), flags=flags, auto_comments=comments, context=name) else: # a normal string # If the string has formatting markers, indicate it in # the gettext output # TODO DRY this. flags = [] if org_value.formatted: flags.append('c-format') catalog.add(org_value.text, trans_value.text if trans_value else '', flags=flags, auto_comments=org_value.comments, context=name) if translations is not None: # At this point, trans_strings only contains those for which # no original existed. return catalog, list(translations.keys()) else: return catalog