Example #1
0
def po2xml(catalog, with_untranslated=False, resfilter=None, warnfunc=dummy_warn):
    """Convert the gettext catalog in ``catalog`` to a ``ResourceTree``
    instance (our in-memory representation of an Android XML resource)

    This currently relies entirely in the fact that we can use the context
    of each message to specify the Android resource name (which we need
    to do to handle duplicates, but this is a nice by-product). However
    that also means we cannot handle arbitrary catalogs.

    The latter would in theory be possible by using the original,
    untranslated XML to match up a messages id to a resource name, but
    right now we don't support this (and it's not clear it would be
    necessary, even).

    If ``with_untranslated`` is given, then strings in the catalog
    that have no translation are written out with the original id,
    whenever this is safely possible. This does not include string-arrays,
    which for technical reasons always must include all elements, and it
    does not include plurals, for which the same is true.
    """
    # Validate that the plurals in the .po catalog match those that
    # we expect on the Android side per CLDR definition. However, we
    # only want to trouble the user with this if plurals are actually
    # used.
    plural_validation = {'done': False}
    plural_keywords = lambda: cldr_db.get_plural_keywords(catalog.language.code)
    def validate_plural_config():
        if plural_validation['done']:
            return
        if catalog.num_plurals != len(plural_keywords()):
            warnfunc(('Catalog defines %d plurals, we expect %d for '
                      'this language. See the README for an '
                      'explanation. plurals have very likely been '
                      'incorrectly written.') % (
                catalog.num_plurals, len(plural_keywords())), 'error')
            pass
        plural_validation['done'] = True

    xml_tree = ResourceTree(getattr(catalog, 'language', None))
    for message in catalog:
        if not message.id:
            # This is the header
            continue

        if not message.context:
            warnfunc(('Ignoring message "%s": has no context; somebody other '+
                      'than android2po seems to have added to this '+
                      'catalog.') % message.id, 'error')
            continue

        if resfilter and resfilter(message):
            continue

        # Both string and id will contain a tuple of this is a plural
        value = message.string or message.id

        # A colon indicates a string array
        if ':' in message.context:
            # Collect all the strings of this array with their indices,
            # so when we're done processing the whole catalog, we can
            # sort by index and restore the proper array order.
            name, index = message.context.split(':', 2)
            index = int(index)
            xml_tree.setdefault(name, StringArray())
            while index >= len(xml_tree[name]):
                xml_tree[name].append(None)  # fill None for missing indices
            if xml_tree[name][index] is not None:
                warnfunc(('Duplicate index %s in array "%s"; ignoring '+
                          'the message. The catalog has possibly been '+
                          'corrupted.') % (index, name), 'error')
            xml_tree[name][index] = value

        # A plurals message
        elif isinstance(message.string, tuple):
            validate_plural_config()

            # Untranslated: Do not include those even with with_untranslated
            # is enabled - this is because even if we could put the plural
            # definition from the master resource here, it wouldn't make
            # sense in the context of another language. Instead, let access
            # to the untranslated master version continue to work.
            if not any(message.string):
                continue

            # We need to work with ``message.string`` directly rather than
            # ``value``, since ``message.id`` will only be a 2-tuple made
            # up of the msgid and msgid_plural definitions.
            xml_tree[message.context] = Plurals([
                (k, None) for k in plural_keywords()])
            for index, keyword in enumerate(plural_keywords()):
                # Assume each keyword matches one index.
                try:
                    xml_tree[message.context][keyword] = message.string[index]
                except IndexError:
                    # Plurals are not matching up, validate_plural_config()
                    # has already raised a warning.
                    break

        # A standard string.
        else:
            if not message.string and not with_untranslated:
                # Untranslated.
                continue
            xml_tree[message.context] = value

    return xml_tree
Example #2
0
def xml2po(resources, translations=None, resfilter=None, warnfunc=dummy_warn):
    """Return ``resources`` as a Babel .po ``Catalog`` instance.

    If given, ``translations`` will be used for the translated values.
    In this case, the returned value is a 2-tuple (catalog, unmatched),
    with the latter being a list of Android string resource names that
    are in the translated file, but not in the original.

    Both ``resources`` and ``translations`` must be ``ResourceTree``
    objects, as returned by ``read_xml()``.

    From the application perspective, it will call this function with
    a ``translations`` object when initializing a new .po file based on
    an existing resource file (the 'init' command). For 'export', this
    function is called without translations. It will thus generate what
    is essentially a POT file (an empty .po file), and this will be
    merged into the existing .po catalogs, as per how gettext usually
    """
    assert not translations or translations.language

    creation_date = datetime.datetime.utcfromtimestamp(0).replace(tzinfo=UTC)
    catalog = Catalog(creation_date=creation_date)
    if translations is not None:
        catalog.locale = translations.language.locale
        # We cannot let Babel determine the plural expr for the locale by
        # itself. It will use a custom list of plural expressions rather
        # than generate them based on CLDR.
        # See http://babel.edgewall.org/ticket/290.
        set_catalog_plural_forms(catalog, translations.language)

    for name, org_value in resources.items():
        if resfilter and resfilter(name):
            continue

        trans_value = None
        if translations:
            trans_value = translations.pop(name, trans_value)

        if isinstance(org_value, StringArray):
            # a string-array, write as "name:index"
            if len(org_value) == 0:
                warnfunc("Warning: string-array '%s' is empty" % name, 'warning')
                continue

            if not isinstance(trans_value, StringArray):
                if trans_value:
                    warnfunc(('""%s" is a string-array in the reference '
                              'file, but not in the translation.') %
                                    name, 'warning')
                trans_value = StringArray()

            for index, item in enumerate(org_value):
                item_trans = trans_value[index].text if index < len(trans_value) else ''

                # If the string has formatting markers, indicate it in
                # the gettext output
                flags = []
                if item.formatted:
                    flags.append('c-format')

                ctx = "%s:%d" % (name, index)
                catalog.add(item.text, item_trans, auto_comments=item.comments,
                            flags=flags, context=ctx)

        elif isinstance(org_value, Plurals):
            # a plurals, convert to a gettext plurals
            if len(org_value) == 0:
                warnfunc("Warning: plurals '%s' is empty" % name, 'warning')
                continue

            if not isinstance(trans_value, Plurals):
                if trans_value:
                    warnfunc(('""%s" is a plurals in the reference '
                              'file, but not in the translation.') %
                                    name, 'warning')
                trans_value = Plurals()

            # Taking the Translation objects for each quantity in ``org_value``,
            # we build a list of strings, which is how plurals are represented
            # in Babel.
            #
            # Since gettext only allows comments/flags on the whole
            # thing at once, we merge the comments/flags of all individual
            # plural strings into one.
            formatted = False
            comments = []
            for _, translation in list(org_value.items()):
                if translation.formatted:
                    formatted = True
                comments.extend(translation.comments)

            # For the message id, choose any two plural forms, but prefer
            # "one" and "other", assuming an English master resource.
            temp = org_value.copy()
            singular =\
                temp.pop('one') if 'one' in temp else\
                temp.pop('other') if 'other' in temp else\
                temp.pop(list(temp.keys())[0])
            plural =\
                temp.pop('other') if 'other' in temp else\
                temp[list(temp.keys())[0]] if temp else\
                singular
            msgid = (singular.text, plural.text)
            del temp, singular, plural

            # We pick the quantities supported by the language (the rest
            # would be ignored by Android as well).
            msgstr = ''
            if trans_value:
                allowed_keywords = cldr_db.get_plural_keywords(translations.language.code)
                msgstr = ['' for i in range(len(allowed_keywords))]
                for quantity, translation in list(trans_value.items()):
                    try:
                        index = allowed_keywords.index(quantity)
                    except ValueError:
                        warnfunc(
                            ('"plurals "%s" uses quantity "%s", which '
                             'is not supported for this language. See '
                             'the README for an explanation. The '
                             'quantity has been ignored') %
                                    (name, quantity), 'warning')
                    else:
                        msgstr[index] = translation.text

            flags = []
            if formatted:
                flags.append('c-format')
            catalog.add(msgid, tuple(msgstr), flags=flags,
                        auto_comments=comments, context=name)

        else:
            # a normal string

            # If the string has formatting markers, indicate it in
            # the gettext output
            # TODO DRY this.
            flags = []
            if org_value.formatted:
                flags.append('c-format')

            catalog.add(org_value.text, trans_value.text if trans_value else '',
                        flags=flags, auto_comments=org_value.comments, context=name)

    if translations is not None:
        # At this point, trans_strings only contains those for which
        # no original existed.
        return catalog, list(translations.keys())
    else:
        return catalog