def po2xml(catalog, with_untranslated=False, filter=None, warnfunc=dummy_warn): """Convert the gettext catalog in ``catalog`` to an XML DOM. This currently relies entirely in the fact that we can use the context of each message to specify the Android resource name (which we need to do to handle duplicates, but this is a nice by-product). However that also means we cannot handle arbitrary catalogs. The latter would in theory be possible by using the original, untranslated XML to match up a messages id to a resource name, but right now we don't support this (and it's not clear it would be necessary, even). If ``with_untranslated`` is given, then strings in the catalog that have no translation are written out with the original id. In the case of a string-array, if ``with_untranslated`` is NOT specified, then only strings that DO have a translation are written out, potentially causing the array to be incomplete. TODO: This should not be the case: Arrays should always contain all elements, whether translated or not (using an empty string instead). When writing tests for this, make sure we generally test the with_untranslated mode, i.e. also the behavior for normal strings. """ # First, process the catalog into a Python sort-of-tree structure. # We can't write directly to the XML output, since stuff like # string-array items are not guaranteed to appear in the correct # order in the calalog. We "xml tree" pulls these things together. # It is quite similar to the structure returned by read_xml(). xml_tree = OrderedDict() for message in catalog: if not message.id: # This is the header continue if not message.string and not with_untranslated: # Untranslated. continue if not message.context: warnfunc(('Ignoring message "%s": has no context; somebody other '+ 'than android2po seems to have added to this '+ 'catalog.') % message.id, 'error') continue if filter and filter(message): continue value = message.string or message.id if ':' in message.context: # A colon indicates a string array; collect all the # strings of this array with their indices, so when # we're done processing the whole catalog, we can # sort by index and restore the proper array order. name, index = message.context.split(':', 2) xml_tree.setdefault(name, {}) if index in xml_tree[name]: warnfunc(('Duplicate index %s in array "%s"; ignoring '+ 'the message. The catalog has possibly been '+ 'corrupted.') % (index, name), 'error') xml_tree[name][index] = value else: xml_tree[message.context] = value # Convert the xml tree we've built into an actual Android XML DOM. root_tags = [] namespaces_used = {} for name, value in xml_tree.iteritems(): if isinstance(value, dict): # string-array - first, sort by index array_el = etree.Element('string-array') array_el.attrib['name'] = name for k in sorted(value, cmp=lambda x,y: cmp(int(x), int(y))): item_el = write_to_dom('item', value[k], message, namespaces_used, warnfunc) array_el.append(item_el) root_tags.append(array_el) else: # standard string string_el = write_to_dom('string', value, message, namespaces_used, warnfunc) string_el.attrib['name'] = name root_tags.append(string_el) # Generate the root element, define the namespaces that have been # used across all of our child elements. root_el = etree.Element('resources', nsmap=namespaces_used) for e in root_tags: root_el.append(e) return root_el
def po2xml(catalog, with_untranslated=False, filter=None, warnfunc=dummy_warn): """Convert the gettext catalog in ``catalog`` to an XML DOM. This currently relies entirely in the fact that we can use the context of each message to specify the Android resource name (which we need to do to handle duplicates, but this is a nice by-product). However that also means we cannot handle arbitrary catalogs. The latter would in theory be possible by using the original, untranslated XML to match up a messages id to a resource name, but right now we don't support this (and it's not clear it would be necessary, even). If ``with_untranslated`` is given, then strings in the catalog that have no translation are written out with the original id. In the case of a string-array, if ``with_untranslated`` is NOT specified, then only strings that DO have a translation are written out, potentially causing the array to be incomplete. TODO: This should not be the case: Arrays should always contain all elements, whether translated or not (using an empty string instead). When writing tests for this, make sure we generally test the with_untranslated mode, i.e. also the behavior for normal strings. """ # First, process the catalog into a Python sort-of-tree structure. # We can't write directly to the XML output, since stuff like # string-array items are not guaranteed to appear in the correct # order in the calalog. We "xml tree" pulls these things together. # It is quite similar to the structure returned by read_xml(). xml_tree = OrderedDict() for message in catalog: if not message.id: # This is the header continue if not message.string and not with_untranslated: # Untranslated. continue if not message.context: warnfunc(('Ignoring message "%s": has no context; somebody other '+ 'than android2po seems to have added to this '+ 'catalog.') % message.id, 'error') continue if filter and filter(message): continue value = message.string or message.id if ':' in message.context: # A colon indicates a string array; collect all the # strings of this array with their indices, so when # we're done processing the whole catalog, we can # sort by index and restore the proper array order. name, index = message.context.split(':', 2) xml_tree.setdefault(name, {}) if index in xml_tree[name]: warnfunc(('Duplicate index %s in array "%s"; ignoring '+ 'the message. The catalog has possibly been '+ 'corrupted.') % (index, name), 'error') xml_tree[name][index] = value else: xml_tree[message.context] = value # Convert the xml tree we've built into an actual Android XML DOM. root_tags = [] namespaces_used = {} for name, value in xml_tree.iteritems(): if isinstance(value, dict): # string-array - first, sort by index array_el = etree.Element('string-array') array_el.attrib['name'] = name for k in sorted(value): item_el = write_to_dom('item', value[k], message, namespaces_used, warnfunc) array_el.append(item_el) root_tags.append(array_el) else: # standard string string_el = write_to_dom('string', value, message, namespaces_used, warnfunc) string_el.attrib['name'] = name root_tags.append(string_el) # Generate the root element, define the namespaces that have been # used across all of our child elements. root_el = etree.Element('resources', nsmap=namespaces_used) for e in root_tags: root_el.append(e) return root_el