def __iter__(self): """Iterates through all the entries in the catalog, in the order they were added, yielding a `Message` object for every entry. :rtype: ``iterator`` """ buf = [] for name, value in self.mime_headers: buf.append('%s: %s' % (name, value)) flags = set() if self.fuzzy: flags |= set(['fuzzy']) yield Message(u'', '\n'.join(buf), flags=flags) for key in self._messages: yield self._messages[key]
def _merge(message, oldkey, newkey): message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) if isinstance(oldmsg.id, basestring): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple( [message.string] + ([u''] * (len(message.id) - 1)) ) elif len(message.string) != self.num_plurals: fuzzy = True message.string = tuple(message.string[:len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: message.flags |= set([u'fuzzy']) self[message.id] = message
def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(), user_comments=(), previous_id=(), lineno=None): """Create the message object. :param id: the message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param string: the translated message string, or a ``(singular, plural)`` tuple for pluralizable messages :param locations: a sequence of ``(filenname, lineno)`` tuples :param flags: a set or sequence of flags :param auto_comments: a sequence of automatic comments for the message :param user_comments: a sequence of user comments for the message :param previous_id: the previous message ID, or a ``(singular, plural)`` tuple for pluralizable messages :param lineno: the line number on which the msgid line was found in the PO file, if any """ self.id = id #: The message ID if not string and self.pluralizable: string = (u'', u'') self.string = string #: The message translation self.locations = list(distinct(locations)) self.flags = set(flags) if id and self.python_format: self.flags.add('python-format') else: self.flags.discard('python-format') self.auto_comments = list(distinct(auto_comments)) self.user_comments = list(distinct(user_comments)) if isinstance(previous_id, basestring): self.previous_id = [previous_id] else: self.previous_id = list(previous_id) self.lineno = lineno
def _add_message(): translations.sort() if len(messages) > 1: msgid = tuple([denormalize(m) for m in messages]) else: msgid = denormalize(messages[0]) if isinstance(msgid, (list, tuple)): string = [] for idx in range(catalog.num_plurals): try: string.append(translations[idx]) except IndexError: string.append((idx, '')) string = tuple([denormalize(t[1]) for t in string]) else: string = denormalize(translations[0][1]) message = Message(msgid, string, list(locations), set(flags), auto_comments, user_comments, lineno=offset[0] + 1) if obsolete[0]: if not ignore_obsolete: catalog.obsolete[msgid] = message else: catalog[msgid] = message del messages[:]; del translations[:]; del locations[:]; del flags[:]; del auto_comments[:]; del user_comments[:] obsolete[0] = False counter[0] += 1
def _merge(message, oldkey, newkey): message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) if isinstance(oldmsg.id, basestring): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple([message.string] + ([u''] * (len(message.id) - 1))) elif len(message.string) != self.num_plurals: fuzzy = True message.string = tuple(message.string[:len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: message.flags |= set([u'fuzzy']) self[message.id] = message
def __init__(self, rules): """Initialize the rule instance. :param rules: a list of ``(tag, expr)``) tuples with the rules conforming to UTS #35 or a dict with the tags as keys and expressions as values. :raise RuleError: if the expression is malformed """ if isinstance(rules, dict): rules = rules.items() found = set() self.abstract = [] for key, expr in rules: if key not in _plural_tags: raise ValueError('unknown tag %r' % key) elif key in found: raise ValueError('tag %r defined twice' % key) found.add(key) self.abstract.append((key, _Parser(expr).ast))
def to_gettext(rule): """The plural rule as gettext expression. The gettext expression is technically limited to integers and returns indices rather than tags. >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'}) 'nplurals=3; plural=((n == 2) ? 1 : (n == 1) ? 0 : 2)' :param rule: the rules as list or dict, or a `PluralRule` object :return: an equivalent gettext-style plural expression :raise RuleError: if the expression is malformed """ rule = PluralRule.parse(rule) used_tags = rule.tags | set([_fallback_tag]) _compile = _GettextCompiler().compile _get_index = [tag for tag in _plural_tags if tag in used_tags].index result = ['nplurals=%d; plural=(' % len(used_tags)] for tag, ast in rule.abstract: result.append('%s ? %d : ' % (_compile(ast), _get_index(tag))) result.append('%d)' % _get_index(_fallback_tag)) return ''.join(result)
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): tzid = elem.attrib['type'] territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) zone_territories[tzid] = elem.attrib['territory'] if 'aliases' in elem.attrib: for alias in elem.attrib['aliases'].split(): zone_aliases[alias] = tzid # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] outfile = open(os.path.join(destdir, 'global.dat'), 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = regions.items() region_items.sort() for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(lambda a,b: len(a)-len(b)) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue print>>sys.stderr, 'Processing input file %r' % filename tree = parse(os.path.join(srcdir, 'main', filename)) data = {} language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] print>>sys.stderr, ' Language: %r' % language territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world print>>sys.stderr, ' Territory: %r' % territory regions = territory_containment.get(territory, []) print>>sys.stderr, ' Regions: %r' % regions # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any([r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = unicode(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = unicode(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = unicode(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = unicode(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) info['common'] = elem.findtext('commonlyUsed') == 'true' meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['months', ctxt_type, width_type], elem.attrib['path']) ) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = unicode(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['days', ctxt_type, width_type], elem.attrib['path']) ) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = unicode(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias(['quarters', ctxt_type, width_type], elem.attrib['path']) ) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode(elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path']) ) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) for elem in calendar.findall('pm'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) except ValueError, e: print>>sys.stderr, 'ERROR: %s' % e elif elem.tag == 'alias': date_formats = Alias(_translate_alias( ['date_formats'], elem.attrib['path']) ) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) except ValueError, e: print>>sys.stderr, 'ERROR: %s' % e elif elem.tag == 'alias': time_formats = Alias(_translate_alias( ['time_formats'], elem.attrib['path']) )
# # This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. """Various routines that help with validation of translations. :since: version 0.9 """ from itertools import izip from babel.messages.catalog import TranslationError, PYTHON_FORMAT from babel.util import set #: list of format chars that are compatible to each other _string_format_compatibilities = [ set(['i', 'd', 'u']), set(['x', 'X']), set(['f', 'F', 'g', 'G']) ] def num_plurals(catalog, message): """Verify the number of plurals in the translation.""" if not message.pluralizable: if not isinstance(message.string, basestring): raise TranslationError("Found plural forms for non-pluralizable " "message") return # skip further tests if no catalog is provided. elif catalog is None:
def update(self, template, no_fuzzy_matching=False): """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog >>> template = Catalog() >>> template.add('green', locations=[('main.py', 99)]) >>> template.add('blue', locations=[('main.py', 100)]) >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) >>> catalog = Catalog(locale='de_DE') >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), ... locations=[('util.py', 38)]) >>> catalog.update(template) >>> len(catalog) 3 >>> msg1 = catalog['green'] >>> msg1.string >>> msg1.locations [('main.py', 99)] >>> msg2 = catalog['blue'] >>> msg2.string u'blau' >>> msg2.locations [('main.py', 100)] >>> msg3 = catalog['salad'] >>> msg3.string (u'Salat', u'Salate') >>> msg3.locations [('util.py', 42)] Messages that are in the catalog but not in the template are removed from the main collection, but can still be accessed via the `obsolete` member: >>> 'head' in catalog False >>> catalog.obsolete.values() [<Message 'head' (flags: [])>] :param template: the reference catalog, usually read from a POT file :param no_fuzzy_matching: whether to use fuzzy matching of message IDs """ messages = self._messages remaining = messages.copy() self._messages = odict() # Prepare for fuzzy matching fuzzy_candidates = [] if not no_fuzzy_matching: fuzzy_candidates = [ self._key_for(msgid) for msgid in messages if msgid and messages[msgid].string ] fuzzy_matches = set() def _merge(message, oldkey, newkey): message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) if isinstance(oldmsg.id, basestring): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple([message.string] + ([u''] * (len(message.id) - 1))) elif len(message.string) != self.num_plurals: fuzzy = True message.string = tuple(message.string[:len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: message.flags |= set([u'fuzzy']) self[message.id] = message for message in template: if message.id: key = self._key_for(message.id) if key in messages: _merge(message, key, key) else: if no_fuzzy_matching is False: # do some fuzzy matching with difflib matches = get_close_matches(key.lower().strip(), fuzzy_candidates, 1) if matches: _merge(message, matches[0], key) continue self[message.id] = message self.obsolete = odict() for msgid in remaining: if no_fuzzy_matching or msgid not in fuzzy_matches: self.obsolete[msgid] = remaining[msgid] # Make updated catalog's POT-Creation-Date equal to the template # used to update the catalog self.creation_date = template.creation_date
def main(): parser = OptionParser(usage="%prog path/to/cldr") options, args = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), "..", "babel") sup = parse(os.path.join(srcdir, "supplemental", "supplementalData.xml")) # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault("territory_zones", {}) zone_aliases = global_data.setdefault("zone_aliases", {}) zone_territories = global_data.setdefault("zone_territories", {}) for elem in sup.findall(".//timezoneData/zoneFormatting/zoneItem"): tzid = elem.attrib["type"] territory_zones.setdefault(elem.attrib["territory"], []).append(tzid) zone_territories[tzid] = elem.attrib["territory"] if "aliases" in elem.attrib: for alias in elem.attrib["aliases"].split(): zone_aliases[alias] = tzid # Import Metazone mapping meta_zones = global_data.setdefault("meta_zones", {}) tzsup = parse(os.path.join(srcdir, "supplemental", "metazoneInfo.xml")) for elem in tzsup.findall(".//timezone"): for child in elem.findall("usesMetazone"): if "to" not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib["type"]] = child.attrib["mzone"] outfile = open(os.path.join(destdir, "global.dat"), "wb") try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall(".//territoryContainment/group"): regions[elem.attrib["type"]] = elem.attrib["contains"].split() # Resolve territory containment territory_containment = {} region_items = regions.items() region_items.sort() for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) # prepare the per-locale plural rules definitions plural_rules = {} prsup = parse(os.path.join(srcdir, "supplemental", "plurals.xml")) for elem in prsup.findall(".//plurals/pluralRules"): rules = [] for rule in elem.findall("pluralRule"): rules.append((rule.attrib["count"], unicode(rule.text))) pr = PluralRule(rules) for locale in elem.attrib["locales"].split(): plural_rules[locale] = pr filenames = os.listdir(os.path.join(srcdir, "main")) filenames.remove("root.xml") filenames.sort(lambda a, b: len(a) - len(b)) filenames.insert(0, "root.xml") for filename in filenames: stem, ext = os.path.splitext(filename) if ext != ".xml": continue print >>sys.stderr, "Processing input file %r" % filename tree = parse(os.path.join(srcdir, "main", filename)) data = {} language = None elem = tree.find(".//identity/language") if elem is not None: language = elem.attrib["type"] print >>sys.stderr, " Language: %r" % language territory = None elem = tree.find(".//identity/territory") if elem is not None: territory = elem.attrib["type"] else: territory = "001" # world print >>sys.stderr, " Territory: %r" % territory regions = territory_containment.get(territory, []) print >>sys.stderr, " Regions: %r" % regions # plural rules locale_id = "_".join(filter(None, [language, territory != "001" and territory or None])) if locale_id in plural_rules: data["plural_form"] = plural_rules[locale_id] # <localeDisplayNames> territories = data.setdefault("territories", {}) for elem in tree.findall(".//territories/territory"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in territories: continue territories[elem.attrib["type"]] = _text(elem) languages = data.setdefault("languages", {}) for elem in tree.findall(".//languages/language"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in languages: continue languages[elem.attrib["type"]] = _text(elem) variants = data.setdefault("variants", {}) for elem in tree.findall(".//variants/variant"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in variants: continue variants[elem.attrib["type"]] = _text(elem) scripts = data.setdefault("scripts", {}) for elem in tree.findall(".//scripts/script"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib["type"] in scripts: continue scripts[elem.attrib["type"]] = _text(elem) # <dates> week_data = data.setdefault("week_data", {}) supelem = sup.find(".//weekData") for elem in supelem.findall("minDays"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["min_days"] = int(elem.attrib["count"]) for elem in supelem.findall("firstDay"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["first_day"] = weekdays[elem.attrib["day"]] for elem in supelem.findall("weekendStart"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["weekend_start"] = weekdays[elem.attrib["day"]] for elem in supelem.findall("weekendEnd"): territories = elem.attrib["territories"].split() if territory in territories or any([r in territories for r in regions]): week_data["weekend_end"] = weekdays[elem.attrib["day"]] zone_formats = data.setdefault("zone_formats", {}) for elem in tree.findall(".//timeZoneNames/gmtFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["gmt"] = unicode(elem.text).replace("{0}", "%s") break for elem in tree.findall(".//timeZoneNames/regionFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["region"] = unicode(elem.text).replace("{0}", "%s") break for elem in tree.findall(".//timeZoneNames/fallbackFormat"): if "draft" not in elem.attrib and "alt" not in elem.attrib: zone_formats["fallback"] = unicode(elem.text).replace("{0}", "%(0)s").replace("{1}", "%(1)s") break time_zones = data.setdefault("time_zones", {}) for elem in tree.findall(".//timeZoneNames/zone"): info = {} city = elem.findtext("exemplarCity") if city: info["city"] = unicode(city) for child in elem.findall("long/*"): info.setdefault("long", {})[child.tag] = unicode(child.text) for child in elem.findall("short/*"): info.setdefault("short", {})[child.tag] = unicode(child.text) time_zones[elem.attrib["type"]] = info meta_zones = data.setdefault("meta_zones", {}) for elem in tree.findall(".//timeZoneNames/metazone"): info = {} city = elem.findtext("exemplarCity") if city: info["city"] = unicode(city) for child in elem.findall("long/*"): info.setdefault("long", {})[child.tag] = unicode(child.text) for child in elem.findall("short/*"): info.setdefault("short", {})[child.tag] = unicode(child.text) info["common"] = elem.findtext("commonlyUsed") == "true" meta_zones[elem.attrib["type"]] = info for calendar in tree.findall(".//calendars/calendar"): if calendar.attrib["type"] != "gregorian": # TODO: support other calendar types continue months = data.setdefault("months", {}) for ctxt in calendar.findall("months/monthContext"): ctxt_type = ctxt.attrib["type"] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall("monthWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "month": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib.get("type"))] = unicode(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["months", ctxt_type, width_type], elem.attrib["path"]) ) days = data.setdefault("days", {}) for ctxt in calendar.findall("days/dayContext"): ctxt_type = ctxt.attrib["type"] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall("dayWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "day": dtype = weekdays[elem.attrib["type"]] if ("draft" in elem.attrib or "alt" not in elem.attrib) and dtype in widths: continue widths[dtype] = unicode(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["days", ctxt_type, width_type], elem.attrib["path"]) ) quarters = data.setdefault("quarters", {}) for ctxt in calendar.findall("quarters/quarterContext"): ctxt_type = ctxt.attrib["type"] ctxts = quarters.setdefault(ctxt.attrib["type"], {}) for width in ctxt.findall("quarterWidth"): width_type = width.attrib["type"] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "quarter": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib["type"])] = unicode(elem.text) elif elem.tag == "alias": ctxts[width_type] = Alias( _translate_alias(["quarters", ctxt_type, width_type], elem.attrib["path"]) ) eras = data.setdefault("eras", {}) for width in calendar.findall("eras/*"): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == "era": if ("draft" in elem.attrib or "alt" in elem.attrib) and int(elem.attrib["type"]) in widths: continue widths[int(elem.attrib.get("type"))] = unicode(elem.text) elif elem.tag == "alias": eras[width_type] = Alias(_translate_alias(["eras", width_type], elem.attrib["path"])) # AM/PM periods = data.setdefault("periods", {}) for elem in calendar.findall("am"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) for elem in calendar.findall("pm"): if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) date_formats = data.setdefault("date_formats", {}) for format in calendar.findall("dateFormats"): for elem in format.getiterator(): if elem.tag == "dateFormatLength": if "draft" in elem.attrib and elem.attrib.get("type") in date_formats: continue try: date_formats[elem.attrib.get("type")] = dates.parse_pattern( unicode(elem.findtext("dateFormat/pattern")) ) except ValueError, e: print >>sys.stderr, "ERROR: %s" % e elif elem.tag == "alias": date_formats = Alias(_translate_alias(["date_formats"], elem.attrib["path"])) time_formats = data.setdefault("time_formats", {}) for format in calendar.findall("timeFormats"): for elem in format.getiterator(): if elem.tag == "timeFormatLength": if ("draft" in elem.attrib or "alt" in elem.attrib) and elem.attrib.get("type") in time_formats: continue try: time_formats[elem.attrib.get("type")] = dates.parse_pattern( unicode(elem.findtext("timeFormat/pattern")) ) except ValueError, e: print >>sys.stderr, "ERROR: %s" % e elif elem.tag == "alias": time_formats = Alias(_translate_alias(["time_formats"], elem.attrib["path"]))
# This software consists of voluntary contributions made by many # individuals. For the exact contribution history, see the revision # history and logs, available at http://babel.edgewall.org/log/. """Various routines that help with validation of translations. :since: version 0.9 """ from itertools import izip from babel.messages.catalog import TranslationError, PYTHON_FORMAT from babel.util import set #: list of format chars that are compatible to each other _string_format_compatibilities = [ set(['i', 'd', 'u']), set(['x', 'X']), set(['f', 'F', 'g', 'G']) ] def num_plurals(catalog, message): """Verify the number of plurals in the translation.""" if not message.pluralizable: if not isinstance(message.string, basestring): raise TranslationError("Found plural forms for non-pluralizable " "message") return # skip further tests if no catalog is provided. elif catalog is None:
def main(): parser = OptionParser(usage='%prog path/to/cldr') options, args = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') srcdir = args[0] destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) # Import global data from the supplemental files global_data = {} territory_zones = global_data.setdefault('territory_zones', {}) zone_aliases = global_data.setdefault('zone_aliases', {}) zone_territories = global_data.setdefault('zone_territories', {}) for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): tzid = elem.attrib['type'] territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) zone_territories[tzid] = elem.attrib['territory'] if 'aliases' in elem.attrib: for alias in elem.attrib['aliases'].split(): zone_aliases[alias] = tzid # Import Metazone mapping meta_zones = global_data.setdefault('meta_zones', {}) tzsup = parse(os.path.join(srcdir, 'supplemental', 'metazoneInfo.xml')) for elem in tzsup.findall('.//timezone'): for child in elem.findall('usesMetazone'): if 'to' not in child.attrib: # FIXME: support old mappings meta_zones[elem.attrib['type']] = child.attrib['mzone'] outfile = open(os.path.join(destdir, 'global.dat'), 'wb') try: pickle.dump(global_data, outfile, 2) finally: outfile.close() # build a territory containment mapping for inheritance regions = {} for elem in sup.findall('.//territoryContainment/group'): regions[elem.attrib['type']] = elem.attrib['contains'].split() # Resolve territory containment territory_containment = {} region_items = regions.items() region_items.sort() for group, territory_list in region_items: for territory in territory_list: containers = territory_containment.setdefault(territory, set([])) if group in territory_containment: containers |= territory_containment[group] containers.add(group) filenames = os.listdir(os.path.join(srcdir, 'main')) filenames.remove('root.xml') filenames.sort(lambda a, b: len(a) - len(b)) filenames.insert(0, 'root.xml') for filename in filenames: stem, ext = os.path.splitext(filename) if ext != '.xml': continue print >> sys.stderr, 'Processing input file %r' % filename tree = parse(os.path.join(srcdir, 'main', filename)) data = {} language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] print >> sys.stderr, ' Language: %r' % language territory = None elem = tree.find('.//identity/territory') if elem is not None: territory = elem.attrib['type'] else: territory = '001' # world print >> sys.stderr, ' Territory: %r' % territory regions = territory_containment.get(territory, []) print >> sys.stderr, ' Regions: %r' % regions # <localeDisplayNames> territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in territories: continue territories[elem.attrib['type']] = _text(elem) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in languages: continue languages[elem.attrib['type']] = _text(elem) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in variants: continue variants[elem.attrib['type']] = _text(elem) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib['type'] in scripts: continue scripts[elem.attrib['type']] = _text(elem) # <dates> week_data = data.setdefault('week_data', {}) supelem = sup.find('.//weekData') for elem in supelem.findall('minDays'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['min_days'] = int(elem.attrib['count']) for elem in supelem.findall('firstDay'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['first_day'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendStart'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_start'] = weekdays[elem.attrib['day']] for elem in supelem.findall('weekendEnd'): territories = elem.attrib['territories'].split() if territory in territories or any( [r in territories for r in regions]): week_data['weekend_end'] = weekdays[elem.attrib['day']] zone_formats = data.setdefault('zone_formats', {}) for elem in tree.findall('.//timeZoneNames/gmtFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s') break for elem in tree.findall('.//timeZoneNames/regionFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['region'] = unicode(elem.text).replace( '{0}', '%s') break for elem in tree.findall('.//timeZoneNames/fallbackFormat'): if 'draft' not in elem.attrib and 'alt' not in elem.attrib: zone_formats['fallback'] = unicode(elem.text) \ .replace('{0}', '%(0)s').replace('{1}', '%(1)s') break time_zones = data.setdefault('time_zones', {}) for elem in tree.findall('.//timeZoneNames/zone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = unicode(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) time_zones[elem.attrib['type']] = info meta_zones = data.setdefault('meta_zones', {}) for elem in tree.findall('.//timeZoneNames/metazone'): info = {} city = elem.findtext('exemplarCity') if city: info['city'] = unicode(city) for child in elem.findall('long/*'): info.setdefault('long', {})[child.tag] = unicode(child.text) for child in elem.findall('short/*'): info.setdefault('short', {})[child.tag] = unicode(child.text) info['common'] = elem.findtext('commonlyUsed') == 'true' meta_zones[elem.attrib['type']] = info for calendar in tree.findall('.//calendars/calendar'): if calendar.attrib['type'] != 'gregorian': # TODO: support other calendar types continue months = data.setdefault('months', {}) for ctxt in calendar.findall('months/monthContext'): ctxt_type = ctxt.attrib['type'] ctxts = months.setdefault(ctxt_type, {}) for width in ctxt.findall('monthWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'month': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode( elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['months', ctxt_type, width_type], elem.attrib['path'])) days = data.setdefault('days', {}) for ctxt in calendar.findall('days/dayContext'): ctxt_type = ctxt.attrib['type'] ctxts = days.setdefault(ctxt_type, {}) for width in ctxt.findall('dayWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'day': dtype = weekdays[elem.attrib['type']] if ('draft' in elem.attrib or 'alt' not in elem.attrib) \ and dtype in widths: continue widths[dtype] = unicode(elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['days', ctxt_type, width_type], elem.attrib['path'])) quarters = data.setdefault('quarters', {}) for ctxt in calendar.findall('quarters/quarterContext'): ctxt_type = ctxt.attrib['type'] ctxts = quarters.setdefault(ctxt.attrib['type'], {}) for width in ctxt.findall('quarterWidth'): width_type = width.attrib['type'] widths = ctxts.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'quarter': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib['type'])] = unicode( elem.text) elif elem.tag == 'alias': ctxts[width_type] = Alias( _translate_alias( ['quarters', ctxt_type, width_type], elem.attrib['path'])) eras = data.setdefault('eras', {}) for width in calendar.findall('eras/*'): width_type = NAME_MAP[width.tag] widths = eras.setdefault(width_type, {}) for elem in width.getiterator(): if elem.tag == 'era': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and int(elem.attrib['type']) in widths: continue widths[int(elem.attrib.get('type'))] = unicode( elem.text) elif elem.tag == 'alias': eras[width_type] = Alias( _translate_alias(['eras', width_type], elem.attrib['path'])) # AM/PM periods = data.setdefault('periods', {}) for elem in calendar.findall('am'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) for elem in calendar.findall('pm'): if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.tag in periods: continue periods[elem.tag] = unicode(elem.text) date_formats = data.setdefault('date_formats', {}) for format in calendar.findall('dateFormats'): for elem in format.getiterator(): if elem.tag == 'dateFormatLength': if 'draft' in elem.attrib and \ elem.attrib.get('type') in date_formats: continue try: date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) except ValueError, e: print >> sys.stderr, 'ERROR: %s' % e elif elem.tag == 'alias': date_formats = Alias( _translate_alias(['date_formats'], elem.attrib['path'])) time_formats = data.setdefault('time_formats', {}) for format in calendar.findall('timeFormats'): for elem in format.getiterator(): if elem.tag == 'timeFormatLength': if ('draft' in elem.attrib or 'alt' in elem.attrib) \ and elem.attrib.get('type') in time_formats: continue try: time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) except ValueError, e: print >> sys.stderr, 'ERROR: %s' % e elif elem.tag == 'alias': time_formats = Alias( _translate_alias(['time_formats'], elem.attrib['path']))
def update(self, template, no_fuzzy_matching=False): """Update the catalog based on the given template catalog. >>> from babel.messages import Catalog >>> template = Catalog() >>> template.add('green', locations=[('main.py', 99)]) >>> template.add('blue', locations=[('main.py', 100)]) >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) >>> catalog = Catalog(locale='de_DE') >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), ... locations=[('util.py', 38)]) >>> catalog.update(template) >>> len(catalog) 3 >>> msg1 = catalog['green'] >>> msg1.string >>> msg1.locations [('main.py', 99)] >>> msg2 = catalog['blue'] >>> msg2.string u'blau' >>> msg2.locations [('main.py', 100)] >>> msg3 = catalog['salad'] >>> msg3.string (u'Salat', u'Salate') >>> msg3.locations [('util.py', 42)] Messages that are in the catalog but not in the template are removed from the main collection, but can still be accessed via the `obsolete` member: >>> 'head' in catalog False >>> catalog.obsolete.values() [<Message 'head' (flags: [])>] :param template: the reference catalog, usually read from a POT file :param no_fuzzy_matching: whether to use fuzzy matching of message IDs """ messages = self._messages remaining = messages.copy() self._messages = odict() # Prepare for fuzzy matching fuzzy_candidates = [] if not no_fuzzy_matching: fuzzy_candidates = dict([ (self._key_for(msgid), messages[msgid].context) for msgid in messages if msgid and messages[msgid].string ]) fuzzy_matches = set() def _merge(message, oldkey, newkey): message = message.clone() fuzzy = False if oldkey != newkey: fuzzy = True fuzzy_matches.add(oldkey) oldmsg = messages.get(oldkey) if isinstance(oldmsg.id, basestring): message.previous_id = [oldmsg.id] else: message.previous_id = list(oldmsg.id) else: oldmsg = remaining.pop(oldkey, None) message.string = oldmsg.string if isinstance(message.id, (list, tuple)): if not isinstance(message.string, (list, tuple)): fuzzy = True message.string = tuple( [message.string] + ([u''] * (len(message.id) - 1)) ) elif len(message.string) != self.num_plurals: fuzzy = True message.string = tuple(message.string[:len(oldmsg.string)]) elif isinstance(message.string, (list, tuple)): fuzzy = True message.string = message.string[0] message.flags |= oldmsg.flags if fuzzy: message.flags |= set([u'fuzzy']) self[message.id] = message for message in template: if message.id: key = self._key_for(message.id, message.context) if key in messages: _merge(message, key, key) else: if no_fuzzy_matching is False: # do some fuzzy matching with difflib if isinstance(key, tuple): matchkey = key[0] # just the msgid, no context else: matchkey = key matches = get_close_matches(matchkey.lower().strip(), fuzzy_candidates.keys(), 1) if matches: newkey = matches[0] newctxt = fuzzy_candidates[newkey] if newctxt is not None: newkey = newkey, newctxt _merge(message, newkey, key) continue self[message.id] = message self.obsolete = odict() for msgid in remaining: if no_fuzzy_matching or msgid not in fuzzy_matches: self.obsolete[msgid] = remaining[msgid] # Make updated catalog's POT-Creation-Date equal to the template # used to update the catalog self.creation_date = template.creation_date