Beispiel #1
0
def _create_lang_script_to_names(lang_script_data):
  """Generate a map from lang-script to English (and possibly native) names.
  Whether the script is included in the name depends on the number of used and unused scripts.
  If there's one used script, that script is omitted.
  Else if there's no used script and one unused script, that script is omitted.
  Else the script is included.
  If there's no English name for the lang_script, it is excluded."""

  lang_to_names = {}
  for lang in lang_script_data:
    used, unused = lang_script_data[lang]
    if len(used) == 1:
      exclude_script = iter(used).next()
    elif not used and len(unused) == 1:
      exclude_script = iter(unused).next()
    else:
      exclude_script = ''

    for script in (used | unused):
      lang_script = lang + '-' + script
      target = lang if script == exclude_script else lang_script
      # special case, not generally useful
      if target.startswith('und-'):
        en_name =  cldr_data.get_english_script_name(target[4:]) + ' script'
      else:
        en_name = cldr_data.get_english_language_name(target)
      if not en_name:
        print '!No english name for %s' % lang_script
        continue
      native_name = cldr_data.get_native_language_name(lang_script, exclude_script)
      if native_name == en_name:
        native_name = None
      lang_to_names[lang_script] = [en_name, native_name] if native_name else [en_name]

  return lang_to_names
def generate_table(filename):
    with codecs.open(filename, "w", "utf-8") as f:
        script_to_samples = _get_script_to_samples()
        f.write(_HTML_HEADER)
        f.write("<table>\n")
        f.write("<tr><th>Script<br/>BCP<th>name<th>type<th>text\n")

        for script, samples in sorted(script_to_samples.items()):
            script_en = cldr_data.get_english_script_name(script)
            f.write("<tr><th colspan=4>%s\n" % script_en)
            for bcp, sample_type, sample_text in samples:
                try:
                    lsrv = cldr_data.loc_tag_to_lsrv(bcp)
                    lsrv = (lsrv[0], None, lsrv[2], lsrv[3])
                    bcp_no_script = cldr_data.lsrv_to_loc_tag(lsrv)
                    bcp_en = cldr_data.get_english_language_name(bcp_no_script)
                    if not bcp_en:
                        bcp_en = "No name"
                    if bcp_en == "Unknown Language" and sample_type == "chars":
                        bcp_en = "(characters)"
                except:
                    print("could not get english name for %s" % bcp)
                    bcp_en = bcp

                cols = ["<tr>"]
                cols.append(bcp_no_script)
                cols.append(bcp_en)
                cols.append(sample_type)
                cols.append(sample_text)
                f.write("<td>".join(cols) + "\n")
            f.write("<tr><td colspan=4>&nbsp;\n")
        f.write("</table>\n")
        f.write(_HTML_FOOTER + "\n")
Beispiel #3
0
def generate_table(filename):
    with codecs.open(filename, 'w', 'utf-8') as f:
        script_to_samples = _get_script_to_samples()
        print >> f, _HTML_HEADER
        print >> f, '<table>'
        print >> f, '<tr><th>Script<br/>BCP<th>name<th>type<th>text'

        for script, samples in sorted(script_to_samples.iteritems()):
            script_en = cldr_data.get_english_script_name(script)
            print >> f, '<tr><th colspan=4>%s' % script_en
            for bcp, sample_type, sample_text in samples:
                try:
                    lsrv = cldr_data.loc_tag_to_lsrv(bcp)
                    lsrv = (lsrv[0], None, lsrv[2], lsrv[3])
                    bcp_no_script = cldr_data.lsrv_to_loc_tag(lsrv)
                    bcp_en = cldr_data.get_english_language_name(bcp_no_script)
                    if not bcp_en:
                        bcp_en = 'No name'
                    if bcp_en == 'Unknown Language' and sample_type == 'chars':
                        bcp_en = '(characters)'
                except:
                    print 'could not get english name for %s' % bcp
                    bcp_en = bcp

                cols = ['<tr>']
                cols.append(bcp_no_script)
                cols.append(bcp_en)
                cols.append(sample_type)
                cols.append(sample_text)
                print >> f, '<td>'.join(cols)
            print >> f, '<tr><td colspan=4>&nbsp;'
        print >> f, '</table>'
        print >> f, _HTML_FOOTER
  def lang_key(lang_tag):
    name = cldr_data.get_english_language_name(lang_tag)
    if not name:
      print 'no name for %s' % lang_tag
      return None

    if name.endswith (' script)'):
      ix = name.rfind('(') - 1
      script_sfx = ' ' + name[ix + 2: len(name) - 8]
      name = name[:ix]
    else:
      script_sfx = ''

    key = name
    for prefix in ['Ancient', 'Central', 'Eastern', 'Lower', 'Middle', 'North',
                   'Northern', 'Old', 'Southern', 'Southwestern', 'Upper',
                   'West', 'Western']:
      if name.startswith(prefix + ' '):
        key = name[len(prefix) + 1:] + ' ' + name[:len(prefix)]
        break

    for cluster in ['Arabic', 'French', 'Chinese', 'English', 'German', 'Hindi',
                    'Malay', 'Nahuatl', 'Tamazight', 'Thai']:
      if name.find(cluster) != -1:
        key = cluster + '-' + name
        break

    return key + script_sfx
def generate_table(filename):
  with codecs.open(filename, 'w', 'utf-8') as f:
    script_to_samples = _get_script_to_samples()
    print >> f, _HTML_HEADER
    print >> f, '<table>'
    print >> f, '<tr><th>Script<br/>BCP<th>name<th>type<th>text'

    for script, samples in sorted(script_to_samples.iteritems()):
      script_en = cldr_data.get_english_script_name(script)
      print >> f, '<tr><th colspan=4>%s' % script_en
      for bcp, sample_type, sample_text in samples:
        try:
          lsrv = cldr_data.loc_tag_to_lsrv(bcp)
          lsrv = (lsrv[0], None, lsrv[2], lsrv[3])
          bcp_no_script = cldr_data.lsrv_to_loc_tag(lsrv)
          bcp_en = cldr_data.get_english_language_name(bcp_no_script)
          if not bcp_en:
            bcp_en = 'No name'
          if bcp_en == 'Unknown Language' and sample_type == 'chars':
            bcp_en = '(characters)'
        except:
          print 'could not get english name for %s' % bcp
          bcp_en = bcp

        cols = ['<tr>']
        cols.append(bcp_no_script)
        cols.append(bcp_en)
        cols.append(sample_type)
        cols.append(sample_text)
        print >> f, '<td>'.join(cols)
      print >> f, '<tr><td colspan=4>&nbsp;'
    print >> f, '</table>'
    print >> f, _HTML_FOOTER
    def lang_key(lang_tag):
        name = cldr_data.get_english_language_name(lang_tag)
        if not name:
            print 'no name for %s' % lang_tag
            return None

        if name.endswith(' script)'):
            ix = name.rfind('(') - 1
            script_sfx = ' ' + name[ix + 2:len(name) - 8]
            name = name[:ix]
        else:
            script_sfx = ''

        key = name
        for prefix in [
                'Ancient', 'Central', 'Eastern', 'Lower', 'Middle', 'North',
                'Northern', 'Old', 'Southern', 'Southwestern', 'Upper', 'West',
                'Western'
        ]:
            if name.startswith(prefix + ' '):
                key = name[len(prefix) + 1:] + ' ' + name[:len(prefix)]
                break

        for cluster in [
                'Arabic', 'French', 'Chinese', 'English', 'German', 'Hindi',
                'Malay', 'Nahuatl', 'Tamazight', 'Thai'
        ]:
            if name.find(cluster) != -1:
                key = cluster + '-' + name
                break

        return key + script_sfx
Beispiel #7
0
def _create_lang_script_to_names(lang_script_data):
    """Generate a map from lang-script to English (and possibly native) names.
  Whether the script is included in the name depends on the number of used
  and unused scripts.  If there's one used script, that script is omitted.
  Else if there's no used script and one unused script, that script is
  omitted.  Else the script is included.  If there's no English name for
  the lang_script, it is excluded.
  """

    lang_to_names = {}
    for lang in lang_script_data:
        used, unused = lang_script_data[lang]
        if len(used) == 1:
            exclude_script = iter(used).next()
        elif not used and len(unused) == 1:
            exclude_script = iter(unused).next()
        else:
            exclude_script = ''

        for script in (used | unused):
            lang_script = lang + '-' + script
            target = lang if script == exclude_script else lang_script
            # special case, not generally useful
            if target.startswith('und-'):
                en_name = cldr_data.get_english_script_name(
                    target[4:]) + ' script'
            else:
                en_name = cldr_data.get_english_language_name(target)
            if not en_name:
                # Easier than patching the cldr_data, not sure I want to go there.
                if lang_script == 'tlh-Piqd':
                    en_name = u'Klingon'
                else:
                    _log('No english name for %s' % lang_script)
                    continue
            native_name = cldr_data.get_native_language_name(
                lang_script, exclude_script)
            if native_name == en_name:
                native_name = None
            lang_to_names[lang_script] = ([en_name, native_name]
                                          if native_name else [en_name])

    return lang_to_names
  def build_data_json(self, families, family_zip_info, universal_zip_info,
                      family_id_to_lang_tags, family_id_to_regions,
                      lang_tag_to_family_ids, region_to_family_ids):

    data_obj = collections.OrderedDict()
    families_obj = collections.OrderedDict()
    # Sort families by English name, except 'Noto Sans' and 'Noto Serif' come first
    family_ids = [family_id for family_id in families if family_id != 'sans' and family_id != 'serif']
    family_ids = sorted(family_ids, key=lambda f: families[f].name)
    sorted_ids = ['sans', 'serif']
    sorted_ids.extend(family_ids)
    for k in sorted_ids:
      family = families[k]
      family_obj = {}
      family_obj['name'] = family.name

      name, hinted_size, unhinted_size = family_zip_info[k]
      pkg_obj = collections.OrderedDict()
      if hinted_size:
        pkg_obj['hinted'] = hinted_size
      if unhinted_size:
        pkg_obj['unhinted'] = unhinted_size
      family_obj['pkgSize'] = pkg_obj

      family_obj['fonts'] = len(family.hinted_members or family.unhinted_members)
      family_obj['langs'] = len(family_id_to_lang_tags[k])
      family_obj['regions'] = len(family_id_to_regions[k])

      families_obj[k] = family_obj
    data_obj['family'] = families_obj

    data_obj['familyOrder'] = sorted_ids

    langs_obj = collections.OrderedDict()
    # Dont list 'und-' lang tags, these are for default samples and not listed in the UI
    lang_tags = [lang for lang in lang_tag_to_family_ids if not lang.startswith('und-')]

    lang_tags = sorted(lang_tags, key=lambda l: cldr_data.get_english_language_name(l))
    for lang in lang_tags:
      lang_obj = collections.OrderedDict()
      english_name = cldr_data.get_english_language_name(lang)
      lang_obj['name'] = english_name
      lang_obj['families'] = sorted(lang_tag_to_family_ids[lang])
      native_name = cldr_data.get_native_language_name(lang)
      if native_name and native_name != english_name:
        lang_obj['keywords'] = [native_name]
      langs_obj[lang] = lang_obj
    data_obj['lang'] = langs_obj

    regions_obj = collections.OrderedDict()
    for region in sorted(region_to_family_ids,
                         key=lambda r: cldr_data.get_english_region_name(r)):
      region_obj = collections.OrderedDict()
      region_obj['families'] = sorted(region_to_family_ids[region])
      region_obj['keywords'] = [cldr_data.get_english_region_name(region)]
      regions_obj[region] = region_obj
    data_obj['region'] = regions_obj

    pkg_obj = collections.OrderedDict()
    pkg_obj['hinted'] = universal_zip_info[1]
    pkg_obj['unhinted'] = universal_zip_info[2]
    data_obj['pkgSize'] = pkg_obj

    self.write_json(data_obj, 'data')
  def generate(self):
    if self.clean:
      self.clean_target_dir()

    if not self.no_build:
      self.ensure_target_dirs_exist()

    # debug/print
    # ['families', 'script_to_family_ids', 'used_lang_data',
    #  'family_id_to_lang_tags', 'family_id_to_default_lang_tag']
    debug = frozenset([])

    fonts = get_noto_fonts()
    families = get_families(fonts)

    if 'families' in debug:
      print '\nfamilies'
      for family_id, family in sorted(families.iteritems()):
        print family_id, family.rep_member.script

    script_to_family_ids = get_script_to_family_ids(families)
    if 'script_to_family_ids' in debug:
      print '\nscript to family ids'
      for script, family_ids in sorted(script_to_family_ids.iteritems()):
        print script, family_ids

    supported_scripts = set(script_to_family_ids.keys())
    used_lang_data = get_used_lang_data(supported_scripts)
    if 'used_lang_data' in debug:
      print '\nused lang data'
      for lang, data in sorted(used_lang_data.iteritems()):
        used = ', '.join(data[0])
        unused = ', '.join(data[1])
        if unused:
          unused = '(' + unused + ')'
          if used:
            unused = ' ' + unused
        print '%s: %s%s' % (lang, used, unused)

    langs_to_delete = []
    for lang in used_lang_data.keys():
      if not cldr_data.get_english_language_name(lang):
        langs_to_delete.append(lang)
    if langs_to_delete:
      print 'deleting languages with no english name: %s' % langs_to_delete
      for lang in langs_to_delete:
        del used_lang_data[lang]

    lang_tag_to_family_ids = get_lang_tag_to_family_ids(used_lang_data, script_to_family_ids)

    region_to_family_ids = get_region_to_family_ids(script_to_family_ids)

    family_id_to_lang_tags = get_family_id_to_lang_tags(lang_tag_to_family_ids, families)
    if 'family_id_to_lang_tags' in debug:
      print '\nfamily id to lang tags'
      for family_id, lang_tags in sorted(family_id_to_lang_tags.iteritems()):
        print '%s: %s' % (family_id, ','.join(sorted(lang_tags)))

    family_id_to_regions = get_family_id_to_regions(region_to_family_ids, families)

    family_id_to_default_lang_tag = get_family_id_to_default_lang_tag(
        family_id_to_lang_tags)
    if 'family_id_to_default_lang_tag' in debug:
      print '\nfamily id to default lang tag'
      for family_id, lang_tag in family_id_to_default_lang_tag.iteritems():
        print family_id, lang_tag

    used_lang_tags = get_used_lang_tags(
        lang_tag_to_family_ids.keys(), family_id_to_default_lang_tag.values())
    lang_tag_to_sample_data = get_lang_tag_to_sample_data(used_lang_tags)

    # find the samples that can't be displayed.
    tested_keys = set()
    failed_keys = set()
    family_langs_to_remove = {}
    for lang_tag in sorted(lang_tag_to_sample_data):
      sample_info = lang_tag_to_sample_data[lang_tag]
      sample = sample_info[1]
      sample_key = sample_info[3]

      for family_id in sorted(lang_tag_to_family_ids[lang_tag]):
        full_key = sample_key + '-' + family_id
        if full_key in tested_keys:
          if full_key in failed_keys:
            print 'failed sample %s lang %s' % (full_key, lang_tag)
            if family_id not in family_langs_to_remove:
              family_langs_to_remove[family_id] = set()
            family_langs_to_remove[family_id].add(lang_tag)
          continue

        failed_cps = set()
        tested_keys.add(full_key)
        charset = families[family_id].charset
        for cp in sample:
          if ord(cp) in [0xa, 0x28, 0x29, 0x2c, 0x2d, 0x2e, 0x3b, 0x5b, 0x5d, 0x2010]:
            continue
          if ord(cp) not in charset:
            failed_cps.add(ord(cp))
        if failed_cps:
          print 'sample %s cannot be displayed in %s (lang %s):\n  %s' % (
              sample_key, family_id, lang_tag,
              '\n  '.join('%04x (%s)' % (cp, unichr(cp)) for cp in sorted(failed_cps)))
          failed_keys.add(full_key)
          if family_id not in family_langs_to_remove:
            family_langs_to_remove[family_id] = set()
          family_langs_to_remove[family_id].add(lang_tag)

    for family_id in sorted(family_langs_to_remove):
      langs_to_remove = family_langs_to_remove[family_id]
      print 'remove from %s: %s' % (family_id, ','.join(sorted(langs_to_remove)))

      family_id_to_lang_tags[family_id] -= langs_to_remove
      default_lang_tag = family_id_to_default_lang_tag[family_id]
      if default_lang_tag in langs_to_remove:
        print '!removing default lang tag %s for family %s' % (
            default_lang_tag, family_id)
      for lang in langs_to_remove:
        lang_tag_to_family_ids[lang] -= set([family_id])

    region_data = get_region_lat_lng_data(region_to_family_ids.keys())

    lang_tag_sort_order = get_lang_tag_sort_order(lang_tag_to_family_ids.keys())

    if self.no_build:
      print 'skipping build output'
      return

    # build outputs
    if self.no_zips:
      print 'skipping zip output'
    else:
      family_zip_info = self.build_zips(families)
      universal_zip_info = self.build_universal_zips(families)

      # build outputs not used by the json but linked to from the web page
      self.build_ttc_zips()

    if self.no_css:
      print 'skipping css output'
    else:
      family_css_info = self.build_css(families)

    if self.no_data or self.no_zips or self.no_css:
      reason = '' if self.no_data else 'no zips' if self.no_zips else 'no css'
      print 'skipping data output%s' % reason
    else:
      self.build_data_json(families, family_zip_info, universal_zip_info,
                           family_id_to_lang_tags, family_id_to_regions,
                           lang_tag_to_family_ids, region_to_family_ids)

      self.build_families_json(families, family_id_to_lang_tags,
                               family_id_to_default_lang_tag,
                               family_id_to_regions, family_css_info,
                               lang_tag_sort_order)

      self.build_misc_json(lang_tag_to_sample_data, region_data)

    if self.no_images:
      print 'skipping image output'
    else:
      self.build_images(families, family_id_to_lang_tags,
                        family_id_to_default_lang_tag, lang_tag_to_sample_data)
    def generate(self):
        if self.clean:
            self.clean_target_dir()

        if not self.no_build:
            self.ensure_target_dirs_exist()

        # debug/print
        # ['families', 'script_to_family_ids', 'used_lang_data',
        #  'family_id_to_lang_tags', 'family_id_to_default_lang_tag']
        debug = frozenset([])

        fonts = noto_fonts.get_noto_fonts()
        families = noto_fonts.get_families(fonts)

        if 'families' in debug:
            print '\nfamilies'
            for family_id, family in sorted(families.iteritems()):
                print family_id, family.rep_member.script

        script_to_family_ids = get_script_to_family_ids(families)
        if 'script_to_family_ids' in debug:
            print '\nscript to family ids'
            for script, family_ids in sorted(script_to_family_ids.iteritems()):
                print script, family_ids

        supported_scripts = set(script_to_family_ids.keys())
        used_lang_data = get_used_lang_data(supported_scripts)
        if 'used_lang_data' in debug:
            print '\nused lang data'
            for lang, data in sorted(used_lang_data.iteritems()):
                used = ', '.join(data[0])
                unused = ', '.join(data[1])
                if unused:
                    unused = '(' + unused + ')'
                    if used:
                        unused = ' ' + unused
                print '%s: %s%s' % (lang, used, unused)

        langs_to_delete = []
        for lang in used_lang_data.keys():
            if not cldr_data.get_english_language_name(lang):
                langs_to_delete.append(lang)
        if langs_to_delete:
            print 'deleting languages with no english name: %s' % langs_to_delete
            for lang in langs_to_delete:
                del used_lang_data[lang]

        lang_tag_to_family_ids = get_lang_tag_to_family_ids(
            used_lang_data, script_to_family_ids)

        region_to_family_ids = get_region_to_family_ids(script_to_family_ids)

        family_id_to_lang_tags = get_family_id_to_lang_tags(
            lang_tag_to_family_ids, families)
        if 'family_id_to_lang_tags' in debug:
            print '\nfamily id to lang tags'
            for family_id, lang_tags in sorted(
                    family_id_to_lang_tags.iteritems()):
                print '%s: %s' % (family_id, ','.join(sorted(lang_tags)))

        family_id_to_regions = get_family_id_to_regions(
            region_to_family_ids, families)

        family_id_to_default_lang_tag = get_family_id_to_default_lang_tag(
            family_id_to_lang_tags)
        if 'family_id_to_default_lang_tag' in debug:
            print '\nfamily id to default lang tag'
            for family_id, lang_tag in family_id_to_default_lang_tag.iteritems(
            ):
                print family_id, lang_tag

        used_lang_tags = get_used_lang_tags(
            lang_tag_to_family_ids.keys(),
            family_id_to_default_lang_tag.values())
        lang_tag_to_sample_data = get_lang_tag_to_sample_data(used_lang_tags)

        # find the samples that can't be displayed.
        tested_keys = set()
        failed_keys = set()
        family_langs_to_remove = {}
        for lang_tag in sorted(lang_tag_to_sample_data):
            sample_info = lang_tag_to_sample_data[lang_tag]
            sample = sample_info[1]
            sample_key = sample_info[3]

            for family_id in sorted(lang_tag_to_family_ids[lang_tag]):
                full_key = sample_key + '-' + family_id
                if full_key in tested_keys:
                    if full_key in failed_keys:
                        print 'failed sample %s lang %s' % (full_key, lang_tag)
                        if family_id not in family_langs_to_remove:
                            family_langs_to_remove[family_id] = set()
                        family_langs_to_remove[family_id].add(lang_tag)
                    continue

                failed_cps = set()
                tested_keys.add(full_key)
                charset = families[family_id].charset
                for cp in sample:
                    if ord(cp) in [
                            0xa, 0x28, 0x29, 0x2c, 0x2d, 0x2e, 0x3b, 0x5b,
                            0x5d, 0x2010
                    ]:
                        continue
                    if ord(cp) not in charset:
                        failed_cps.add(ord(cp))
                if failed_cps:
                    print 'sample %s cannot be displayed in %s (lang %s):\n  %s' % (
                        sample_key, family_id, lang_tag, '\n  '.join(
                            '%04x (%s)' % (cp, unichr(cp))
                            for cp in sorted(failed_cps)))
                    failed_keys.add(full_key)
                    if family_id not in family_langs_to_remove:
                        family_langs_to_remove[family_id] = set()
                    family_langs_to_remove[family_id].add(lang_tag)

        for family_id in sorted(family_langs_to_remove):
            langs_to_remove = family_langs_to_remove[family_id]
            print 'remove from %s: %s' % (family_id, ','.join(
                sorted(langs_to_remove)))

            family_id_to_lang_tags[family_id] -= langs_to_remove
            default_lang_tag = family_id_to_default_lang_tag[family_id]
            if default_lang_tag in langs_to_remove:
                print '!removing default lang tag %s for family %s' % (
                    default_lang_tag, family_id)
            for lang in langs_to_remove:
                lang_tag_to_family_ids[lang] -= set([family_id])

        region_data = get_region_lat_lng_data(region_to_family_ids.keys())

        lang_tag_sort_order = get_lang_tag_sort_order(
            lang_tag_to_family_ids.keys())

        if self.no_build:
            print 'skipping build output'
            return

        # build outputs
        if self.no_zips:
            print 'skipping zip output'
        else:
            family_zip_info = self.build_zips(families)
            universal_zip_info = self.build_universal_zips(families)

            # build outputs not used by the json but linked to from the web page
            self.build_ttc_zips()

        if self.no_css:
            print 'skipping css output'
        else:
            family_css_info = self.build_css(families)

        if self.no_data or self.no_zips or self.no_css:
            reason = '' if self.no_data else 'no zips' if self.no_zips else 'no css'
            print 'skipping data output%s' % reason
        else:
            self.build_data_json(families, family_zip_info, universal_zip_info,
                                 family_id_to_lang_tags, family_id_to_regions,
                                 lang_tag_to_family_ids, region_to_family_ids)

            self.build_families_json(families, family_id_to_lang_tags,
                                     family_id_to_default_lang_tag,
                                     family_id_to_regions, family_css_info,
                                     lang_tag_sort_order)

            self.build_misc_json(lang_tag_to_sample_data, region_data)

        if self.no_images:
            print 'skipping image output'
        else:
            self.build_images(families, family_id_to_lang_tags,
                              family_id_to_default_lang_tag,
                              lang_tag_to_sample_data)
    def build_data_json(self, families, family_zip_info, universal_zip_info,
                        family_id_to_lang_tags, family_id_to_regions,
                        lang_tag_to_family_ids, region_to_family_ids):

        data_obj = collections.OrderedDict()
        families_obj = collections.OrderedDict()
        # Sort families by English name, except 'Noto Sans' and 'Noto Serif' come first
        family_ids = [
            family_id for family_id in families
            if family_id != 'sans' and family_id != 'serif'
        ]
        family_ids = sorted(family_ids, key=lambda f: families[f].name)
        sorted_ids = ['sans', 'serif']
        sorted_ids.extend(family_ids)
        for k in sorted_ids:
            family = families[k]
            family_obj = {}
            family_obj['name'] = family.name

            name, hinted_size, unhinted_size = family_zip_info[k]
            pkg_obj = collections.OrderedDict()
            if hinted_size:
                pkg_obj['hinted'] = hinted_size
            if unhinted_size:
                pkg_obj['unhinted'] = unhinted_size
            family_obj['pkgSize'] = pkg_obj

            family_obj['fonts'] = len(family.hinted_members
                                      or family.unhinted_members)
            family_obj['langs'] = len(family_id_to_lang_tags[k])
            family_obj['regions'] = len(family_id_to_regions[k])

            families_obj[k] = family_obj
        data_obj['family'] = families_obj

        data_obj['familyOrder'] = sorted_ids

        langs_obj = collections.OrderedDict()
        # Dont list 'und-' lang tags, these are for default samples and not listed in the UI
        lang_tags = [
            lang for lang in lang_tag_to_family_ids
            if not lang.startswith('und-')
        ]

        lang_tags = sorted(
            lang_tags, key=lambda l: cldr_data.get_english_language_name(l))
        for lang in lang_tags:
            lang_obj = collections.OrderedDict()
            english_name = cldr_data.get_english_language_name(lang)
            lang_obj['name'] = english_name
            lang_obj['families'] = sorted(lang_tag_to_family_ids[lang])
            native_name = cldr_data.get_native_language_name(lang)
            if native_name and native_name != english_name:
                lang_obj['keywords'] = [native_name]
            langs_obj[lang] = lang_obj
        data_obj['lang'] = langs_obj

        regions_obj = collections.OrderedDict()
        for region in sorted(
                region_to_family_ids,
                key=lambda r: cldr_data.get_english_region_name(r)):
            region_obj = collections.OrderedDict()
            region_obj['families'] = sorted(region_to_family_ids[region])
            region_obj['keywords'] = [
                cldr_data.get_english_region_name(region)
            ]
            regions_obj[region] = region_obj
        data_obj['region'] = regions_obj

        pkg_obj = collections.OrderedDict()
        pkg_obj['hinted'] = universal_zip_info[1]
        pkg_obj['unhinted'] = universal_zip_info[2]
        data_obj['pkgSize'] = pkg_obj

        self.write_json(data_obj, 'data')