Exemple #1
0
def main():
    DEFAULT_OUTFILE = 'font_cmaps_temp.xml'

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-o',
        '--outfile',
        help='output file to write ("%s" if no name provided)' %
        DEFAULT_OUTFILE,
        metavar='name',
        nargs='?',
        default=None,
        const=DEFAULT_OUTFILE)
    parser.add_argument(
        '-p',
        '--paths',
        help='list of directory paths to search for noto fonts '
        '(default is standard noto phase2 paths)',
        metavar='path',
        nargs='*',
        default=None)
    args = parser.parse_args()

    cmapdata = font_cmap_data(args.paths)
    if args.outfile:
        cmap_data.write_cmap_data_file(cmapdata, args.outfile, pretty=True)
    else:
        print(
            unicode(cmap_data.write_cmap_data(cmapdata, pretty=True), "utf-8"))
Exemple #2
0
def ttc_filenames(ttc, data):
  """Returns likely filenames for each ttc file.

  The filenames are based on the postscript name from the name table for each
  font.  When there is no information, the string '<unknown x>' is provided with
  either 'ttf' or 'otf' in place of 'x' depending on the info in the sfnt
  header.
  """
  names = []
  for font_entry in ttc.fonts:
    name_entry = None
    file_name = None
    for ix in font_entry.tables:
      if ttc.tables[ix].tag == 'name':
        name_entry = ttc.tables[ix]
        break
    if name_entry:
      offset = name_entry.offset
      limit = offset + name_entry.length
      name_table = NameTable()
      name_table.decompile(data[offset:limit], None)
      ps_name = None
      for r in name_table.names:
        if (r.nameID, r.platformID, r.platEncID, r.langID) == (6, 3, 1, 0x409):
          ps_name = unicode(r.string, 'UTF-16BE')
          break
      if ps_name:
        file_name = ps_name
        if '-' not in ps_name:
          file_name += '-Regular'
        file_name += '.' + font_entry.fmt
    names.append(file_name or ('<unknown %s>' % font_entry.fmt))

  return names
    def sub(esc_match):
        esc_type = esc_match.group(1)
        esc_val = esc_match.group(2)
        if esc_type == 'x':
            esc_len = 2
        elif esc_type == 'u':
            esc_len = 4
        elif esc_type == 'U':
            esc_len = 6
        else:
            raise ValueError('internal error')

        if len(esc_val) < esc_len:
            error = 'Unicode escape too short: "%s"' % (esc_match.group(0))
            raise ValueError(error)
        unival = int(esc_val[:esc_len], 16)
        if unival > 0x10ffff:
            error = 'Unicode escape value too large: "%X"' % unival
            raise ValueError(error)
        if unival < 0x10000:
            prefix = unichr(unival)
        else:
            prefix = unicode('\\U%08X' % unival,
                             encoding='unicode_escape',
                             errors='strict')
        return prefix + esc_val[esc_len:]
Exemple #4
0
def _xml_to_dict(element):
    result = {}
    for child in list(element):
        if 'alt' in child.attrib:
            continue
        key = child.get('type')
        key = key.replace('_', '-')
        result[key] = unicode(child.text)
    return result
Exemple #5
0
def _xml_to_dict(element):
    result = {}
    for child in list(element):
        if "alt" in child.attrib:
            continue
        key = child.get("type")
        key = key.replace("_", "-")
        result[key] = unicode(child.text)
    return result
Exemple #6
0
def get_name_records(font):
    """Get a font's 'name' table records as a dictionary of Unicode strings."""
    name_table = font['name']
    names = {}
    for record in name_table.names:
        name_ids = (record.platformID, record.platEncID, record.langID)
        if name_ids != (3, 1, 0x409):
            continue
        names[record.nameID] = unicode(record.string, 'UTF-16BE')
    return names
Exemple #7
0
def get_scripts(text):
    """Return the set of scripts in this text.  Excludes
  some common chars."""
    # ignore these chars, we assume they are ok in any script
    exclusions = {0x00, 0x0A, 0x0D, 0x20, 0xA0, 0xFEFF}
    zyyy_chars = set()
    scripts = set()
    ustr = unicode(text, 'utf8')
    for cp in ustr:
        if ord(cp) in exclusions:
            continue
        script = unicode_data.script(cp)
        if script == 'Zyyy':  # common/undetermined
            zyyy_chars.add(cp if cp < '\u00fe' else ord(cp))
        elif not script == 'Zinh':  # inherited
            scripts.add(script)
    return scripts, zyyy_chars
Exemple #8
0
def main():
  DEFAULT_UNICODE_VERSION = 9.0

  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--scripts', help='list of pseudo-script codes, empty for all '
      'phase 2 scripts', metavar='code', nargs='*')
  parser.add_argument(
      '--unicode_version', help='version of unicode to use (default %s)' %
      DEFAULT_UNICODE_VERSION, metavar='version', type=float,
      default=DEFAULT_UNICODE_VERSION)
  parser.add_argument(
      '--unicode_only', help='only use unicode data, not noto-specific data',
      action='store_true')
  parser.add_argument(
      '-p', '--phase', help='noto phase (default 3)',
      metavar='phase', type=int, default=3)
  parser.add_argument(
      '--outfile', help='write to output file, otherwise to stdout',
      metavar='fname', nargs='?', const='-default-')
  parser.add_argument(
      '--verbose', help='log to stderr as each script is complete',
      action='store_true')
  args = parser.parse_args()

  if not args.scripts:
    scripts = set(s.strip() for s in _PHASE_TWO_SCRIPTS.split(','))
  else:
    scripts = _check_scripts(args.scripts)

  cmapdata = get_cmap_data(
      scripts, args.unicode_version, args.phase, args.unicode_only,
      args.verbose)
  if args.outfile:
    if args.outfile == '-default-':
      args.outfile = 'lint_cmap_%s.xml' % args.unicode_version
    sys.stderr.write('writing %s\n' % args.outfile)
    cmap_data.write_cmap_data_file(cmapdata, args.outfile, pretty=True)
  else:
    print(unicode(cmap_data.write_cmap_data(cmapdata, pretty=True), "utf-8"))
Exemple #9
0
def _get_language_name_from_file(language, cldr_file_path):
    cache_key = (language, cldr_file_path)
    try:
        return _LANGUAGE_NAME_FROM_FILE_CACHE[cache_key]
    except KeyError:
        pass

    data_file = path.join(CLDR_DIR, cldr_file_path)
    try:
        root = ElementTree.parse(data_file).getroot()
    except IOError:
        _LANGUAGE_NAME_FROM_FILE_CACHE[cache_key] = None
        return None

    parent = root.find('.//languages')
    if parent is None:
        return None
    for tag in parent:
        assert tag.tag == 'language'
        if tag.get('type').replace('_', '-') == language:
            _LANGUAGE_NAME_FROM_FILE_CACHE[cache_key] = unicode(tag.text)
            return _LANGUAGE_NAME_FROM_FILE_CACHE[cache_key]
    return None
Exemple #10
0
def main():
    merge_table = {
        'Historic': [
            'Avestan',
            'Carian',
            'Egyptian Hieroglyphs',
            'Imperial Aramaic',
            'Pahlavi',  # Should be 'Inscriptional Pahlavi',
            'Parthian',  # Should be 'Inscriptional Parthian',
            'Linear B',
            'Lycian',
            'Lydian',
            'Mandaic',
            'Old Persian',
            'Old South Arabian',
            'Old Turkic',
            'Osmanya',
            'Phags-Pa',
            'Phoenician',
            'Samaritan',
            'Sumero-Akkadian Cuneiform',
            'Ugaritic',
        ],
        'South Asian': [
            'Devanagari',
            'Bengali',
            'Gurmukhi',
            'Gujarati',
            'Oriya',
            'Tamil',
            'Telugu',
            'Kannada',
            'Malayalam',
            'Sinhala',
            'Thaana',
            'Brahmi',
            'Kaithi',
            'Kharoshthi',  # Move to Historic?
            'Lepcha',
            'Limbu',
            'Meetei Mayek',
            'Ol Chiki',
            'Saurashtra',
            'Syloti Nagri',
        ],
        'Southeast Asian': [
            'Thai',
            'Lao',
            'Khmer',
            'Batak',
            'Buginese',
            'Buhid',
            'Cham',
            'Hanunoo',
            'Javanese',
            'Kayah Li',
            'New Tai Lue',
            'Rejang',
            'Sundanese',
            'Tagalog',
            'Tagbanwa',
            'Tai Le',
            'Tai Tham',
            'Tai Viet',
        ],
        '': [  # LGC,
            'Armenian',
            'Bamum',
            'Canadian Aboriginal',
            'Cherokee',
            'Coptic',
            'Cypriot Syllabary',
            'Deseret',
            'Ethiopic',
            'Georgian',
            'Glagolitic',
            'Gothic',
            'Hebrew',
            'Lisu',
            'NKo',
            'Ogham',
            'Old Italic',
            'Runic',
            'Shavian',
            'Tifinagh',
            'Vai',
        ],
    }

    add_ui_alternative(merge_table, 'South Asian')
    add_ui_alternative(merge_table, 'Southeast Asian')

    for merge_target in sorted(merge_table):
        for weight in ['Regular', 'Bold']:
            merger = merge.Merger()
            source_fonts = merge_table[merge_target]
            if '' not in source_fonts:
                source_fonts = [''] + source_fonts  # The LGC font
            regular_sources = [make_font_file_name(script, weight)
                               for script in source_fonts]
            regular_sources = [font
                               for font in regular_sources
                               if os.path.isfile(font)]

            if len(regular_sources) <= 1:
                continue

            print('Merging Noto Sans %s %s' % (merge_target, weight))

            for index, fontfile in enumerate(regular_sources):
                if not has_gsub_table(fontfile):
                    regular_sources[index] = add_gsub_to_font(fontfile)

            font = merger.merge(regular_sources)

            first_font = source_fonts[0]
            if first_font != merge_target:
                for name_record in font['name'].names:
                    name = unicode(name_record.string, 'UTF-16BE')
                    name = name.replace(make_font_name(first_font),
                                        make_font_name(merge_target))
                    name = name.replace(make_puncless_font_name(first_font),
                                        make_puncless_font_name(merge_target))
                    name_record.string = name.encode('UTF-16BE')

            font.save(make_font_file_name(
                merge_target,
                weight,
                directory='combined/unhinted'))
Exemple #11
0
def cp_to_str(cp):
    if cp < 0x10000:
        return unichr(cp)
    return unicode(r'\U%08X' % cp, encoding='unicode_escape')
def get_sample_from_sample_file(language, script):
    filepath = path.join(SAMPLE_TEXT_DIR, language+'-'+script+'.txt')
    if path.exists(filepath):
        return unicode(open(filepath).read().strip(), 'UTF-8')
    return None