Ejemplo n.º 1
0
def csv_from_cmap_data(data, scripts, exclude_scripts):
  script_to_rowdata = cmap_data.create_map_from_table(data.table)
  cols = []
  max_lines = 0
  num_cells = 0
  for script in sorted(
      script_to_rowdata, key=lambda s: _script_to_name(s).lower()):
    if scripts and script not in scripts:
      continue
    if script in exclude_scripts:
      continue

    col = [
        '"%s"' % _script_to_name(script)
    ]
    rd = script_to_rowdata[script]
    cps = tool_utils.parse_int_ranges(rd.ranges)
    num_cells += len(cps)
    col.extend('U+%04X' % cp for cp in sorted(
        tool_utils.parse_int_ranges(rd.ranges)))
    cols.append(col)
    max_lines = max(max_lines, len(col))

  num_cols = len(cols)
  num_cells += num_cols  # headers are not empty
  all_cells = num_cols * max_lines
  fmt = 'Columns: %d\nRows: %d\nNon-empty cells: %d\nCells: %d'
  print >> sys.stderr, fmt % (num_cols, max_lines, num_cells, all_cells)
  cmap_lines = []
  cmap_lines.append(','.join(col[0] for col in cols))
  for i in range(1, max_lines):
    cmap_lines.append(','.join(col[i] if i < len(col) else '' for col in cols))
  return '\n'.join(cmap_lines)
Ejemplo n.º 2
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '-i', '--include', help='ranges of characters to include',
      metavar='range', nargs='+')
  parser.add_argument(
      '-e', '--exclude', help='ranges of characters to exclude '
      '(applied after include)',
      metavar='range', nargs='+')
  parser.add_argument(
      '-d', '--dstdir', help='directory to write new files to',
      metavar='dir')
  parser.add_argument(
      '-b', '--bump_version', help='bump version (default true)',
      metavar='bool', type=bool, default=True)
  parser.add_argument(
      'fonts', help='fonts to subset',
      metavar='font', nargs='+')
  args = parser.parse_args()

  if args.exclude:
    args.exclude = tool_utils.parse_int_ranges(' '.join(args.exclude))
  if args.include:
    args.include = tool_utils.parse_int_ranges(' '.join(args.include))
  subset_fonts_cmap(
      args.fonts, args.dstdir, exclude=args.exclude, include=args.include,
      bump_version=args.bump_version)
Ejemplo n.º 3
0
def report_compare(compare_result, detailed=True):
  compare, base_cmap_data, target_cmap_data = compare_result
  base_map = cmap_data.create_map_from_table(base_cmap_data.table)
  target_map = cmap_data.create_map_from_table(target_cmap_data.table)

  inverted_target = collections.defaultdict(set)
  for script, row in target_map.iteritems():
    cps = tool_utils.parse_int_ranges(row.ranges)
    for cp in cps:
      inverted_target[cp].add(script)


  base_title = title_from_metadata(base_cmap_data.meta)
  target_title = title_from_metadata(target_cmap_data.meta)

  print 'base: %s' % base_title
  print 'target: %s' % target_title
  for script in sorted(compare):
    added, removed = compare[script]
    if added or removed:
      name = base_map[script].name
      print '%s # %s' % (script, name)
      if added:
        print '  added (%d): %s' % (
            len(added), lint_config.write_int_ranges(added))
        if detailed:
          _print_detailed(added)
      if removed:
        print '  removed (%d): %s' % (
            len(removed), lint_config.write_int_ranges(removed))
        if detailed:
          _print_detailed(removed, inverted_target)
Ejemplo n.º 4
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      'cmap_file', help='cmap data file',
      metavar='file')
  parser.add_argument(
      '-d', '--details', help='show details on N characters in each range'
      ' (3 if no value provided)', metavar='num', default=0, const=3,
      type=int, nargs='?')
  parser.add_argument(
      '-s', '--summary', help='show summary of block usage only',
      action='store_true')
  parser.add_argument(
      '-r', '--range', help='range of characters to show (default 0-1ffff)',
      metavar='range', default='0-1ffff')
  parser.add_argument(
      '-sc', '--scripts', help='limit scripts to show',
      metavar='script', nargs='+', default=None)

  args = parser.parse_args()
  ranges = tool_utils.parse_int_ranges(args.range)
  start = min(ranges)
  end = max(ranges)
  if end > 0x10ffff:
    end = 0x10ffff;
  limit = end + 1

  if args.scripts:
    args.scripts = frozenset(args.scripts)
  block_coverage(
      args.cmap_file, start, limit, args.scripts, args.details, args.summary)
def _create_flag_sets(data_dir):
    """Returns map from flag name to pairs of cp_set, boolean.
  These get added to a codepoint name if the the boolean matches
  the result of 'cp in cp_set'."""
    # These are hardcoded for now, should be able to specify on
    # command line... (TODO)

    # I propose supporting some emoji in Noto even if they don't have text
    # variation sequences proposed, we can remove those for Android if they
    # disagree.
    emoji_only = (unicode_data.get_emoji() -
                  unicode_data.get_unicode_emoji_variants('proposed_extra'))

    current_sym2_path = path.join(data_dir, 'NotoSansSymbols2-Regular.ttf')
    current_sym2 = CodeList.fromfontcmap(current_sym2_path).codeset()

    sym2_path = path.join(data_dir, 'notosanssymbols2_cmap.txt')
    with open(sym2_path, 'r') as f:
        sym2_cmap = f.read()
    expect_sym2 = tool_utils.parse_int_ranges(sym2_cmap)

    add_sym2 = expect_sym2 - current_sym2

    # True means set flag if cp in set, False means set if not in set
    flag_sets = {
        'ref only': (expect_sym2, False),
        'emoji only': (emoji_only, True),
        'add': (add_sym2, True),
    }
    return flag_sets
Ejemplo n.º 6
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      'cmap_file', help='cmap data file',
      metavar='file')
  parser.add_argument(
      '-d', '--details', help='show details on N characters in each range'
      ' (3 if no value provided)', metavar='num', default=0, const=3,
      type=int, nargs='?')
  parser.add_argument(
      '-s', '--summary', help='show summary of block usage only',
      action='store_true')
  parser.add_argument(
      '-r', '--range', help='range of characters to show (default 0-1ffff)',
      metavar='range', default='0-1ffff')
  parser.add_argument(
      '-sc', '--scripts', help='limit scripts to show',
      metavar='script', nargs='+', default=None)

  args = parser.parse_args()
  ranges = tool_utils.parse_int_ranges(args.range)
  start = min(ranges)
  end = max(ranges)
  if end > 0x10ffff:
    end = 0x10ffff;
  limit = end + 1

  if args.scripts:
    args.scripts = frozenset(args.scripts)
  block_coverage(
      args.cmap_file, start, limit, args.scripts, args.details, args.summary)
Ejemplo n.º 7
0
def generate_single(cmapdata, script, outfile):
  for row in cmapdata.table.rows:
    if script == row.script:
      cps = tool_utils.parse_int_ranges(row.ranges)
      write_cp_list(cps, outfile)
      print('wrote %s to %s' % (script, outfile), file=sys.stderr)
      return
  raise ValueError('no script "%s" in cmap data' % script)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-i",
        "--include",
        help="ranges of characters to include",
        metavar="range",
        nargs="+",
    )
    parser.add_argument(
        "-e",
        "--exclude",
        help="ranges of characters to exclude "
        "(applied after include)",
        metavar="range",
        nargs="+",
    )
    parser.add_argument("-d",
                        "--dstdir",
                        help="directory to write new files to",
                        metavar="dir")
    parser.add_argument(
        "-b",
        "--bump_version",
        help="bump version (default true)",
        metavar="bool",
        type=bool,
        default=True,
    )
    parser.add_argument("fonts",
                        help="fonts to subset",
                        metavar="font",
                        nargs="+")
    args = parser.parse_args()

    if args.exclude:
        args.exclude = tool_utils.parse_int_ranges(" ".join(args.exclude))
    if args.include:
        args.include = tool_utils.parse_int_ranges(" ".join(args.include))
    subset_fonts_cmap(
        args.fonts,
        args.dstdir,
        exclude=args.exclude,
        include=args.include,
        bump_version=args.bump_version,
    )
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--dstdir",
        help="destination directory for modified files, default /tmp/vs",
        metavar="dir",
        default="/tmp/vs",
    )
    parser.add_argument(
        "-p",
        "--presentation",
        help='presentation of glyphs in the font, default "emoji"',
        choices=["emoji", "text"],
        default="emoji",
    )
    parser.add_argument(
        "-s",
        "--suffix",
        metavar="suffix",
        help="suffix to add to file names for output, goes before extension",
    )
    parser.add_argument(
        "-o",
        "--output",
        metavar="filename",
        help="output file name, requires only one input file",
    )
    parser.add_argument(
        "-vs",
        "--vs_added",
        help="extra ranges to treat as having the requested presentation",
        metavar="range",
        nargs="+",
    )
    parser.add_argument("files",
                        help="files to modify",
                        metavar="file",
                        nargs="+")

    # argparse fails with named arguments that have leading hyphen.  You
    # can work around this by using a short arg and concatenating it and
    # the argument together, e.g. '-s-foo'.
    # Both parse_known_args and inserting '--' between the key and its
    # value fail, though.
    args = parser.parse_args()
    vs_set = None
    if args.vs_added:
        vs_set = tool_utils.parse_int_ranges(" ".join(args.vs_added))

    modify_fonts(
        args.files,
        presentation=args.presentation,
        output=args.output,
        suffix=args.suffix,
        dst_dir=args.dstdir,
        vs_added=vs_set,
    )
Ejemplo n.º 10
0
def _covered_cps(cmap_file):
    all_cps = set()
    tree = cmap_data.read_cmap_data_file(cmap_file)
    for rowdata in tree.table.rows:
        if rowdata.script == 'EXCL':
            continue
        cps = tool_utils.parse_int_ranges(rowdata.ranges)
        all_cps |= cps
    return all_cps
Ejemplo n.º 11
0
def _covered_cps(cmap_file):
  all_cps = set()
  tree = cmap_data.read_cmap_data_file(cmap_file)
  for rowdata in tree.table.rows:
    if rowdata.script == 'EXCL':
      continue
    cps = tool_utils.parse_int_ranges(rowdata.ranges)
    all_cps |= cps
  return all_cps
Ejemplo n.º 12
0
def csv_from_cmap_data(data, scripts, exclude_scripts):
  script_to_rowdata = cmap_data.create_map_from_table(data.table)
  cols = []
  max_lines = 0
  num_cells = 0
  for script in sorted(
      script_to_rowdata, key=lambda s: _script_to_name(s).lower()):
    if scripts and script not in scripts:
      continue
    if exclude_scripts and script in exclude_scripts:
      continue

    rd = script_to_rowdata[script]
    star = int(getattr(rd, 'xcount', -1)) != -1
    col = [
        '"%s%s"' % (_script_to_name(script), '*' if star else '')
    ]
    cps = tool_utils.parse_int_ranges(rd.ranges)
    xranges = getattr(rd, 'xranges', None)
    if xranges is not None:
      xcps = frozenset(tool_utils.parse_int_ranges(xranges))
      cps |= xcps
    else:
      xcps = frozenset()
    num_cells += len(cps)
    col.extend(
        '%04X%s' % (cp, '*' if cp in xcps else '')
        for cp in sorted(cps))
    cols.append(col)
    max_lines = max(max_lines, len(col))

  num_cols = len(cols)
  num_cells += num_cols  # headers are not empty
  all_cells = num_cols * max_lines
  fmt = 'Columns: %d\nRows: %d\nNon-empty cells: %d\nCells: %d'
  sys.stderr.write(fmt % (num_cols, max_lines, num_cells, all_cells) + '\n')
  cmap_lines = []
  cmap_lines.append(','.join(col[0] for col in cols))
  for i in range(1, max_lines):
    cmap_lines.append(','.join(col[i] if i < len(col) else '' for col in cols))
  return '\n'.join(cmap_lines)
Ejemplo n.º 13
0
def csv_from_cmap_data(data, scripts, exclude_scripts):
  script_to_rowdata = cmap_data.create_map_from_table(data.table)
  cols = []
  max_lines = 0
  num_cells = 0
  for script in sorted(
      script_to_rowdata, key=lambda s: _script_to_name(s).lower()):
    if scripts and script not in scripts:
      continue
    if exclude_scripts and script in exclude_scripts:
      continue

    rd = script_to_rowdata[script]
    star = int(getattr(rd, 'xcount', -1)) != -1
    col = [
        '"%s%s"' % (_script_to_name(script), '*' if star else '')
    ]
    cps = tool_utils.parse_int_ranges(rd.ranges)
    xranges = getattr(rd, 'xranges', None)
    if xranges != None:
      xcps = frozenset(tool_utils.parse_int_ranges(xranges))
      cps |= xcps
    else:
      xcps = frozenset()
    num_cells += len(cps)
    col.extend(
        '%04X%s' % (cp, '*' if cp in xcps else '')
        for cp in sorted(cps))
    cols.append(col)
    max_lines = max(max_lines, len(col))

  num_cols = len(cols)
  num_cells += num_cols  # headers are not empty
  all_cells = num_cols * max_lines
  fmt = 'Columns: %d\nRows: %d\nNon-empty cells: %d\nCells: %d'
  print >> sys.stderr, fmt % (num_cols, max_lines, num_cells, all_cells)
  cmap_lines = []
  cmap_lines.append(','.join(col[0] for col in cols))
  for i in range(1, max_lines):
    cmap_lines.append(','.join(col[i] if i < len(col) else '' for col in cols))
  return '\n'.join(cmap_lines)
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i',
                        '--include',
                        help='ranges of characters to include',
                        metavar='range',
                        nargs='+')
    parser.add_argument('-e',
                        '--exclude',
                        help='ranges of characters to exclude '
                        '(applied after include)',
                        metavar='range',
                        nargs='+')
    parser.add_argument('-d',
                        '--dstdir',
                        help='directory to write new files to',
                        metavar='dir')
    parser.add_argument('-b',
                        '--bump_version',
                        help='bump version (default true)',
                        metavar='bool',
                        type=bool,
                        default=True)
    parser.add_argument('fonts',
                        help='fonts to subset',
                        metavar='font',
                        nargs='+')
    args = parser.parse_args()

    if args.exclude:
        args.exclude = tool_utils.parse_int_ranges(' '.join(args.exclude))
    if args.include:
        args.include = tool_utils.parse_int_ranges(' '.join(args.include))
    subset_fonts_cmap(args.fonts,
                      args.dstdir,
                      exclude=args.exclude,
                      include=args.include,
                      bump_version=args.bump_version)
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-d',
        '--dstdir',
        help='destination directory for modified files, default /tmp/vs',
        metavar='dir',
        default='/tmp/vs')
    parser.add_argument(
        '-p',
        '--presentation',
        help='presentation of glyphs in the font, default "emoji"',
        choices=['emoji', 'text'],
        default='emoji')
    parser.add_argument(
        '-s',
        '--suffix',
        metavar='suffix',
        help='suffix to add to file names for output, goes before extension')
    parser.add_argument('-o',
                        '--output',
                        metavar='filename',
                        help='output file name, requires only one input file')
    parser.add_argument(
        '-vs',
        '--vs_added',
        help='extra ranges to treat as having the requested presentation',
        metavar='range',
        nargs='+')
    parser.add_argument('files',
                        help='files to modify',
                        metavar='file',
                        nargs='+')

    # argparse fails with named arguments that have leading hyphen.  You
    # can work around this by using a short arg and concatenating it and
    # the argument together, e.g. '-s-foo'.
    # Both parse_known_args and inserting '--' between the key and its
    # value fail, though.
    args = parser.parse_args()
    vs_set = None
    if args.vs_added:
        vs_set = tool_utils.parse_int_ranges(' '.join(args.vs_added))

    modify_fonts(args.files,
                 presentation=args.presentation,
                 output=args.output,
                 suffix=args.suffix,
                 dst_dir=args.dstdir,
                 vs_added=vs_set)
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("cmap_file", help="cmap data file", metavar="file")
    parser.add_argument(
        "-d",
        "--details",
        help="show details on N characters in each range"
        " (3 if no value provided)",
        metavar="num",
        default=0,
        const=3,
        type=int,
        nargs="?",
    )
    parser.add_argument("-s",
                        "--summary",
                        help="show summary of block usage only",
                        action="store_true")
    parser.add_argument(
        "-r",
        "--range",
        help="range of characters to show (default 0-1ffff)",
        metavar="range",
        default="0-1ffff",
    )
    parser.add_argument(
        "-sc",
        "--scripts",
        help="limit scripts to show",
        metavar="script",
        nargs="+",
        default=None,
    )

    args = parser.parse_args()
    ranges = tool_utils.parse_int_ranges(args.range)
    start = min(ranges)
    end = max(ranges)
    if end > 0x10FFFF:
        end = 0x10FFFF
    limit = end + 1

    if args.scripts:
        args.scripts = frozenset(args.scripts)
    block_coverage(args.cmap_file, start, limit, args.scripts, args.details,
                   args.summary)
Ejemplo n.º 17
0
def get_unicode_emoji_variants(include_proposed='proposed'):
  """Returns the emoji characters that have both emoji and text presentations.
  If include_proposed is 'proposed', include the ones proposed in 2016/08.  If
  include_proposed is 'proposed_extra', also include the emoji Noto proposes
  for text presentation treatment to align related characters.  Else
  include_proposed should resolve to boolean False."""
  _load_unicode_emoji_variants()
  if not include_proposed:
    return _emoji_variants
  elif include_proposed == 'proposed':
    return _emoji_variants_proposed
  elif include_proposed == 'proposed_extra':
    extra = tool_utils.parse_int_ranges(
        '1f4b9 1f4c8-1f4ca 1f507 1f509-1f50a 1f44c')
    return _emoji_variants_proposed | extra
  else:
    raise Exception(
        "include_proposed is %s which is not in ['proposed', 'proposed_extra']"
        % include_proposed)
Ejemplo n.º 18
0
def get_unicode_emoji_variants(include_proposed='proposed'):
  """Returns the emoji characters that have both emoji and text presentations.
  If include_proposed is 'proposed', include the ones proposed in 2016/08.  If
  include_proposed is 'proposed_extra', also include the emoji Noto proposes
  for text presentation treatment to align related characters.  Else
  include_proposed should resolve to boolean False."""
  _load_unicode_emoji_variants()
  if not include_proposed:
    return _emoji_variants
  elif include_proposed == 'proposed':
    return _emoji_variants_proposed
  elif include_proposed == 'proposed_extra':
    extra = tool_utils.parse_int_ranges(
        '1f4b9 1f4c8-1f4ca 1f507 1f509-1f50a 1f44c')
    return _emoji_variants_proposed | extra
  else:
    raise Exception(
        "include_proposed is %s which is not in ['proposed', 'proposed_extra']"
        % include_proposed)
Ejemplo n.º 19
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '-d', '--dstdir',
      help='destination directory for modified files, default /tmp/vs',
      metavar = 'dir',
      default='/tmp/vs')
  parser.add_argument(
      '-p', '--presentation',
      help='presentation of glyphs in the font, default "emoji"',
      choices=['emoji', 'text'],
      default='emoji')
  parser.add_argument(
      '-s', '--suffix',
      metavar='suffix',
      help='suffix to add to file names for output, goes before extension')
  parser.add_argument(
      '-o', '--output',
      metavar='filename',
      help='output file name, requires only one input file')
  parser.add_argument(
      '-vs', '--vs_added',
      help='extra ranges to treat as having the requested presentation',
      metavar='range', nargs='+')
  parser.add_argument(
      'files',
      help='files to modify',
      metavar='file',
      nargs='+')

  # argparse fails with named arguments that have leading hyphen.  You
  # can work around this by using a short arg and concatenating it and
  # the argument together, e.g. '-s-foo'.
  # Both parse_known_args and inserting '--' between the key and its
  # value fail, though.
  args = parser.parse_args()
  vs_set = None
  if args.vs_added:
    vs_set = tool_utils.parse_int_ranges(' '.join(args.vs_added))

  modify_fonts(args.files, presentation=args.presentation, output=args.output,
               suffix=args.suffix, dst_dir=args.dstdir, vs_added=vs_set)
Ejemplo n.º 20
0
def report_compare(compare_result, detailed=True):
  compare, base_cmap_data, target_cmap_data = compare_result
  base_map = cmap_data.create_map_from_table(base_cmap_data.table)
  target_map = cmap_data.create_map_from_table(target_cmap_data.table)

  inverted_target = collections.defaultdict(set)
  for script, row in target_map.iteritems():
    cps = tool_utils.parse_int_ranges(row.ranges)
    for cp in cps:
      inverted_target[cp].add(script)

  base_title = title_from_metadata(base_cmap_data.meta)
  target_title = title_from_metadata(target_cmap_data.meta)

  print 'base: %s' % base_title
  print 'target: %s' % target_title
  for script in sorted(compare):
    added, removed, xadded, xremoved = compare[script]
    label = '%s # %s' % (script, base_map[script].name)
    report_cmap_compare(
        label, added, removed, xadded, xremoved, inverted_target, detailed)
Ejemplo n.º 21
0
def report_compare(compare_result, detailed=True):
    compare, base_cmap_data, target_cmap_data = compare_result
    base_map = cmap_data.create_map_from_table(base_cmap_data.table)
    target_map = cmap_data.create_map_from_table(target_cmap_data.table)

    inverted_target = collections.defaultdict(set)
    for script, row in target_map.items():
        cps = tool_utils.parse_int_ranges(row.ranges)
        for cp in cps:
            inverted_target[cp].add(script)

    base_title = title_from_metadata(base_cmap_data.meta)
    target_title = title_from_metadata(target_cmap_data.meta)

    print('base: %s' % base_title)
    print('target: %s' % target_title)
    for script in sorted(compare):
        added, removed, xadded, xremoved = compare[script]
        label = '%s # %s' % (script, base_map[script].name)
        report_cmap_compare(label, added, removed, xadded, xremoved,
                            inverted_target, detailed)
Ejemplo n.º 22
0
def _create_cp_to_scripts(data, only_scripts=None):
  cp_to_scripts = collections.defaultdict(set)
  all_scripts = set()
  skip_set = frozenset(['Zinh', 'Zyyy', 'Zzzz'])
  cjk_set = frozenset('Bopo,Hang,Hani,Hans,Hant,Hira,Jpan,Kana,Kore'.split(','))
  lgc_set = frozenset('Latn,Grek,Cyrl'.split(','))
  for row in data.table.rows:
    script = row.script
    if only_scripts and script not in only_scripts:
      continue
    if script in skip_set:
      continue
    if script in cjk_set:
      script = 'CJK'
    if script in lgc_set:
      script = 'LGC'
    all_scripts.add(script)
    chars = tool_utils.parse_int_ranges(row.ranges)
    for cp in chars:
      cp_to_scripts[cp].add(script)
  return cp_to_scripts, all_scripts
Ejemplo n.º 23
0
def _create_cp_to_scripts(data, only_scripts=None):
  cp_to_scripts = collections.defaultdict(set)
  all_scripts = set()
  skip_set = frozenset(['Zinh', 'Zyyy', 'Zzzz'])
  cjk_set = frozenset('Bopo,Hang,Hani,Hans,Hant,Hira,Jpan,Kana,Kore'.split(','))
  lgc_set = frozenset('Latn,Grek,Cyrl'.split(','))
  for row in data.table.rows:
    script = row.script
    if only_scripts and script not in only_scripts:
      continue
    if script in skip_set:
      continue
    if script in cjk_set:
      script = 'CJK'
    if script in lgc_set:
      script = 'LGC'
    all_scripts.add(script)
    chars = tool_utils.parse_int_ranges(row.ranges)
    for cp in chars:
      cp_to_scripts[cp].add(script)
  return cp_to_scripts, all_scripts
Ejemplo n.º 24
0
def _create_cp_to_scripts(data, only_scripts=None):
    cp_to_scripts = collections.defaultdict(set)
    all_scripts = set()
    skip_set = frozenset(["Zinh", "Zyyy", "Zzzz"])
    cjk_set = frozenset(
        "Bopo,Hang,Hani,Hans,Hant,Hira,Jpan,Kana,Kore".split(","))
    lgc_set = frozenset("Latn,Grek,Cyrl".split(","))
    for row in data.table.rows:
        script = row.script
        if only_scripts and script not in only_scripts:
            continue
        if script in skip_set:
            continue
        if script in cjk_set:
            script = "CJK"
        if script in lgc_set:
            script = "LGC"
        all_scripts.add(script)
        chars = tool_utils.parse_int_ranges(row.ranges)
        for cp in chars:
            cp_to_scripts[cp].add(script)
    return cp_to_scripts, all_scripts
def get_block_data(defined_cps, coverages, no_empty=False):
    block_data = []
    covered_cps_list = [
        tool_utils.parse_int_ranges(cov.cmapdata.ranges) for cov in coverages
    ]
    for block_name in unicode_data.block_names():
        block_range = unicode_data.block_range(block_name)
        block_cps = unicode_data.block_chars(block_name)
        block_cps &= defined_cps
        if not block_cps:
            continue
        cov_info = []
        all_empty = True
        for covered_cps in covered_cps_list:
            block_covered_cps = covered_cps & block_cps
            if block_covered_cps:
                all_empty = False
            cov_info.append(block_covered_cps)
        if no_empty and all_empty:
            continue
        block_data.append(block_range + (block_name, block_cps, cov_info))
    return block_data
Ejemplo n.º 26
0
def get_block_data(defined_cps, coverages, no_empty=False):
  block_data = []
  covered_cps_list = [
      tool_utils.parse_int_ranges(cov.cmapdata.ranges)
      for cov in coverages]
  for block_name in unicode_data.block_names():
    block_range = unicode_data.block_range(block_name)
    block_cps = unicode_data.block_chars(block_name)
    block_cps &= defined_cps
    if not block_cps:
      continue
    cov_info = []
    all_empty = True
    for covered_cps in covered_cps_list:
      block_covered_cps = covered_cps & block_cps
      if block_covered_cps:
        all_empty = False
      cov_info.append(block_covered_cps)
    if no_empty and all_empty:
      continue
    block_data.append(block_range + (block_name, block_cps, cov_info))
  return block_data
Ejemplo n.º 27
0
 def fromlisttext(cplist):
   codes = tool_utils.parse_int_ranges(
       cplist, allow_duplicates=True, return_set=False, allow_compressed=True)
   return CodeList.fromlist(codes)
Ejemplo n.º 28
0
 def fromrangetext(cpranges):
   return CodeList.fromset(
       tool_utils.parse_int_ranges(cpranges, allow_compressed=True))
Ejemplo n.º 29
0
 def fromrangetext(cpranges):
     return CodeList.fromset(tool_utils.parse_int_ranges(cpranges))
Ejemplo n.º 30
0
def get_cps_from_cmap_data_file(data_file):
  cps = set()
  data = cmap_data.read_cmap_data_file(data_file)
  for row in data.table.rows:
    cps |= tool_utils.parse_int_ranges(row.ranges)
  return cps
Ejemplo n.º 31
0
 def fromrangetext(cpranges):
   return CodeList.fromset(tool_utils.parse_int_ranges(cpranges))
def get_defined_cps(version=default_version, exclude_ranges=None):
    defined_cps = unicode_data.defined_characters(version)
    if exclude_ranges:
        defined_cps -= tool_utils.parse_int_ranges(exclude_ranges)
    return defined_cps
Ejemplo n.º 33
0
"""Display unicode coverage of a set of cmaps."""

import argparse
import collections

from nototools import cmap_data
from nototools import unicode_data
from nototools import tool_utils

_MISSING_SCRIPTS = frozenset(['<MISSING>'])
_OMITTED_SCRIPTS = frozenset(['(omitted)'])
_OMITTED = tool_utils.parse_int_ranges("""
    0001-000c 000e-001f  # C0 controls
    007f-009f  # del and C1 controls
    d800-dfff  # surrogates
    e000-f8ff  # pua
    fe00-fe0f  # variation selectors
    feff  # BOM
    e0000-e007f # tags
    fe000-fe4e4 fe4ef-fe82b fe82d fe838-ffffd  # plane 15 PUA - emoji
    100000-10ffff  # plane 16 pua""")
_LGC_LIST = ['LGC', 'Latn', 'Grek', 'Cyrl']


def _get_scripts(cp, cp_to_scripts):
  scripts = cp_to_scripts.get(cp, None)
  if not scripts:
    scripts = _OMITTED_SCRIPTS if cp in _OMITTED else _MISSING_SCRIPTS
  return scripts


def _script_names(scripts):
Ejemplo n.º 34
0
ANDROID_EMOJI = {
    0x2600,  # ☀ BLACK SUN WITH RAYS
    0x2601,  # ☁ CLOUD
    0X260E,  # ☎ BLACK TELEPHONE
    0x261D,  # ☝ WHITE UP POINTING INDEX
    0x263A,  # ☺ WHITE SMILING FACE
    0x2660,  # ♠ BLACK SPADE SUIT
    0x2663,  # ♣ BLACK CLUB SUIT
    0x2665,  # ♥ BLACK HEART SUIT
    0x2666,  # ♦ BLACK DIAMOND SUIT
    0x270C,  # ✌ VICTORY HAND
    0x2744,  # ❄ SNOWFLAKE
    0x2764,  # ❤ HEAVY BLACK HEART
}
# We don't want support for ASCII control chars.
CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F')

EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)

SCRIPT_PATH = pathlib.Path(__file__).parent.absolute()
PIP_USER = None
# get_path return User path on Windows without scheme but reversed on Linux
if (os.name == 'nt'):
    PIP_USER = sysconfig.get_path('scripts')
else:
    PIP_USER = sysconfig.get_path('scripts', f'{os.name}_user')


def remove_from_cmap(infile, outfile, exclude=frozenset()):
    """Removes a set of characters from a font file's cmap table."""
    font = ttLib.TTFont(f'{SCRIPT_PATH}/{infile}')
Ejemplo n.º 35
0
import argparse
import collections

from nototools import cmap_data
from nototools import unicode_data
from nototools import tool_utils

_MISSING_SCRIPTS = frozenset(['<MISSING>'])
_OMITTED_SCRIPTS = frozenset(['(omitted)'])
_OMITTED = tool_utils.parse_int_ranges("""
    0001-000c 000e-001f  # C0 controls
    007f-009f  # del and C1 controls
    d800-dfff  # surrogates
    e000-f8ff  # pua
    fe00-fe0f  # variation selectors
    feff  # BOM
    e0000-e007f # tags
    e0100-e01ff # supplementary variation selectors
    f0000-ffffd # supplementary PUA
    # fe000-fe4e4 fe4ef-fe82b fe82d fe838-ffffd  # plane 15 PUA - emoji
    100000-10ffff  # plane 16 pua""")
_LGC_LIST = ['LGC', 'Latn', 'Grek', 'Cyrl']


def _get_scripts(cp, cp_to_scripts):
  scripts = cp_to_scripts.get(cp, None)
  if not scripts:
    scripts = _OMITTED_SCRIPTS if cp in _OMITTED else _MISSING_SCRIPTS
  return scripts

Ejemplo n.º 36
0
    0x2600,  # ☀ BLACK SUN WITH RAYS
    0x2601,  # ☁ CLOUD
    0x260E,  # ☎ BLACK TELEPHONE
    0x261D,  # ☝ WHITE UP POINTING INDEX
    0x263A,  # ☺ WHITE SMILING FACE
    0x2660,  # ♠ BLACK SPADE SUIT
    0x2663,  # ♣ BLACK CLUB SUIT
    0x2665,  # ♥ BLACK HEART SUIT
    0x2666,  # ♦ BLACK DIAMOND SUIT
    0x270C,  # ✌ VICTORY HAND
    0x2744,  # ❄ SNOWFLAKE
    0x2764,  # ❤ HEAVY BLACK HEART
}

# TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557
TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges('1f19b-1f1ac 1f23b')

EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI


def _format_set(char_set, name, filename):
    lines = ['%s = {' % name]
    for cp in sorted(char_set):
        name = unicode_data.name(cp)
        lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
    lines.append('}\n')
    with codecs.open(filename, 'w', 'UTF-8') as f:
        f.write('\n'.join(lines))
    print 'wrote', filename

Ejemplo n.º 37
0
def get_cps_from_cmap_data_file(data_file):
  cps = set()
  data = cmap_data.read_cmap_data_file(data_file)
  for row in data.table.rows:
    cps |= tool_utils.parse_int_ranges(row.ranges)
  return cps
Ejemplo n.º 38
0
    0x2600,  # ☀ BLACK SUN WITH RAYS
    0x2601,  # ☁ CLOUD
    0x260E,  # ☎ BLACK TELEPHONE
    0x261D,  # ☝ WHITE UP POINTING INDEX
    0x263A,  # ☺ WHITE SMILING FACE
    0x2660,  # ♠ BLACK SPADE SUIT
    0x2663,  # ♣ BLACK CLUB SUIT
    0x2665,  # ♥ BLACK HEART SUIT
    0x2666,  # ♦ BLACK DIAMOND SUIT
    0x270C,  # ✌ VICTORY HAND
    0x2744,  # ❄ SNOWFLAKE
    0x2764,  # ❤ HEAVY BLACK HEART
}

# TV symbols, see https://github.com/googlei18n/noto-fonts/issues/557
TV_SYMBOLS_FOR_SUBSETTED = tool_utils.parse_int_ranges(
  '1f19b-1f1ac 1f23b')

EMOJI = unicode_data.get_presentation_default_emoji() | ANDROID_EMOJI

def _format_set(char_set, name, filename):
  lines = ['%s = {' % name]
  for cp in sorted(char_set):
    name = unicode_data.name(cp)
    lines.append('    0x%04X,  # %s %s' % (cp, unichr(cp), name))
  lines.append('}\n')
  with codecs.open(filename, 'w', 'UTF-8') as f:
    f.write('\n'.join(lines))
  print 'wrote', filename


def subset_symbols(srcdir, dstdir):
Ejemplo n.º 39
0
    0x2600, # ☀ BLACK SUN WITH RAYS
    0x2601, # ☁ CLOUD
    0X260E, # ☎ BLACK TELEPHONE
    0x261D, # ☝ WHITE UP POINTING INDEX
    0x263A, # ☺ WHITE SMILING FACE
    0x2660, # ♠ BLACK SPADE SUIT
    0x2663, # ♣ BLACK CLUB SUIT
    0x2665, # ♥ BLACK HEART SUIT
    0x2666, # ♦ BLACK DIAMOND SUIT
    0x270C, # ✌ VICTORY HAND
    0x2744, # ❄ SNOWFLAKE
    0x2764, # ❤ HEAVY BLACK HEART
}

# We don't want support for ASCII control chars.
CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F');

EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)


def remove_from_cmap(infile, outfile, exclude=frozenset()):
    """Removes a set of characters from a font file's cmap table."""
    font = ttLib.TTFont(infile)
    font_data.delete_from_cmap(font, exclude)
    font.save(outfile)


TEMP_DIR = 'subsetted'

def remove_codepoints_from_ttc(ttc_name):
    otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR)
Ejemplo n.º 40
0
def get_defined_cps(version=default_version, exclude_ranges=None):
  defined_cps = unicode_data.defined_characters(version)
  if exclude_ranges:
    defined_cps -= tool_utils.parse_int_ranges(exclude_ranges)
  return defined_cps