예제 #1
0
 def test_age(self):
     """Tests the age method."""
     self.assertEqual(unicode_data.age(0xE000), "1.1")
     self.assertEqual(unicode_data.age(0xE0021), "3.1")
     self.assertEqual(unicode_data.age(0x20BD), "7.0")
     self.assertEqual(unicode_data.age(0x2B820), "8.0")
     self.assertEqual(unicode_data.age(0x104B0), "9.0")
예제 #2
0
 def test_age(self):
     """Tests the age method."""
     self.assertEqual(unicode_data.age(0xE000), '1.1')
     self.assertEqual(unicode_data.age(0xE0021), '3.1')
     self.assertEqual(unicode_data.age(0x20BD), '7.0')
     self.assertEqual(unicode_data.age(0x2B820), '8.0')
     self.assertEqual(unicode_data.age(0x104B0), '9.0')
예제 #3
0
 def test_age(self):
     """Tests the age method."""
     self.assertEqual(unicode_data.age(0xE000), '1.1')
     self.assertEqual(unicode_data.age(0xE0021), '3.1')
     self.assertEqual(unicode_data.age(0x20BD), '7.0')
     self.assertEqual(unicode_data.age(0x2B820), '8.0')
     # below will fail once unicode 9 character age data updates
     self.assertIsNone(unicode_data.age(0x104B0))
예제 #4
0
 def test_age(self):
     """Tests the age method."""
     self.assertEqual(unicode_data.age(0xE000), '1.1')
     self.assertEqual(unicode_data.age(0xE0021), '3.1')
     self.assertEqual(unicode_data.age(0x20BD), '7.0')
     self.assertEqual(unicode_data.age(0x2B820), '8.0')
     # below will fail once unicode 9 character age data updates
     self.assertIsNone(unicode_data.age(0x104B0))
예제 #5
0
def _defined_characters_in_range(range_str):
    """Given a range string, returns defined Unicode characters in the range."""
    characters = set()
    for code in _range_string_to_set(range_str):
        if unicode_data.is_defined(code) and unicode_data.age(code) is not None:
            characters.add(code)
    return characters
def generate_text(outfile, title, fonts, targets, flag_sets, data_dir):
    print >> outfile, title
    print >> outfile
    print >> outfile, 'Fonts:'
    for key, keyinfos in fonts:
        for font, name, _ in keyinfos:
            rel_font = path.relpath(font, data_dir) if font else '(no font)'
            print >> outfile, '  %s: %s (%s)' % (key, name, rel_font)
    print >> outfile

    for name, codelist, used_fonts in targets:
        print >> outfile
        print >> outfile, name
        header = ['idx  code']
        header.extend(f[0] for f in used_fonts)
        header.append('age name')
        print >> outfile, ' '.join(header)
        index = 1
        for cp in codelist.codes():
            print >> outfile, '%3d' % index,
            index += 1
            print >> outfile, '%5s' % ('%04x' % cp),
            for rkey, keyinfos in used_fonts:
                match = any(
                    codelist.contains(cp) for _, _, codelist in keyinfos)
                print >> outfile, rkey if match else ('-' * len(rkey)),
            print >> outfile, unicode_data.age(cp),
            name = _flagged_name(cp, flag_sets)
            print >> outfile, name
예제 #7
0
def _print_detailed(cps, inverted_target=None):
  last_block = None
  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block:
      print '    %s' % block
      last_block = block
    script = unicode_data.script(cp)
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
예제 #8
0
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
  """Ensure all cps in these sequences are valid emoji cps or specific cps
  used in forming emoji sequences.  This is a 'pre-check' that reports
  this specific problem."""

  valid_cps = set(unicode_data.get_emoji())
  if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE:
    valid_cps |= unicode_data.proposed_emoji_cps()
  else:
    valid_cps = set(
        cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version)
  valid_cps.add(0x200d)  # ZWJ
  valid_cps.add(0x20e3)  # combining enclosing keycap
  valid_cps.add(0xfe0f)  # variation selector (emoji presentation)
  valid_cps.add(0xfe82b)  # PUA value for unknown flag
  valid_cps |= TAG_SET  # used in subregion tag sequences

  not_emoji = {}
  for seq, fp in sorted_seq_to_filepath.iteritems():
    for cp in seq:
      if cp not in valid_cps:
        if cp not in not_emoji:
          not_emoji[cp] = []
        not_emoji[cp].append(fp)

  if len(not_emoji):
    print(
        'check valid emoji cps: %d non-emoji cp found' % len(not_emoji),
        file=sys.stderr)
    for cp in sorted(not_emoji):
      fps = not_emoji[cp]
      print(
          'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)),
          file=sys.stderr)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
    """Ensure all cps in these sequences are valid emoji cps or specific cps
  used in forming emoji sequences.  This is a 'pre-check' that reports
  this specific problem."""

    valid_cps = set(unicode_data.get_emoji())
    if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE:
        valid_cps |= unicode_data.proposed_emoji_cps()
    else:
        valid_cps = set(cp for cp in valid_cps
                        if unicode_data.age(cp) <= unicode_version)
    valid_cps.add(0x200d)  # ZWJ
    valid_cps.add(0x20e3)  # combining enclosing keycap
    valid_cps.add(0xfe0f)  # variation selector (emoji presentation)
    valid_cps.add(0xfe82b)  # PUA value for unknown flag
    valid_cps |= TAG_SET  # used in subregion tag sequences

    not_emoji = {}
    for seq, fp in sorted_seq_to_filepath.iteritems():
        for cp in seq:
            if cp not in valid_cps:
                if cp not in not_emoji:
                    not_emoji[cp] = []
                not_emoji[cp].append(fp)

    if len(not_emoji):
        print('check valid emoji cps: %d non-emoji cp found' % len(not_emoji),
              file=sys.stderr)
        for cp in sorted(not_emoji):
            fps = not_emoji[cp]
            print('check valid emoji cps: %04x (in %d sequences)' %
                  (cp, len(fps)),
                  file=sys.stderr)
예제 #10
0
def _defined_characters_in_range(range_str):
    """Given a range string, returns defined Unicode characters in the range."""
    characters = set()
    for code in _range_string_to_set(range_str):
        if unicode_data.is_defined(code) and unicode_data.age(
                code) is not None:
            characters.add(code)
    return characters
예제 #11
0
def _print_detailed(cps, inverted_target=None):
    last_block = None
    undefined_start = -1
    undefined_end = -1

    def show_undefined(start, end):
        if start >= 0:
            if end > start:
                print("      %04x-%04x Zzzz <%d undefined>" %
                      (start, end, end - start - 1))
            else:
                print("      %04x Zzzz <1 undefined>" % start)

    for cp in sorted(cps):
        block = unicode_data.block(cp)
        if block != last_block or (undefined_end > -1
                                   and cp > undefined_end + 1):
            show_undefined(undefined_start, undefined_end)
            undefined_start, undefined_end = -1, -1
            if block != last_block:
                print("    %s" % block)
                last_block = block
        script = unicode_data.script(cp)
        if script == "Zzzz":
            if undefined_start >= 0:
                undefined_end = cp
            else:
                undefined_start, undefined_end = cp, cp
            continue

        show_undefined(undefined_start, undefined_end)
        undefined_start, undefined_end = -1, -1
        extensions = unicode_data.script_extensions(cp) - {script}
        if extensions:
            extensions = " (script %s)" % ", ".join(sorted(extensions))
        else:
            extensions = ""
        if not inverted_target:
            extra = ""
        elif cp not in inverted_target:
            extra = " !missing"
        else:
            scripts = sorted(inverted_target[cp])
            if len(scripts) > 3:
                script_text = ", ".join(scripts[:3]) + "... " + scripts[-1]
            else:
                script_text = ", ".join(scripts)
            extra = " (font %s)" % script_text
        print("    %6s %4s %2s %3s %s%s%s" % (
            "%04x" % cp,
            script,
            unicode_data.category(cp),
            unicode_data.age(cp),
            unicode_data.name(cp, ""),
            extensions,
            extra,
        ))
    show_undefined(undefined_start, undefined_end)
예제 #12
0
def _print_detailed(cps, inverted_target=None):
  last_block = None
  undefined_start = -1
  undefined_end = -1
  def show_undefined(start, end):
    if start >= 0:
      if end > start:
        print '      %04x-%04x Zzzz <%d undefined>' % (
            start, end, end - start - 1)
      else:
        print '      %04x Zzzz <1 undefined>' % start

  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block or (undefined_end > -1 and cp > undefined_end + 1):
      show_undefined(undefined_start, undefined_end)
      undefined_start, undefined_end = -1, -1
      if block != last_block:
        print '    %s' % block
        last_block = block
    script = unicode_data.script(cp)
    if script == 'Zzzz':
      if undefined_start >= 0:
        undefined_end = cp
      else:
        undefined_start, undefined_end = cp, cp
      continue

    show_undefined(undefined_start, undefined_end)
    undefined_start, undefined_end = -1, -1
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
  show_undefined(undefined_start, undefined_end)
예제 #13
0
def _set_ompl():
    """Set up OMPL.

    OMPL is defined to be the list of mirrored pairs in Unicode 5.1:
    http://www.microsoft.com/typography/otspec/ttochap1.htm#ltrrtl
    """

    global OMPL
    unicode_data.load_data()
    bmg_data = unicode_data._bidi_mirroring_glyph_data
    OMPL = {char: bmg for (char, bmg) in bmg_data.items() if float(unicode_data.age(char)) <= 5.1}
예제 #14
0
def _set_ompl():
    """Set up OMPL.

    OMPL is defined to be the list of mirrored pairs in Unicode 5.1:
    http://www.microsoft.com/typography/otspec/ttochap1.htm#ltrrtl
    """

    global OMPL
    unicode_data.load_data()
    bmg_data = unicode_data._bidi_mirroring_glyph_data
    OMPL = {char:bmg for (char, bmg) in bmg_data.items()
            if float(unicode_data.age(char)) <= 5.1}
예제 #15
0
 def generate_text(self, metrics, flag_sets):
   lines = [self.name]
   header = ['idx  code']
   header.extend(f[0] for f in self.used_fonts)
   header.append('age name')
   lines.append(' '.join(header))
   for index, cp in enumerate(self.codelist.codes()):
     line = ['%3d' % index]
     line.append('%5s' % ('%04x' % cp))
     for rkey, keyinfos in self.used_fonts:
       match = any(codelist.contains(cp) for _, _, codelist in keyinfos)
       line.append(rkey if match else ('-' * len(rkey)))
     line.append(unicode_data.age(cp))
     line.append(_flagged_name(cp, flag_sets))
     lines.append(' '.join(line))
   return '\n'.join(lines)
def _generate_table(index, target, context, flag_sets):
    name, codelist, used_fonts = target

    def context_string(codelist, cp):
        cps = unichr(codelist.mapped_code(cp))
        return (context % cps) if context else cps

    lines = ['<h3 id="target_%d">%s</h3>' % (index, name)]
    lines.append('<table>')
    header = _generate_header(used_fonts)
    linecount = 0
    for cp in codelist.codes():
        if linecount % 20 == 0:
            lines.append(header)
        linecount += 1

        line = ['<tr>']
        line.append('<td class="code">U+%04x' % cp)
        for rkey, keyinfos in used_fonts:
            cell_class = None
            cell_text = None
            index = 0
            for font, _, rcodelist in keyinfos:
                if rcodelist.contains(cp):
                    if len(keyinfos) > 1:
                        cell_class = '%s_%d' % (rkey, index)
                    else:
                        cell_class = rkey
                    cell_class = replace_nonalpha(cell_class)
                    if font:
                        cell_text = context_string(rcodelist, cp)
                    else:
                        cell_text = ' * '
                        cell_class += ' star'
                    break
                index += 1
            if cell_class:
                line.append('<td class="%s">%s' % (cell_class, cell_text))
            else:
                line.append('<td>&nbsp;')
        line.append('<td class="age">%s' % unicode_data.age(cp))
        name = _flagged_name(cp, flag_sets)
        line.append('<td class="name">%s' % name)
        lines.append(''.join(line))
    lines.append('</table>')
    return '\n'.join(lines)
예제 #17
0
 def generate_text(self, metrics, flag_sets):
     lines = [self.name]
     header = ["idx  code"]
     header.extend(f[0] for f in self.used_fonts)
     header.append("age name")
     lines.append(" ".join(header))
     for index, cp in enumerate(self.codelist.codes()):
         line = ["%3d" % index]
         line.append("%5s" % ("%04x" % cp))
         for rkey, keyinfos in self.used_fonts:
             match = any(
                 codelist.contains(cp) for _, _, codelist in keyinfos)
             line.append(rkey if match else ("-" * len(rkey)))
         line.append(unicode_data.age(cp))
         line.append(_flagged_name(cp, flag_sets))
         lines.append(" ".join(line))
     return "\n".join(lines)
예제 #18
0
def main():
    """Checkes the coverage of all Roboto fonts."""
    with open('res/char_requirements.tsv') as char_reqs_file:
        char_reqs_data = char_reqs_file.read()

    # The format of the data to be parsed is like the following:
    # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F
    # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t
    required_set = set()
    for line in char_reqs_data.split('\n'):
        if line.startswith('#'):  # Skip comment lines
            continue
        line = line.split('\t')
        if not line[0]:
            continue  # Skip the first line and empty lines
        block_range = line[1]
        full_coverage_required = (line[5] == '1')
        exceptions = line[6]
        required_set.update(
            _find_required_chars(block_range, full_coverage_required,
                                 exceptions))

    # Skip Unicode 8.0 characters
    required_set = {
        ch
        for ch in required_set if float(unicode_data.age(ch)) <= 7.0
    }

    # Skip ASCII and C1 controls
    required_set -= set(range(0, 0x20) + range(0x7F, 0xA0))

    missing_char_found = False
    for font in load_fonts():
        font_coverage = coverage.character_set(font)
        missing_chars = required_set - font_coverage
        if missing_chars:
            missing_char_found = True
            font_name = font_data.font_name(font)
            print 'Characters missing from %s:' % font_name
            for char in sorted(missing_chars):
                _print_char(char)
            print

    if missing_char_found:
        sys.exit(1)
예제 #19
0
def main():
    """Checkes the coverage of all Roboto fonts."""
    with open('res/char_requirements.tsv') as char_reqs_file:
        char_reqs_data = char_reqs_file.read()

    # The format of the data to be parsed is like the following:
    # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F
    # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t
    required_set = set()
    for line in char_reqs_data.split('\n'):
        if line.startswith('#'):  # Skip comment lines
            continue
        line = line.split('\t')
        if not line[0]:
            continue  # Skip the first line and empty lines
        block_range = line[1]
        full_coverage_required = (line[5] == '1')
        exceptions = line[6]
        required_set.update(
            _find_required_chars(block_range,
                                 full_coverage_required,
                                 exceptions))

    # Skip Unicode 8.0 characters
    required_set = {ch for ch in required_set
                    if float(unicode_data.age(ch)) <= 7.0}

    # Skip ASCII and C1 controls
    required_set -= set(range(0, 0x20) + range(0x7F, 0xA0))

    missing_char_found = False
    for font in load_fonts():
        font_coverage = coverage.character_set(font)
        missing_chars = required_set - font_coverage
        if missing_chars:
            missing_char_found = True
            font_name = font_data.font_name(font)
            print 'Characters missing from %s:' % font_name
            for char in sorted(missing_chars):
                _print_char(char)
            print

    if missing_char_found:
        sys.exit(1)
예제 #20
0
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version):
    """Ensure all cps in these sequences are valid emoji cps or specific cps
  used in forming emoji sequences.  This is a 'pre-check' that reports
  this specific problem."""

    coverage_pass = True

    valid_cps = set(unicode_data.get_emoji())
    if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE:
        valid_cps |= unicode_data.proposed_emoji_cps()
    else:
        valid_cps = set(cp for cp in valid_cps
                        if unicode_data.age(cp) <= unicode_version)
    valid_cps.add(0x200d)  # ZWJ
    valid_cps.add(0x20e3)  # combining enclosing keycap
    valid_cps.add(0xfe0f)  # variation selector (emoji presentation)
    valid_cps.add(0xfe82b)  # PUA value for unknown flag
    valid_cps |= TAG_SET  # used in subregion tag sequences

    not_emoji = {}
    for seq, fp in sorted_seq_to_filepath.items():
        for cp in seq:
            if cp not in valid_cps:
                if cp not in not_emoji:
                    not_emoji[cp] = []
                not_emoji[cp].append(fp)

    if len(not_emoji):
        print(f'check valid emoji cps: {len(not_emoji)} non-emoji cp found',
              file=sys.stderr)
        for cp in sorted(not_emoji):
            fps = not_emoji[cp]
            print(
                f'check the following cp: {cp} - {not_emoji.get(cp)[0]} (in {len(fps)} sequences)',
                file=sys.stderr)
        coverage_pass = False

    if not coverage_pass:
        exit(
            "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'"
        )
예제 #21
0
 def test_age(self):
     """Tests the age method."""
     self.assertEqual(unicode_data.age(0xE000), '1.1')
     self.assertEqual(unicode_data.age(0xE0021), '3.1')
     self.assertEqual(unicode_data.age(0x20BD), '7.0')
     self.assertIsNone(unicode_data.age(0x2B820))
예제 #22
0
  def generate_html(self, tindex, context, metrics, flag_sets, cp_to_targets):
    dump_metrics = False

    if dump_metrics:
      print '$ %s' % self.name

    def context_string(codelist, cp):
      cps = unichr(codelist.mapped_code(cp))
      return (context % cps) if context else cps

    def _target_line(cp, tindex, tinfo):
      info = []
      for ix, name in tinfo:
        if ix == tindex:
          continue
        info.append('<a href="#target_%d">%s</a>' % (ix, name))
      if not info:
        return '(no group)'
      return '; '.join(info)

    def _generate_header():
      header_parts = ['<tr class="head"><th>CP']
      for key, _ in self.used_fonts:
        header_parts.append('<th>' + key)
      if metrics != None:
        header_parts.append('<th>lsb<th>mid<th>rsb<th>wid<th>cy')
      header_parts.append('<th>Age<th>Name')
      return ''.join(header_parts)

    if metrics != None:
      # the metrics apply to the rightmost font
      fontname = self.used_fonts[-1][1][0][0]
      if fontname:
        metrics_font = _get_font(fontname)
      else:
        metrics_font = None
        print >> sys.stderr, 'no metrics font'

    lines = ['<h3 id="target_%d">%s</h3>' % (tindex, self.name)]
    char_line = _character_string_html(self.codelist, self.used_fonts[-1])
    if char_line:
      lines.append(char_line)
    lines.append('<table>')
    header = _generate_header()
    linecount = 0
    for cp in self.codelist.codes():
      if linecount % 20 == 0:
        lines.append(header)
      linecount += 1
      line = ['<tr>']
      line.append('<td>U+%04x' % cp)
      for rkey, keyinfos in self.used_fonts:
        cell_class = None
        cell_text = None
        index = 0
        for font, _, rcodelist in keyinfos:
          if rcodelist.contains(cp):
            if len(keyinfos) > 1:
              cell_class = '%s_%d' % (rkey, index)
            else:
              cell_class = rkey
            cell_class = replace_nonalpha(cell_class)
            if font:
              cell_text = context_string(rcodelist, cp)
            else:
              cell_text = ' * '
              cell_class += ' star'
            break
          index += 1
        if cell_class:
          line.append('<td class="%s">%s' % (cell_class, cell_text))
        else:
          line.append('<td>&nbsp;')
      name = _flagged_name(cp, flag_sets)
      if metrics != None:
        cp_metrics = _get_cp_metrics(metrics_font, cp) if metrics_font else None
        if cp_metrics:
          lsb, rsb, wid, adv, cy = cp_metrics
          if dump_metrics:
            print '%04x # %4d, %4d, %4d, %s' % (cp, lsb, adv, cy, name)

          if cp in metrics:
            nlsb, nadv, ncy = metrics[cp]
          else:
            nlsb, nadv, ncy = lsb, adv, cy
          nrsb = nadv - wid - nlsb

          line.append('<td>%d%s' % (
              lsb, '&rarr;<b>%d</b>' % nlsb if lsb != nlsb else ''))
          line.append('<td>%d' % wid)
          line.append('<td>%d%s' % (
              rsb, '&rarr;<b>%d</b>' % nrsb if rsb != nrsb else ''))
          line.append('<td>%d%s' % (
              adv, '&rarr;<b>%d</b>' % nadv if adv != nadv else ''))
          line.append('<td>%d%s' % (
              cy, '&rarr;<b>%d</b>' % ncy if cy != ncy else ''))
        else:
          line.append('<td><td><td><td><td>')
      line.append('<td>%s' % unicode_data.age(cp))
      line.append('<td>%s' % name)
      line.append('<td>%s' % _target_line(cp, tindex, cp_to_targets.get(cp)))
      lines.append(''.join(line))
    lines.append('</table>')
    return '\n'.join(lines)