def test_age(self): """Tests the age method.""" self.assertEqual(unicode_data.age(0xE000), "1.1") self.assertEqual(unicode_data.age(0xE0021), "3.1") self.assertEqual(unicode_data.age(0x20BD), "7.0") self.assertEqual(unicode_data.age(0x2B820), "8.0") self.assertEqual(unicode_data.age(0x104B0), "9.0")
def test_age(self): """Tests the age method.""" self.assertEqual(unicode_data.age(0xE000), '1.1') self.assertEqual(unicode_data.age(0xE0021), '3.1') self.assertEqual(unicode_data.age(0x20BD), '7.0') self.assertEqual(unicode_data.age(0x2B820), '8.0') self.assertEqual(unicode_data.age(0x104B0), '9.0')
def test_age(self): """Tests the age method.""" self.assertEqual(unicode_data.age(0xE000), '1.1') self.assertEqual(unicode_data.age(0xE0021), '3.1') self.assertEqual(unicode_data.age(0x20BD), '7.0') self.assertEqual(unicode_data.age(0x2B820), '8.0') # below will fail once unicode 9 character age data updates self.assertIsNone(unicode_data.age(0x104B0))
def _defined_characters_in_range(range_str): """Given a range string, returns defined Unicode characters in the range.""" characters = set() for code in _range_string_to_set(range_str): if unicode_data.is_defined(code) and unicode_data.age(code) is not None: characters.add(code) return characters
def generate_text(outfile, title, fonts, targets, flag_sets, data_dir): print >> outfile, title print >> outfile print >> outfile, 'Fonts:' for key, keyinfos in fonts: for font, name, _ in keyinfos: rel_font = path.relpath(font, data_dir) if font else '(no font)' print >> outfile, ' %s: %s (%s)' % (key, name, rel_font) print >> outfile for name, codelist, used_fonts in targets: print >> outfile print >> outfile, name header = ['idx code'] header.extend(f[0] for f in used_fonts) header.append('age name') print >> outfile, ' '.join(header) index = 1 for cp in codelist.codes(): print >> outfile, '%3d' % index, index += 1 print >> outfile, '%5s' % ('%04x' % cp), for rkey, keyinfos in used_fonts: match = any( codelist.contains(cp) for _, _, codelist in keyinfos) print >> outfile, rkey if match else ('-' * len(rkey)), print >> outfile, unicode_data.age(cp), name = _flagged_name(cp, flag_sets) print >> outfile, name
def _print_detailed(cps, inverted_target=None): last_block = None for cp in sorted(cps): block = unicode_data.block(cp) if block != last_block: print ' %s' % block last_block = block script = unicode_data.script(cp) extensions = unicode_data.script_extensions(cp) - set([script]) if extensions: extensions = ' (%s)' % ','.join(sorted(extensions)) else: extensions = '' if not inverted_target: extra = '' elif cp not in inverted_target: extra = ' !missing' else: scripts = sorted(inverted_target[cp]) if len(scripts) > 3: script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1] else: script_text = ', '.join(scripts) extra = ' (in %s)' % script_text print ' %6s %4s %2s %3s %s%s%s' % ( '%04x' % cp, script, unicode_data.category(cp), unicode_data.age(cp), unicode_data.name(cp, ''), extensions, extra)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set( cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.iteritems(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print( 'check valid emoji cps: %d non-emoji cp found' % len(not_emoji), file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print( 'check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), file=sys.stderr)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set(cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.iteritems(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print('check valid emoji cps: %d non-emoji cp found' % len(not_emoji), file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print('check valid emoji cps: %04x (in %d sequences)' % (cp, len(fps)), file=sys.stderr)
def _defined_characters_in_range(range_str): """Given a range string, returns defined Unicode characters in the range.""" characters = set() for code in _range_string_to_set(range_str): if unicode_data.is_defined(code) and unicode_data.age( code) is not None: characters.add(code) return characters
def _print_detailed(cps, inverted_target=None): last_block = None undefined_start = -1 undefined_end = -1 def show_undefined(start, end): if start >= 0: if end > start: print(" %04x-%04x Zzzz <%d undefined>" % (start, end, end - start - 1)) else: print(" %04x Zzzz <1 undefined>" % start) for cp in sorted(cps): block = unicode_data.block(cp) if block != last_block or (undefined_end > -1 and cp > undefined_end + 1): show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 if block != last_block: print(" %s" % block) last_block = block script = unicode_data.script(cp) if script == "Zzzz": if undefined_start >= 0: undefined_end = cp else: undefined_start, undefined_end = cp, cp continue show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 extensions = unicode_data.script_extensions(cp) - {script} if extensions: extensions = " (script %s)" % ", ".join(sorted(extensions)) else: extensions = "" if not inverted_target: extra = "" elif cp not in inverted_target: extra = " !missing" else: scripts = sorted(inverted_target[cp]) if len(scripts) > 3: script_text = ", ".join(scripts[:3]) + "... " + scripts[-1] else: script_text = ", ".join(scripts) extra = " (font %s)" % script_text print(" %6s %4s %2s %3s %s%s%s" % ( "%04x" % cp, script, unicode_data.category(cp), unicode_data.age(cp), unicode_data.name(cp, ""), extensions, extra, )) show_undefined(undefined_start, undefined_end)
def _print_detailed(cps, inverted_target=None): last_block = None undefined_start = -1 undefined_end = -1 def show_undefined(start, end): if start >= 0: if end > start: print ' %04x-%04x Zzzz <%d undefined>' % ( start, end, end - start - 1) else: print ' %04x Zzzz <1 undefined>' % start for cp in sorted(cps): block = unicode_data.block(cp) if block != last_block or (undefined_end > -1 and cp > undefined_end + 1): show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 if block != last_block: print ' %s' % block last_block = block script = unicode_data.script(cp) if script == 'Zzzz': if undefined_start >= 0: undefined_end = cp else: undefined_start, undefined_end = cp, cp continue show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 extensions = unicode_data.script_extensions(cp) - set([script]) if extensions: extensions = ' (%s)' % ','.join(sorted(extensions)) else: extensions = '' if not inverted_target: extra = '' elif cp not in inverted_target: extra = ' !missing' else: scripts = sorted(inverted_target[cp]) if len(scripts) > 3: script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1] else: script_text = ', '.join(scripts) extra = ' (in %s)' % script_text print ' %6s %4s %2s %3s %s%s%s' % ( '%04x' % cp, script, unicode_data.category(cp), unicode_data.age(cp), unicode_data.name(cp, ''), extensions, extra) show_undefined(undefined_start, undefined_end)
def _set_ompl(): """Set up OMPL. OMPL is defined to be the list of mirrored pairs in Unicode 5.1: http://www.microsoft.com/typography/otspec/ttochap1.htm#ltrrtl """ global OMPL unicode_data.load_data() bmg_data = unicode_data._bidi_mirroring_glyph_data OMPL = {char: bmg for (char, bmg) in bmg_data.items() if float(unicode_data.age(char)) <= 5.1}
def _set_ompl(): """Set up OMPL. OMPL is defined to be the list of mirrored pairs in Unicode 5.1: http://www.microsoft.com/typography/otspec/ttochap1.htm#ltrrtl """ global OMPL unicode_data.load_data() bmg_data = unicode_data._bidi_mirroring_glyph_data OMPL = {char:bmg for (char, bmg) in bmg_data.items() if float(unicode_data.age(char)) <= 5.1}
def generate_text(self, metrics, flag_sets): lines = [self.name] header = ['idx code'] header.extend(f[0] for f in self.used_fonts) header.append('age name') lines.append(' '.join(header)) for index, cp in enumerate(self.codelist.codes()): line = ['%3d' % index] line.append('%5s' % ('%04x' % cp)) for rkey, keyinfos in self.used_fonts: match = any(codelist.contains(cp) for _, _, codelist in keyinfos) line.append(rkey if match else ('-' * len(rkey))) line.append(unicode_data.age(cp)) line.append(_flagged_name(cp, flag_sets)) lines.append(' '.join(line)) return '\n'.join(lines)
def _generate_table(index, target, context, flag_sets): name, codelist, used_fonts = target def context_string(codelist, cp): cps = unichr(codelist.mapped_code(cp)) return (context % cps) if context else cps lines = ['<h3 id="target_%d">%s</h3>' % (index, name)] lines.append('<table>') header = _generate_header(used_fonts) linecount = 0 for cp in codelist.codes(): if linecount % 20 == 0: lines.append(header) linecount += 1 line = ['<tr>'] line.append('<td class="code">U+%04x' % cp) for rkey, keyinfos in used_fonts: cell_class = None cell_text = None index = 0 for font, _, rcodelist in keyinfos: if rcodelist.contains(cp): if len(keyinfos) > 1: cell_class = '%s_%d' % (rkey, index) else: cell_class = rkey cell_class = replace_nonalpha(cell_class) if font: cell_text = context_string(rcodelist, cp) else: cell_text = ' * ' cell_class += ' star' break index += 1 if cell_class: line.append('<td class="%s">%s' % (cell_class, cell_text)) else: line.append('<td> ') line.append('<td class="age">%s' % unicode_data.age(cp)) name = _flagged_name(cp, flag_sets) line.append('<td class="name">%s' % name) lines.append(''.join(line)) lines.append('</table>') return '\n'.join(lines)
def generate_text(self, metrics, flag_sets): lines = [self.name] header = ["idx code"] header.extend(f[0] for f in self.used_fonts) header.append("age name") lines.append(" ".join(header)) for index, cp in enumerate(self.codelist.codes()): line = ["%3d" % index] line.append("%5s" % ("%04x" % cp)) for rkey, keyinfos in self.used_fonts: match = any( codelist.contains(cp) for _, _, codelist in keyinfos) line.append(rkey if match else ("-" * len(rkey))) line.append(unicode_data.age(cp)) line.append(_flagged_name(cp, flag_sets)) lines.append(" ".join(line)) return "\n".join(lines)
def main(): """Checkes the coverage of all Roboto fonts.""" with open('res/char_requirements.tsv') as char_reqs_file: char_reqs_data = char_reqs_file.read() # The format of the data to be parsed is like the following: # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t required_set = set() for line in char_reqs_data.split('\n'): if line.startswith('#'): # Skip comment lines continue line = line.split('\t') if not line[0]: continue # Skip the first line and empty lines block_range = line[1] full_coverage_required = (line[5] == '1') exceptions = line[6] required_set.update( _find_required_chars(block_range, full_coverage_required, exceptions)) # Skip Unicode 8.0 characters required_set = { ch for ch in required_set if float(unicode_data.age(ch)) <= 7.0 } # Skip ASCII and C1 controls required_set -= set(range(0, 0x20) + range(0x7F, 0xA0)) missing_char_found = False for font in load_fonts(): font_coverage = coverage.character_set(font) missing_chars = required_set - font_coverage if missing_chars: missing_char_found = True font_name = font_data.font_name(font) print 'Characters missing from %s:' % font_name for char in sorted(missing_chars): _print_char(char) print if missing_char_found: sys.exit(1)
def main(): """Checkes the coverage of all Roboto fonts.""" with open('res/char_requirements.tsv') as char_reqs_file: char_reqs_data = char_reqs_file.read() # The format of the data to be parsed is like the following: # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t required_set = set() for line in char_reqs_data.split('\n'): if line.startswith('#'): # Skip comment lines continue line = line.split('\t') if not line[0]: continue # Skip the first line and empty lines block_range = line[1] full_coverage_required = (line[5] == '1') exceptions = line[6] required_set.update( _find_required_chars(block_range, full_coverage_required, exceptions)) # Skip Unicode 8.0 characters required_set = {ch for ch in required_set if float(unicode_data.age(ch)) <= 7.0} # Skip ASCII and C1 controls required_set -= set(range(0, 0x20) + range(0x7F, 0xA0)) missing_char_found = False for font in load_fonts(): font_coverage = coverage.character_set(font) missing_chars = required_set - font_coverage if missing_chars: missing_char_found = True font_name = font_data.font_name(font) print 'Characters missing from %s:' % font_name for char in sorted(missing_chars): _print_char(char) print if missing_char_found: sys.exit(1)
def _check_valid_emoji_cps(sorted_seq_to_filepath, unicode_version): """Ensure all cps in these sequences are valid emoji cps or specific cps used in forming emoji sequences. This is a 'pre-check' that reports this specific problem.""" coverage_pass = True valid_cps = set(unicode_data.get_emoji()) if unicode_version is None or unicode_version >= unicode_data.PROPOSED_EMOJI_AGE: valid_cps |= unicode_data.proposed_emoji_cps() else: valid_cps = set(cp for cp in valid_cps if unicode_data.age(cp) <= unicode_version) valid_cps.add(0x200d) # ZWJ valid_cps.add(0x20e3) # combining enclosing keycap valid_cps.add(0xfe0f) # variation selector (emoji presentation) valid_cps.add(0xfe82b) # PUA value for unknown flag valid_cps |= TAG_SET # used in subregion tag sequences not_emoji = {} for seq, fp in sorted_seq_to_filepath.items(): for cp in seq: if cp not in valid_cps: if cp not in not_emoji: not_emoji[cp] = [] not_emoji[cp].append(fp) if len(not_emoji): print(f'check valid emoji cps: {len(not_emoji)} non-emoji cp found', file=sys.stderr) for cp in sorted(not_emoji): fps = not_emoji[cp] print( f'check the following cp: {cp} - {not_emoji.get(cp)[0]} (in {len(fps)} sequences)', file=sys.stderr) coverage_pass = False if not coverage_pass: exit( "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'" )
def test_age(self): """Tests the age method.""" self.assertEqual(unicode_data.age(0xE000), '1.1') self.assertEqual(unicode_data.age(0xE0021), '3.1') self.assertEqual(unicode_data.age(0x20BD), '7.0') self.assertIsNone(unicode_data.age(0x2B820))
def generate_html(self, tindex, context, metrics, flag_sets, cp_to_targets): dump_metrics = False if dump_metrics: print '$ %s' % self.name def context_string(codelist, cp): cps = unichr(codelist.mapped_code(cp)) return (context % cps) if context else cps def _target_line(cp, tindex, tinfo): info = [] for ix, name in tinfo: if ix == tindex: continue info.append('<a href="#target_%d">%s</a>' % (ix, name)) if not info: return '(no group)' return '; '.join(info) def _generate_header(): header_parts = ['<tr class="head"><th>CP'] for key, _ in self.used_fonts: header_parts.append('<th>' + key) if metrics != None: header_parts.append('<th>lsb<th>mid<th>rsb<th>wid<th>cy') header_parts.append('<th>Age<th>Name') return ''.join(header_parts) if metrics != None: # the metrics apply to the rightmost font fontname = self.used_fonts[-1][1][0][0] if fontname: metrics_font = _get_font(fontname) else: metrics_font = None print >> sys.stderr, 'no metrics font' lines = ['<h3 id="target_%d">%s</h3>' % (tindex, self.name)] char_line = _character_string_html(self.codelist, self.used_fonts[-1]) if char_line: lines.append(char_line) lines.append('<table>') header = _generate_header() linecount = 0 for cp in self.codelist.codes(): if linecount % 20 == 0: lines.append(header) linecount += 1 line = ['<tr>'] line.append('<td>U+%04x' % cp) for rkey, keyinfos in self.used_fonts: cell_class = None cell_text = None index = 0 for font, _, rcodelist in keyinfos: if rcodelist.contains(cp): if len(keyinfos) > 1: cell_class = '%s_%d' % (rkey, index) else: cell_class = rkey cell_class = replace_nonalpha(cell_class) if font: cell_text = context_string(rcodelist, cp) else: cell_text = ' * ' cell_class += ' star' break index += 1 if cell_class: line.append('<td class="%s">%s' % (cell_class, cell_text)) else: line.append('<td> ') name = _flagged_name(cp, flag_sets) if metrics != None: cp_metrics = _get_cp_metrics(metrics_font, cp) if metrics_font else None if cp_metrics: lsb, rsb, wid, adv, cy = cp_metrics if dump_metrics: print '%04x # %4d, %4d, %4d, %s' % (cp, lsb, adv, cy, name) if cp in metrics: nlsb, nadv, ncy = metrics[cp] else: nlsb, nadv, ncy = lsb, adv, cy nrsb = nadv - wid - nlsb line.append('<td>%d%s' % ( lsb, '→<b>%d</b>' % nlsb if lsb != nlsb else '')) line.append('<td>%d' % wid) line.append('<td>%d%s' % ( rsb, '→<b>%d</b>' % nrsb if rsb != nrsb else '')) line.append('<td>%d%s' % ( adv, '→<b>%d</b>' % nadv if adv != nadv else '')) line.append('<td>%d%s' % ( cy, '→<b>%d</b>' % ncy if cy != ncy else '')) else: line.append('<td><td><td><td><td>') line.append('<td>%s' % unicode_data.age(cp)) line.append('<td>%s' % name) line.append('<td>%s' % _target_line(cp, tindex, cp_to_targets.get(cp))) lines.append(''.join(line)) lines.append('</table>') return '\n'.join(lines)