def _check_no_alias_sources(sorted_seq_to_filepath): """Check that we don't have sequences that we expect to be aliased to some other sequence.""" aliases = add_aliases.read_default_emoji_aliases() for seq, fp in sorted_seq_to_filepath.iteritems(): if seq in aliases: print('check no alias sources: aliased sequence %s' % fp)
def get_inv_aliases(): """Return a mapping from target to list of sources for all alias targets in either the default alias table or the unknown_flag alias table.""" inv_aliases = collections.defaultdict(list) standard_aliases = add_aliases.read_default_emoji_aliases() for k, v in standard_aliases.iteritems(): inv_aliases[v].append(k) unknown_flag_aliases = add_aliases.read_emoji_aliases( 'unknown_flag_aliases.txt') for k, v in unknown_flag_aliases.iteritems(): inv_aliases[v].append(k) return inv_aliases
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % ( alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print( 'coverage: missing single %04x (%s)' % ( cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def _get_canonical_aliases(): def canon(seq): return unicode_data.get_canonical_emoji_sequence(seq) or seq aliases = add_aliases.read_default_emoji_aliases() return {canon(k): canon(v) for k, v in aliases.iteritems()}
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % (alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print('coverage: missing single %04x (%s)' % (cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" coverage_pass = True age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print(f'coverage: alias {alias_str} missing target {target_str}') coverage_pass = False continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print( f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})' ) coverage_pass = False continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji()) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print(f'coverage: missing single {cp} ({unicode_data.name(cp)})') coverage_pass = False # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print(f'coverage: missing special {cp} ({unicode_data.name(cp)})') coverage_pass = False # combining sequences comb_seq_to_name = sorted(unicode_data._emoji_sequence_data.items()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print( f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})' ) coverage_pass = False # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b') coverage_pass = False if not coverage_pass: exit( "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'" )