def _get_desc(key_tuple, dir_infos, basepaths): CELL_PREFIX = '<td>' def _get_filepath(cp): cp_key = tuple([cp]) for i in range(len(dir_infos)): info = dir_infos[i] if cp_key in info.filemap: basepath = basepaths[i] return path.join(basepath, info.filemap[cp_key]) return None def _get_part(cp): if cp == 0x200d: # zwj, common so replace with '+' return '+' if unicode_data.is_regional_indicator(cp): return unicode_data.regional_indicator_to_ascii(cp) if unicode_data.is_tag(cp): return unicode_data.tag_character_to_ascii(cp) fname = _get_filepath(cp) if fname: return '<img src="%s">' % fname raise Exception() if len(key_tuple) == 1: desc = '%04x' % key_tuple else: desc = ' '.join('%04x' % cp for cp in key_tuple) if len(unicode_data.strip_emoji_vs(key_tuple)) > 1: try: desc += ' (%s)' % ''.join( _get_part(cp) for cp in key_tuple if cp != 0xfe0f) except: pass return CELL_PREFIX + desc
def seq_name(seq): global _namedata if not _namedata: def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems()} _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_modifier_sequences()), strip_vs_map(unicode_data.get_emoji_zwj_sequences()), ] if len(seq) == 1: return unicode_data.name(seq[0], None) for data in _namedata: if seq in data: return data[seq] if EMOJI_VS in seq: non_vs_seq = unicode_data.strip_emoji_vs(seq) for data in _namedata: if non_vs_seq in data: return data[non_vs_seq] return None
def create_thumbnails_and_aliases(src_dir, dst_dir, dst_prefix): """Creates thumbnails in dst_dir based on sources in src.dir, using dst_prefix. Assumes the source prefix is 'emoji_u' and the common suffix is '.png'.""" if not path.isdir(src_dir): raise ValueError('"%s" is not a directory') dst_dir = tool_utils.ensure_dir_exists(dst_dir) src_prefix = 'emoji_u' suffix = '.png' inv_aliases = get_inv_aliases() for src_file in os.listdir(src_dir): try: seq = unicode_data.strip_emoji_vs( filename_to_sequence(src_file, src_prefix, suffix)) except ValueError as ve: logger.warning('Error (%s), skipping' % ve) continue src_path = path.join(src_dir, src_file) dst_file = sequence_to_filename(seq, dst_prefix, suffix) dst_path = path.join(dst_dir, dst_file) create_thumbnail(src_path, dst_path) logger.info('wrote thumbnail: %s' % dst_file) for alias_seq in inv_aliases.get(seq, ()): alias_file = sequence_to_filename(alias_seq, dst_prefix, suffix) alias_path = path.join(dst_dir, alias_file) shutil.copy2(dst_path, alias_path) logger.info('wrote alias: %s' % alias_file)
def seq_name(seq): global _namedata if not _namedata: def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems() } _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_modifier_sequences()), strip_vs_map(unicode_data.get_emoji_zwj_sequences()), ] if len(seq) == 1: return unicode_data.name(seq[0], None) for data in _namedata: if seq in data: return data[seq] if EMOJI_VS in seq: non_vs_seq = unicode_data.strip_emoji_vs(seq) for data in _namedata: if non_vs_seq in data: return data[non_vs_seq] return None
def _get_desc(key_tuple, aliases, dir_infos, basepaths): CELL_PREFIX = '<td>' def _get_filepath(cp): def get_key_filepath(key): for i in range(len(dir_infos)): info = dir_infos[i] if key in info.filemap: basepath = basepaths[i] return path.join(basepath, info.filemap[key]) return None cp_key = tuple([cp]) cp_key = unicode_data.get_canonical_emoji_sequence(cp_key) or cp_key fp = get_key_filepath(cp_key) if not fp: if cp_key in aliases: fp = get_key_filepath(aliases[cp_key]) else: print('no alias for %s' % unicode_data.seq_to_string(cp_key)) if not fp: print('no part for %s in %s' % (unicode_data.seq_to_string(cp_key), unicode_data.seq_to_string(key_tuple))) return fp def _get_part(cp): if cp == 0x200d: # zwj, common so replace with '+' return '+' if unicode_data.is_regional_indicator(cp): return unicode_data.regional_indicator_to_ascii(cp) if unicode_data.is_tag(cp): return unicode_data.tag_character_to_ascii(cp) fname = _get_filepath(cp) if fname: return '<img src="%s">' % fname raise Exception() if len(key_tuple) == 1: desc = '%04x' % key_tuple else: desc = ' '.join('%04x' % cp for cp in key_tuple) if len(unicode_data.strip_emoji_vs(key_tuple)) > 1: try: desc += ' (%s)' % ''.join( _get_part(cp) for cp in key_tuple if cp != 0xfe0f) except: pass return CELL_PREFIX + desc
def create_thumbnails_and_aliases(src_dir, dst_dir, crop, dst_prefix): """Creates thumbnails in dst_dir based on sources in src.dir, using dst_prefix. Assumes the source prefix is 'emoji_u' and the common suffix is '.png'.""" src_dir = tool_utils.resolve_path(src_dir) if not path.isdir(src_dir): raise ValueError('"%s" is not a directory') dst_dir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir)) src_prefix = 'emoji_u' suffix = '.png' inv_aliases = get_inv_aliases() for src_file in os.listdir(src_dir): try: seq = unicode_data.strip_emoji_vs( filename_to_sequence(src_file, src_prefix, suffix)) except ValueError as ve: logger.warning('Error (%s), skipping' % ve) continue src_path = path.join(src_dir, src_file) dst_file = sequence_to_filename(seq, dst_prefix, suffix) dst_path = path.join(dst_dir, dst_file) create_thumbnail(src_path, dst_path, crop) logger.info('wrote thumbnail%s: %s' % ( ' with crop' if crop else '', dst_file)) for alias_seq in inv_aliases.get(seq, ()): alias_file = sequence_to_filename(alias_seq, dst_prefix, suffix) alias_path = path.join(dst_dir, alias_file) shutil.copy2(dst_path, alias_path) logger.info('wrote alias: %s' % alias_file)
def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems()}
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % ( alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print( 'coverage: missing single %04x (%s)' % ( cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems() }
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % (alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print('coverage: missing single %04x (%s)' % (cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" coverage_pass = True age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print(f'coverage: alias {alias_str} missing target {target_str}') coverage_pass = False continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print( f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})' ) coverage_pass = False continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji()) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print(f'coverage: missing single {cp} ({unicode_data.name(cp)})') coverage_pass = False # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print(f'coverage: missing special {cp} ({unicode_data.name(cp)})') coverage_pass = False # combining sequences comb_seq_to_name = sorted(unicode_data._emoji_sequence_data.items()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print( f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})' ) coverage_pass = False # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b') coverage_pass = False if not coverage_pass: exit( "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'" )