def _list_details(start_cp, limit_cp, defined_cps, defined_count, details): num = 0 initial_cp = start_cp while num < details - 1 and num < defined_count: if initial_cp in defined_cps: print '%13d %04x %s' % ( num + 1, initial_cp, unicode_data.name(initial_cp, '(unnamed)')) num += 1 initial_cp += 1 if num < defined_count: final_cp = limit_cp - 1 final_name = None while final_cp >= initial_cp: if final_cp in defined_cps: final_name = unicode_data.name(final_cp, '(unnamed)') num += 1 break final_cp -= 1 if final_name and num < defined_count: middle_cp = final_cp - 1 while middle_cp >= initial_cp: if middle_cp in defined_cps: print '%13s' % '...' break middle_cp -= 1 if final_name: print '%13d %04x %s' % (defined_count, final_cp, final_name)
def report_compare(compare_result, detailed=True): compare, base_cmap_data, target_cmap_data = compare_result base_map = cmap_data.create_map_from_table(base_cmap_data.table) target_map = cmap_data.create_map_from_table(target_cmap_data.table) base_title = title_from_metadata(base_cmap_data.meta) target_title = title_from_metadata(target_cmap_data.meta) print 'base: %s' % base_title print 'target: %s' % target_title for script in sorted(compare): added, removed = compare[script] if added or removed: name = base_map[script].name print '%s # %s' % (script, name) if added: print ' added (%d): %s' % ( len(added), lint_config.write_int_ranges(added)) if detailed: for cp in sorted(added): print ' %6s %s' % ( '%04x' % cp, unicode_data.name(cp, '')) if removed: print ' removed (%d): %s' % ( len(removed), lint_config.write_int_ranges(removed)) if detailed: for cp in sorted(removed): print ' %6s %s' % ( '%04x' % cp, unicode_data.name(cp, ''))
def _print_detailed(cps, inverted_target=None): last_block = None for cp in sorted(cps): block = unicode_data.block(cp) if block != last_block: print ' %s' % block last_block = block script = unicode_data.script(cp) extensions = unicode_data.script_extensions(cp) - set([script]) if extensions: extensions = ' (%s)' % ','.join(sorted(extensions)) else: extensions = '' if not inverted_target: extra = '' elif cp not in inverted_target: extra = ' !missing' else: scripts = sorted(inverted_target[cp]) if len(scripts) > 3: script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1] else: script_text = ', '.join(scripts) extra = ' (in %s)' % script_text print ' %6s %4s %2s %3s %s%s%s' % ( '%04x' % cp, script, unicode_data.category(cp), unicode_data.age(cp), unicode_data.name(cp, ''), extensions, extra)
def seq_name(seq): global _namedata if not _namedata: def strip_vs_map(seq_map): return { unicode_data.strip_emoji_vs(k): v for k, v in seq_map.iteritems()} _namedata = [ strip_vs_map(unicode_data.get_emoji_combining_sequences()), strip_vs_map(unicode_data.get_emoji_flag_sequences()), strip_vs_map(unicode_data.get_emoji_modifier_sequences()), strip_vs_map(unicode_data.get_emoji_zwj_sequences()), ] if len(seq) == 1: return unicode_data.name(seq[0], None) for data in _namedata: if seq in data: return data[seq] if EMOJI_VS in seq: non_vs_seq = unicode_data.strip_emoji_vs(seq) for data in _namedata: if non_vs_seq in data: return data[non_vs_seq] return None
def _char_name(cp): if _is_regional_indicator(cp): return 'RIS ' + _regional_indicator_letter(cp) if _is_fitzpatrick(cp): return None if cp in [0x200d, 0xfe0f]: return None return unicode_data.name(cp, '<?>')
def _report_info(title, cps_list): if not cps_list: return print '%s %d:' % (title, len(cps_list)) for cps in sorted(cps_list): print ' %s (%s)' % ( '_'.join('%04x' % cp for cp in cps), ','.join(unicode_data.name(cp, '') for cp in cps))
def _format_set(char_set, name, filename): lines = ['%s = {' % name] for cp in sorted(char_set): name = unicode_data.name(cp) lines.append(' 0x%04X, # %s %s' % (cp, unichr(cp), name)) lines.append('}\n') with codecs.open(filename, 'w', 'UTF-8') as f: f.write('\n'.join(lines)) print 'wrote', filename
def show_cps_by_block(cps): print '%d missing codepoints' % len(cps) block = None for cp in sorted(cps): new_block = unicode_data.block(cp) if new_block != block: print '# %s' % new_block block = new_block print '%5s %s' % ('%04x' % cp, unicode_data.name(cp))
def _print_detailed(cps, inverted_target=None): last_block = None undefined_start = -1 undefined_end = -1 def show_undefined(start, end): if start >= 0: if end > start: print ' %04x-%04x Zzzz <%d undefined>' % ( start, end, end - start - 1) else: print ' %04x Zzzz <1 undefined>' % start for cp in sorted(cps): block = unicode_data.block(cp) if block != last_block or (undefined_end > -1 and cp > undefined_end + 1): show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 if block != last_block: print ' %s' % block last_block = block script = unicode_data.script(cp) if script == 'Zzzz': if undefined_start >= 0: undefined_end = cp else: undefined_start, undefined_end = cp, cp continue show_undefined(undefined_start, undefined_end) undefined_start, undefined_end = -1, -1 extensions = unicode_data.script_extensions(cp) - set([script]) if extensions: extensions = ' (%s)' % ','.join(sorted(extensions)) else: extensions = '' if not inverted_target: extra = '' elif cp not in inverted_target: extra = ' !missing' else: scripts = sorted(inverted_target[cp]) if len(scripts) > 3: script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1] else: script_text = ', '.join(scripts) extra = ' (in %s)' % script_text print ' %6s %4s %2s %3s %s%s%s' % ( '%04x' % cp, script, unicode_data.category(cp), unicode_data.age(cp), unicode_data.name(cp, ''), extensions, extra) show_undefined(undefined_start, undefined_end)
def _get_name(key_tuple): CELL_PREFIX = '<td class="name">' if len(key_tuple) != 1: name = '' else: cp = key_tuple[0] if cp in unicode_data.proposed_emoji_cps(): name = '(proposed) ' + unicode_data.proposed_emoji_name(cp) else: name = unicode_data.name(cp, '(error)') return CELL_PREFIX + name
def _flagged_name(cp, flag_sets): """Prepend any flags to cp's unicode name, and return. Flag_sets is a map from flag name to a tuple of cp set and boolean. True means add flag if cp in set, False means add flag if it is not in the set.""" name = unicode_data.name(cp) flags = [] for k, v in sorted(flag_sets.iteritems()): if (cp in v[0]) == v[1]: flags.append(k) if flags: name = '(%s) %s' % (', '.join(flags), name) return name
def _get_char_names(charset): name_map = {} if charset: for cp in charset: try: name = unicode_data.name(cp) except: name = None if not name or name == '<control>': name = '%04x' % cp else: name = '%04x %s' % (cp, name.lower()) name_map[name] = cp return name_map
def generate_names(srcdir, outfile, force): if not path.isdir(srcdir): print "%s is not a directory" % srcdir return if path.exists(outfile): if not force: print "%s already exists" % outfile return if not path.isfile(outfile): print "%s is not a file" % outfile return else: parent = path.dirname(outfile) if parent and not os.path.exists(parent): os.makedirs(parent) output = {} skipped = [] for f in glob.glob(path.join(srcdir, "emoji_u*.png")): fname = path.basename(f) parts = fname[7:-4].split("_") # Omit emoji presentation variation selector, it should not be necessary. cps = [int(part, 16) for part in parts if part != "fe0f"] if not keep_sequence(cps): skipped.append(cps) continue sequence = "".join("&#x%x;" % cp for cp in cps) if len(cps) == 1: name = unicode_name(cps[0]) elif is_flag_sequence(cps): name = flag_sequence_name(cps) else: name = "" output[tuple(cps)] = (fname, sequence, name) with open(outfile, "w") as f: f.write("[\n") for k in sorted(output): f.write(' {"image":"%s", "sequence":"%s", "name":"%s"},\n' % output[k]) f.write("]\n") print "wrote %s" % outfile if skipped: print "skipped %d images:" % len(skipped) for cps in sorted(skipped): print " %s (%s)" % ("_".join("%04x" % cp for cp in cps), ",".join(unicode_data.name(cp, "") for cp in cps))
_FITZ_START + 3: '5', _FITZ_START + 4: '6' } def _fitz_sequence_name(cps): # return '%s Type %s' % (_unicode_name(cps[0]), _FITZ_NAMES[cps[1]]) return _unicode_name(cps[0]) def _is_keycap_sequence(cps): return len(cps) == 2 and cps[1] == 0x20e3 _KEYCAP_NAMES = { cp: unicode_data.name(cp)[6:] for cp in range(0x30, 0x30 + 10) } def _keycap_sequence_name(cps): name = _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0])) return 'Keycap ' + name.title() def _create_extra_sequence_names(): BOY = 0x1f466 GIRL = 0x1f467 MAN = 0x1f468 WOMAN = 0x1f469 HEART = 0x2764 # Heavy Black Heart
def _build_text(name_map, initial_text=""): text = initial_text print("build text using map of length %d" % len(name_map)) while True: line = input("> ") if not line: continue if line == "quit": break if line == "help": _help() continue if line == "names": print("names:\n " + "\n ".join(sorted(name_map.keys()))) continue if line == "dump": print("dump: '%s'" % text) for cp in text: print("%06x %s" % (ord(cp), unicode_data.name(ord(cp)))) continue if line == "clear": text = "" continue if line == "write": line = input("file name> ") if line: _write_text(line, text) continue matches = [] for name, cp in sorted(name_map.items()): if line in name: matches.append(name) if not matches: print('no match for "%s"' % line) continue if len(matches) == 1: print(matches[0]) text += unichr(name_map[matches[0]]) continue # if we match a full line, then use that if line in matches: print(line) text += unichr(name_map[line]) continue new_matches = [] for m in matches: if line in m.split(" "): new_matches.append(m) # if we match a full word, and only one line has this full word, use that if len(new_matches) == 1: print(new_matches[0]) text += unichr(name_map[new_matches[0]]) continue select_multiple = True while select_multiple: print("multiple matches:\n " + "\n ".join("[%2d] %s" % (i, n) for i, n in enumerate(matches))) while True: line = input("0-%d or q to skip> " % (len(matches) - 1)) if line == "q": select_multiple = False break try: n = int(line) break except ValueError: continue if not select_multiple: # q break if n < 0 or n >= len(matches): print("%d out of range" % n) continue text += unichr(name_map[matches[n]]) select_multiple = False print("done.") return text
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % ( alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print( 'coverage: missing single %04x (%s)' % ( cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % ( unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def _character_name(code): """Returns the printable name of a character.""" return unicode_data.name(unichr(code), '<Unassigned>')
def _unicode_name(cp): name = unicode_data.name(cp).title() for k, v in _NAME_FIXES.iteritems(): name = name.replace(k, v) return name
_FITZ_START+1: '3', _FITZ_START+2: '4', _FITZ_START+3: '5', _FITZ_START+4: '6' } def _fitz_sequence_name(cps): # return '%s Type %s' % (_unicode_name(cps[0]), _FITZ_NAMES[cps[1]]) return _unicode_name(cps[0]) def _is_keycap_sequence(cps): return len(cps) == 2 and cps[1] == 0x20e3 _KEYCAP_NAMES = {cp: unicode_data.name(cp)[6:] for cp in range(0x30, 0x30+10)} def _keycap_sequence_name(cps): return 'Keycap ' + _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0])) def _create_extra_sequence_names(): BOY = 0x1f466 GIRL = 0x1f467 MAN = 0x1f468 WOMAN = 0x1f469 HEART = 0x2764 # Heavy Black Heart KISS_MARK = 0x1f48b EYE = 0x1f441 SPEECH = 0x1f5e8
def generate_names( src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False, verbose=False): srcdir = tool_utils.resolve_path(src_dir) if not path.isdir(srcdir): print('%s is not a directory' % src_dir, file=sys.stderr) return if omit_groups: unknown_groups = set(omit_groups) - set(unicode_data.get_emoji_groups()) if unknown_groups: print('did not recognize %d group%s: %s' % ( len(unknown_groups), '' if len(unknown_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr) print('valid groups are:\n %s' % ( '\n '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr) return print('omitting %d group%s: %s' % ( len(omit_groups), '' if len(omit_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups))) else: # might be None print('keeping all groups') omit_groups = [] # make sure the destination exists dstdir = tool_utils.ensure_dir_exists( tool_utils.resolve_path(dst_dir)) # _get_image_data returns canonical cp sequences print('src dir:', srcdir) seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u') print('seq to file has %d sequences' % len(seq_to_file)) # Aliases add non-gendered versions using gendered images for the most part. # But when we display the images, we don't distinguish genders in the # naming, we rely on the images-- so these look redundant. So we # intentionally don't generate images for these. # However, the alias file also includes the flag aliases, which we do want, # and it also fails to exclude the unknown flag pua (since it doesn't # map to anything), so we need to adjust for this. canonical_aliases = generate_emoji_html._get_canonical_aliases() aliases = set([ cps for cps in canonical_aliases.keys() if not unicode_data.is_regional_indicator_seq(cps)]) aliases.add((0xfe82b,)) # unknown flag PUA excluded = aliases | generate_emoji_html._get_canonical_excluded() # The flag aliases have distinct names, so we _do_ want to show them # multiple times. to_add = {} for seq in canonical_aliases: if unicode_data.is_regional_indicator_seq(seq): replace_seq = canonical_aliases[seq] if seq in seq_to_file: print('warning, alias %s has file %s' % ( unicode_data.regional_indicator_seq_to_string(seq), seq_to_file[seq])) continue replace_file = seq_to_file.get(replace_seq) if replace_file: to_add[seq] = replace_file seq_to_file.update(to_add) data = [] last_skipped_group = None skipcount = 0 for group in unicode_data.get_emoji_groups(): if group in omit_groups: continue name_data = [] for seq in unicode_data.get_emoji_in_group(group): if seq in excluded: continue seq_file = seq_to_file.get(seq, None) if seq_file is None: skipcount += 1 if verbose: if group != last_skipped_group: print('group %s' % group) last_skipped_group = group print(' %s (%s)' % ( unicode_data.seq_to_string(seq), ', '.join(unicode_data.name(cp, 'x') for cp in seq))) if skip_limit >= 0 and skipcount > skip_limit: raise Exception('skipped too many items') else: name_data.append(_name_data(seq, seq_file)) data.append({'category': group, 'emojis': name_data}) outfile = path.join(dstdir, 'data.json') with open(outfile, 'w') as f: indent = 2 if pretty_print else None separators = None if pretty_print else (',', ':') json.dump(data, f, indent=indent, separators=separators) print('wrote %s' % outfile)
def _build_text(name_map, initial_text=''): text = initial_text print 'build text using map of length %d' % len(name_map) while True: line = raw_input('> ') if not line: continue if line == 'quit': break if line == 'help': _help() continue if line == 'names': print 'names:\n ' + '\n '.join(sorted(name_map.keys())) continue if line == 'dump': print 'dump: \'%s\'' % text for cp in text: print '%06x %s' % (ord(cp), unicode_data.name(ord(cp))) continue if line == 'clear': text = '' continue if line == 'write': line = raw_input('file name> ') if line: _write_text(line, text) continue matches = [] for name, cp in sorted(name_map.iteritems()): if line in name: matches.append(name) if not matches: print 'no match for "%s"' % line continue if len(matches) == 1: print matches[0] text += unichr(name_map[matches[0]]) continue # if we match a full line, then use that if line in matches: print line text += unichr(name_map[line]) continue new_matches = [] for m in matches: if line in m.split(' '): new_matches.append(m) # if we match a full word, and only one line has this full word, use that if len(new_matches) == 1: print new_matches[0] text += unichr(name_map[new_matches[0]]) continue select_multiple = True while select_multiple: print 'multiple matches:\n ' + '\n '.join( '[%2d] %s' % (i, n) for i, n in enumerate(matches)) while True: line = raw_input('0-%d or q to skip> ' % (len(matches) - 1)) if line == 'q': select_multiple = False break try: n = int(line) break except ValueError: continue if not select_multiple: # q break if n < 0 or n >= len(matches): print '%d out of range' % n continue text += unichr(name_map[matches[n]]) select_multiple = False print 'done.' return text
def _check_coverage(seq_to_filepath, unicode_version): """Ensure we have all and only the cps and sequences that we need for the font as of this version.""" age = unicode_version non_vs_to_canonical = {} for k in seq_to_filepath: if EMOJI_VS in k: non_vs = unicode_data.strip_emoji_vs(k) non_vs_to_canonical[non_vs] = k aliases = add_aliases.read_default_emoji_aliases() for k, v in sorted(aliases.items()): if v not in seq_to_filepath and v not in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s missing target %s' % (alias_str, target_str)) continue if k in seq_to_filepath or k in non_vs_to_canonical: alias_str = unicode_data.seq_to_string(k) target_str = unicode_data.seq_to_string(v) print('coverage: alias %s already exists as %s (%s)' % (alias_str, target_str, seq_name(v))) continue filename = seq_to_filepath.get(v) or seq_to_filepath[ non_vs_to_canonical[v]] seq_to_filepath[k] = 'alias:' + filename # check single emoji, this includes most of the special chars emoji = sorted(unicode_data.get_emoji(age=age)) for cp in emoji: if tuple([cp]) not in seq_to_filepath: print('coverage: missing single %04x (%s)' % (cp, unicode_data.name(cp, '<no name>'))) # special characters # all but combining enclosing keycap are currently marked as emoji for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a): if cp not in emoji and tuple([cp]) not in seq_to_filepath: print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp))) # combining sequences comb_seq_to_name = sorted( unicode_data.get_emoji_combining_sequences(age=age).iteritems()) for seq, name in comb_seq_to_name: if seq not in seq_to_filepath: # strip vs and try again non_vs_seq = unicode_data.strip_emoji_vs(seq) if non_vs_seq not in seq_to_filepath: print('coverage: missing combining sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # flag sequences flag_seq_to_name = sorted( unicode_data.get_emoji_flag_sequences(age=age).iteritems()) for seq, name in flag_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing flag sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # skin tone modifier sequences mod_seq_to_name = sorted( unicode_data.get_emoji_modifier_sequences(age=age).iteritems()) for seq, name in mod_seq_to_name: if seq not in seq_to_filepath: print('coverage: missing modifier sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # zwj sequences # some of ours include the emoji presentation variation selector and some # don't, and the same is true for the canonical sequences. normalize all # of them to omit it to test coverage, but report the canonical sequence. zwj_seq_without_vs = set() for seq in seq_to_filepath: if ZWJ not in seq: continue if EMOJI_VS in seq: seq = tuple(cp for cp in seq if cp != EMOJI_VS) zwj_seq_without_vs.add(seq) for seq, name in sorted( unicode_data.get_emoji_zwj_sequences(age=age).iteritems()): if EMOJI_VS in seq: test_seq = tuple(s for s in seq if s != EMOJI_VS) else: test_seq = seq if test_seq not in zwj_seq_without_vs: print('coverage: missing (canonical) zwj sequence %s (%s)' % (unicode_data.seq_to_string(seq), name)) # check for 'unknown flag' # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that # don't start with our prefix so 'unknown_flag' would be excluded by default. if tuple([0xfe82b]) not in seq_to_filepath: print('coverage: missing unknown flag PUA fe82b')
def test_name(self): """Tests the name() method.""" self.assertEqual('WARANG CITI OM', unicode_data.name(0x118FF))
def _keycap_sequence_name(cps): name = _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0])) return 'Keycap ' + name.title()
def generate_names(src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False, verbose=False): srcdir = tool_utils.resolve_path(src_dir) if not path.isdir(srcdir): print('%s is not a directory' % src_dir, file=sys.stderr) return if omit_groups: unknown_groups = set(omit_groups) - set( unicode_data.get_emoji_groups()) if unknown_groups: print( 'did not recognize %d group%s: %s' % (len(unknown_groups), '' if len(unknown_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr) print('valid groups are:\n %s' % ('\n '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr) return print('omitting %d group%s: %s' % (len(omit_groups), '' if len(omit_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups))) else: # might be None print('keeping all groups') omit_groups = [] # make sure the destination exists dstdir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir)) # _get_image_data returns canonical cp sequences print('src dir:', srcdir) seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u') print('seq to file has %d sequences' % len(seq_to_file)) # Aliases add non-gendered versions using gendered images for the most part. # But when we display the images, we don't distinguish genders in the # naming, we rely on the images-- so these look redundant. So we # intentionally don't generate images for these. # However, the alias file also includes the flag aliases, which we do want, # and it also fails to exclude the unknown flag pua (since it doesn't # map to anything), so we need to adjust for this. canonical_aliases = generate_emoji_html._get_canonical_aliases() aliases = set([ cps for cps in canonical_aliases.keys() if not unicode_data.is_regional_indicator_seq(cps) ]) aliases.add((0xfe82b, )) # unknown flag PUA excluded = aliases | generate_emoji_html._get_canonical_excluded() # The flag aliases have distinct names, so we _do_ want to show them # multiple times. to_add = {} for seq in canonical_aliases: if unicode_data.is_regional_indicator_seq(seq): replace_seq = canonical_aliases[seq] if seq in seq_to_file: print('warning, alias %s has file %s' % (unicode_data.regional_indicator_seq_to_string(seq), seq_to_file[seq])) continue replace_file = seq_to_file.get(replace_seq) if replace_file: to_add[seq] = replace_file seq_to_file.update(to_add) data = [] last_skipped_group = None skipcount = 0 for group in unicode_data.get_emoji_groups(): if group in omit_groups: continue name_data = [] for seq in unicode_data.get_emoji_in_group(group): if seq in excluded: continue seq_file = seq_to_file.get(seq, None) if seq_file is None: skipcount += 1 if verbose: if group != last_skipped_group: print('group %s' % group) last_skipped_group = group print(' %s (%s)' % (unicode_data.seq_to_string(seq), ', '.join( unicode_data.name(cp, 'x') for cp in seq))) if skip_limit >= 0 and skipcount > skip_limit: raise Exception('skipped too many items') else: name_data.append(_name_data(seq, seq_file)) data.append({'category': group, 'emojis': name_data}) outfile = path.join(dstdir, 'data.json') with open(outfile, 'w') as f: indent = 2 if pretty_print else None separators = None if pretty_print else (',', ':') json.dump(data, f, indent=indent, separators=separators) print('wrote %s' % outfile)
def _keycap_sequence_name(cps): return 'Keycap ' + _KEYCAP_NAMES.get(cps[0], unicode_data.name(cps[0]))
def _build_text(name_map, initial_text=''): text = initial_text print 'build text using map of length %d' % len(name_map) while True: line = raw_input('> ') if not line: continue if line == 'quit': break if line == 'help': _help() continue if line == 'names': print 'names:\n ' + '\n '.join(sorted(name_map.keys())) continue if line == 'dump': print 'dump: \'%s\'' % text for cp in text: print '%06x %s' % (ord(cp), unicode_data.name(ord(cp))) continue if line == 'clear': text = '' continue if line == 'write': line = raw_input('file name> ') if line: _write_text(line, text) continue matches = [] for name, cp in sorted(name_map.iteritems()): if line in name: matches.append(name) if not matches: print 'no match for "%s"'% line continue if len(matches) == 1: print matches[0] text += unichr(name_map[matches[0]]) continue # if we match a full line, then use that if line in matches: print line text += unichr(name_map[line]) continue new_matches = [] for m in matches: if line in m.split(' '): new_matches.append(m) # if we match a full word, and only one line has this full word, use that if len(new_matches) == 1: print new_matches[0] text += unichr(name_map[new_matches[0]]) continue select_multiple = True while select_multiple: print 'multiple matches:\n ' + '\n '.join( '[%2d] %s' % (i, n) for i, n in enumerate(matches)) while True: line = raw_input('0-%d or q to skip> ' % (len(matches) - 1)) if line == 'q': select_multiple = False break try: n = int(line) break except ValueError: continue if not select_multiple: # q break if n < 0 or n >= len(matches): print '%d out of range' % n continue text += unichr(name_map[matches[n]]) select_multiple = False print 'done.' return text