def generate_names(src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False, verbose=False): srcdir = tool_utils.resolve_path(src_dir) if not path.isdir(srcdir): print('%s is not a directory' % src_dir, file=sys.stderr) return if omit_groups: unknown_groups = set(omit_groups) - set( unicode_data.get_emoji_groups()) if unknown_groups: print( 'did not recognize %d group%s: %s' % (len(unknown_groups), '' if len(unknown_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr) print('valid groups are:\n %s' % ('\n '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr) return print('omitting %d group%s: %s' % (len(omit_groups), '' if len(omit_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups))) else: # might be None print('keeping all groups') omit_groups = [] # make sure the destination exists dstdir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir)) # _get_image_data returns canonical cp sequences print('src dir:', srcdir) seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u') print('seq to file has %d sequences' % len(seq_to_file)) # Aliases add non-gendered versions using gendered images for the most part. # But when we display the images, we don't distinguish genders in the # naming, we rely on the images-- so these look redundant. So we # intentionally don't generate images for these. # However, the alias file also includes the flag aliases, which we do want, # and it also fails to exclude the unknown flag pua (since it doesn't # map to anything), so we need to adjust for this. canonical_aliases = generate_emoji_html._get_canonical_aliases() aliases = set([ cps for cps in canonical_aliases.keys() if not unicode_data.is_regional_indicator_seq(cps) ]) aliases.add((0xfe82b, )) # unknown flag PUA excluded = aliases | generate_emoji_html._get_canonical_excluded() # The flag aliases have distinct names, so we _do_ want to show them # multiple times. to_add = {} for seq in canonical_aliases: if unicode_data.is_regional_indicator_seq(seq): replace_seq = canonical_aliases[seq] if seq in seq_to_file: print('warning, alias %s has file %s' % (unicode_data.regional_indicator_seq_to_string(seq), seq_to_file[seq])) continue replace_file = seq_to_file.get(replace_seq) if replace_file: to_add[seq] = replace_file seq_to_file.update(to_add) data = [] last_skipped_group = None skipcount = 0 for group in unicode_data.get_emoji_groups(): if group in omit_groups: continue name_data = [] for seq in unicode_data.get_emoji_in_group(group): if seq in excluded: continue seq_file = seq_to_file.get(seq, None) if seq_file is None: skipcount += 1 if verbose: if group != last_skipped_group: print('group %s' % group) last_skipped_group = group print(' %s (%s)' % (unicode_data.seq_to_string(seq), ', '.join( unicode_data.name(cp, 'x') for cp in seq))) if skip_limit >= 0 and skipcount > skip_limit: raise Exception('skipped too many items') else: name_data.append(_name_data(seq, seq_file)) data.append({'category': group, 'emojis': name_data}) outfile = path.join(dstdir, 'data.json') with open(outfile, 'w') as f: indent = 2 if pretty_print else None separators = None if pretty_print else (',', ':') json.dump(data, f, indent=indent, separators=separators) print('wrote %s' % outfile)
def generate_names( src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False, verbose=False): srcdir = tool_utils.resolve_path(src_dir) if not path.isdir(srcdir): print('%s is not a directory' % src_dir, file=sys.stderr) return if omit_groups: unknown_groups = set(omit_groups) - set(unicode_data.get_emoji_groups()) if unknown_groups: print('did not recognize %d group%s: %s' % ( len(unknown_groups), '' if len(unknown_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr) print('valid groups are:\n %s' % ( '\n '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr) return print('omitting %d group%s: %s' % ( len(omit_groups), '' if len(omit_groups) == 1 else 's', ', '.join('"%s"' % g for g in omit_groups))) else: # might be None print('keeping all groups') omit_groups = [] # make sure the destination exists dstdir = tool_utils.ensure_dir_exists( tool_utils.resolve_path(dst_dir)) # _get_image_data returns canonical cp sequences print('src dir:', srcdir) seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u') print('seq to file has %d sequences' % len(seq_to_file)) # Aliases add non-gendered versions using gendered images for the most part. # But when we display the images, we don't distinguish genders in the # naming, we rely on the images-- so these look redundant. So we # intentionally don't generate images for these. # However, the alias file also includes the flag aliases, which we do want, # and it also fails to exclude the unknown flag pua (since it doesn't # map to anything), so we need to adjust for this. canonical_aliases = generate_emoji_html._get_canonical_aliases() aliases = set([ cps for cps in canonical_aliases.keys() if not unicode_data.is_regional_indicator_seq(cps)]) aliases.add((0xfe82b,)) # unknown flag PUA excluded = aliases | generate_emoji_html._get_canonical_excluded() # The flag aliases have distinct names, so we _do_ want to show them # multiple times. to_add = {} for seq in canonical_aliases: if unicode_data.is_regional_indicator_seq(seq): replace_seq = canonical_aliases[seq] if seq in seq_to_file: print('warning, alias %s has file %s' % ( unicode_data.regional_indicator_seq_to_string(seq), seq_to_file[seq])) continue replace_file = seq_to_file.get(replace_seq) if replace_file: to_add[seq] = replace_file seq_to_file.update(to_add) data = [] last_skipped_group = None skipcount = 0 for group in unicode_data.get_emoji_groups(): if group in omit_groups: continue name_data = [] for seq in unicode_data.get_emoji_in_group(group): if seq in excluded: continue seq_file = seq_to_file.get(seq, None) if seq_file is None: skipcount += 1 if verbose: if group != last_skipped_group: print('group %s' % group) last_skipped_group = group print(' %s (%s)' % ( unicode_data.seq_to_string(seq), ', '.join(unicode_data.name(cp, 'x') for cp in seq))) if skip_limit >= 0 and skipcount > skip_limit: raise Exception('skipped too many items') else: name_data.append(_name_data(seq, seq_file)) data.append({'category': group, 'emojis': name_data}) outfile = path.join(dstdir, 'data.json') with open(outfile, 'w') as f: indent = 2 if pretty_print else None separators = None if pretty_print else (',', ':') json.dump(data, f, indent=indent, separators=separators) print('wrote %s' % outfile)