def _standard_name(seq):
    """Use the standard emoji name, with some algorithmic modifications.

  We want to ignore skin-tone modifiers (but of course if the sequence _is_
  the skin-tone modifier itself we keep that).  So we strip these so we can
  start with the generic name ignoring skin tone.

  Non-emoji that are turned into emoji using the emoji VS have '(emoji) '
  prepended to them, so strip that.

  Regional indicator symbol names are a bit long, so shorten them.

  Regional sequences are assumed to be ok as-is in terms of capitalization and
  punctuation, so no modifications are applied to them.

  After title-casing we make some English articles/prepositions lower-case
  again.  We also replace '&' with 'and'; Unicode seems rather fond of
  ampersand."""

    if not unicode_data.is_skintone_modifier(seq[0]):
        seq = tuple(
            [cp for cp in seq if not unicode_data.is_skintone_modifier(cp)])
    name = unicode_data.get_emoji_sequence_name(seq)

    if name.startswith('(emoji) '):
        name = name[8:]

    if len(seq) == 1 and unicode_data.is_regional_indicator(seq[0]):
        return 'Regional Symbol ' + unicode_data.regional_indicator_to_ascii(
            seq[0])

    if (unicode_data.is_regional_indicator_seq(seq)
            or unicode_data.is_regional_tag_seq(seq)):
        return name

    name = name.title()
    # Require space delimiting just in case...
    name = re.sub(r'\s&\s', ' and ', name)
    name = re.sub(
        # not \b at start because we retain capital at start of phrase
        r'(\s(:?A|And|From|In|Of|With|For))\b',
        lambda s: s.group(1).lower(),
        name)

    return name
def _standard_name(seq):
  """Use the standard emoji name, with some algorithmic modifications.

  We want to ignore skin-tone modifiers (but of course if the sequence _is_
  the skin-tone modifier itself we keep that).  So we strip these so we can
  start with the generic name ignoring skin tone.

  Non-emoji that are turned into emoji using the emoji VS have '(emoji) '
  prepended to them, so strip that.

  Regional indicator symbol names are a bit long, so shorten them.

  Regional sequences are assumed to be ok as-is in terms of capitalization and
  punctuation, so no modifications are applied to them.

  After title-casing we make some English articles/prepositions lower-case
  again.  We also replace '&' with 'and'; Unicode seems rather fond of
  ampersand."""

  if not unicode_data.is_skintone_modifier(seq[0]):
    seq = tuple([cp for cp in seq if not unicode_data.is_skintone_modifier(cp)])
  name = unicode_data.get_emoji_sequence_name(seq)

  if name.startswith('(emoji) '):
    name = name[8:]

  if len(seq) == 1 and unicode_data.is_regional_indicator(seq[0]):
    return 'Regional Symbol ' + unicode_data.regional_indicator_to_ascii(seq[0])

  if (unicode_data.is_regional_indicator_seq(seq) or
      unicode_data.is_regional_tag_seq(seq)):
    return name

  name = name.title()
  # Require space delimiting just in case...
  name = re.sub(r'\s&\s', ' and ', name)
  name = re.sub(
      # not \b at start because we retain capital at start of phrase
      r'(\s(:?A|And|From|In|Of|With|For))\b', lambda s: s.group(1).lower(),
      name)

  return name
def generate_names(src_dir,
                   dst_dir,
                   skip_limit=20,
                   omit_groups=None,
                   pretty_print=False,
                   verbose=False):
    srcdir = tool_utils.resolve_path(src_dir)
    if not path.isdir(srcdir):
        print('%s is not a directory' % src_dir, file=sys.stderr)
        return

    if omit_groups:
        unknown_groups = set(omit_groups) - set(
            unicode_data.get_emoji_groups())
        if unknown_groups:
            print(
                'did not recognize %d group%s: %s' %
                (len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
                 ', '.join('"%s"' % g
                           for g in omit_groups if g in unknown_groups)),
                file=sys.stderr)
            print('valid groups are:\n  %s' %
                  ('\n  '.join(g for g in unicode_data.get_emoji_groups())),
                  file=sys.stderr)
            return
        print('omitting %d group%s: %s' %
              (len(omit_groups), '' if len(omit_groups) == 1 else 's',
               ', '.join('"%s"' % g for g in omit_groups)))
    else:
        # might be None
        print('keeping all groups')
        omit_groups = []

    # make sure the destination exists
    dstdir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir))

    # _get_image_data returns canonical cp sequences
    print('src dir:', srcdir)
    seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
    print('seq to file has %d sequences' % len(seq_to_file))

    # Aliases add non-gendered versions using gendered images for the most part.
    # But when we display the images, we don't distinguish genders in the
    # naming, we rely on the images-- so these look redundant. So we
    # intentionally don't generate images for these.
    # However, the alias file also includes the flag aliases, which we do want,
    # and it also fails to exclude the unknown flag pua (since it doesn't
    # map to anything), so we need to adjust for this.
    canonical_aliases = generate_emoji_html._get_canonical_aliases()

    aliases = set([
        cps for cps in canonical_aliases.keys()
        if not unicode_data.is_regional_indicator_seq(cps)
    ])
    aliases.add((0xfe82b, ))  # unknown flag PUA
    excluded = aliases | generate_emoji_html._get_canonical_excluded()

    # The flag aliases have distinct names, so we _do_ want to show them
    # multiple times.
    to_add = {}
    for seq in canonical_aliases:
        if unicode_data.is_regional_indicator_seq(seq):
            replace_seq = canonical_aliases[seq]
            if seq in seq_to_file:
                print('warning, alias %s has file %s' %
                      (unicode_data.regional_indicator_seq_to_string(seq),
                       seq_to_file[seq]))
                continue
            replace_file = seq_to_file.get(replace_seq)
            if replace_file:
                to_add[seq] = replace_file
    seq_to_file.update(to_add)

    data = []
    last_skipped_group = None
    skipcount = 0
    for group in unicode_data.get_emoji_groups():
        if group in omit_groups:
            continue
        name_data = []
        for seq in unicode_data.get_emoji_in_group(group):
            if seq in excluded:
                continue
            seq_file = seq_to_file.get(seq, None)
            if seq_file is None:
                skipcount += 1
                if verbose:
                    if group != last_skipped_group:
                        print('group %s' % group)
                        last_skipped_group = group
                    print('  %s (%s)' %
                          (unicode_data.seq_to_string(seq), ', '.join(
                              unicode_data.name(cp, 'x') for cp in seq)))
                if skip_limit >= 0 and skipcount > skip_limit:
                    raise Exception('skipped too many items')
            else:
                name_data.append(_name_data(seq, seq_file))
        data.append({'category': group, 'emojis': name_data})

    outfile = path.join(dstdir, 'data.json')
    with open(outfile, 'w') as f:
        indent = 2 if pretty_print else None
        separators = None if pretty_print else (',', ':')
        json.dump(data, f, indent=indent, separators=separators)
    print('wrote %s' % outfile)
def generate_names(
    src_dir, dst_dir, skip_limit=20, omit_groups=None, pretty_print=False,
    verbose=False):
  srcdir = tool_utils.resolve_path(src_dir)
  if not path.isdir(srcdir):
    print('%s is not a directory' % src_dir, file=sys.stderr)
    return

  if omit_groups:
    unknown_groups = set(omit_groups) - set(unicode_data.get_emoji_groups())
    if unknown_groups:
      print('did not recognize %d group%s: %s' % (
          len(unknown_groups), '' if len(unknown_groups) == 1 else 's',
          ', '.join('"%s"' % g for g in omit_groups if g in unknown_groups)), file=sys.stderr)
      print('valid groups are:\n  %s' % (
          '\n  '.join(g for g in unicode_data.get_emoji_groups())), file=sys.stderr)
      return
    print('omitting %d group%s: %s' % (
        len(omit_groups), '' if len(omit_groups) == 1 else 's',
        ', '.join('"%s"' % g for g in omit_groups)))
  else:
    # might be None
    print('keeping all groups')
    omit_groups = []

  # make sure the destination exists
  dstdir = tool_utils.ensure_dir_exists(
      tool_utils.resolve_path(dst_dir))

  # _get_image_data returns canonical cp sequences
  print('src dir:', srcdir)
  seq_to_file = generate_emoji_html._get_image_data(srcdir, 'png', 'emoji_u')
  print('seq to file has %d sequences' % len(seq_to_file))

  # Aliases add non-gendered versions using gendered images for the most part.
  # But when we display the images, we don't distinguish genders in the
  # naming, we rely on the images-- so these look redundant. So we
  # intentionally don't generate images for these.
  # However, the alias file also includes the flag aliases, which we do want,
  # and it also fails to exclude the unknown flag pua (since it doesn't
  # map to anything), so we need to adjust for this.
  canonical_aliases = generate_emoji_html._get_canonical_aliases()

  aliases = set([
      cps for cps in canonical_aliases.keys()
      if not unicode_data.is_regional_indicator_seq(cps)])
  aliases.add((0xfe82b,))  # unknown flag PUA
  excluded = aliases | generate_emoji_html._get_canonical_excluded()

  # The flag aliases have distinct names, so we _do_ want to show them
  # multiple times.
  to_add = {}
  for seq in canonical_aliases:
    if unicode_data.is_regional_indicator_seq(seq):
      replace_seq = canonical_aliases[seq]
      if seq in seq_to_file:
        print('warning, alias %s has file %s' % (
            unicode_data.regional_indicator_seq_to_string(seq),
            seq_to_file[seq]))
        continue
      replace_file = seq_to_file.get(replace_seq)
      if replace_file:
        to_add[seq] = replace_file
  seq_to_file.update(to_add)

  data = []
  last_skipped_group = None
  skipcount = 0
  for group in unicode_data.get_emoji_groups():
    if group in omit_groups:
      continue
    name_data = []
    for seq in unicode_data.get_emoji_in_group(group):
      if seq in excluded:
        continue
      seq_file = seq_to_file.get(seq, None)
      if seq_file is None:
        skipcount += 1
        if verbose:
          if group != last_skipped_group:
            print('group %s' % group)
            last_skipped_group = group
          print('  %s (%s)' % (
              unicode_data.seq_to_string(seq),
              ', '.join(unicode_data.name(cp, 'x') for cp in seq)))
        if skip_limit >= 0 and skipcount > skip_limit:
          raise Exception('skipped too many items')
      else:
        name_data.append(_name_data(seq, seq_file))
    data.append({'category': group, 'emojis': name_data})

  outfile = path.join(dstdir, 'data.json')
  with open(outfile, 'w') as f:
    indent = 2 if pretty_print else None
    separators = None if pretty_print else (',', ':')
    json.dump(data, f, indent=indent, separators=separators)
  print('wrote %s' % outfile)