Python strip_emoji_vs 예제들, nototools.unicode_data.strip_emoji_vs Python 예제들

예제 #1

0

파일 보기

def _get_desc(key_tuple, dir_infos, basepaths):
    CELL_PREFIX = '<td>'

    def _get_filepath(cp):
        cp_key = tuple([cp])
        for i in range(len(dir_infos)):
            info = dir_infos[i]
            if cp_key in info.filemap:
                basepath = basepaths[i]
                return path.join(basepath, info.filemap[cp_key])
        return None

    def _get_part(cp):
        if cp == 0x200d:  # zwj, common so replace with '+'
            return '+'
        if unicode_data.is_regional_indicator(cp):
            return unicode_data.regional_indicator_to_ascii(cp)
        if unicode_data.is_tag(cp):
            return unicode_data.tag_character_to_ascii(cp)
        fname = _get_filepath(cp)
        if fname:
            return '<img src="%s">' % fname
        raise Exception()

    if len(key_tuple) == 1:
        desc = '%04x' % key_tuple
    else:
        desc = ' '.join('%04x' % cp for cp in key_tuple)
        if len(unicode_data.strip_emoji_vs(key_tuple)) > 1:
            try:
                desc += ' (%s)' % ''.join(
                    _get_part(cp) for cp in key_tuple if cp != 0xfe0f)
            except:
                pass
    return CELL_PREFIX + desc

예제 #2

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: dougfelt/noto-emoji

def seq_name(seq):
  global _namedata

  if not _namedata:
    def strip_vs_map(seq_map):
      return {
          unicode_data.strip_emoji_vs(k): v
          for k, v in seq_map.iteritems()}
    _namedata = [
        strip_vs_map(unicode_data.get_emoji_combining_sequences()),
        strip_vs_map(unicode_data.get_emoji_flag_sequences()),
        strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
        strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
        ]

  if len(seq) == 1:
    return unicode_data.name(seq[0], None)

  for data in _namedata:
    if seq in data:
      return data[seq]
  if EMOJI_VS in seq:
    non_vs_seq = unicode_data.strip_emoji_vs(seq)
    for data in _namedata:
      if non_vs_seq in data:
        return data[non_vs_seq]

  return None

예제 #3

0

파일 보기

def create_thumbnails_and_aliases(src_dir, dst_dir, dst_prefix):
    """Creates thumbnails in dst_dir based on sources in src.dir, using
  dst_prefix. Assumes the source prefix is 'emoji_u' and the common suffix
  is '.png'."""

    if not path.isdir(src_dir):
        raise ValueError('"%s" is not a directory')
    dst_dir = tool_utils.ensure_dir_exists(dst_dir)

    src_prefix = 'emoji_u'
    suffix = '.png'

    inv_aliases = get_inv_aliases()

    for src_file in os.listdir(src_dir):
        try:
            seq = unicode_data.strip_emoji_vs(
                filename_to_sequence(src_file, src_prefix, suffix))
        except ValueError as ve:
            logger.warning('Error (%s), skipping' % ve)
            continue

        src_path = path.join(src_dir, src_file)

        dst_file = sequence_to_filename(seq, dst_prefix, suffix)
        dst_path = path.join(dst_dir, dst_file)

        create_thumbnail(src_path, dst_path)
        logger.info('wrote thumbnail: %s' % dst_file)

        for alias_seq in inv_aliases.get(seq, ()):
            alias_file = sequence_to_filename(alias_seq, dst_prefix, suffix)
            alias_path = path.join(dst_dir, alias_file)
            shutil.copy2(dst_path, alias_path)
            logger.info('wrote alias: %s' % alias_file)

예제 #4

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: danielfspencer/oxfordhack-2019

def seq_name(seq):
    global _namedata

    if not _namedata:

        def strip_vs_map(seq_map):
            return {
                unicode_data.strip_emoji_vs(k): v
                for k, v in seq_map.iteritems()
            }

        _namedata = [
            strip_vs_map(unicode_data.get_emoji_combining_sequences()),
            strip_vs_map(unicode_data.get_emoji_flag_sequences()),
            strip_vs_map(unicode_data.get_emoji_modifier_sequences()),
            strip_vs_map(unicode_data.get_emoji_zwj_sequences()),
        ]

    if len(seq) == 1:
        return unicode_data.name(seq[0], None)

    for data in _namedata:
        if seq in data:
            return data[seq]
    if EMOJI_VS in seq:
        non_vs_seq = unicode_data.strip_emoji_vs(seq)
        for data in _namedata:
            if non_vs_seq in data:
                return data[non_vs_seq]

    return None

예제 #5

0

파일 보기

파일: generate_emoji_html.py 프로젝트: inbreaks/apple-emoji-linux

def _get_desc(key_tuple, aliases, dir_infos, basepaths):
    CELL_PREFIX = '<td>'

    def _get_filepath(cp):
        def get_key_filepath(key):
            for i in range(len(dir_infos)):
                info = dir_infos[i]
                if key in info.filemap:
                    basepath = basepaths[i]
                    return path.join(basepath, info.filemap[key])
            return None

        cp_key = tuple([cp])
        cp_key = unicode_data.get_canonical_emoji_sequence(cp_key) or cp_key
        fp = get_key_filepath(cp_key)
        if not fp:
            if cp_key in aliases:
                fp = get_key_filepath(aliases[cp_key])
            else:
                print('no alias for %s' % unicode_data.seq_to_string(cp_key))
        if not fp:
            print('no part for %s in %s' %
                  (unicode_data.seq_to_string(cp_key),
                   unicode_data.seq_to_string(key_tuple)))
        return fp

    def _get_part(cp):
        if cp == 0x200d:  # zwj, common so replace with '+'
            return '+'
        if unicode_data.is_regional_indicator(cp):
            return unicode_data.regional_indicator_to_ascii(cp)
        if unicode_data.is_tag(cp):
            return unicode_data.tag_character_to_ascii(cp)
        fname = _get_filepath(cp)
        if fname:
            return '<img src="%s">' % fname
        raise Exception()

    if len(key_tuple) == 1:
        desc = '%04x' % key_tuple
    else:
        desc = ' '.join('%04x' % cp for cp in key_tuple)
        if len(unicode_data.strip_emoji_vs(key_tuple)) > 1:
            try:
                desc += ' (%s)' % ''.join(
                    _get_part(cp) for cp in key_tuple if cp != 0xfe0f)
            except:
                pass
    return CELL_PREFIX + desc

예제 #6

0

파일 보기

파일: generate_emoji_thumbnails.py 프로젝트: dougfelt/noto-emoji

def create_thumbnails_and_aliases(src_dir, dst_dir, crop, dst_prefix):
  """Creates thumbnails in dst_dir based on sources in src.dir, using
  dst_prefix. Assumes the source prefix is 'emoji_u' and the common suffix
  is '.png'."""

  src_dir = tool_utils.resolve_path(src_dir)
  if not path.isdir(src_dir):
    raise ValueError('"%s" is not a directory')

  dst_dir = tool_utils.ensure_dir_exists(tool_utils.resolve_path(dst_dir))

  src_prefix = 'emoji_u'
  suffix = '.png'

  inv_aliases = get_inv_aliases()

  for src_file in os.listdir(src_dir):
    try:
      seq = unicode_data.strip_emoji_vs(
          filename_to_sequence(src_file, src_prefix, suffix))
    except ValueError as ve:
      logger.warning('Error (%s), skipping' % ve)
      continue

    src_path = path.join(src_dir, src_file)

    dst_file = sequence_to_filename(seq, dst_prefix, suffix)
    dst_path = path.join(dst_dir, dst_file)

    create_thumbnail(src_path, dst_path, crop)
    logger.info('wrote thumbnail%s: %s' % (
        ' with crop' if crop else '', dst_file))

    for alias_seq in inv_aliases.get(seq, ()):
      alias_file = sequence_to_filename(alias_seq, dst_prefix, suffix)
      alias_path = path.join(dst_dir, alias_file)
      shutil.copy2(dst_path, alias_path)
      logger.info('wrote alias: %s' % alias_file)

예제 #7

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: dougfelt/noto-emoji

 def strip_vs_map(seq_map):
   return {
       unicode_data.strip_emoji_vs(k): v
       for k, v in seq_map.iteritems()}

예제 #8

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: dougfelt/noto-emoji

def _check_coverage(seq_to_filepath, unicode_version):
  """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

  age = unicode_version

  non_vs_to_canonical = {}
  for k in seq_to_filepath:
    if EMOJI_VS in k:
      non_vs = unicode_data.strip_emoji_vs(k)
      non_vs_to_canonical[non_vs] = k

  aliases = add_aliases.read_default_emoji_aliases()
  for k, v in sorted(aliases.items()):
    if v not in seq_to_filepath and v not in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s missing target %s' % (alias_str, target_str))
      continue
    if k in seq_to_filepath or k in non_vs_to_canonical:
      alias_str = unicode_data.seq_to_string(k)
      target_str = unicode_data.seq_to_string(v)
      print('coverage: alias %s already exists as %s (%s)' % (
          alias_str, target_str, seq_name(v)))
      continue
    filename = seq_to_filepath.get(v) or seq_to_filepath[non_vs_to_canonical[v]]
    seq_to_filepath[k] = 'alias:' + filename

  # check single emoji, this includes most of the special chars
  emoji = sorted(unicode_data.get_emoji(age=age))
  for cp in emoji:
    if tuple([cp]) not in seq_to_filepath:
      print(
          'coverage: missing single %04x (%s)' % (
              cp, unicode_data.name(cp, '<no name>')))

  # special characters
  # all but combining enclosing keycap are currently marked as emoji
  for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
    if cp not in emoji and tuple([cp]) not in seq_to_filepath:
      print('coverage: missing special %04x (%s)' % (cp, unicode_data.name(cp)))

  # combining sequences
  comb_seq_to_name = sorted(
      unicode_data.get_emoji_combining_sequences(age=age).iteritems())
  for seq, name in comb_seq_to_name:
    if seq not in seq_to_filepath:
      # strip vs and try again
      non_vs_seq = unicode_data.strip_emoji_vs(seq)
      if non_vs_seq not in seq_to_filepath:
        print('coverage: missing combining sequence %s (%s)' %
              (unicode_data.seq_to_string(seq), name))

  # flag sequences
  flag_seq_to_name = sorted(
      unicode_data.get_emoji_flag_sequences(age=age).iteritems())
  for seq, name in flag_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing flag sequence %s (%s)' %
            (unicode_data.seq_to_string(seq), name))

  # skin tone modifier sequences
  mod_seq_to_name = sorted(
      unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
  for seq, name in mod_seq_to_name:
    if seq not in seq_to_filepath:
      print('coverage: missing modifier sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # zwj sequences
  # some of ours include the emoji presentation variation selector and some
  # don't, and the same is true for the canonical sequences.  normalize all
  # of them to omit it to test coverage, but report the canonical sequence.
  zwj_seq_without_vs = set()
  for seq in seq_to_filepath:
    if ZWJ not in seq:
      continue
    if EMOJI_VS in seq:
      seq = tuple(cp for cp in seq if cp != EMOJI_VS)
    zwj_seq_without_vs.add(seq)

  for seq, name in sorted(
      unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
    if EMOJI_VS in seq:
      test_seq = tuple(s for s in seq if s != EMOJI_VS)
    else:
      test_seq = seq
    if test_seq not in zwj_seq_without_vs:
      print('coverage: missing (canonical) zwj sequence %s (%s)' % (
          unicode_data.seq_to_string(seq), name))

  # check for 'unknown flag'
  # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
  # don't start with our prefix so 'unknown_flag' would be excluded by default.
  if tuple([0xfe82b]) not in seq_to_filepath:
    print('coverage: missing unknown flag PUA fe82b')

예제 #9

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: danielfspencer/oxfordhack-2019

 def strip_vs_map(seq_map):
     return {
         unicode_data.strip_emoji_vs(k): v
         for k, v in seq_map.iteritems()
     }

예제 #10

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: danielfspencer/oxfordhack-2019

def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s missing target %s' %
                  (alias_str, target_str))
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print('coverage: alias %s already exists as %s (%s)' %
                  (alias_str, target_str, seq_name(v)))
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji(age=age))
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print('coverage: missing single %04x (%s)' %
                  (cp, unicode_data.name(cp, '<no name>')))

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + range(0x30, 0x3a):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print('coverage: missing special %04x (%s)' %
                  (cp, unicode_data.name(cp)))

    # combining sequences
    comb_seq_to_name = sorted(
        unicode_data.get_emoji_combining_sequences(age=age).iteritems())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print('coverage: missing combining sequence %s (%s)' %
                      (unicode_data.seq_to_string(seq), name))

    # flag sequences
    flag_seq_to_name = sorted(
        unicode_data.get_emoji_flag_sequences(age=age).iteritems())
    for seq, name in flag_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing flag sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # skin tone modifier sequences
    mod_seq_to_name = sorted(
        unicode_data.get_emoji_modifier_sequences(age=age).iteritems())
    for seq, name in mod_seq_to_name:
        if seq not in seq_to_filepath:
            print('coverage: missing modifier sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # zwj sequences
    # some of ours include the emoji presentation variation selector and some
    # don't, and the same is true for the canonical sequences.  normalize all
    # of them to omit it to test coverage, but report the canonical sequence.
    zwj_seq_without_vs = set()
    for seq in seq_to_filepath:
        if ZWJ not in seq:
            continue
        if EMOJI_VS in seq:
            seq = tuple(cp for cp in seq if cp != EMOJI_VS)
        zwj_seq_without_vs.add(seq)

    for seq, name in sorted(
            unicode_data.get_emoji_zwj_sequences(age=age).iteritems()):
        if EMOJI_VS in seq:
            test_seq = tuple(s for s in seq if s != EMOJI_VS)
        else:
            test_seq = seq
        if test_seq not in zwj_seq_without_vs:
            print('coverage: missing (canonical) zwj sequence %s (%s)' %
                  (unicode_data.seq_to_string(seq), name))

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')

예제 #11

0

파일 보기

파일: check_emoji_sequences.py 프로젝트: C1710/emoji_builder

def _check_coverage(seq_to_filepath, unicode_version):
    """Ensure we have all and only the cps and sequences that we need for the
  font as of this version."""

    coverage_pass = True
    age = unicode_version

    non_vs_to_canonical = {}
    for k in seq_to_filepath:
        if EMOJI_VS in k:
            non_vs = unicode_data.strip_emoji_vs(k)
            non_vs_to_canonical[non_vs] = k

    aliases = add_aliases.read_default_emoji_aliases()
    for k, v in sorted(aliases.items()):
        if v not in seq_to_filepath and v not in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(f'coverage: alias {alias_str} missing target {target_str}')
            coverage_pass = False
            continue
        if k in seq_to_filepath or k in non_vs_to_canonical:
            alias_str = unicode_data.seq_to_string(k)
            target_str = unicode_data.seq_to_string(v)
            print(
                f'coverage: alias {alias_str} already exists as {target_str} ({seq_name(v)})'
            )
            coverage_pass = False
            continue
        filename = seq_to_filepath.get(v) or seq_to_filepath[
            non_vs_to_canonical[v]]
        seq_to_filepath[k] = 'alias:' + filename

    # check single emoji, this includes most of the special chars
    emoji = sorted(unicode_data.get_emoji())
    for cp in emoji:
        if tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing single {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # special characters
    # all but combining enclosing keycap are currently marked as emoji
    for cp in [ord('*'), ord('#'), ord(u'\u20e3')] + list(range(0x30, 0x3a)):
        if cp not in emoji and tuple([cp]) not in seq_to_filepath:
            print(f'coverage: missing special {cp} ({unicode_data.name(cp)})')
            coverage_pass = False

    # combining sequences
    comb_seq_to_name = sorted(unicode_data._emoji_sequence_data.items())
    for seq, name in comb_seq_to_name:
        if seq not in seq_to_filepath:
            # strip vs and try again
            non_vs_seq = unicode_data.strip_emoji_vs(seq)
            if non_vs_seq not in seq_to_filepath:
                print(
                    f'coverage: missing combining sequence {unicode_data.seq_to_string(seq)} ({name})'
                )
                coverage_pass = False

    # check for 'unknown flag'
    # this is either emoji_ufe82b or 'unknown_flag', but we filter out things that
    # don't start with our prefix so 'unknown_flag' would be excluded by default.
    if tuple([0xfe82b]) not in seq_to_filepath:
        print('coverage: missing unknown flag PUA fe82b')
        coverage_pass = False

    if not coverage_pass:
        exit(
            "Please fix the problems metioned above or run: make BYPASS_SEQUENCE_CHECK='True'"
        )