def _get_part(cp): if cp == 0x200d: # zwj, common so replace with '+' return '+' if unicode_data.is_regional_indicator(cp): return unicode_data.regional_indicator_to_ascii(cp) if unicode_data.is_tag(cp): return unicode_data.tag_character_to_ascii(cp) fname = _get_filepath(cp) if fname: return '<img src="%s">' % fname raise Exception()
def _standard_name(seq): """Use the standard emoji name, with some algorithmic modifications. We want to ignore skin-tone modifiers (but of course if the sequence _is_ the skin-tone modifier itself we keep that). So we strip these so we can start with the generic name ignoring skin tone. Non-emoji that are turned into emoji using the emoji VS have '(emoji) ' prepended to them, so strip that. Regional indicator symbol names are a bit long, so shorten them. Regional sequences are assumed to be ok as-is in terms of capitalization and punctuation, so no modifications are applied to them. After title-casing we make some English articles/prepositions lower-case again. We also replace '&' with 'and'; Unicode seems rather fond of ampersand.""" if not unicode_data.is_skintone_modifier(seq[0]): seq = tuple( [cp for cp in seq if not unicode_data.is_skintone_modifier(cp)]) name = unicode_data.get_emoji_sequence_name(seq) if name.startswith('(emoji) '): name = name[8:] if len(seq) == 1 and unicode_data.is_regional_indicator(seq[0]): return 'Regional Symbol ' + unicode_data.regional_indicator_to_ascii( seq[0]) if (unicode_data.is_regional_indicator_seq(seq) or unicode_data.is_regional_tag_seq(seq)): return name name = name.title() # Require space delimiting just in case... name = re.sub(r'\s&\s', ' and ', name) name = re.sub( # not \b at start because we retain capital at start of phrase r'(\s(:?A|And|From|In|Of|With|For))\b', lambda s: s.group(1).lower(), name) return name
def _check_flags(sorted_seq_to_filepath): """Ensure regional indicators are only in sequences of one or two, and never mixed.""" for seq, fp in sorted_seq_to_filepath.items(): have_reg = None for cp in seq: is_reg = unicode_data.is_regional_indicator(cp) if have_reg == None: have_reg = is_reg elif have_reg != is_reg: print(f'check flags: mix of regional and non-regional in {fp}', file=sys.stderr) if have_reg and len(seq) > 2: # We provide dummy glyphs for regional indicators, so there are sequences # with single regional indicator symbols, the len check handles this. print( f'check flags: regional indicator sequence length != 2 in {fp}', file=sys.stderr)
def _check_flags(sorted_seq_to_filepath): """Ensure regional indicators are only in sequences of one or two, and never mixed.""" for seq, fp in sorted_seq_to_filepath.iteritems(): have_reg = None for cp in seq: is_reg = unicode_data.is_regional_indicator(cp) if have_reg == None: have_reg = is_reg elif have_reg != is_reg: print( 'check flags: mix of regional and non-regional in %s' % fp, file=sys.stderr) if have_reg and len(seq) > 2: # We provide dummy glyphs for regional indicators, so there are sequences # with single regional indicator symbols, the len check handles this. print( 'check flags: regional indicator sequence length != 2 in %s' % fp, file=sys.stderr)
def _standard_name(seq): """Use the standard emoji name, with some algorithmic modifications. We want to ignore skin-tone modifiers (but of course if the sequence _is_ the skin-tone modifier itself we keep that). So we strip these so we can start with the generic name ignoring skin tone. Non-emoji that are turned into emoji using the emoji VS have '(emoji) ' prepended to them, so strip that. Regional indicator symbol names are a bit long, so shorten them. Regional sequences are assumed to be ok as-is in terms of capitalization and punctuation, so no modifications are applied to them. After title-casing we make some English articles/prepositions lower-case again. We also replace '&' with 'and'; Unicode seems rather fond of ampersand.""" if not unicode_data.is_skintone_modifier(seq[0]): seq = tuple([cp for cp in seq if not unicode_data.is_skintone_modifier(cp)]) name = unicode_data.get_emoji_sequence_name(seq) if name.startswith('(emoji) '): name = name[8:] if len(seq) == 1 and unicode_data.is_regional_indicator(seq[0]): return 'Regional Symbol ' + unicode_data.regional_indicator_to_ascii(seq[0]) if (unicode_data.is_regional_indicator_seq(seq) or unicode_data.is_regional_tag_seq(seq)): return name name = name.title() # Require space delimiting just in case... name = re.sub(r'\s&\s', ' and ', name) name = re.sub( # not \b at start because we retain capital at start of phrase r'(\s(:?A|And|From|In|Of|With|For))\b', lambda s: s.group(1).lower(), name) return name