def _check_zwj(sorted_seq_to_filepath):
  """Ensure zwj is only between two appropriate emoji.  This is a 'pre-check'
  that reports this specific problem."""

  for seq, fp in sorted_seq_to_filepath.iteritems():
    if ZWJ not in seq:
      continue
    if seq[0] == ZWJ:
      print('check zwj: zwj at head of sequence in %s' % fp, file=sys.stderr)
    if len(seq) == 1:
      continue
    if seq[-1] == ZWJ:
      print('check zwj: zwj at end of sequence in %s' % fp, file=sys.stderr)
    for i, cp in enumerate(seq):
      if cp == ZWJ:
        if i > 0:
          pcp = seq[i-1]
          if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
            print(
                'check zwj: non-emoji %04x preceeds ZWJ in %s' % (pcp, fp),
                file=sys.stderr)
        if i < len(seq) - 1:
          fcp = seq[i+1]
          if not unicode_data.is_emoji(fcp):
            print(
                'check zwj: non-emoji %04x follows ZWJ in %s' % (fcp, fp),
                file=sys.stderr)
def _check_zwj(sorted_seq_to_filepath):
    """Ensure zwj is only between two appropriate emoji.  This is a 'pre-check'
  that reports this specific problem."""

    for seq, fp in sorted_seq_to_filepath.iteritems():
        if ZWJ not in seq:
            continue
        if seq[0] == ZWJ:
            print('check zwj: zwj at head of sequence in %s' % fp,
                  file=sys.stderr)
        if len(seq) == 1:
            continue
        if seq[-1] == ZWJ:
            print('check zwj: zwj at end of sequence in %s' % fp,
                  file=sys.stderr)
        for i, cp in enumerate(seq):
            if cp == ZWJ:
                if i > 0:
                    pcp = seq[i - 1]
                    if pcp != EMOJI_VS and not unicode_data.is_emoji(pcp):
                        print('check zwj: non-emoji %04x preceeds ZWJ in %s' %
                              (pcp, fp),
                              file=sys.stderr)
                if i < len(seq) - 1:
                    fcp = seq[i + 1]
                    if not unicode_data.is_emoji(fcp):
                        print('check zwj: non-emoji %04x follows ZWJ in %s' %
                              (fcp, fp),
                              file=sys.stderr)
Exemple #3
0
def _check_zwj(sorted_seq_to_filepath):
    """Ensure zwj is only between two appropriate emoji."""
    ZWJ = 0x200D
    EMOJI_PRESENTATION_VS = 0xFE0F

    for seq, fp in sorted_seq_to_filepath.iteritems():
        if ZWJ not in seq:
            continue
        if seq[0] == 0x200d:
            print >> sys.stderr, 'zwj at head of sequence in %s' % fp
        if len(seq) == 1:
            continue
        if seq[-1] == 0x200d:
            print >> sys.stderr, 'zwj at end of sequence in %s' % fp
        for i, cp in enumerate(seq):
            if cp == ZWJ:
                if i > 0:
                    pcp = seq[i - 1]
                    if pcp != EMOJI_PRESENTATION_VS and not unicode_data.is_emoji(
                            pcp):
                        print >> sys.stderr, 'non-emoji %04x preceeds ZWJ in %s' % (
                            pcp, fp)
                if i < len(seq) - 1:
                    fcp = seq[i + 1]
                    if not unicode_data.is_emoji(fcp):
                        print >> sys.stderr, 'non-emoji %04x follows ZWJ in %s' % (
                            fcp, fp)
def _check_zwj(sorted_seqs):
  """Ensure zwj is only between two appropriate emoji."""
  ZWJ = 0x200D
  EMOJI_PRESENTATION_VS = 0xFE0F

  for seq in sorted_seqs:
    if ZWJ not in seq:
      continue
    if seq[0] == 0x200d:
      print >> sys.stderr, 'zwj at head of sequence'
    if len(seq) == 1:
      continue
    if seq[-1] == 0x200d:
      print >> sys.stderr, 'zwj at end of sequence'
    for i, cp in enumerate(seq):
      if cp == ZWJ:
        pcp = seq[i-1]
        if pcp != EMOJI_PRESENTATION_VS and not unicode_data.is_emoji(pcp):
          print >> sys.stderr, 'non-emoji %04X preceeds ZWJ' % pcp
        fcp = seq[i+1]
        if not unicode_data.is_emoji(fcp):
          print >> sys.stderr, 'non-emoji %04X follows ZWJ' % fcp
def _check_zwj(sorted_seqs):
    """Ensure zwj is only between two appropriate emoji."""
    ZWJ = 0x200D
    EMOJI_PRESENTATION_VS = 0xFE0F

    for seq in sorted_seqs:
        if ZWJ not in seq:
            continue
        if seq[0] == 0x200d:
            print >> sys.stderr, 'zwj at head of sequence'
        if len(seq) == 1:
            continue
        if seq[-1] == 0x200d:
            print >> sys.stderr, 'zwj at end of sequence'
        for i, cp in enumerate(seq):
            if cp == ZWJ:
                pcp = seq[i - 1]
                if pcp != EMOJI_PRESENTATION_VS and not unicode_data.is_emoji(
                        pcp):
                    print >> sys.stderr, 'non-emoji %04X preceeds ZWJ' % pcp
                fcp = seq[i + 1]
                if not unicode_data.is_emoji(fcp):
                    print >> sys.stderr, 'non-emoji %04X follows ZWJ' % fcp