Exemple #1
0
def _print_detailed(cps, inverted_target=None):
  last_block = None
  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block:
      print '    %s' % block
      last_block = block
    script = unicode_data.script(cp)
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
 def setUp(self):
     self.font_files, _ = self.loaded_fonts
     charset = coverage.character_set(self.font_files[0])
     self.marks_to_test = [
         char for char in charset
         if unicode_data.category(char) in ['Lm', 'Sk']
     ]
     self.advance_cache = {}
Exemple #3
0
def _print_detailed(cps, inverted_target=None):
    last_block = None
    undefined_start = -1
    undefined_end = -1

    def show_undefined(start, end):
        if start >= 0:
            if end > start:
                print("      %04x-%04x Zzzz <%d undefined>" %
                      (start, end, end - start - 1))
            else:
                print("      %04x Zzzz <1 undefined>" % start)

    for cp in sorted(cps):
        block = unicode_data.block(cp)
        if block != last_block or (undefined_end > -1
                                   and cp > undefined_end + 1):
            show_undefined(undefined_start, undefined_end)
            undefined_start, undefined_end = -1, -1
            if block != last_block:
                print("    %s" % block)
                last_block = block
        script = unicode_data.script(cp)
        if script == "Zzzz":
            if undefined_start >= 0:
                undefined_end = cp
            else:
                undefined_start, undefined_end = cp, cp
            continue

        show_undefined(undefined_start, undefined_end)
        undefined_start, undefined_end = -1, -1
        extensions = unicode_data.script_extensions(cp) - {script}
        if extensions:
            extensions = " (script %s)" % ", ".join(sorted(extensions))
        else:
            extensions = ""
        if not inverted_target:
            extra = ""
        elif cp not in inverted_target:
            extra = " !missing"
        else:
            scripts = sorted(inverted_target[cp])
            if len(scripts) > 3:
                script_text = ", ".join(scripts[:3]) + "... " + scripts[-1]
            else:
                script_text = ", ".join(scripts)
            extra = " (font %s)" % script_text
        print("    %6s %4s %2s %3s %s%s%s" % (
            "%04x" % cp,
            script,
            unicode_data.category(cp),
            unicode_data.age(cp),
            unicode_data.name(cp, ""),
            extensions,
            extra,
        ))
    show_undefined(undefined_start, undefined_end)
 def accept_cp(cp):
     if len(cp) != 1:
         return False
     cat = unicode_data.category(cp)
     if cat[0] != 'L' and cat != 'Nd':
         return False
     if no_latin and cp in 'df':
         return False
     return True
Exemple #5
0
 def accept_cp(cp):
     if len(cp) != 1:
         return False
     cat = unicode_data.category(cp)
     if cat[0] != "L" and cat != "Nd":
         return False
     if no_latin and cp in "df":
         return False
     return True
 def accept_cp(cp):
   if len(cp) != 1:
     return False
   cat = unicode_data.category(cp)
   if cat[0] != 'L' and cat != 'Nd':
     return False
   if no_latin and cp in 'df':
     return False
   return True
def keep_sequence(cps):
    if len(cps) > 1:
        return True
    cp = cps[0]
    if (unicode_data.is_private_use(cp)
            or unicode_data.category(cp)[0] not in ['L', 'P', 'S']
            or is_regional_indicator(cp) or is_ascii_digit(cp)
            or cp == ord('#')):
        return False
    return True
Exemple #8
0
def _print_detailed(cps, inverted_target=None):
  last_block = None
  undefined_start = -1
  undefined_end = -1
  def show_undefined(start, end):
    if start >= 0:
      if end > start:
        print '      %04x-%04x Zzzz <%d undefined>' % (
            start, end, end - start - 1)
      else:
        print '      %04x Zzzz <1 undefined>' % start

  for cp in sorted(cps):
    block = unicode_data.block(cp)
    if block != last_block or (undefined_end > -1 and cp > undefined_end + 1):
      show_undefined(undefined_start, undefined_end)
      undefined_start, undefined_end = -1, -1
      if block != last_block:
        print '    %s' % block
        last_block = block
    script = unicode_data.script(cp)
    if script == 'Zzzz':
      if undefined_start >= 0:
        undefined_end = cp
      else:
        undefined_start, undefined_end = cp, cp
      continue

    show_undefined(undefined_start, undefined_end)
    undefined_start, undefined_end = -1, -1
    extensions = unicode_data.script_extensions(cp) - set([script])
    if extensions:
      extensions = ' (%s)' % ','.join(sorted(extensions))
    else:
      extensions = ''
    if not inverted_target:
      extra = ''
    elif cp not in inverted_target:
      extra = ' !missing'
    else:
      scripts = sorted(inverted_target[cp])
      if len(scripts) > 3:
        script_text = ', '.join(scripts[:3]) + '... ' + scripts[-1]
      else:
        script_text = ', '.join(scripts)
      extra = ' (in %s)' % script_text
    print '    %6s %4s %2s %3s %s%s%s' % (
        '%04x' % cp,
        script,
        unicode_data.category(cp),
        unicode_data.age(cp),
        unicode_data.name(cp, ''),
        extensions,
        extra)
  show_undefined(undefined_start, undefined_end)
def _keep_sequence(cps):
  if len(cps) > 1:
    return True
  cp = cps[0]
  if (unicode_data.is_private_use(cp) or
      unicode_data.category(cp)[0] not in ['L', 'P', 'S'] or
      _is_regional_indicator(cp) or
      _is_ascii_digit(cp) or
      cp == ord('#') or
      cp == ord('*')):
    return False
  return True
def keep_sequence(cps):
    if len(cps) > 1:
        return True
    cp = cps[0]
    if (
        unicode_data.is_private_use(cp)
        or unicode_data.category(cp)[0] not in ["L", "P", "S"]
        or is_regional_indicator(cp)
        or is_ascii_digit(cp)
        or cp == ord("#")
    ):
        return False
    return True
Exemple #11
0
 def accept(s):
     return len(s) > 1 or unicode_data.category(s)[0] in cat
Exemple #12
0
 def accept_cp(cp):
     cat = unicode_data.category(cp)
     return cat[0] not in ["M", "C", "Z"] or cat == "Co"
 def test_category(self):
     """Tests the category() method."""
     self.assertEqual('Co', unicode_data.category(0xF0001))
     self.assertEqual('Cn', unicode_data.category(0xE01F0))
def sample_text_from_exemplar(exemplar):
  exemplar = [c for c in exemplar
                if unicode_data.category(c[0])[0] in 'LNPS']
  exemplar = exemplar[:EXEMPLAR_CUTOFF_SIZE]
  return ' '.join(exemplar)
 def accept_cp(cp):
     cat = unicode_data.category(cp)
     return cat[0] == 'L' or cat == 'Nd'
Exemple #16
0
 def accept_cp(cp):
     cat = unicode_data.category(cp)
     return cat[0] not in ['M', 'C', 'Z'] or cat == 'Co'
Exemple #17
0
def sample_text_from_exemplar(exemplar):
    exemplar = [
        c for c in exemplar if unicode_data.category(c[0])[0] in 'LNPS'
    ]
    exemplar = exemplar[:EXEMPLAR_CUTOFF_SIZE]
    return ' '.join(exemplar)
 def accept_cp(cp):
   cat = unicode_data.category(cp)
   return cat[0] == 'L' or cat == 'Nd'
Exemple #19
0
 def setUp(self):
     self.font_files, _ = self.loaded_fonts
     charset = coverage.character_set(self.font_files[0])
     self.marks_to_test = [char for char in charset
                           if unicode_data.category(char) in ['Lm', 'Sk']]
     self.advance_cache = {}
 def test_category(self):
     """Tests the category() method."""
     self.assertEqual('Co', unicode_data.category(0xF0001))
     self.assertEqual('Cn', unicode_data.category(0xE01F0))
Exemple #21
0
 def accept_cp(cp):
     cat = unicode_data.category(cp)
     return cat[0] == "L" or cat == "Nd"