def test_defined_characters(self):
     """Tests the defined_characters() method."""
     self.assertIn(0x20BD, unicode_data.defined_characters())
     self.assertNotIn(0xFDD0, unicode_data.defined_characters())
     # Version-restricted
     self.assertNotIn(0x20BD, unicode_data.defined_characters(6.3))
     self.assertIn(0x20BD, unicode_data.defined_characters(7.0))
     # Script restricted
     self.assertIn(0x1CD1, unicode_data.defined_characters(scr='Deva'))
     self.assertNotIn(
         0x1CD1, unicode_data.defined_characters(version=5.1, scr='Deva'))
     self.assertIn(0x0964, unicode_data.defined_characters(scr='Beng'))
     self.assertIn(0xA92E, unicode_data.defined_characters(scr='Latn'))
     self.assertNotIn(0x0363, unicode_data.defined_characters(scr='Arab'))
Exemple #2
0
def display_missing(cmap_file):
    print('Checking data in %s' % cmap_file)
    filename = tool_utils.resolve_path(cmap_file)
    cps = _covered_cps(filename)
    defined_cps = unicode_data.defined_characters(version=9.0)
    omitted = cmap_block_coverage._OMITTED
    expected_cps = defined_cps - omitted
    missing_cps = expected_cps - cps
    show_cps_by_block(missing_cps)
Exemple #3
0
def _symbol_set():
  """Returns set of characters that should be supported in Noto Symbols.
  """
  global _SYMBOL_SET

  if not _SYMBOL_SET:
    ranges = unicode_data._parse_code_ranges(noto_data.SYMBOL_RANGES_TXT)
    _SYMBOL_SET = code_range_to_set(ranges) & unicode_data.defined_characters()
  return _SYMBOL_SET
def display_missing(cmap_file):
  print 'Checking data in %s' % cmap_file
  filename = tool_utils.resolve_path(cmap_file)
  cps = _covered_cps(filename)
  defined_cps = unicode_data.defined_characters(version=9.0)
  omitted = cmap_block_coverage._OMITTED
  expected_cps = defined_cps - omitted
  missing_cps = expected_cps - cps
  show_cps_by_block(missing_cps)
 def test_defined_characters(self):
     """Tests the defined_characters() method."""
     self.assertIn(0x20BD, unicode_data.defined_characters())
     self.assertNotIn(0xFDD0, unicode_data.defined_characters())
     # Version-restricted
     self.assertNotIn(0x20BD, unicode_data.defined_characters(6.3))
     self.assertIn(0x20BD, unicode_data.defined_characters(7.0))
     # Script restricted
     self.assertIn(0x1CD1, unicode_data.defined_characters(scr='Deva'))
     self.assertNotIn(
         0x1CD1,
         unicode_data.defined_characters(version=5.1, scr='Deva'))
     self.assertIn(0x0964, unicode_data.defined_characters(scr='Beng'))
     self.assertIn(0xA92E, unicode_data.defined_characters(scr='Latn'))
     self.assertNotIn(
         0x0363,
         unicode_data.defined_characters(scr='Arab'))
Exemple #6
0
def _get_script_required(script,
                         unicode_version,
                         noto_phase,
                         unicode_only=False,
                         verbose=False):
    needed_chars = set()
    if script == "Zsye":  # Emoji
        # TODO: Check emoji coverage
        if not unicode_only:
            needed_chars = _emoji_pua_set()  # legacy PUA for android emoji
    elif script == "Zmth":  # Math
        if not unicode_only:
            needed_chars = _math_set()
    elif script == "Zsym":  # Symbols
        if not unicode_only:
            needed_chars = _symbol_set()
    elif script == "LGC":
        needed_chars = (unicode_data.defined_characters(
            scr="Latn", version=unicode_version)
                        | unicode_data.defined_characters(
                            scr="Grek", version=unicode_version)
                        | unicode_data.defined_characters(
                            scr="Cyrl", version=unicode_version))
        if not unicode_only:
            needed_chars -= _symbol_set()
            needed_chars -= _cjk_set()
    elif script == "Aran":
        if unicode_only:
            needed_chars = unicode_data.defined_characters(
                scr="Arab", version=unicode_version)
        else:
            needed_chars = noto_data.urdu_set()
    elif script in ["Hans", "Hant", "Jpan", "Kore"]:
        needed_chars = _cjk_set()
    else:
        needed_chars = unicode_data.defined_characters(scr=script,
                                                       version=unicode_version)
        if not unicode_only:
            needed_chars -= _symbol_set()

    if not unicode_only:
        needed_chars |= noto_data.get_extra_characters_needed(
            script, noto_phase)
        try:
            needed_chars |= set(
                opentype_data.SPECIAL_CHARACTERS_NEEDED[script])
        except KeyError:
            pass
        needed_chars -= noto_data.get_characters_not_needed(script, noto_phase)

    if not unicode_only:
        needed_chars |= {0, 0xD, 0x20}

    if verbose:
        sys.stderr.write(script + "\n")

    needed_chars &= unicode_data.defined_characters(version=unicode_version)

    return needed_chars
Exemple #7
0
def _get_script_required(script,
                         unicode_version,
                         noto_phase,
                         unicode_only=False,
                         verbose=False):
    needed_chars = set()
    if script == 'Zsye':  # Emoji
        # TODO: Check emoji coverage
        if not unicode_only:
            needed_chars = _emoji_pua_set()  # legacy PUA for android emoji
    elif script == 'Zmth':  # Math
        if not unicode_only:
            needed_chars = _math_set()
    elif script == 'Zsym':  # Symbols
        if not unicode_only:
            needed_chars = _symbol_set()
    elif script == 'LGC':
        needed_chars = (unicode_data.defined_characters(
            scr='Latn', version=unicode_version)
                        | unicode_data.defined_characters(
                            scr='Grek', version=unicode_version)
                        | unicode_data.defined_characters(
                            scr='Cyrl', version=unicode_version))
        if not unicode_only:
            needed_chars -= _symbol_set()
            needed_chars -= _cjk_set()
    elif script == "Aran":
        if unicode_only:
            needed_chars = unicode_data.defined_characters(
                scr='Arab', version=unicode_version)
        else:
            needed_chars = noto_data.urdu_set()
    elif script in ['Hans', 'Hant', 'Jpan', 'Kore']:
        needed_chars = _cjk_set()
    else:
        needed_chars = unicode_data.defined_characters(scr=script,
                                                       version=unicode_version)
        if not unicode_only:
            needed_chars -= _symbol_set()

    if not unicode_only:
        needed_chars |= noto_data.get_extra_characters_needed(
            script, noto_phase)
        try:
            needed_chars |= set(
                opentype_data.SPECIAL_CHARACTERS_NEEDED[script])
        except KeyError:
            pass
        needed_chars -= noto_data.get_characters_not_needed(script, noto_phase)

    if not unicode_only:
        needed_chars |= {0, 0xd, 0x20}

    if verbose:
        sys.stderr.write(script + '\n')

    needed_chars &= unicode_data.defined_characters(version=unicode_version)

    return needed_chars
def block_coverage(
    cmap_file, start=0, limit=0x20000, only_scripts=None, details=0,
    summary=False):
  data = cmap_data.read_cmap_data_file(cmap_file)
  cp_to_scripts, all_scripts = _create_cp_to_scripts(data, only_scripts)
  defined_cps = unicode_data.defined_characters(version=9.0)

  if summary:
    _summarize_blocks(
        start, limit, defined_cps, cp_to_scripts, all_scripts)
  else:
    _list_blocks(
        start, limit, defined_cps, cp_to_scripts, all_scripts, only_scripts,
        details)
def block_coverage(
    cmap_file, start=0, limit=0x20000, only_scripts=None, details=0,
    summary=False):
  data = cmap_data.read_cmap_data_file(cmap_file)
  cp_to_scripts, all_scripts = _create_cp_to_scripts(data, only_scripts)
  defined_cps = unicode_data.defined_characters(version=9.0)

  if summary:
    _summarize_blocks(
        start, limit, defined_cps, cp_to_scripts, all_scripts)
  else:
    _list_blocks(
        start, limit, defined_cps, cp_to_scripts, all_scripts, only_scripts,
        details)
Exemple #10
0
def _get_script_required(script, unicode_version, unicode_only, verbose=False):
  needed_chars = set()
  if script == "Qaae":
    # TODO: Check emoji coverage
    if not unicode_only:
      needed_chars = _emoji_pua_set()  # legacy PUA for android emoji
  elif script == "Zsym":
    if not unicode_only:
      needed_chars = _symbol_set()
  elif script == "LGC":
    needed_chars = (
        unicode_data.defined_characters(scr="Latn")
        | unicode_data.defined_characters(scr="Grek")
        | unicode_data.defined_characters(scr="Cyrl"))
    if not unicode_only:
      needed_chars -= _symbol_set()
      needed_chars -= _cjk_set()
  elif script == "Aran":
    if unicode_only:
      needed_chars = unicode_data.defined_characters(scr='Arab')
    else:
      needed_chars = noto_data.urdu_set()
  elif script in ['Hans', 'Hant', 'Jpan', 'Kore']:
      needed_chars = _cjk_set()
  else:
    needed_chars = unicode_data.defined_characters(scr=script)
    if not unicode_only:
      needed_chars -= _symbol_set()

  needed_chars &= unicode_data.defined_characters(version=unicode_version)

  if not unicode_only and script != 'Aran':
    try:
      needed_chars |= set(noto_data.EXTRA_CHARACTERS_NEEDED[script])
    except KeyError:
      pass

    try:
      needed_chars |= set(opentype_data.SPECIAL_CHARACTERS_NEEDED[script])
    except KeyError:
      pass

    try:
      needed_chars -= set(noto_data.CHARACTERS_NOT_NEEDED[script])
    except KeyError:
      pass

  if not unicode_only:
    needed_chars |= set([0, 0xd, 0x20])

  if verbose:
    print >> sys.stderr, script,

  return needed_chars
def get_defined_cps(version=default_version, exclude_ranges=None):
    defined_cps = unicode_data.defined_characters(version)
    if exclude_ranges:
        defined_cps -= tool_utils.parse_int_ranges(exclude_ranges)
    return defined_cps