def test_defined_characters(self): """Tests the defined_characters() method.""" self.assertIn(0x20BD, unicode_data.defined_characters()) self.assertNotIn(0xFDD0, unicode_data.defined_characters()) # Version-restricted self.assertNotIn(0x20BD, unicode_data.defined_characters(6.3)) self.assertIn(0x20BD, unicode_data.defined_characters(7.0)) # Script restricted self.assertIn(0x1CD1, unicode_data.defined_characters(scr='Deva')) self.assertNotIn( 0x1CD1, unicode_data.defined_characters(version=5.1, scr='Deva')) self.assertIn(0x0964, unicode_data.defined_characters(scr='Beng')) self.assertIn(0xA92E, unicode_data.defined_characters(scr='Latn')) self.assertNotIn(0x0363, unicode_data.defined_characters(scr='Arab'))
def display_missing(cmap_file): print('Checking data in %s' % cmap_file) filename = tool_utils.resolve_path(cmap_file) cps = _covered_cps(filename) defined_cps = unicode_data.defined_characters(version=9.0) omitted = cmap_block_coverage._OMITTED expected_cps = defined_cps - omitted missing_cps = expected_cps - cps show_cps_by_block(missing_cps)
def _symbol_set(): """Returns set of characters that should be supported in Noto Symbols. """ global _SYMBOL_SET if not _SYMBOL_SET: ranges = unicode_data._parse_code_ranges(noto_data.SYMBOL_RANGES_TXT) _SYMBOL_SET = code_range_to_set(ranges) & unicode_data.defined_characters() return _SYMBOL_SET
def display_missing(cmap_file): print 'Checking data in %s' % cmap_file filename = tool_utils.resolve_path(cmap_file) cps = _covered_cps(filename) defined_cps = unicode_data.defined_characters(version=9.0) omitted = cmap_block_coverage._OMITTED expected_cps = defined_cps - omitted missing_cps = expected_cps - cps show_cps_by_block(missing_cps)
def test_defined_characters(self): """Tests the defined_characters() method.""" self.assertIn(0x20BD, unicode_data.defined_characters()) self.assertNotIn(0xFDD0, unicode_data.defined_characters()) # Version-restricted self.assertNotIn(0x20BD, unicode_data.defined_characters(6.3)) self.assertIn(0x20BD, unicode_data.defined_characters(7.0)) # Script restricted self.assertIn(0x1CD1, unicode_data.defined_characters(scr='Deva')) self.assertNotIn( 0x1CD1, unicode_data.defined_characters(version=5.1, scr='Deva')) self.assertIn(0x0964, unicode_data.defined_characters(scr='Beng')) self.assertIn(0xA92E, unicode_data.defined_characters(scr='Latn')) self.assertNotIn( 0x0363, unicode_data.defined_characters(scr='Arab'))
def _get_script_required(script, unicode_version, noto_phase, unicode_only=False, verbose=False): needed_chars = set() if script == "Zsye": # Emoji # TODO: Check emoji coverage if not unicode_only: needed_chars = _emoji_pua_set() # legacy PUA for android emoji elif script == "Zmth": # Math if not unicode_only: needed_chars = _math_set() elif script == "Zsym": # Symbols if not unicode_only: needed_chars = _symbol_set() elif script == "LGC": needed_chars = (unicode_data.defined_characters( scr="Latn", version=unicode_version) | unicode_data.defined_characters( scr="Grek", version=unicode_version) | unicode_data.defined_characters( scr="Cyrl", version=unicode_version)) if not unicode_only: needed_chars -= _symbol_set() needed_chars -= _cjk_set() elif script == "Aran": if unicode_only: needed_chars = unicode_data.defined_characters( scr="Arab", version=unicode_version) else: needed_chars = noto_data.urdu_set() elif script in ["Hans", "Hant", "Jpan", "Kore"]: needed_chars = _cjk_set() else: needed_chars = unicode_data.defined_characters(scr=script, version=unicode_version) if not unicode_only: needed_chars -= _symbol_set() if not unicode_only: needed_chars |= noto_data.get_extra_characters_needed( script, noto_phase) try: needed_chars |= set( opentype_data.SPECIAL_CHARACTERS_NEEDED[script]) except KeyError: pass needed_chars -= noto_data.get_characters_not_needed(script, noto_phase) if not unicode_only: needed_chars |= {0, 0xD, 0x20} if verbose: sys.stderr.write(script + "\n") needed_chars &= unicode_data.defined_characters(version=unicode_version) return needed_chars
def _get_script_required(script, unicode_version, noto_phase, unicode_only=False, verbose=False): needed_chars = set() if script == 'Zsye': # Emoji # TODO: Check emoji coverage if not unicode_only: needed_chars = _emoji_pua_set() # legacy PUA for android emoji elif script == 'Zmth': # Math if not unicode_only: needed_chars = _math_set() elif script == 'Zsym': # Symbols if not unicode_only: needed_chars = _symbol_set() elif script == 'LGC': needed_chars = (unicode_data.defined_characters( scr='Latn', version=unicode_version) | unicode_data.defined_characters( scr='Grek', version=unicode_version) | unicode_data.defined_characters( scr='Cyrl', version=unicode_version)) if not unicode_only: needed_chars -= _symbol_set() needed_chars -= _cjk_set() elif script == "Aran": if unicode_only: needed_chars = unicode_data.defined_characters( scr='Arab', version=unicode_version) else: needed_chars = noto_data.urdu_set() elif script in ['Hans', 'Hant', 'Jpan', 'Kore']: needed_chars = _cjk_set() else: needed_chars = unicode_data.defined_characters(scr=script, version=unicode_version) if not unicode_only: needed_chars -= _symbol_set() if not unicode_only: needed_chars |= noto_data.get_extra_characters_needed( script, noto_phase) try: needed_chars |= set( opentype_data.SPECIAL_CHARACTERS_NEEDED[script]) except KeyError: pass needed_chars -= noto_data.get_characters_not_needed(script, noto_phase) if not unicode_only: needed_chars |= {0, 0xd, 0x20} if verbose: sys.stderr.write(script + '\n') needed_chars &= unicode_data.defined_characters(version=unicode_version) return needed_chars
def block_coverage( cmap_file, start=0, limit=0x20000, only_scripts=None, details=0, summary=False): data = cmap_data.read_cmap_data_file(cmap_file) cp_to_scripts, all_scripts = _create_cp_to_scripts(data, only_scripts) defined_cps = unicode_data.defined_characters(version=9.0) if summary: _summarize_blocks( start, limit, defined_cps, cp_to_scripts, all_scripts) else: _list_blocks( start, limit, defined_cps, cp_to_scripts, all_scripts, only_scripts, details)
def _get_script_required(script, unicode_version, unicode_only, verbose=False): needed_chars = set() if script == "Qaae": # TODO: Check emoji coverage if not unicode_only: needed_chars = _emoji_pua_set() # legacy PUA for android emoji elif script == "Zsym": if not unicode_only: needed_chars = _symbol_set() elif script == "LGC": needed_chars = ( unicode_data.defined_characters(scr="Latn") | unicode_data.defined_characters(scr="Grek") | unicode_data.defined_characters(scr="Cyrl")) if not unicode_only: needed_chars -= _symbol_set() needed_chars -= _cjk_set() elif script == "Aran": if unicode_only: needed_chars = unicode_data.defined_characters(scr='Arab') else: needed_chars = noto_data.urdu_set() elif script in ['Hans', 'Hant', 'Jpan', 'Kore']: needed_chars = _cjk_set() else: needed_chars = unicode_data.defined_characters(scr=script) if not unicode_only: needed_chars -= _symbol_set() needed_chars &= unicode_data.defined_characters(version=unicode_version) if not unicode_only and script != 'Aran': try: needed_chars |= set(noto_data.EXTRA_CHARACTERS_NEEDED[script]) except KeyError: pass try: needed_chars |= set(opentype_data.SPECIAL_CHARACTERS_NEEDED[script]) except KeyError: pass try: needed_chars -= set(noto_data.CHARACTERS_NOT_NEEDED[script]) except KeyError: pass if not unicode_only: needed_chars |= set([0, 0xd, 0x20]) if verbose: print >> sys.stderr, script, return needed_chars
def get_defined_cps(version=default_version, exclude_ranges=None): defined_cps = unicode_data.defined_characters(version) if exclude_ranges: defined_cps -= tool_utils.parse_int_ranges(exclude_ranges) return defined_cps