Python character_set 예제들, coverage.character_set Python 예제들

예제 #1

0

파일 보기

파일: fix_khmer_and_lao_coverage.py 프로젝트: h2so5/Twemoji4Android

def main(argv):
    """Fix all the fonts given in the command line.
    
    If they are Lao fonts, make sure they have ZWSP and dotted circle. If they
    are Khmer fonts, make sure they have ZWSP, joiners, and dotted circle."""

    for font_name in argv[1:]:
        if 'Khmer' in font_name:
            script = 'Khmr'
        elif 'Lao' in font_name:
            script = 'Laoo'
        needed_chars = set(opentype_data.SPECIAL_CHARACTERS_NEEDED[script])

        lgc_font_name = (
            os.path.basename(font_name).replace('Khmer', '').replace('Lao', ''))
        lgc_font_name = os.path.join(_UNHINTED_FONTS_DIR, lgc_font_name)

        font_charset = coverage.character_set(font_name)
        missing_chars = needed_chars - font_charset
        if missing_chars:
            merge_chars_from_bank(
                font_name,
                lgc_font_name,
                os.path.dirname(font_name)+'/new/'+os.path.basename(font_name),
                missing_chars)

예제 #2

0

파일 보기

파일: fix_khmer_and_lao_coverage.py 프로젝트: zer0ohm/Twemoji4Android

def main(argv):
    """Fix all the fonts given in the command line.
    
    If they are Lao fonts, make sure they have ZWSP and dotted circle. If they
    are Khmer fonts, make sure they have ZWSP, joiners, and dotted circle."""

    for font_name in argv[1:]:
        if 'Khmer' in font_name:
            script = 'Khmr'
        elif 'Lao' in font_name:
            script = 'Laoo'
        needed_chars = set(opentype_data.SPECIAL_CHARACTERS_NEEDED[script])

        lgc_font_name = (os.path.basename(font_name).replace('Khmer',
                                                             '').replace(
                                                                 'Lao', ''))
        lgc_font_name = os.path.join(_UNHINTED_FONTS_DIR, lgc_font_name)

        font_charset = coverage.character_set(font_name)
        missing_chars = needed_chars - font_charset
        if missing_chars:
            merge_chars_from_bank(
                font_name, lgc_font_name,
                os.path.dirname(font_name) + '/new/' +
                os.path.basename(font_name), missing_chars)

예제 #3

0

파일 보기

파일: test_vertical_extents.py 프로젝트: mike-fabian/nototools

def test_rendering(data,
                   font_file_name,
                   min_allowed,
                   max_allowed,
                   language=None):
    """Test the rendering of the input data in a given font.
    
    The input data is first filtered for sequences supported in the font.
    """
    font_characters = coverage.character_set(font_file_name)
    # Hack to add ASCII digits, even if the font doesn't have them,
    # to keep potential frequency info in the input intact
    font_characters |= set(range(ord('0'), ord('9') + 1))

    supported_chars_regex = _regular_expression_from_set(font_characters)

    harfbuzz_input = []
    for match in supported_chars_regex.finditer(data):
        harfbuzz_input.append(match.group(0))

    harfbuzz_input = '\n'.join(harfbuzz_input)

    return render.test_text_vertical_extents(harfbuzz_input, font_file_name,
                                             min_allowed, max_allowed,
                                             language)

예제 #4

0

파일 보기

    def test_sanity(self):
        """Test basic sanity of the method."""
        font_file_name = path.join(path.dirname(__file__), os.pardir, 'fonts',
                                   'individual', 'unhinted',
                                   'NotoSansAvestan-Regular.ttf')
        charset = coverage.character_set(font_file_name)

        self.assertTrue(ord(' ') in charset)
        self.assertTrue(0x10B00 in charset)
        self.assertFalse(ord('A') in charset)

예제 #5

0

파일 보기

파일: coverage_test.py 프로젝트: adrientetar/nototools

    def test_sanity(self):
        """Test basic sanity of the method."""
        font_file_name = path.join(
            path.dirname(__file__), os.pardir,
            'fonts', 'individual', 'unhinted', 'NotoSansAvestan-Regular.ttf')
        charset = coverage.character_set(font_file_name)

        self.assertTrue(ord(' ') in charset)
        self.assertTrue(0x10B00 in charset)
        self.assertFalse(ord('A') in charset)

예제 #6

0

파일 보기

def subset_font(source_file,
                target_file,
                include=None,
                exclude=None,
                options=None):
    """Subsets a font file.

    Subsets a font file based on a specified character set. If only include is
    specified, only characters from that set would be included in the output
    font.  If only exclude is specified, all characters except those in that
    set will be included.  If neither is specified, the character set will
    remain the same, but inaccessible glyphs will be removed.

    Args:
      source_file: Input file name.
      target_file: Output file name
      include: The list of characters to include from the source font.
      exclude: The list of characters to exclude from the source font.
      options: A dictionary listing which options should be different from the
          default.

    Raises:
      NotImplementedError: Both include and exclude were specified.
    """
    opt = subset.Options()

    opt.name_IDs = ['*']
    opt.name_legacy = True
    opt.name_languages = ['*']
    opt.layout_features = ['*']
    opt.notdef_outline = True
    opt.recalc_bounds = True
    opt.recalc_timestamp = True
    opt.canonical_order = True

    if options is not None:
        for name, value in options.iteritems():
            setattr(opt, name, value)

    if include is not None:
        if exclude is not None:
            raise NotImplementedError(
                'Subset cannot include and exclude a set at the same time.')
        target_charset = include
    else:
        if exclude is None:
            exclude = []
        source_charset = coverage.character_set(source_file)
        target_charset = source_charset - set(exclude)

    font = subset.load_font(source_file, opt)
    subsetter = subset.Subsetter(options=opt)
    subsetter.populate(unicodes=target_charset)
    subsetter.subset(font)
    subset.save_font(font, target_file, opt)

예제 #7

0

파일 보기

파일: subset.py 프로젝트: dougfelt/nototools

def subset_font(source_file, target_file,
                include=None, exclude=None, options=None):
    """Subsets a font file.

    Subsets a font file based on a specified character set. If only include is
    specified, only characters from that set would be included in the output
    font.  If only exclude is specified, all characters except those in that
    set will be included.  If neither is specified, the character set will
    remain the same, but inaccessible glyphs will be removed.

    Args:
      source_file: Input file name.
      target_file: Output file name
      include: The list of characters to include from the source font.
      exclude: The list of characters to exclude from the source font.
      options: A dictionary listing which options should be different from the
          default.

    Raises:
      NotImplementedError: Both include and exclude were specified.
    """
    opt = subset.Options()

    opt.name_IDs = ['*']
    opt.name_legacy = True
    opt.name_languages = ['*']
    opt.layout_features = ['*']
    opt.notdef_outline = True
    opt.recalc_bounds = True
    opt.recalc_timestamp = True
    opt.canonical_order = True
    opt.drop_tables = ['+TTFA']

    if options is not None:
        for name, value in options.iteritems():
            setattr(opt, name, value)

    if include is not None:
        if exclude is not None:
            raise NotImplementedError(
                'Subset cannot include and exclude a set at the same time.')
        target_charset = include
    else:
        if exclude is None:
            exclude = []
        source_charset = coverage.character_set(source_file)
        target_charset = source_charset - set(exclude)

    font = subset.load_font(source_file, opt)
    subsetter = subset.Subsetter(options=opt)
    subsetter.populate(unicodes=target_charset)
    subsetter.subset(font)
    subset.save_font(font, target_file, opt)

예제 #8

0

파일 보기

def test_all_combinations(
    max_len, font_file_name, min_allowed, max_allowed, language=None):
    """Tests the rendering of all combinations up to certain length."""

    font_characters = coverage.character_set(font_file_name)
    font_characters -= set(range(0x00, 0x20))  # Remove ASCII controls
    font_characters = [unichr(code) for code in font_characters]
    font_characters = sorted(font_characters)

    all_strings = []
    for length in range(1, max_len+1):
        all_combinations = itertools.product(font_characters, repeat=length)
        all_strings += [''.join(comb) for comb in all_combinations]

    test_data = '\n'.join(all_strings)
    return test_rendering(
        test_data, font_file_name, min_allowed, max_allowed, language)

예제 #9

0

파일 보기

파일: test_vertical_extents.py 프로젝트: adrientetar/nototools

def test_rendering(
    data, font_file_name, min_allowed, max_allowed, language=None):
    """Test the rendering of the input data in a given font.
    
    The input data is first filtered for sequences supported in the font.
    """
    font_characters = coverage.character_set(font_file_name)
    # Hack to add ASCII digits, even if the font doesn't have them,
    # to keep potential frequency info in the input intact
    font_characters |= set(range(ord('0'), ord('9')+1))

    supported_chars_regex = _regular_expression_from_set(font_characters)

    harfbuzz_input = []
    for match in supported_chars_regex.finditer(data):
        harfbuzz_input.append(match.group(0))

    harfbuzz_input = '\n'.join(harfbuzz_input)

    return render.test_text_vertical_extents(
        harfbuzz_input, font_file_name, min_allowed, max_allowed, language)

예제 #10

0

파일 보기

def find_fonts():
    font_name_regexp = re.compile(
        '(NotoSans|NotoSerif|NotoNaskh|NotoKufi|Arimo|Cousine|Tinos)'
        '(.*?)'
        '(UI|Eastern|Estrangela|Western)?'
        '-'
        '(|Black|Bold|DemiLight|Light|Medium|Regular|Thin)'
        '(Italic)?'
        '(-Windows)?'
        '.[ot]t[cf]')

    unicode_data.load_data()

    for directory in [
            path.join(FONT_DIR, 'hinted'),
            path.join(FONT_DIR, 'unhinted'), CJK_DIR
    ]:
        for filename in os.listdir(directory):
            match = font_name_regexp.match(filename)
            if not match:
                assert (filename == 'NotoSansCJK.ttc'
                        or  # All-comprenehsive CJK
                        filename.endswith('.ttx') or
                        filename.startswith('README.') or filename
                        in ['COPYING', 'LICENSE', 'NEWS'])
                continue
            family, script, variant, weight, style, platform = match.groups()

            if family in {'Arimo', 'Cousine', 'Tinos'}:
                continue  # Skip these three for the website

            if family.startswith('Noto'):
                family = family.replace('Noto', 'Noto ')

            if weight == '':
                weight = 'Regular'

            assert platform is None

            if script == '':  # LGC
                supported_scripts.update({'Latn', 'Grek', 'Cyrl'})
            elif script in {'JP', 'KR', 'SC', 'TC', 'CJK'}:
                continue  # Skip unified or old CJK fonts
            else:
                script = convert_to_four_letter(script)
                supported_scripts.add(script)

            file_path = path.join(directory, filename)
            if filename.endswith('.ttf') or filename.endswith('.otf'):
                charset = coverage.character_set(file_path)
            else:
                charset = NotImplemented

            if directory == CJK_DIR:
                hint_status = 'hinted'
            else:
                hint_status = path.basename(directory)
            assert hint_status in ['hinted', 'unhinted']

            key = family.replace(' ', '-')
            if script:
                key += '-' + script
            if variant not in {None, 'UI'}:
                key += '-' + variant
            key = key.lower()

            font = Font(file_path, hint_status, key, family, script, variant,
                        weight, style, platform, charset)
            all_fonts.append(font)

예제 #11

0

파일 보기

파일: generate_website_data.py 프로젝트: h2so5/Twemoji4Android

def find_fonts():
    font_name_regexp = re.compile(
        '(NotoSans|NotoSerif|NotoNaskh|NotoKufi|Arimo|Cousine|Tinos)'
        '(.*?)'
        '(UI|Eastern|Estrangela|Western)?'
        '-'
        '(|Black|Bold|DemiLight|Light|Medium|Regular|Thin)'
        '(Italic)?'
        '(-Windows)?'
        '.[ot]t[cf]')

    unicode_data.load_data()

    for directory in [path.join(FONT_DIR, 'hinted'),
                      path.join(FONT_DIR, 'unhinted'),
                      CJK_DIR]:
        for filename in os.listdir(directory):
            match = font_name_regexp.match(filename)
            if not match:
                assert (
                    filename == 'NotoSansCJK.ttc' or  # All-comprenehsive CJK
                    filename.endswith('.ttx') or
                    filename.startswith('README.') or
                    filename in ['COPYING', 'LICENSE', 'NEWS'])
                continue
            family, script, variant, weight, style, platform = match.groups()

            if family in {'Arimo', 'Cousine', 'Tinos'}:
                continue  # Skip these three for the website

            if family.startswith('Noto'):
                family = family.replace('Noto', 'Noto ')

            if weight == '':
                weight = 'Regular'

            assert platform is None

            if script == '':  # LGC
                supported_scripts.update({'Latn', 'Grek', 'Cyrl'})
            elif script in {'JP', 'KR', 'SC', 'TC', 'CJK'}:
                continue  # Skip unified or old CJK fonts
            else:
                script = convert_to_four_letter(script)
                supported_scripts.add(script)

            file_path = path.join(directory, filename)
            if filename.endswith('.ttf') or filename.endswith('.otf'):
                charset = coverage.character_set(file_path)
            else:
                charset = NotImplemented

            if directory == CJK_DIR:
                hint_status = 'hinted'
            else:
                hint_status = path.basename(directory)
            assert hint_status in ['hinted', 'unhinted']

            key = family.replace(' ', '-')
            if script:
                key += '-' + script
            if variant not in {None, 'UI'}:
                key += '-' + variant
            key = key.lower()

            font = Font(file_path, hint_status, key,
                        family, script, variant, weight, style, platform,
                        charset)
            all_fonts.append(font)