Beispiel #1
0
def decompositionBase(value):
    letterCategories = ("Ll", "Lu", "Lt", "Lo")
    try:
        c = chr(value)
    # see not in category function
    except ValueError:
        return -1
    decomposition = unicodedata.decomposition(c)
    if decomposition.startswith("<"):
        return -1
    if " " not in decomposition:
        return -1
    parts = decomposition.split(" ")
    unichrs = [chr(int(i, 16)) for i in parts if i]
    letters = [
        ord(i) for i in unichrs if unicodedata.category(i) in letterCategories
    ]
    letterCount = len(letters)
    if letterCount != 1:
        return -1
    decomposedUniValue = letters[0]
    furtherDecomposedUniValue = decompositionBase(decomposedUniValue)
    if furtherDecomposedUniValue != -1:
        furtherFurtherDecomposedUniValue = decompositionBase(
            furtherDecomposedUniValue)
        if furtherFurtherDecomposedUniValue != -1:
            decomposedUniValue = furtherFurtherDecomposedUniValue
        else:
            decomposedUniValue = furtherDecomposedUniValue
    return decomposedUniValue
Beispiel #2
0
def _is_non_mark_char(charcode):
    from fontTools import unicodedata
    category = unicodedata.category(chr(charcode))
    if category.startswith("C"):
        # skip control characters
        return None
    else:
        return not category.startswith("M")
Beispiel #3
0
def _is_non_spacing_mark_char(charcode):
    from fontTools import unicodedata
    category = unicodedata.category(chr(charcode))
    if category.startswith("C"):
        # skip control characters
        return None
    else:
        # Non spacing marks either have the Unicode General_category:
        # Mn, Nonspacing_Mark
        # Me, Enclosing_Mark
        # Characters with the category Mc, Spacing_Mark should not be considered
        # as non spacing marks.
        return category in ("Mn", "Me")
Beispiel #4
0
def glyph_metrics_stats(ttFont):
    """Returns a dict containing whether the font seems_monospaced,
    what's the maximum glyph width and what's the most common width.

    For a font to be considered monospaced, if at least 80% of ASCII
    characters have glyphs, then at least 80% of those must have the same
    width, otherwise all glyphs of printable characters must have one of
    two widths or be zero-width.
    """
    glyph_metrics = ttFont['hmtx'].metrics
    # NOTE: `range(a, b)` includes `a` and does not include `b`.
    #       Here we don't include 0-31 as well as 127
    #       because these are control characters.
    ascii_glyph_names = [
        ttFont.getBestCmap()[c] for c in range(32, 127)
        if c in ttFont.getBestCmap()
    ]

    if len(ascii_glyph_names) > 0.8 * (127 - 32):
        ascii_widths = [
            adv for name, (adv, lsb) in glyph_metrics.items()
            if name in ascii_glyph_names and adv != 0
        ]
        ascii_width_count = Counter(ascii_widths)
        ascii_most_common_width = ascii_width_count.most_common(1)[0][1]
        seems_monospaced = ascii_most_common_width >= len(ascii_widths) * 0.8
    else:
        from fontTools import unicodedata
        # Collect relevant glyphs.
        relevant_glyph_names = set()
        # Add character glyphs that are in one of these categories:
        # Letter, Mark, Number, Punctuation, Symbol, Space_Separator.
        # This excludes Line_Separator, Paragraph_Separator and Control.
        for value, name in ttFont.getBestCmap().items():
            if unicodedata.category(chr(value)).startswith(
                ("L", "M", "N", "P", "S", "Zs")):
                relevant_glyph_names.add(name)
        # Remove character glyphs that are mark glyphs.
        gdef = ttFont.get("GDEF")
        if gdef and gdef.table.GlyphClassDef:
            marks = {
                name
                for name, c in gdef.table.GlyphClassDef.classDefs.items()
                if c == 3
            }
            relevant_glyph_names.difference_update(marks)

        widths = sorted({
            adv
            for name, (adv, lsb) in glyph_metrics.items()
            if name in relevant_glyph_names and adv != 0
        })
        seems_monospaced = len(widths) <= 2

    width_max = max([adv for k, (adv, lsb) in glyph_metrics.items()])
    most_common_width = Counter([
        g for g in glyph_metrics.values() if g[0] != 0
    ]).most_common(1)[0][0][0]
    return {
        "seems_monospaced": seems_monospaced,
        "width_max": width_max,
        "most_common_width": most_common_width,
    }
def _construct_category(glyph_name, data):
    """Derive (sub)category of a glyph name."""
    # Glyphs creates glyphs that start with an underscore as "non-exportable" glyphs or
    # construction helpers without a category.
    if glyph_name.startswith("_"):
        return None, None

    # Glyph variants (e.g. "fi.alt") don't have their own entry, so we strip e.g. the
    # ".alt" and try a second lookup with just the base name. A variant is hopefully in
    # the same category as its base glyph.
    base_name = glyph_name.split(".", 1)[0]
    base_attribute = data.names.get(base_name) or {}
    if base_attribute:
        category = base_attribute.get("category")
        sub_category = base_attribute.get("subCategory")
        return category, sub_category

    # Detect ligatures.
    if "_" in base_name:
        base_names = base_name.split("_")
        # The last name has a suffix, add it to all the names.
        if "-" in base_names[-1]:
            _, s = base_names[-1].rsplit("-", 1)
            base_names = [
                (n if n.endswith(f"-{s}") else f"{n}-{s}") for n in base_names
            ]
        base_names_attributes = [_lookup_attributes(name, data) for name in base_names]
        first_attribute = base_names_attributes[0]

        # If the first part is a Mark, Glyphs 2.6 declares the entire glyph a Mark
        if first_attribute.get("category") == "Mark":
            category = first_attribute.get("category")
            sub_category = first_attribute.get("subCategory")
            return category, sub_category

        # If the first part is a Letter...
        if first_attribute.get("category") == "Letter":
            # ... and the rest are only marks or separators or don't exist, the
            # sub_category is that of the first part ...
            if all(
                a.get("category") in (None, "Mark", "Separator")
                for a in base_names_attributes[1:]
            ):
                category = first_attribute.get("category")
                sub_category = first_attribute.get("subCategory")
                return category, sub_category
            # ... otherwise, a ligature.
            category = first_attribute.get("category")
            sub_category = "Ligature"
            return category, sub_category

        # TODO: Cover more cases. E.g. "one_one" -> ("Number", "Ligature") but
        # "one_onee" -> ("Number", "Composition").

    # Still nothing? Maybe we're looking at something like "uni1234.alt", try
    # using fontTools' AGL module to convert the base name to something meaningful.
    # Corner case: when looking at ligatures, names that don't exist in the AGLFN
    # are skipped, so len("acutecomb_o") == 2 but len("dotaccentcomb_o") == 1.
    character = fontTools.agl.toUnicode(base_name)
    if character:
        category, sub_category = _translate_category(
            glyph_name, unicodedata.category(character[0])
        )
        return category, sub_category

    return None, None
Beispiel #6
0
def category(value):
    c = chr(value)
    return unicodedata.category(c)
                written_units=list(graphemes(written_form)),
                position_key_borrowed=position_key_borrowed,
                conditions=conditions,
            )
            LETTER_NAME_TO_VARIANTS[letter_name].append(variant)

with open("./MongolianVariants.txt", "w") as f:
    for letter_name, code_point in LETTER_NAME_TO_CODE_POINT.items():
        character_name = unicodedata.name(chr(int(code_point, 16)))
        for variant in LETTER_NAME_TO_VARIANTS[letter_name]:
            fields = [code_point]
            fields.append(variant.position_key)
            field = " ".join(variant.written_units)
            width = 5
            for character in field:
                if unicodedata.category(character) == "Mn":
                    width += 1
            fields.append(field.ljust(width))
            if variant.position_key_borrowed:
                field = variant.position_key_borrowed
            else:
                field = "    "
            fields.append(field)
            for field_key, width in [
                ("context", 7),
                ("mvs", 4),
                ("fvs", 1),
            ]:
                value = variant.conditions[field_key]
                if value:
                    field = value