def generateFlat(path,
                 onlySupported=True,
                 scriptSeparator=None,
                 scriptAsPrefix=None,
                 status=0,
                 includeUnicodeCategory=False):
    data = [
        "# Glyph Name Formatted Unicode List - GNFUL",
        "# GlyphNameFormatter version %s" % _versionNumber,
        "# Unicode version: %s" % unicodeVersion,
        "# Source code: %s" % _githubLink,
        "# Generated on %s" % time.strftime("%Y %m %d %H:%M:%S"),
    ]
    if includeUnicodeCategory:
        data.append("# <glyphName> <hex unicode> <unicodeCategory>")
    else:
        data.append("# <glyphName> <hex unicode>")
    if scriptSeparator is not None:
        data.append("# Separator \"%s\"" % scriptSeparator)
    if scriptAsPrefix is not None:
        data.append("# Prefixed \"%s\"" % scriptAsPrefix)
    data.append("#")
    for rangeName in getAllRangeNames():
        if onlySupported:
            moduleName = rangeNameToModuleName(rangeName)
            try:
                module = importlib.import_module(
                    'glyphNameFormatter.rangeProcessors.%s' % moduleName)
            except:
                continue
        data.append("# %s" % rangeName)
        start, end = getRangeByName(rangeName)
        for u in range(start, end + 1):
            g = GlyphName(uniNumber=u,
                          scriptSeparator=scriptSeparator,
                          scriptAsPrefix=scriptAsPrefix)
            g.compress()  # should auto compress
            if status is not None:
                if g.status < status:
                    # if the glyph has a status that is less than what we're looking for
                    # then do not include it in the list.
                    continue
            name = g.getName(extension=True)
            if name is None:
                continue
            if includeUnicodeCategory:
                data.append("%s %04X %s" %
                            (name, u, unicodeCategories.get(u, "-")))
            else:
                data.append("%s %04X" % (name, u))

    f = open(path, "w")
    f.write("\n".join(data))
    f.close()
 def testAllPrefixes():
     # let's not just assume all prefixes that end up the same
     # will also be able to disambiguate names.
     from glyphNameFormatter.unicodeRangeNames import getAllRangeNames
     prefixes = {}
     for n in getAllRangeNames():
         pf = scriptPrefixes[n]
         if pf not in prefixes:
             prefixes[pf] = []
         prefixes[pf].append(n)
     from pprint import pprint
     pprint(prefixes)
 def testAllPrefixes():
     # let's not just assume all prefixes that end up the same
     # will also be able to disambiguate names.
     from glyphNameFormatter.unicodeRangeNames import getAllRangeNames
     prefixes = {}
     for n in getAllRangeNames():
         pf = scriptPrefixes[n]
         if pf not in prefixes:
             prefixes[pf] = []
         prefixes[pf].append(n)
     from pprint import pprint
     pprint(prefixes)
def testAGDCoverage():
    wantRanges = []
    glyphCount = {}
    for name in name2unicode_AGD:
        uniNumber = name2unicode_AGD[name]
        thisRange = getRangeName(uniNumber)
        if thisRange == "Private Use Area":
            continue
        if thisRange not in glyphCount:
            glyphCount[thisRange] = 0
        glyphCount[thisRange] += 1
        if thisRange is None:
            continue
        if thisRange not in wantRanges:
            wantRanges.append(thisRange)
    supported = []
    notSupported = []
    notNeeded = []
    for name in wantRanges:
        if name == "Private Use Area":
            continue
        moduleName = rangeNameToModuleName(name)
        try:
            module = importlib.import_module('glyphNameFormatter.rangeProcessors.%s' % moduleName)
            supported.append(name)
        except ImportError:
            notSupported.append(name)
    for name in getAllRangeNames():
        if name not in supported and name not in notSupported:
            notNeeded.append(name)
    supported.sort()
    notSupported.sort()
    notNeeded.sort()
    supportedTotal = 0
    notSupportedTotal = 0
    print("Available range processors for AGD:")
    for n in supported:
        print("\t%8d\t%s" % (glyphCount[n], n))
        supportedTotal += glyphCount[n]
    print("\nMissing range processors for AGD:")
    for n in notSupported:
        print("\t%8d\t%s" % (glyphCount[n], n))
        notSupportedTotal += glyphCount[n]
    print("Supported total", supportedTotal+notSupportedTotal)
    print("AGD supported total", supportedTotal)
    print("AGD total", len(name2unicode_AGD))
    print("Coverage complete: %3.1f%%" % (100.0*supportedTotal/len(name2unicode_AGD)))
    
    print("\nRange processors not needed for AGD:")
    for n in notNeeded:
       print("\t", n)
def generateFlat(path, onlySupported=True, scriptSeparator=None, scriptAsPrefix=None, status=0, includeUnicodeCategory=False):
    data = [
        "# Glyph Name Formatted Unicode List - GNFUL",
        "# GlyphNameFormatter version %s" % _versionNumber,
        "# Unicode version: %s" % unicodeVersion,
        "# Source code: %s" % _githubLink,
        "# Generated on %s" % time.strftime("%Y %m %d %H:%M:%S"),
    ]
    if includeUnicodeCategory:
        data.append("# <glyphName> <hex unicode> <unicodeCategory>")
    else:
        data.append("# <glyphName> <hex unicode>")
    if scriptSeparator is not None:
        data.append("# Separator \"%s\"" % scriptSeparator)
    if scriptAsPrefix is not None:
        data.append("# Prefixed \"%s\"" % scriptAsPrefix)
    data.append("#")
    for rangeName in getAllRangeNames():
        if onlySupported:
            moduleName = rangeNameToModuleName(rangeName)
            try:
                module = importlib.import_module('glyphNameFormatter.rangeProcessors.%s' % moduleName)
            except:
                continue
        data.append("# %s" % rangeName)
        start, end = getRangeByName(rangeName)
        for u in range(start, end+1):
            g = GlyphName(uniNumber=u, scriptSeparator=scriptSeparator, scriptAsPrefix=scriptAsPrefix)
            g.compress()  # should auto compress
            if status is not None:
                if g.status < status:
                    # if the glyph has a status that is less than what we're looking for
                    # then do not include it in the list.
                    continue
            name = g.getName(extension=True)
            if name is None:
                continue
            if includeUnicodeCategory:
                data.append("%s %04X %s" % (name, u, unicodeCategories.get(u, "-")))
            else:
                data.append("%s %04X" % (name, u))


    f = open(path, "w")
    f.write("\n".join(data))
    f.close()
def generateFlat(path, onlySupported=True):
    data = [
        "# format",
        "# <glyphName> <hex unicode>"
    ]
    for rangeName in getAllRangeNames():
        if onlySupported:
            moduleName = rangeNameToModuleName(rangeName)
            try:
                module = importlib.import_module('glyphNameFormatter.rangeProcessors.%s' % moduleName)
            except:
                continue
        data.append("# %s" % rangeName)
        for u in range(*getRangeByName(rangeName)):
            g = GlyphName(uniNumber=u)
            name = g.getName(extension=True)
            if name is None:
                continue
            data.append("%s %04X" % (name, u))

    f = open(path, "w")
    f.write("\n".join(data))
    f.close()
Example #7
0
# copy the latest name list
srcNamePath = os.path.join(
    gnfRoot, "names", "glyphNamesToUnicodeAndCategories_experimental.txt")
if os.path.exists(srcNamePath):
    dstNamePth = os.path.join(browserRoot, "data", "glyphNamesToUnicode.txt")
    print "srcNamePath", srcNamePath
    print "dstNamePth", dstNamePth
    shutil.copyfile(srcNamePath, dstNamePth)

# make a range name table
from glyphNameFormatter.unicodeRangeNames import getAllRangeNames, getRangeByName

ranges = {}

for rangeName in getAllRangeNames():
    r = getRangeByName(rangeName)
    if r is None:
        print("unknown range name", rangeName)
        continue
    start, end = r
    ranges[(start, end)] = rangeName

pyText = []
pyText.append(u"# -*- coding: UTF-8 -*-")
pyText.append(u"# Generated from glyphNameFormatter range names")
pyText.append(u"# Generated on %s" % time.strftime("%Y %m %d %H:%M:%S"))
pyText.append(u"unicodeRangeNames =" + pprint.pformat(ranges, indent=4))

pyPath = os.path.join(browserRoot, "unicodeRanges.py")
print "pyPath", pyPath
Example #8
0
# copy the joining types
srcJoiningTypesPath = os.path.join(gnfRoot, "data", "joiningTypes.txt")
if os.path.exists(srcJoiningTypesPath):
    dstJoiningTypesPath = os.path.join(browserRoot, "data", "joiningTypes.txt")
    print("srcJoiningTypesPath", srcJoiningTypesPath)
    print("dstJoiningTypesPath", dstJoiningTypesPath)
shutil.copyfile(srcJoiningTypesPath, dstJoiningTypesPath)


# make a range name table
from glyphNameFormatter.unicodeRangeNames import getAllRangeNames, getRangeByName

ranges = {}

for rangeName in getAllRangeNames():
    r = getRangeByName(rangeName)
    if r is None:
        print("unknown range name", rangeName)
        continue
    start, end = r
    ranges[(start,end)] = rangeName

pyText = []
pyText.append(u"# -*- coding: UTF-8 -*-")
pyText.append(u"# Generated from glyphNameFormatter range names")
pyText.append(u"# Generated on %s" % time.strftime("%Y %m %d %H:%M:%S"))
pyText.append(u"unicodeRangeNames =" + pprint.pformat(ranges, indent=4))

pyPath = os.path.join(browserRoot, "unicodeRanges.py")
print("pyPath", pyPath)
Example #9
0
def findConflict(makeModule=True, makeReport=False, printReport=False):
    names = {}
    dontReport = [
        'tang:tangutideograph#',
        'cjk:cjkunifiedideograph#',
        'cjk:cjkcompatibilityideograph#',
        'nsh:nushucharacter#',
        'tangutideograph#',
        'cjkunifiedideograph#',
        'cjkcompatibilityideograph#',
        'nushucharacter#',
    ]
    #extendedNames = {}
    lines = []
    for rangeName in getAllRangeNames():
        start, end = getRangeByName(rangeName)
        for uniNumber in range(start, end + 1):
            glyphName = glyphNameFormatter.GlyphName(uniNumber,
                                                     ignoreConflicts=True)
            if glyphName.hasName():
                # name = glyphName.getName(extension=False)
                name = glyphName.getName(extension=True)
                if name not in names:
                    names[name] = []
                names[name].append(glyphName)
    n = list(names.keys())
    n.sort()

    conflictNames = []
    conflictUniNumbers = []
    conflictsPerRange = {}
    if makeReport:
        line = "{0:>6s} | {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format(
            "hex", "basic formatted name", "AGL name", "with extension",
            "range", "uni name")
        if printReport:
            print(line)
        lines.append(line)
        line = "{0:->6s} + {1:-<50}{2:-<25}{3:-<40}{4:-<40}{5:-<20}".format(
            "", "", "+", "+", "+", "+")
        if printReport:
            print(line)
        lines.append(line)
    for name in n:
        if len(names[name]) > 1:
            conflictNames.append(name, )
            line = ""
            if printReport:
                print()
            lines.append(line)
            for g in names[name]:
                rangeName = g.uniRangeName
                extendedName = g.getName(extension=True)
                AGLname = unicode2name_AGD.get(g.uniNumber, "-")
                nn = g.getName()
                if nn in dontReport:
                    continue
                conflictUniNumbers.append(g.uniNumber)
                if makeReport:
                    line = "{0:>6X} : {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format(
                        g.uniNumber, nn, AGLname[:25], g.getName(),
                        g.uniRangeName[:40], g.uniName)
                    if printReport:
                        print(line)
                    lines.append(line)

                if rangeName not in conflictsPerRange:
                    conflictsPerRange[rangeName] = []
                conflictsPerRange[rangeName].append(line)

    if makeReport:
        stats = "# %d names with conflicts, affecting %d unicodes" % (
            len(conflictNames), len(conflictUniNumbers))
        if printReport:
            print(stats)
        lines.append(stats)

    dirName = os.path.dirname(__file__)

    k = list(conflictsPerRange.keys())
    if makeReport:
        lines.append("")
        lines.append("")
        lines.append("Conflicts by rangename")
        k.sort()
        for rangeName in k:
            lines.append("\n%s" % rangeName)
            for line in conflictsPerRange[rangeName]:
                lines.append(line)
        path = os.path.join(dirName, "..", "data", "conflict.txt")
        f = open(path, 'w')
        f.write("\n".join(lines))
        f.close()

        # separate report for conflicts per range name
        for rangeName in k:
            rlines = []
            for line in conflictsPerRange[rangeName]:
                rlines.append(line)
            d = os.path.join(dirName, "..", "data", "conflicts")
            if not os.path.exists(d):
                # remove it first
                shutil.rmtree(d)
                os.makedirs(d)
            path = os.path.join(
                dirName, "..", "data", "conflicts",
                "conflict_%s.txt" % rangeName.replace(" ", "_").lower())
            f = open(path, 'w')
            f.write("\n".join(rlines))
            f.close()

    if makeModule:
        path = os.path.join(dirName, "..", "data", "scriptConflictNames.py")

        pyText = [
            "# Table with conflicting names. Generated by /exporters/analyseConflicts.py",
            "\nscriptConflictNames = ["
        ]
        conflictNames.sort()
        for name in conflictNames:
            pyText.append("    \"%s\"," % name)
        pyText.append("]\n\n")
        f = open(path, 'w')
        f.write("\n".join(pyText))
        f.close()
def findConflict():
    names = {}
    lines = []
    for rangeName in getAllRangeNames():
        start, end = getRangeByName(rangeName)
        for uniNumber in range(start, end+1):
            glyphName = glyphNameFormatter.GlyphName(uniNumber)
            if glyphName.hasName():
                # lines.append("%04X\t%s\t%s" % (uniNumber, glyphName.getName(), glyphName.uniName))
                name = glyphName.getName(extension=False)
                extendedName = glyphName.getName(extension=True)
                if not name in names:
                    names[name] = []
                names[name].append(glyphName)
    n = names.keys()
    n.sort()

    conflictNames = []
    conflictUniNumbers = []
    conflictsPerRange = {}
    line = "{0:>6s} | {1:<50}{2:<25}{3:<40}{4:<40}".format("hex", "basic formatted name", "AGL name", "with extension", "range")
    print(line)
    lines.append(line)
    line = "{0:->6s} + {1:-<50}{2:-<25}{3:-<40}{4:<40}".format("", "", "+", "+", "+")
    print(line)
    lines.append(line)
    for name in n:
        if len(names[name]) > 1:
            conflictNames.append(name)
            line = ""
            print()
            lines.append(line)
            for g in names[name]:
                rangeName = g.uniRangeName
                extendedName = g.getName(extension=True)
                conflictUniNumbers.append(g.uniNumber)
                AGLname = unicode2name_AGD.get(g.uniNumber, "-")
                line = "{0:>6X} : {1:<50}{2:<25}{3:<40}{4:<40}".format(g.uniNumber, g.getName(), AGLname[:25], g.getName(), g.uniRangeName[:40])
                print(line)
                lines.append(line)

                if not rangeName in conflictsPerRange:
                    conflictsPerRange[rangeName] = []
                conflictsPerRange[rangeName].append(line)
    stats =  "# %d names with conflicts, affecting %d unicodes"%(len(conflictNames), len(conflictUniNumbers))
    print(stats)
    lines.append(stats)

    k = conflictsPerRange.keys()
    lines.append("")
    lines.append("")
    lines.append("Conflicts by rangename")
    k.sort()
    for rangeName in k:
        lines.append("\n%s"%rangeName)
        for line in conflictsPerRange[rangeName]:
            lines.append(line)
    path = "./../names/conflict.txt"
    f = open(path, 'w')
    f.write("\n".join(lines))
    f.close()


    path = "./../data/scriptConflictNames.py"
    pyText = ["# Table with conflicting names. Generated by /test/testConflicts.py", "\nscriptConflictNames = ["]
    conflictNames.sort()
    for name in conflictNames:
        pyText.append("    \"%s\","%name)
    pyText.append("]\n\n")
    f = open(path, 'w')
    f.write("\n".join(pyText))
    f.close()
def testCoverage():
    uncountables = [
        'Hangul Syllables',
        'CJK Unified Ideographs',
        'Private'
    ]
    uncounted = []
    text = []
    text.append("\n\n# Coverage")
    wantRanges = {}
    glyphCount = {}
    for thisRange in getAllRangeNames():
        a, b = getRangeByName(thisRange)
        countThis = True
        for uc in uncountables:
            if thisRange.find(uc)!=-1:
                uncounted.append(" * %s"%thisRange)
                print(thisRange, "uncountable")
                countThis = False

        moduleName = rangeNameToModuleName(thisRange)
        if thisRange not in glyphCount:
            glyphCount[thisRange] = {'nameable': 0, 'uniNames':0, 'total':b-a, 'rangeProcessor':None}
        try:
            module = importlib.import_module('glyphNameFormatter.rangeProcessors.%s' % moduleName)
            glyphCount[thisRange]['rangeProcessor']=True
        except ImportError:
            pass
        for uniNumber in range(a,b):
            g = GlyphName(uniNumber)
            if g.uniName is not None:
                glyphCount[thisRange]['uniNames'] += 1
                if countThis:
                    glyphCount[thisRange]['nameable'] += 1

    totalGlyphs = 0     # the total of all glyph counts in all ranges
    totalCovered = 0    # the total of all glyphs that this package has rangeprocessors for
    totalPoints = 0     # the total of all ranges
    totalNameable = 0   # the total of all glyphs that can be named

    for key, items in glyphCount.items():
        #print(key, items)
        totalGlyphs += items['uniNames']
        totalPoints += items['total']
        if items['rangeProcessor'] is not None:
            totalCovered += items['uniNames']
            totalNameable += items['nameable']

    text = []
    text.append("")
    text.append("## Version %s"%unicodeVersion)
    text.append("\n\n\n")
    text.append("This coverage page is has some issues.")
    text.append(" * The version of python used to build the table does not have the latest Unicode data.")
    text.append(" * Narrow build Python might also leave some names inaccessible.")
    text.append(" * Not all ranges need to count. Private Use ranges are ignored, perhaps others need to as well.")
    text.append("\n\n\n")

    # if uncounted:
    #     text.append("The following ranges are skipped:")
    #     for line in uncounted:
    #         text.append(line)

    # text.append("\n\n\n")
    # text.append("| Stats                                      | :)        |")
    # text.append("| ------------------------------------------ | --------: |")
    # text.append('| Total code points in the available ranges  |   `%d`    |'%totalPoints)
    # text.append('| Total glyphs in the available ranges       |   `%d`    |'%totalGlyphs)
    # text.append('| Total glyphs that can be named             |   `%d`    |'%totalNameable)
    # text.append('| Total names covered in GlyphNameFormatter  | `%d`      |'%totalCovered)
    # text.append('| Progress                                   | `%3.3f%%` |'%(100.0*totalCovered/totalGlyphs))

    text.append("\n\n\n")
    text.append("| Range name | # | has processor | Start | End |")
    text.append("| ----- | ----- |----- | ----- | ----- |")

    for thisRange in getAllRangeNames():
        if not thisRange in glyphCount: continue
        a, b = getRangeByName(thisRange)
        items = glyphCount[thisRange]
        if items['rangeProcessor']!=None:
            has = "**Yes**"
            n = "**%s**"%items['uniNames']
        else:
            has = "No"
            n = items['uniNames']
        text.append("| %s | %s | %s | `%04X` | `%04X` |"%(thisRange, n, has, a, b))

    text.append("\n\n")
    path = "../../../coverage.md"
    f = open(path, 'w')
    f.write("\n".join(text))
    f.close()
def findConflict(makeModule=True, makeReport=False, printReport=False):
    names = {}
    lines = []
    for rangeName in getAllRangeNames():
        start, end = getRangeByName(rangeName)
        for uniNumber in range(start, end+1):
            glyphName = glyphNameFormatter.GlyphName(uniNumber)
            if glyphName.hasName():
                name = glyphName.getName(extension=False)
                extendedName = glyphName.getName(extension=True)
                if name not in names:
                    names[name] = []
                names[name].append(glyphName)
    n = names.keys()
    n.sort()

    conflictNames = []
    conflictUniNumbers = []
    conflictsPerRange = {}
    if makeReport:
        line = "{0:>6s} | {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format("hex", "basic formatted name", "AGL name", "with extension", "range", "uni name")
        if printReport:
            print(line)
        lines.append(line)
        line = "{0:->6s} + {1:-<50}{2:-<25}{3:-<40}{4:-<40}{5:-<20}".format("", "", "+", "+", "+", "+")
        if printReport:
            print(line)
        lines.append(line)
    for name in n:
        if len(names[name]) > 1:
            conflictNames.append(name)
            line = ""
            if printReport:
                print()
            lines.append(line)
            for g in names[name]:
                rangeName = g.uniRangeName
                extendedName = g.getName(extension=True)
                conflictUniNumbers.append(g.uniNumber)
                AGLname = unicode2name_AGD.get(g.uniNumber, "-")
                if makeReport:
                    line = "{0:>6X} : {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format(g.uniNumber, g.getName(), AGLname[:25], g.getName(), g.uniRangeName[:40], g.uniName)
                    if printReport:
                        print(line)
                    lines.append(line)

                if rangeName not in conflictsPerRange:
                    conflictsPerRange[rangeName] = []
                conflictsPerRange[rangeName].append(line)

    if makeReport:
        stats = "# %d names with conflicts, affecting %d unicodes" % (len(conflictNames), len(conflictUniNumbers))
        if printReport:
            print(stats)
        lines.append(stats)

    dirName = os.path.dirname(__file__)

    k = conflictsPerRange.keys()
    if makeReport:
        lines.append("")
        lines.append("")
        lines.append("Conflicts by rangename")
        k.sort()
        for rangeName in k:
            lines.append("\n%s" % rangeName)
            for line in conflictsPerRange[rangeName]:
                lines.append(line)
        path = os.path.join(dirName, "..", "data", "conflict.txt")
        f = open(path, 'w')
        f.write("\n".join(lines))
        f.close()

    if makeModule:
        path = os.path.join(dirName, "..", "data", "scriptConflictNames.py")

        pyText = ["# Table with conflicting names. Generated by /test/testConflicts.py", "\nscriptConflictNames = ["]
        conflictNames.sort()
        for name in conflictNames:
            pyText.append("    \"%s\"," % name)
        pyText.append("]\n\n")
        f = open(path, 'w')
        f.write("\n".join(pyText))
        f.close()
def findConflict(makeModule=True, makeReport=False, printReport=False):
    names = {}
    dontReport = [
        'tang:tangutideograph#',
        'cjk:cjkunifiedideograph#',
        'cjk:cjkcompatibilityideograph#',
        'nsh:nushucharacter#',
        'tangutideograph#',
        'cjkunifiedideograph#',
        'cjkcompatibilityideograph#',
        'nushucharacter#',
        ]
    #extendedNames = {}
    lines = []
    for rangeName in getAllRangeNames():
        start, end = getRangeByName(rangeName)
        for uniNumber in range(start, end+1):
            glyphName = glyphNameFormatter.GlyphName(uniNumber, ignoreConflicts=True)
            if glyphName.hasName():
                # name = glyphName.getName(extension=False)
                name = glyphName.getName(extension=True)
                if name not in names:
                    names[name] = []
                names[name].append(glyphName)
    n = list(names.keys())
    n.sort()

    conflictNames = []
    conflictUniNumbers = []
    conflictsPerRange = {}
    if makeReport:
        line = "{0:>6s} | {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format("hex", "basic formatted name", "AGL name", "with extension", "range", "uni name")
        if printReport:
            print(line)
        lines.append(line)
        line = "{0:->6s} + {1:-<50}{2:-<25}{3:-<40}{4:-<40}{5:-<20}".format("", "", "+", "+", "+", "+")
        if printReport:
            print(line)
        lines.append(line)
    for name in n:
        if len(names[name]) > 1:
            conflictNames.append(name, )
            line = ""
            if printReport:
                print()
            lines.append(line)
            for g in names[name]:
                rangeName = g.uniRangeName
                extendedName = g.getName(extension=True)
                AGLname = unicode2name_AGD.get(g.uniNumber, "-")
                nn = g.getName()
                if nn in dontReport:
                    continue
                conflictUniNumbers.append(g.uniNumber)
                if makeReport:
                    line = "{0:>6X} : {1:<50}{2:<25}{3:<40}{4:<40}{5:<20}".format(g.uniNumber, nn, AGLname[:25], g.getName(), g.uniRangeName[:40], g.uniName)
                    if printReport:
                        print(line)
                    lines.append(line)

                if rangeName not in conflictsPerRange:
                    conflictsPerRange[rangeName] = []
                conflictsPerRange[rangeName].append(line)

    if makeReport:
        stats = "# %d names with conflicts, affecting %d unicodes" % (len(conflictNames), len(conflictUniNumbers))
        if printReport:
            print(stats)
        lines.append(stats)

    dirName = os.path.dirname(__file__)

    k = list(conflictsPerRange.keys())
    if makeReport:
        lines.append("")
        lines.append("")
        lines.append("Conflicts by rangename")
        k.sort()
        for rangeName in k:
            lines.append("\n%s" % rangeName)
            for line in conflictsPerRange[rangeName]:
                lines.append(line)
        path = os.path.join(dirName, "..", "data", "conflict.txt")
        f = open(path, 'w')
        f.write("\n".join(lines))
        f.close()

        # separate report for conflicts per range name
        for rangeName in k:
            rlines = []
            for line in conflictsPerRange[rangeName]:
                rlines.append(line)
            d = os.path.join(dirName, "..", "data", "conflicts")
            if not os.path.exists(d):
                # remove it first
                shutil.rmtree(d)
                os.makedirs(d)
            path = os.path.join(dirName, "..", "data", "conflicts", "conflict_%s.txt" % rangeName.replace(" ", "_").lower() )
            f = open(path, 'w')
            f.write("\n".join(rlines))
            f.close()



    if makeModule:
        path = os.path.join(dirName, "..", "data", "scriptConflictNames.py")

        pyText = ["# Table with conflicting names. Generated by /exporters/analyseConflicts.py", "\nscriptConflictNames = ["]
        conflictNames.sort()
        for name in conflictNames:
            pyText.append("    \"%s\"," % name)
        pyText.append("]\n\n")
        f = open(path, 'w')
        f.write("\n".join(pyText))
        f.close()
Example #14
0
def testCoverage():
    uncountables = ['Hangul Syllables', 'CJK Unified Ideographs', 'Private']
    uncounted = []
    text = []
    text.append("\n\n# Coverage")
    wantRanges = {}
    glyphCount = {}
    for thisRange in getAllRangeNames():
        a, b = getRangeByName(thisRange)
        countThis = True
        for uc in uncountables:
            if thisRange.find(uc) != -1:
                uncounted.append(" * %s" % thisRange)
                print(thisRange, "uncountable")
                countThis = False

        moduleName = rangeNameToModuleName(thisRange)
        if thisRange not in glyphCount:
            glyphCount[thisRange] = {
                'nameable': 0,
                'uniNames': 0,
                'total': b - a,
                'rangeProcessor': None
            }
        try:
            module = importlib.import_module(
                'glyphNameFormatter.rangeProcessors.%s' % moduleName)
            glyphCount[thisRange]['rangeProcessor'] = True
        except ImportError:
            pass
        for uniNumber in range(a, b):
            g = GlyphName(uniNumber)
            if g.uniName is not None:
                glyphCount[thisRange]['uniNames'] += 1
                if countThis:
                    glyphCount[thisRange]['nameable'] += 1

    totalGlyphs = 0  # the total of all glyph counts in all ranges
    totalCovered = 0  # the total of all glyphs that this package has rangeprocessors for
    totalPoints = 0  # the total of all ranges
    totalNameable = 0  # the total of all glyphs that can be named

    for key, items in glyphCount.items():
        #print(key, items)
        totalGlyphs += items['uniNames']
        totalPoints += items['total']
        if items['rangeProcessor'] is not None:
            totalCovered += items['uniNames']
            totalNameable += items['nameable']

    text = []
    text.append("")
    text.append("## Version %s" % unicodeVersion)
    text.append("\n\n\n")
    text.append("### Note:\n")
    text.append("This coverage page is has some issues.\n")
    text.append(
        " * Most of the Unicode data is downloaded from Unicode.org. Only the bidirectional data still depends on the Python unicodedata module.."
    )
    text.append(
        " * Narrow build Python might also leave some names inaccessible.")
    text.append(
        " * Not all ranges need to count. Private Use ranges are ignored, perhaps others need to as well."
    )
    text.append("\n\n\n")

    # if uncounted:
    #     text.append("The following ranges are skipped:")
    #     for line in uncounted:
    #         text.append(line)

    # text.append("\n\n\n")
    # text.append("| Stats                                      | :)        |")
    # text.append("| ------------------------------------------ | --------: |")
    # text.append('| Total code points in the available ranges  |   `%d`    |'%totalPoints)
    # text.append('| Total glyphs in the available ranges       |   `%d`    |'%totalGlyphs)
    # text.append('| Total glyphs that can be named             |   `%d`    |'%totalNameable)
    # text.append('| Total names covered in GlyphNameFormatter  | `%d`      |'%totalCovered)
    # text.append('| Progress                                   | `%3.3f%%` |'%(100.0*totalCovered/totalGlyphs))

    text.append("\n\n\n")
    text.append("| Range name | # | has processor | Start | End |")
    text.append("| ----- | ----- |----- | ----- | ----- |")

    for thisRange in getAllRangeNames():
        if not thisRange in glyphCount: continue
        a, b = getRangeByName(thisRange)
        items = glyphCount[thisRange]
        if items['rangeProcessor'] != None:
            has = "**Yes**"
            n = "**%s**" % items['uniNames']
        else:
            has = "No"
            n = items['uniNames']
        text.append("| %s | %s | %s | `%04X` | `%04X` |" %
                    (thisRange, n, has, a, b))

    text.append("\n\n")
    path = "../../../coverage.md"
    f = open(path, 'w')
    f.write("\n".join(text))
    f.close()