Ejemplo n.º 1
0
 def get_table(self, tag):
     (pos, length) = self.get_table_pos(tag)
     if (length == 0):
         die('Truetype font (' + self.filename +
             '): error reading table: ' + tag)
     self.fh.seek(pos)
     return (self.fh.read(length))
Ejemplo n.º 2
0
    def getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph):
        self.maxUniChar = 0
        # table (skip format version, should be 12)
        self.seek(unicode_cmap_offset + 2)
        # reserved
        self.skip(2)
        # table length
        length = self.read_ulong()
        # language (should be 0)
        self.skip(4)
        # groups count
        grpCount = self.read_ulong()

        if 2 + 2 + 4 + 4 + 4 + grpCount * 3 * 4 > length:
            die("TTF format 12 cmap table too small")  
        for n in range(grpCount):
            startCharCode = self.read_ulong()
            endCharCode = self.read_ulong()
            glyph = self.read_ulong()
            for unichar in range(startCharCode, endCharCode + 1):
                charToGlyph[unichar] = glyph
                if (unichar < 196608):
                    self.maxUniChar = max(unichar, self.maxUniChar) 
                glyphToChar.setdefault(glyph, []).append(unichar)
                glyph += 1
Ejemplo n.º 3
0
    def getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph):
        self.maxUniChar = 0
        # table (skip format version, should be 12)
        self.seek(unicode_cmap_offset + 2)
        # reserved
        self.skip(2)
        # table length
        length = self.read_ulong()
        # language (should be 0)
        self.skip(4)
        # groups count
        grpCount = self.read_ulong()

        if 2 + 2 + 4 + 4 + 4 + grpCount * 3 * 4 > length:
            die("TTF format 12 cmap table too small")
        for n in range(grpCount):
            startCharCode = self.read_ulong()
            endCharCode = self.read_ulong()
            glyph = self.read_ulong()
            for unichar in range(startCharCode, endCharCode + 1):
                charToGlyph[unichar] = glyph
                if (unichar < 196608):
                    self.maxUniChar = max(unichar, self.maxUniChar)
                glyphToChar.setdefault(glyph, []).append(unichar)
                glyph += 1
Ejemplo n.º 4
0
 def getLOCA(self, indexToLocFormat, numGlyphs):
     start = self.seek_table('loca')
     self.glyphPos = []
     if (indexToLocFormat == 0):
         data = self.get_chunk(start, (numGlyphs * 2) + 2)
         arr = unpack(">" + "H" * (len(data) / 2), data)
         for n in range(numGlyphs):
             self.glyphPos.append((arr[n] * 2))  # n+1 !?
     elif (indexToLocFormat == 1):
         data = self.get_chunk(start, (numGlyphs * 4) + 4)
         arr = unpack(">" + "L" * (len(data) / 4), data)
         for n in range(numGlyphs):
             self.glyphPos.append((arr[n]))  # n+1 !?
     else:
         die('Unknown location table format ' + indexToLocFormat)
Ejemplo n.º 5
0
 def getLOCA(self, indexToLocFormat, numGlyphs): 
     start = self.seek_table('loca')
     self.glyphPos = []
     if (indexToLocFormat == 0):
         data = self.get_chunk(start,(numGlyphs*2)+2)
         arr = unpack(">" + "H" * (len(data)/2), data)
         for n in range(numGlyphs): 
             self.glyphPos.append((arr[n] * 2))  # n+1 !?
     elif (indexToLocFormat == 1):
         data = self.get_chunk(start,(numGlyphs*4)+4)
         arr = unpack(">" + "L" * (len(data)/4), data)
         for n in range(numGlyphs):
             self.glyphPos.append((arr[n]))  # n+1 !?
     else:
         die('Unknown location table format ' + indexToLocFormat)
Ejemplo n.º 6
0
 def getMetrics(self, file):
     self.filename = file
     self.fh = open(file, 'rb')
     self._pos = 0
     self.charWidths = []
     self.glyphPos = {}
     self.charToGlyph = {}
     self.tables = {}
     self.otables = {}
     self.ascent = 0
     self.descent = 0
     self.TTCFonts = {}
     self.version = version = self.read_ulong()
     if (version == 0x4F54544F):
         die("Postscript outlines are not supported")
     if (version == 0x74746366):
         die("ERROR - TrueType Fonts Collections not supported")
     if (version not in (0x00010000, 0x74727565)):
         die("Not a TrueType font: version=" + version)
     self.readTableDirectory()
     self.extractInfo()
     self.fh.close()
Ejemplo n.º 7
0
 def getMetrics(self, file):
     self.filename = file
     self.fh = open(file,'rb')
     self._pos = 0
     self.charWidths = []
     self.glyphPos = {}
     self.charToGlyph = {}
     self.tables = {}
     self.otables = {}
     self.ascent = 0
     self.descent = 0
     self.TTCFonts = {}
     self.version = version = self.read_ulong()
     if (version==0x4F54544F):
         die("Postscript outlines are not supported")
     if (version==0x74746366):
         die("ERROR - TrueType Fonts Collections not supported")
     if (version not in (0x00010000,0x74727565)):
         die("Not a TrueType font: version=" + version)
     self.readTableDirectory()
     self.extractInfo()
     self.fh.close()
Ejemplo n.º 8
0
    def makeSubset(self, file, subset):
        self.filename = file
        self.fh = open(file, 'rb')
        self._pos = 0
        self.charWidths = []
        self.glyphPos = {}
        self.charToGlyph = {}
        self.tables = {}
        self.otables = {}
        self.ascent = 0
        self.descent = 0
        self.skip(4)
        self.maxUni = 0
        self.readTableDirectory()

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(50)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32)
        metricDataFormat = self.read_ushort()
        orignHmetrics = numberOfHMetrics = self.read_ushort()

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1)
                    or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    unicode_cmap_offset = cmap_offset + offset
                    break

            self.seek(save_pos)

        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename +
                ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)'
                )

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        self.charToGlyph = charToGlyph

        #################/
        # hmtx - Horizontal metrics table
        #################/
        scale = 1  # not used
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

        #################/
        # loca - Index to location
        #################/
        self.getLOCA(indexToLocFormat, numGlyphs)

        subsetglyphs = [(0, 0)]  # special "sorted dict"!
        subsetCharToGlyph = {}
        for code in subset:
            if (code in self.charToGlyph):
                if (self.charToGlyph[code], code) not in subsetglyphs:
                    subsetglyphs.append((self.charToGlyph[code],
                                         code))  # Old Glyph ID => Unicode
                subsetCharToGlyph[code] = self.charToGlyph[
                    code]  # Unicode to old GlyphID
            self.maxUni = max(self.maxUni, code)
        (start, dummy) = self.get_table_pos('glyf')

        subsetglyphs.sort()
        glyphSet = {}
        n = 0
        fsLastCharIndex = 0  # maximum Unicode index (character code) in this font, according to the cmap subtable for platform ID 3 and platform- specific encoding ID 0 or 1.
        for originalGlyphIdx, uni in subsetglyphs:
            fsLastCharIndex = max(fsLastCharIndex, uni)
            glyphSet[originalGlyphIdx] = n  # old glyphID to new glyphID
            n += 1

        codeToGlyph = {}
        for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()):
            codeToGlyph[uni] = glyphSet[originalGlyphIdx]

        self.codeToGlyph = codeToGlyph

        for originalGlyphIdx, uni in subsetglyphs:
            nonlocals = {
                'start': start,
                'glyphSet': glyphSet,
                'subsetglyphs': subsetglyphs
            }
            self.getGlyphs(originalGlyphIdx, nonlocals)

        numGlyphs = numberOfHMetrics = len(subsetglyphs)

        #tables copied from the original
        tags = ['name']
        for tag in tags:
            self.add(tag, self.get_table(tag))
        tags = ['cvt ', 'fpgm', 'prep', 'gasp']
        for tag in tags:
            if (tag in self.tables):
                self.add(tag, self.get_table(tag))

        # post - PostScript
        opost = self.get_table('post')
        post = "\x00\x03\x00\x00" + substr(
            opost, 4, 12
        ) + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        self.add('post', post)

        # Sort CID2GID map into segments of contiguous codes
        if 0 in codeToGlyph:
            del codeToGlyph[0]
        #unset(codeToGlyph[65535])
        rangeid = 0
        range_ = {}
        prevcid = -2
        prevglidx = -1
        # for each character
        for cid, glidx in sorted(codeToGlyph.items()):
            if (cid == (prevcid + 1) and glidx == (prevglidx + 1)):
                range_[rangeid].append(glidx)
            else:
                # new range
                rangeid = cid
                range_[rangeid] = []
                range_[rangeid].append(glidx)
            prevcid = cid
            prevglidx = glidx

        # cmap - Character to glyph mapping - Format 4 (MS / )
        segCount = len(
            range_) + 1  # + 1 Last segment has missing character 0xFFFF
        searchRange = 1
        entrySelector = 0
        while (searchRange * 2 <= segCount):
            searchRange = searchRange * 2
            entrySelector = entrySelector + 1

        searchRange = searchRange * 2
        rangeShift = segCount * 2 - searchRange
        length = 16 + (8 * segCount) + (numGlyphs + 1)
        cmap = [
            0,
            1,  # Index : version, number of encoding subtables
            3,
            1,  # Encoding Subtable : platform (MS=3), encoding (Unicode)
            0,
            12,  # Encoding Subtable : offset (hi,lo)
            4,
            length,
            0,  # Format 4 Mapping subtable: format, length, language
            segCount * 2,
            searchRange,
            entrySelector,
            rangeShift
        ]

        range_ = sorted(range_.items())

        # endCode(s)
        for start, subrange in range_:
            endCode = start + (len(subrange) - 1)
            cmap.append(endCode)  # endCode(s)

        cmap.append(0xFFFF)  # endCode of last Segment
        cmap.append(0)  # reservedPad

        # startCode(s)
        for start, subrange in range_:
            cmap.append(start)  # startCode(s)

        cmap.append(0xFFFF)  # startCode of last Segment
        # idDelta(s)
        for start, subrange in range_:
            idDelta = -(start - subrange[0])
            n += count(subrange)
            cmap.append(idDelta)  # idDelta(s)

        cmap.append(1)  # idDelta of last Segment
        # idRangeOffset(s)
        for subrange in range_:
            cmap.append(
                0
            )  # idRangeOffset[segCount]      Offset in bytes to glyph indexArray, or 0

        cmap.append(0)  # idRangeOffset of last Segment
        for subrange, glidx in range_:
            cmap.extend(glidx)

        cmap.append(0)  # Mapping for last character
        cmapstr = ''
        for cm in cmap:
            if cm >= 0:
                cmapstr += pack(">H", cm)
            else:
                try:
                    cmapstr += pack(">h", cm)
                except:
                    warnings.warn("cmap value too big/small: %s" % cm)
                    cmapstr += pack(">H", -cm)
        self.add('cmap', cmapstr)

        # glyf - Glyph data
        (glyfOffset, glyfLength) = self.get_table_pos('glyf')
        if (glyfLength < self.maxStrLenRead):
            glyphData = self.get_table('glyf')

        offsets = []
        glyf = ''
        pos = 0

        hmtxstr = ''
        xMinT = 0
        yMinT = 0
        xMaxT = 0
        yMaxT = 0
        advanceWidthMax = 0
        minLeftSideBearing = 0
        minRightSideBearing = 0
        xMaxExtent = 0
        maxPoints = 0  # points in non-compound glyph
        maxContours = 0  # contours in non-compound glyph
        maxComponentPoints = 0  # points in compound glyph
        maxComponentContours = 0  # contours in compound glyph
        maxComponentElements = 0  # number of glyphs referenced at top level
        maxComponentDepth = 0  # levels of recursion, set to 0 if font has only simple glyphs
        self.glyphdata = {}

        for originalGlyphIdx, uni in subsetglyphs:
            # hmtx - Horizontal Metrics
            hm = self.getHMetric(orignHmetrics, originalGlyphIdx)
            hmtxstr += hm

            offsets.append(pos)
            try:
                glyphPos = self.glyphPos[originalGlyphIdx]
                glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
            except IndexError:
                warnings.warn("missing glyph %s" % (originalGlyphIdx))
                glyphLen = 0

            if (glyfLength < self.maxStrLenRead):
                data = substr(glyphData, glyphPos, glyphLen)
            else:
                if (glyphLen > 0):
                    data = self.get_chunk(glyfOffset + glyphPos, glyphLen)
                else:
                    data = ''

            if (glyphLen > 0):
                up = unpack(">H", substr(data, 0, 2))[0]
            if (glyphLen > 2 and (up & (1 << 15))
                ):  # If number of contours <= -1 i.e. composiste glyph
                pos_in_glyph = 10
                flags = GF_MORE
                nComponentElements = 0
                while (flags & GF_MORE):
                    nComponentElements += 1  # number of glyphs referenced at top level
                    up = unpack(">H", substr(data, pos_in_glyph, 2))
                    flags = up[0]
                    up = unpack(">H", substr(data, pos_in_glyph + 2, 2))
                    glyphIdx = up[0]
                    self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault(
                        'compGlyphs', []).append(glyphIdx)
                    try:
                        data = self._set_ushort(data, pos_in_glyph + 2,
                                                glyphSet[glyphIdx])
                    except KeyError:
                        data = 0
                        warnings.warn("missing glyph data %s" % glyphIdx)
                    pos_in_glyph += 4
                    if (flags & GF_WORDS):
                        pos_in_glyph += 4
                    else:
                        pos_in_glyph += 2
                    if (flags & GF_SCALE):
                        pos_in_glyph += 2
                    elif (flags & GF_XYSCALE):
                        pos_in_glyph += 4
                    elif (flags & GF_TWOBYTWO):
                        pos_in_glyph += 8

                maxComponentElements = max(maxComponentElements,
                                           nComponentElements)

            glyf += data
            pos += glyphLen
            if (pos % 4 != 0):
                padding = 4 - (pos % 4)
                glyf += str_repeat("\0", padding)
                pos += padding

        offsets.append(pos)
        self.add('glyf', glyf)

        # hmtx - Horizontal Metrics
        self.add('hmtx', hmtxstr)

        # loca - Index to location
        locastr = ''
        if (((pos + 1) >> 1) > 0xFFFF):
            indexToLocFormat = 1  # long format
            for offset in offsets:
                locastr += pack(">L", offset)
        else:
            indexToLocFormat = 0  # short format
            for offset in offsets:
                locastr += pack(">H", (offset / 2))

        self.add('loca', locastr)

        # head - Font header
        head = self.get_table('head')
        head = self._set_ushort(head, 50, indexToLocFormat)
        self.add('head', head)

        # hhea - Horizontal Header
        hhea = self.get_table('hhea')
        hhea = self._set_ushort(hhea, 34, numberOfHMetrics)
        self.add('hhea', hhea)

        # maxp - Maximum Profile
        maxp = self.get_table('maxp')
        maxp = self._set_ushort(maxp, 4, numGlyphs)
        self.add('maxp', maxp)

        # OS/2 - OS/2
        os2 = self.get_table('OS/2')
        self.add('OS/2', os2)

        self.fh.close()

        # Put the TTF file together
        stm = self.endTTFile('')
        return stm
Ejemplo n.º 9
0
    def extractInfo(self):
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if (format != 0):
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1: '', 2: '', 3: '', 4: '', 6: ''}
        K = names.keys()
        nameCount = len(names)
        for i in range(numRecords):
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K): continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId
                    == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length /= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)

            elif (platformId == 1 and encodingId == 0
                  and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length)
                self._pos = opos
                self.seek(opos)

            if (N and names[nameId] == ''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount == 0): break

        if (names[6]):
            psName = names[6]
        elif (names[4]):
            psName = re.sub(' ', '-', names[4])
        elif (names[1]):
            psName = re.sub(' ', '-', names[1])
        else:
            psName = ''
        if (not psName):
            die("Could not find PostScript font name")
        self.name = psName
        if (names[1]):
            self.familyName = names[1]
        else:
            self.familyName = psName
        if (names[2]):
            self.styleName = names[2]
        else:
            self.styleName = 'Regular'
        if (names[4]):
            self.fullName = names[4]
        else:
            self.fullName = psName
        if (names[3]):
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName
        if (names[6]):
            self.fullName = names[6]

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18)
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin * scale), (yMin * scale), (xMax * scale),
                     (yMax * scale)]
        self.skip(3 * 2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        if (glyphDataFormat != 0):
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if ("hhea" in self.tables):
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = (hheaAscender * scale)
            self.descent = (hheaDescender * scale)

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if ("OS/2" in self.tables):
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if (fsType == 0x0002 or (fsType & 0x0300) != 0):
                die('ERROR - Font file ' + self.filename +
                    ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True

            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  #PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            if (not self.ascent):
                self.ascent = (sTypoAscender * scale)
            if (not self.descent):
                self.descent = (sTypoDescender * scale)
            if (version > 1):
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight * scale)
            else:
                self.capHeight = self.ascent

        else:
            usWeightClass = 500
            if (not self.ascent): self.ascent = (yMax * scale)
            if (not self.descent): self.descent = (yMin * scale)
            self.capHeight = self.ascent

        self.stemV = 50 + int(pow((usWeightClass / 65.0), 2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4)
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if (self.italicAngle != 0):
            self.flags = self.flags | 64
        if (usWeightClass >= 600):
            self.flags = self.flags | 262144
        if (isFixedPitch):
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32)
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0

        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1)
                    or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    if (not unicode_cmap_offset):
                        unicode_cmap_offset = cmap_offset + offset
                    break

            self.seek(save_pos)

        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename +
                ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)'
                )

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)
Ejemplo n.º 10
0
    def makeSubset(self, file, subset):
        self.filename = file
        self.fh = open(file ,'rb')
        self._pos = 0
        self.charWidths = []
        self.glyphPos = {}
        self.charToGlyph = {}
        self.tables = {}
        self.otables = {}
        self.ascent = 0
        self.descent = 0
        self.skip(4)
        self.maxUni = 0
        self.readTableDirectory()

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(50) 
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32) 
        metricDataFormat = self.read_ushort()
        orignHmetrics = numberOfHMetrics = self.read_ushort()

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1) or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    unicode_cmap_offset = cmap_offset + offset
                    break
                
            self.seek(save_pos )
        
        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:    
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        self.charToGlyph = charToGlyph

        #################/
        # hmtx - Horizontal metrics table
        #################/
        scale = 1    # not used
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)

        #################/
        # loca - Index to location
        #################/
        self.getLOCA(indexToLocFormat, numGlyphs)

        subsetglyphs = [(0, 0)]     # special "sorted dict"!
        subsetCharToGlyph = {}
        for code in subset: 
            if (code in self.charToGlyph):
                if (self.charToGlyph[code], code) not in subsetglyphs:
                    subsetglyphs.append((self.charToGlyph[code], code))   # Old Glyph ID => Unicode
                subsetCharToGlyph[code] = self.charToGlyph[code]    # Unicode to old GlyphID
            self.maxUni = max(self.maxUni, code)
        (start,dummy) = self.get_table_pos('glyf')

        subsetglyphs.sort()
        glyphSet = {}
        n = 0
        fsLastCharIndex = 0    # maximum Unicode index (character code) in this font, according to the cmap subtable for platform ID 3 and platform- specific encoding ID 0 or 1.
        for originalGlyphIdx, uni in subsetglyphs:
            fsLastCharIndex = max(fsLastCharIndex , uni)
            glyphSet[originalGlyphIdx] = n    # old glyphID to new glyphID
            n += 1

        codeToGlyph = {}
        for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()):
            codeToGlyph[uni] = glyphSet[originalGlyphIdx] 
        
        self.codeToGlyph = codeToGlyph
        
        for originalGlyphIdx, uni in subsetglyphs: 
            nonlocals = {'start': start, 'glyphSet': glyphSet, 
                         'subsetglyphs': subsetglyphs}
            self.getGlyphs(originalGlyphIdx, nonlocals)

        numGlyphs = numberOfHMetrics = len(subsetglyphs)

        #tables copied from the original
        tags = ['name']
        for tag in tags:  
            self.add(tag, self.get_table(tag)) 
        tags = ['cvt ', 'fpgm', 'prep', 'gasp']
        for tag in tags:
            if (tag in self.tables):  
                self.add(tag, self.get_table(tag))        

        # post - PostScript
        opost = self.get_table('post')
        post = "\x00\x03\x00\x00" + substr(opost,4,12) + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        self.add('post', post)

        # Sort CID2GID map into segments of contiguous codes
        if 0 in codeToGlyph:
            del codeToGlyph[0]
        #unset(codeToGlyph[65535])
        rangeid = 0
        range_ = {}
        prevcid = -2
        prevglidx = -1
        # for each character
        for cid, glidx in sorted(codeToGlyph.items()):
            if (cid == (prevcid + 1) and glidx == (prevglidx + 1)):
                range_[rangeid].append(glidx)
            else:
                # new range
                rangeid = cid
                range_[rangeid] = []
                range_[rangeid].append(glidx)
            prevcid = cid
            prevglidx = glidx

        # cmap - Character to glyph mapping - Format 4 (MS / )
        segCount = len(range_) + 1    # + 1 Last segment has missing character 0xFFFF
        searchRange = 1
        entrySelector = 0
        while (searchRange * 2 <= segCount ):
            searchRange = searchRange * 2
            entrySelector = entrySelector + 1
        
        searchRange = searchRange * 2
        rangeShift = segCount * 2 - searchRange
        length = 16 + (8*segCount ) + (numGlyphs+1)
        cmap = [0, 1,        # Index : version, number of encoding subtables
            3, 1,                # Encoding Subtable : platform (MS=3), encoding (Unicode)
            0, 12,            # Encoding Subtable : offset (hi,lo)
            4, length, 0,         # Format 4 Mapping subtable: format, length, language
            segCount*2,
            searchRange,
            entrySelector,
            rangeShift]

        range_ = sorted(range_.items())
        
        # endCode(s)
        for start, subrange in range_:
            endCode = start + (len(subrange)-1)
            cmap.append(endCode)    # endCode(s)
        
        cmap.append(0xFFFF)    # endCode of last Segment
        cmap.append(0)    # reservedPad

        # startCode(s)
        for start, subrange in range_: 
            cmap.append(start)    # startCode(s)
        
        cmap.append(0xFFFF)    # startCode of last Segment
        # idDelta(s) 
        for start, subrange in range_: 
            idDelta = -(start-subrange[0])
            n += count(subrange)
            cmap.append(idDelta)    # idDelta(s)
        
        cmap.append(1)    # idDelta of last Segment
        # idRangeOffset(s) 
        for subrange in range_: 
            cmap.append(0)    # idRangeOffset[segCount]      Offset in bytes to glyph indexArray, or 0
        
        cmap.append(0)    # idRangeOffset of last Segment
        for subrange, glidx in range_: 
            cmap.extend(glidx)
        
        cmap.append(0)    # Mapping for last character
        cmapstr = ''
        for cm in cmap:
            if cm >= 0:
                cmapstr += pack(">H", cm) 
            else:
                try:
                    cmapstr += pack(">h", cm) 
                except:
                    warnings.warn("cmap value too big/small: %s" % cm)
                    cmapstr += pack(">H", -cm) 
        self.add('cmap', cmapstr)

        # glyf - Glyph data
        (glyfOffset,glyfLength) = self.get_table_pos('glyf')
        if (glyfLength < self.maxStrLenRead):
            glyphData = self.get_table('glyf')

        offsets = []
        glyf = ''
        pos = 0

        hmtxstr = ''
        xMinT = 0
        yMinT = 0
        xMaxT = 0
        yMaxT = 0
        advanceWidthMax = 0
        minLeftSideBearing = 0
        minRightSideBearing = 0
        xMaxExtent = 0
        maxPoints = 0            # points in non-compound glyph
        maxContours = 0            # contours in non-compound glyph
        maxComponentPoints = 0    # points in compound glyph
        maxComponentContours = 0    # contours in compound glyph
        maxComponentElements = 0    # number of glyphs referenced at top level
        maxComponentDepth = 0        # levels of recursion, set to 0 if font has only simple glyphs
        self.glyphdata = {}

        for originalGlyphIdx, uni in subsetglyphs: 
            # hmtx - Horizontal Metrics
            hm = self.getHMetric(orignHmetrics, originalGlyphIdx)    
            hmtxstr += hm

            offsets.append(pos)
            try:
                glyphPos = self.glyphPos[originalGlyphIdx]
                glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos
            except IndexError:
                warnings.warn("missing glyph %s" % (originalGlyphIdx))
                glyphLen = 0

            if (glyfLength < self.maxStrLenRead):
                data = substr(glyphData,glyphPos,glyphLen)
            else:
                if (glyphLen > 0):
                    data = self.get_chunk(glyfOffset+glyphPos,glyphLen)
                else:
                    data = ''
            
            if (glyphLen > 0):
                up = unpack(">H", substr(data,0,2))[0]
            if (glyphLen > 2 and (up & (1 << 15)) ):     # If number of contours <= -1 i.e. composiste glyph
                pos_in_glyph = 10
                flags = GF_MORE
                nComponentElements = 0
                while (flags & GF_MORE):
                    nComponentElements += 1    # number of glyphs referenced at top level
                    up = unpack(">H", substr(data,pos_in_glyph,2))
                    flags = up[0]
                    up = unpack(">H", substr(data,pos_in_glyph+2,2))
                    glyphIdx = up[0]
                    self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault('compGlyphs', []).append(glyphIdx)
                    try:
                        data = self._set_ushort(data, pos_in_glyph + 2, glyphSet[glyphIdx])
                    except KeyError:
                        data = 0
                        warnings.warn("missing glyph data %s" % glyphIdx)
                    pos_in_glyph += 4
                    if (flags & GF_WORDS): 
                        pos_in_glyph += 4 
                    else: 
                        pos_in_glyph += 2 
                    if (flags & GF_SCALE):
                        pos_in_glyph += 2 
                    elif (flags & GF_XYSCALE):
                        pos_in_glyph += 4 
                    elif (flags & GF_TWOBYTWO):
                        pos_in_glyph += 8 
                
                maxComponentElements = max(maxComponentElements, nComponentElements)
            
            glyf += data
            pos += glyphLen
            if (pos % 4 != 0): 
                padding = 4 - (pos % 4)
                glyf += str_repeat("\0",padding)
                pos += padding

        offsets.append(pos)
        self.add('glyf', glyf)

        # hmtx - Horizontal Metrics
        self.add('hmtx', hmtxstr)

        # loca - Index to location
        locastr = ''
        if (((pos + 1) >> 1) > 0xFFFF): 
            indexToLocFormat = 1        # long format
            for offset in offsets:
                locastr += pack(">L",offset) 
        else:
            indexToLocFormat = 0        # short format
            for offset in offsets:  
                locastr += pack(">H",(offset/2)) 
        
        self.add('loca', locastr)

        # head - Font header
        head = self.get_table('head')
        head = self._set_ushort(head, 50, indexToLocFormat)
        self.add('head', head)

        # hhea - Horizontal Header
        hhea = self.get_table('hhea')
        hhea = self._set_ushort(hhea, 34, numberOfHMetrics)
        self.add('hhea', hhea)

        # maxp - Maximum Profile
        maxp = self.get_table('maxp')
        maxp = self._set_ushort(maxp, 4, numGlyphs)
        self.add('maxp', maxp)

        # OS/2 - OS/2
        os2 = self.get_table('OS/2')
        self.add('OS/2', os2 )

        self.fh.close()

        # Put the TTF file together
        stm = self.endTTFile('')
        return stm 
Ejemplo n.º 11
0
    def extractInfo(self): 
        #################/
        # name - Naming table
        #################/
        self.sFamilyClass = 0
        self.sFamilySubClass = 0

        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if (format != 0):
            die("Unknown name table format " + format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1:'',2:'',3:'',4:'',6:''}
        K = names.keys()
        nameCount = len(names)
        for i in range(numRecords): 
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if (nameId not in K): continue
            N = ''
            if (platformId == 3 and encodingId == 1 and languageId == 0x409):  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                self.seek(string_data_offset + offset)
                if (length % 2 != 0):
                    die("PostScript name is UTF-16BE string of odd length")
                length /= 2
                N = ''
                while (length > 0):
                    char = self.read_ushort()
                    N += (chr(char))
                    length -= 1
                self._pos = opos
                self.seek(opos)
            
            elif (platformId == 1 and encodingId == 0 and languageId == 0):  # Macintosh, Roman, English, PS Name
                opos = self._pos
                N = self.get_chunk(string_data_offset + offset, length)
                self._pos = opos
                self.seek(opos)
            
            if (N and names[nameId]==''):
                names[nameId] = N
                nameCount -= 1
                if (nameCount==0): break
            
        
        if (names[6]):
            psName = names[6]
        elif (names[4]):
            psName = re.sub(' ','-',names[4])
        elif (names[1]):
            psName = re.sub(' ','-',names[1])
        else:
            psName = ''
        if (not psName):
            die("Could not find PostScript font name")
        self.name = psName
        if (names[1]):
            self.familyName = names[1]  
        else:  
            self.familyName = psName 
        if (names[2]):
            self.styleName = names[2]
        else:
            self.styleName = 'Regular' 
        if (names[4]):
            self.fullName = names[4]
        else:
            self.fullName = psName 
        if (names[3]):
            self.uniqueFontID = names[3]
        else:
            self.uniqueFontID = psName 
        if (names[6]):
            self.fullName = names[6] 

        #################/
        # head - Font header table
        #################/
        self.seek_table("head")
        self.skip(18) 
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = 1000 / float(unitsPerEm)
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = [(xMin*scale), (yMin*scale), (xMax*scale), (yMax*scale)]
        self.skip(3*2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()
        if (glyphDataFormat != 0):
            die('Unknown glyph data format ' + glyphDataFormat)

        #################/
        # hhea metrics table
        #################/
        # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for compatibility
        if ("hhea" in self.tables):
            self.seek_table("hhea")
            self.skip(4)
            hheaAscender = self.read_short()
            hheaDescender = self.read_short()
            self.ascent = (hheaAscender *scale)
            self.descent = (hheaDescender *scale)
        

        #################/
        # OS/2 - OS/2 and Windows metrics table
        #################/
        if ("OS/2" in self.tables): 
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if (fsType == 0x0002 or (fsType & 0x0300) != 0): 
                die('ERROR - Font file ' + self.filename + ' cannot be embedded due to copyright restrictions.')
                self.restrictedUse = True
            
            self.skip(20)
            sF = self.read_short()
            self.sFamilyClass = (sF >> 8)
            self.sFamilySubClass = (sF & 0xFF)
            self._pos += 10  #PANOSE = 10 byte length
            panose = self.fh.read(10)
            self.skip(26)
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            if (not self.ascent): 
                self.ascent = (sTypoAscender*scale)
            if (not self.descent): 
                self.descent = (sTypoDescender*scale)
            if (version > 1):
                self.skip(16)
                sCapHeight = self.read_short()
                self.capHeight = (sCapHeight*scale)
            else:
                self.capHeight = self.ascent            
        
        else:
            usWeightClass = 500
            if (not self.ascent): self.ascent = (yMax*scale)
            if (not self.descent): self.descent = (yMin*scale)
            self.capHeight = self.ascent
        
        self.stemV = 50 + int(pow((usWeightClass / 65.0),2))

        #################/
        # post - PostScript table
        #################/
        self.seek_table("post")
        self.skip(4) 
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short() * scale
        self.underlineThickness = self.read_short() * scale
        isFixedPitch = self.read_ulong()

        self.flags = 4

        if (self.italicAngle!= 0):
            self.flags = self.flags | 64
        if (usWeightClass >= 600):
            self.flags = self.flags | 262144
        if (isFixedPitch):
            self.flags = self.flags | 1

        #################/
        # hhea - Horizontal header table
        #################/
        self.seek_table("hhea")
        self.skip(32) 
        metricDataFormat = self.read_ushort()
        if (metricDataFormat != 0):
            die('Unknown horizontal metric data format '.metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if (numberOfHMetrics == 0):
            die('Number of horizontal metrics is 0')

        #################/
        # maxp - Maximum profile table
        #################/
        self.seek_table("maxp")
        self.skip(4)
        numGlyphs = self.read_ushort()

        #################/
        # cmap - Character to glyph index mapping table
        #################/
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = 0
        unicode_cmap_offset12 = 0
        
        for i in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            save_pos = self._pos
            if platformID == 3 and encodingID == 10:  # Microsoft, UCS-4
                format = self.get_ushort(cmap_offset + offset)
                if (format == 12):
                    if not unicode_cmap_offset12:
                        unicode_cmap_offset12 = cmap_offset + offset
                    break
            if ((platformID == 3 and encodingID == 1) or platformID == 0):  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if (format == 4):
                    if (not unicode_cmap_offset):
                        unicode_cmap_offset = cmap_offset + offset
                    break
                    
            self.seek(save_pos)
        
        if not unicode_cmap_offset and not unicode_cmap_offset12:
            die('Font (' + self.filename + ') does not have cmap for Unicode (platform 3, encoding 1, format 4, or platform 3, encoding 10, format 12, or platform 0, any encoding, format 4)')

        glyphToChar = {}
        charToGlyph = {}
        if unicode_cmap_offset12:
            self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph)
        else:    
            self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph)

        #################/
        # hmtx - Horizontal metrics table
        #################/
        self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale)
Ejemplo n.º 12
0
 def get_table(self, tag):
     (pos, length) = self.get_table_pos(tag)
     if (length == 0):
         die('Truetype font (' + self.filename + '): error reading table: ' + tag) 
     self.fh.seek(pos)
     return (self.fh.read(length))