Example #1
0
def _escape_and_limit(s):
    s = asBytes(s)
    R = []
    aR = R.append
    n = 0
    for c in s:
        c = _ESCAPEDICT[char2int(c)]
        aR(c)
        n += len(c)
        if n >= 200:
            n = 0
            aR('\\\n')
    return ''.join(R)
Example #2
0
def _escape_and_limit(s):
    s = asBytes(s)
    R = []
    aR = R.append
    n = 0
    for c in s:
        c = _ESCAPEDICT[char2int(c)]
        aR(c)
        n += len(c)
        if n>=200:
            n = 0
            aR('\\\n')
    return ''.join(R)
Example #3
0
    def extractInfo(self, charInfo=1):
        """
        Extract typographic information from the loaded font file.

        The following attributes will be set::
        
            name         PostScript font name
            flags        Font flags
            ascent       Typographic ascender in 1/1000ths of a point
            descent      Typographic descender in 1/1000ths of a point
            capHeight    Cap height in 1/1000ths of a point (0 if not available)
            bbox         Glyph bounding box [l,t,r,b] in 1/1000ths of a point
            _bbox        Glyph bounding box [l,t,r,b] in unitsPerEm
            unitsPerEm   Glyph units per em
            italicAngle  Italic angle in degrees ccw
            stemV        stem weight in 1/1000ths of a point (approximate)
        
        If charInfo is true, the following will also be set::
        
            defaultWidth   default glyph width in 1/1000ths of a point
            charWidths     dictionary of character widths for every supported UCS character
                           code
        
        This will only work if the font has a Unicode cmap (platform 3,
        encoding 1, format 4 or platform 0 any encoding format 4).  Setting
        charInfo to false avoids this requirement
        
        """
        # name - Naming table
        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if format != 0:
            raise TTFError("Unknown name table format (%d)" % format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1: None, 2: None, 3: None, 4: None, 6: None}
        K = list(names.keys())
        nameCount = len(names)
        for i in range(numRecords):
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if nameId not in K: continue
            N = None
            if platformId == 3 and encodingId == 1 and languageId == 0x409:  # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                try:
                    self.seek(string_data_offset + offset)
                    if length % 2 != 0:
                        raise TTFError(
                            "PostScript name is UTF-16BE string of odd length")
                    length /= 2
                    N = []
                    A = N.append
                    while length > 0:
                        char = self.read_ushort()
                        A(chr(char))
                        length -= 1
                    N = ''.join(N)
                finally:
                    self._pos = opos
            elif platformId == 1 and encodingId == 0 and languageId == 0:  # Macintosh, Roman, English, PS Name
                # According to OpenType spec, if PS name exists, it must exist
                # both in MS Unicode and Macintosh Roman formats.  Apparently,
                # you can find live TTF fonts which only have Macintosh format.
                N = self.get_chunk(string_data_offset + offset, length)
            if N and names[nameId] == None:
                names[nameId] = N
                nameCount -= 1
                if nameCount == 0: break
        if names[6] is not None:
            psName = names[6].replace(
                b" ", b"-")  #Dinu Gherman's fix for font names with spaces
        elif names[4] is not None:
            psName = names[4].replace(b" ", b"-")
        # Fine, one last try before we bail.
        elif names[1] is not None:
            psName = names[1].replace(b" ", b"-")
        else:
            psName = None

        # Don't just assume, check for None since some shoddy fonts cause crashes here...
        if not psName:
            raise TTFError("Could not find PostScript font name")
        for c in psName:
            if char2int(c) > 126 or c in b' [](){}<>/%':
                raise TTFError("psName=%r contains invalid character %s" %
                               (psName, ascii(c)))
        self.name = psName
        self.familyName = names[1] or psName
        self.styleName = names[2] or 'Regular'
        self.fullName = names[4] or psName
        self.uniqueFontID = names[3] or psName

        # head - Font header table
        self.seek_table("head")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown head table version %d.%04x' %
                           (ver_maj, ver_min))
        self.fontRevision = self.read_ushort(), self.read_ushort()

        self.skip(4)
        magic = self.read_ulong()
        if magic != 0x5F0F3CF5:
            raise TTFError('Invalid head table magic %04x' % magic)
        self.skip(2)
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = lambda x, unitsPerEm=unitsPerEm: x * 1000. / unitsPerEm
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = list(map(scale, [xMin, yMin, xMax, yMax]))
        self.skip(3 * 2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()

        # OS/2 - OS/2 and Windows metrics table
        # (needs data from head table)
        if "OS/2" in self.table:
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if fsType == 0x0002 or (fsType & 0x0300) != 0:
                raise TTFError(
                    'Font does not allow subsetting/embedding (%04X)' % fsType)
            self.skip(58)  #11*2 + 10 + 4*4 + 4 + 3*2
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            self.ascent = scale(
                sTypoAscender
            )  # XXX: for some reason it needs to be multiplied by 1.24--1.28
            self.descent = scale(sTypoDescender)

            if version > 1:
                self.skip(16)  #3*2 + 2*4 + 2
                sCapHeight = self.read_short()
                self.capHeight = scale(sCapHeight)
            else:
                self.capHeight = self.ascent
        else:
            # Microsoft TTFs require an OS/2 table; Apple ones do not.  Try to
            # cope.  The data is not very important anyway.
            usWeightClass = 500
            self.ascent = scale(yMax)
            self.descent = scale(yMin)
            self.capHeight = self.ascent

        # There's no way to get stemV from a TTF file short of analyzing actual outline data
        # This fuzzy formula is taken from pdflib sources, but we could just use 0 here
        self.stemV = 50 + int((usWeightClass / 65.0)**2)

        # post - PostScript table
        # (needs data from OS/2 table)
        self.seek_table("post")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj not in (1, 2, 3, 4):
            # Adobe/MS documents 1, 2, 2.5, 3; Apple also has 4.
            # From Apple docs it seems that we do not need to care
            # about the exact version, so if you get this error, you can
            # try to remove this check altogether.
            raise TTFError('Unknown post table version %d.%04x' %
                           (ver_maj, ver_min))
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short()
        self.underlineThickness = self.read_short()
        isFixedPitch = self.read_ulong()

        self.flags = FF_SYMBOLIC  # All fonts that contain characters
        # outside the original Adobe character
        # set are considered "symbolic".
        if self.italicAngle != 0:
            self.flags = self.flags | FF_ITALIC
        if usWeightClass >= 600:  # FW_REGULAR == 500, FW_SEMIBOLD == 600
            self.flags = self.flags | FF_FORCEBOLD
        if isFixedPitch:
            self.flags = self.flags | FF_FIXED
        # XXX: FF_SERIF?  FF_SCRIPT?  FF_ALLCAP?  FF_SMALLCAP?

        # hhea - Horizontal header table
        self.seek_table("hhea")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown hhea table version %d.%04x' %
                           (ver_maj, ver_min))
        self.skip(28)
        metricDataFormat = self.read_ushort()
        if metricDataFormat != 0:
            raise TTFError('Unknown horizontal metric data format (%d)' %
                           metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if numberOfHMetrics == 0:
            raise TTFError('Number of horizontal metrics is 0')

        # maxp - Maximum profile table
        self.seek_table("maxp")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown maxp table version %d.%04x' %
                           (ver_maj, ver_min))
        numGlyphs = self.read_ushort()

        if not charInfo:
            self.charToGlyph = None
            self.defaultWidth = None
            self.charWidths = None
            return

        if glyphDataFormat != 0:
            raise TTFError('Unknown glyph data format (%d)' % glyphDataFormat)

        # cmap - Character to glyph index mapping table
        cmap_offset = self.seek_table("cmap")
        self.skip(2)
        cmapTableCount = self.read_ushort()
        unicode_cmap_offset = None
        for n in range(cmapTableCount):
            platformID = self.read_ushort()
            encodingID = self.read_ushort()
            offset = self.read_ulong()
            if platformID == 3 and encodingID == 1:  # Microsoft, Unicode
                format = self.get_ushort(cmap_offset + offset)
                if format == 4:
                    unicode_cmap_offset = cmap_offset + offset
                    break
            elif platformID == 0:  # Unicode -- assume all encodings are compatible
                format = self.get_ushort(cmap_offset + offset)
                if format == 4:
                    unicode_cmap_offset = cmap_offset + offset
                    break
        if unicode_cmap_offset is None:
            raise TTFError(
                'Font does not have cmap for Unicode (platform 3, encoding 1, format 4 or platform 0 any encoding format 4)'
            )
        self.seek(unicode_cmap_offset + 2)
        length = self.read_ushort()
        limit = unicode_cmap_offset + length
        self.skip(2)
        segCount = int(self.read_ushort() / 2.0)
        self.skip(6)
        endCount = list(
            map(lambda x, self=self: self.read_ushort(), range(segCount)))
        self.skip(2)
        startCount = list(
            map(lambda x, self=self: self.read_ushort(), range(segCount)))
        idDelta = list(
            map(lambda x, self=self: self.read_short(), range(segCount)))
        idRangeOffset_start = self._pos
        idRangeOffset = list(
            map(lambda x, self=self: self.read_ushort(), range(segCount)))

        # Now it gets tricky.
        glyphToChar = {}
        charToGlyph = {}
        for n in range(segCount):
            for unichar in range(startCount[n], endCount[n] + 1):
                if idRangeOffset[n] == 0:
                    glyph = (unichar + idDelta[n]) & 0xFFFF
                else:
                    offset = (unichar - startCount[n]) * 2 + idRangeOffset[n]
                    offset = idRangeOffset_start + 2 * n + offset
                    if offset >= limit:
                        # workaround for broken fonts (like Thryomanes)
                        glyph = 0
                    else:
                        glyph = self.get_ushort(offset)
                        if glyph != 0:
                            glyph = (glyph + idDelta[n]) & 0xFFFF
                charToGlyph[unichar] = glyph
                if glyph in glyphToChar:
                    glyphToChar[glyph].append(unichar)
                else:
                    glyphToChar[glyph] = [unichar]
        self.charToGlyph = charToGlyph

        # hmtx - Horizontal metrics table
        # (needs data from hhea, maxp, and cmap tables)
        self.seek_table("hmtx")
        aw = None
        self.charWidths = {}
        self.hmetrics = []
        for glyph in range(numberOfHMetrics):
            # advance width and left side bearing.  lsb is actually signed
            # short, but we don't need it anyway (except for subsetting)
            aw, lsb = self.read_ushort(), self.read_ushort()
            self.hmetrics.append((aw, lsb))
            aw = scale(aw)
            if glyph == 0:
                self.defaultWidth = aw
            if glyph in glyphToChar:
                for char in glyphToChar[glyph]:
                    self.charWidths[char] = aw
        for glyph in range(numberOfHMetrics, numGlyphs):
            # the rest of the table only lists advance left side bearings.
            # so we reuse aw set by the last iteration of the previous loop
            lsb = self.read_ushort()
            self.hmetrics.append((aw, lsb))
            if glyph in glyphToChar:
                for char in glyphToChar[glyph]:
                    self.charWidths[char] = aw

        # loca - Index to location
        self.seek_table('loca')
        self.glyphPos = []
        if indexToLocFormat == 0:
            for n in range(numGlyphs + 1):
                self.glyphPos.append(self.read_ushort() << 1)
        elif indexToLocFormat == 1:
            for n in range(numGlyphs + 1):
                self.glyphPos.append(self.read_ulong())
        else:
            raise TTFError('Unknown location table format (%d)' %
                           indexToLocFormat)
Example #4
0
 def _AsciiHexEncode(self, input):  # also based on piddlePDF
     "Helper function used by images"
     output = getStringIO()
     for char in asBytes(input):
         output.write('%02x' % char2int(char))
     return output.getvalue()
Example #5
0
 def _AsciiHexEncode(self, input):  # also based on piddlePDF
     "Helper function used by images"
     output = getStringIO()
     for char in asBytes(input):
         output.write('%02x' % char2int(char))
     return output.getvalue()
Example #6
0
    def extractInfo(self, charInfo=1):
        """
        Extract typographic information from the loaded font file.

        The following attributes will be set::
        
            name         PostScript font name
            flags        Font flags
            ascent       Typographic ascender in 1/1000ths of a point
            descent      Typographic descender in 1/1000ths of a point
            capHeight    Cap height in 1/1000ths of a point (0 if not available)
            bbox         Glyph bounding box [l,t,r,b] in 1/1000ths of a point
            _bbox        Glyph bounding box [l,t,r,b] in unitsPerEm
            unitsPerEm   Glyph units per em
            italicAngle  Italic angle in degrees ccw
            stemV        stem weight in 1/1000ths of a point (approximate)
        
        If charInfo is true, the following will also be set::
        
            defaultWidth   default glyph width in 1/1000ths of a point
            charWidths     dictionary of character widths for every supported UCS character
                           code
        
        This will only work if the font has a Unicode cmap (platform 3,
        encoding 1, format 4 or platform 0 any encoding format 4).  Setting
        charInfo to false avoids this requirement
        
        """
        # name - Naming table
        name_offset = self.seek_table("name")
        format = self.read_ushort()
        if format != 0:
            raise TTFError("Unknown name table format (%d)" % format)
        numRecords = self.read_ushort()
        string_data_offset = name_offset + self.read_ushort()
        names = {1:None,2:None,3:None,4:None,6:None}
        K = list(names.keys())
        nameCount = len(names)
        for i in xrange(numRecords):
            platformId = self.read_ushort()
            encodingId = self.read_ushort()
            languageId = self.read_ushort()
            nameId = self.read_ushort()
            length = self.read_ushort()
            offset = self.read_ushort()
            if nameId not in K: continue
            N = None
            if platformId == 3 and encodingId == 1 and languageId == 0x409: # Microsoft, Unicode, US English, PS Name
                opos = self._pos
                try:
                    self.seek(string_data_offset + offset)
                    if length % 2 != 0:
                        raise TTFError("PostScript name is UTF-16BE string of odd length")
                    length /= 2
                    N = []
                    A = N.append
                    while length > 0:
                        char = self.read_ushort()
                        A(chr(char))
                        length -= 1
                    N = ''.join(N)
                finally:
                    self._pos = opos
            elif platformId == 1 and encodingId == 0 and languageId == 0: # Macintosh, Roman, English, PS Name
                # According to OpenType spec, if PS name exists, it must exist
                # both in MS Unicode and Macintosh Roman formats.  Apparently,
                # you can find live TTF fonts which only have Macintosh format.
                N = self.get_chunk(string_data_offset + offset, length)
            if N and names[nameId]==None:
                names[nameId] = N
                nameCount -= 1
                if nameCount==0: break
        if names[6] is not None:
            psName = names[6].replace(b" ", b"-")  #Dinu Gherman's fix for font names with spaces
        elif names[4] is not None:
            psName = names[4].replace(b" ", b"-")
        # Fine, one last try before we bail.
        elif names[1] is not None:
            psName = names[1].replace(b" ", b"-")
        else:
            psName = None

        # Don't just assume, check for None since some shoddy fonts cause crashes here...
        if not psName:
            raise TTFError("Could not find PostScript font name")
        for c in psName:
            if char2int(c)>126 or c in b' [](){}<>/%':
                raise TTFError("psName=%r contains invalid character %s" % (psName,ascii(c)))
        self.name = psName
        self.familyName = names[1] or psName
        self.styleName = names[2] or 'Regular'
        self.fullName = names[4] or psName
        self.uniqueFontID = names[3] or psName

        # head - Font header table
        self.seek_table("head")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown head table version %d.%04x' % (ver_maj, ver_min))
        self.fontRevision = self.read_ushort(), self.read_ushort()

        self.skip(4)
        magic = self.read_ulong()
        if magic != 0x5F0F3CF5:
            raise TTFError('Invalid head table magic %04x' % magic)
        self.skip(2)
        self.unitsPerEm = unitsPerEm = self.read_ushort()
        scale = lambda x, unitsPerEm=unitsPerEm: x * 1000. / unitsPerEm
        self.skip(16)
        xMin = self.read_short()
        yMin = self.read_short()
        xMax = self.read_short()
        yMax = self.read_short()
        self.bbox = list(map(scale, [xMin, yMin, xMax, yMax]))
        self.skip(3*2)
        indexToLocFormat = self.read_ushort()
        glyphDataFormat = self.read_ushort()

        # OS/2 - OS/2 and Windows metrics table
        # (needs data from head table)
        if "OS/2" in self.table:
            self.seek_table("OS/2")
            version = self.read_ushort()
            self.skip(2)
            usWeightClass = self.read_ushort()
            self.skip(2)
            fsType = self.read_ushort()
            if fsType == 0x0002 or (fsType & 0x0300) != 0:
                raise TTFError('Font does not allow subsetting/embedding (%04X)' % fsType)
            self.skip(58)   #11*2 + 10 + 4*4 + 4 + 3*2
            sTypoAscender = self.read_short()
            sTypoDescender = self.read_short()
            self.ascent = scale(sTypoAscender)      # XXX: for some reason it needs to be multiplied by 1.24--1.28
            self.descent = scale(sTypoDescender)

            if version > 1:
                self.skip(16)   #3*2 + 2*4 + 2
                sCapHeight = self.read_short()
                self.capHeight = scale(sCapHeight)
            else:
                self.capHeight = self.ascent
        else:
            # Microsoft TTFs require an OS/2 table; Apple ones do not.  Try to
            # cope.  The data is not very important anyway.
            usWeightClass = 500
            self.ascent = scale(yMax)
            self.descent = scale(yMin)
            self.capHeight = self.ascent

        # There's no way to get stemV from a TTF file short of analyzing actual outline data
        # This fuzzy formula is taken from pdflib sources, but we could just use 0 here
        self.stemV = 50 + int((usWeightClass / 65.0) ** 2)

        # post - PostScript table
        # (needs data from OS/2 table)
        self.seek_table("post")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj not in (1, 2, 3, 4):
            # Adobe/MS documents 1, 2, 2.5, 3; Apple also has 4.
            # From Apple docs it seems that we do not need to care
            # about the exact version, so if you get this error, you can
            # try to remove this check altogether.
            raise TTFError('Unknown post table version %d.%04x' % (ver_maj, ver_min))
        self.italicAngle = self.read_short() + self.read_ushort() / 65536.0
        self.underlinePosition = self.read_short()
        self.underlineThickness = self.read_short()
        isFixedPitch = self.read_ulong()

        self.flags = FF_SYMBOLIC        # All fonts that contain characters
                                        # outside the original Adobe character
                                        # set are considered "symbolic".
        if self.italicAngle!= 0:
            self.flags = self.flags | FF_ITALIC
        if usWeightClass >= 600:        # FW_REGULAR == 500, FW_SEMIBOLD == 600
            self.flags = self.flags | FF_FORCEBOLD
        if isFixedPitch:
            self.flags = self.flags | FF_FIXED
        # XXX: FF_SERIF?  FF_SCRIPT?  FF_ALLCAP?  FF_SMALLCAP?

        # hhea - Horizontal header table
        self.seek_table("hhea")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown hhea table version %d.%04x' % (ver_maj, ver_min))
        self.skip(28)
        metricDataFormat = self.read_ushort()
        if metricDataFormat != 0:
            raise TTFError('Unknown horizontal metric data format (%d)' % metricDataFormat)
        numberOfHMetrics = self.read_ushort()
        if numberOfHMetrics == 0:
            raise TTFError('Number of horizontal metrics is 0')

        # maxp - Maximum profile table
        self.seek_table("maxp")
        ver_maj, ver_min = self.read_ushort(), self.read_ushort()
        if ver_maj != 1:
            raise TTFError('Unknown maxp table version %d.%04x' % (ver_maj, ver_min))
        self.numGlyphs = numGlyphs = self.read_ushort()

        if not charInfo:
            self.charToGlyph = None
            self.defaultWidth = None
            self.charWidths = None
            return

        if glyphDataFormat != 0:
            raise TTFError('Unknown glyph data format (%d)' % glyphDataFormat)

        # cmap - Character to glyph index mapping table
        cmap_offset = self.seek_table("cmap")
        cmapVersion = self.read_ushort()
        cmapTableCount = self.read_ushort()
        if cmapTableCount==0 and cmapVersion!=0:
            cmapTableCount, cmapVersion = cmapVersion, cmapTableCount
        encoffs = None
        enc = 0
        for n in xrange(cmapTableCount):
            platform = self.read_ushort()
            encoding = self.read_ushort()
            offset = self.read_ulong()
            if platform==3:
                enc = 1
                encoffs = offset
            elif platform==1 and encoding==0 and enc!=1:
                enc = 2
                encoffs = offset
            elif platform==1 and encoding==1:
                enc = 1
                encoffs = offset
            elif platform==0 and encoding!=5:
                enc = 1
                encoffs = offset
        if encoffs is None:
            raise TTFError('could not find a suitable cmap encoding')
        encoffs += cmap_offset
        self.seek(encoffs)
        fmt = self.read_ushort()
        self.charToGlyph = charToGlyph = {}
        glyphToChar = {}
        if fmt!=12 and fmt!=10 and fmt!=8:
            length = self.read_ushort()
            lang = self.read_ushort()
        else:
            self.skip(2)    #padding
            length = self.read_ulong()
            lang = self.read_ulong()
        if fmt==0:
            T = [self.read_uint8() for i in xrange(length-6)]
            for unichar in xrange(min(256,self.numGlyphs,len(table))):
                glyph = T[glyph]
                charToGlyph[unichar] = glyph
                glyphToChar.setdefault(glyph,[]).append(unichar)
        elif fmt==4:
            limit = encoffs + length
            segCount = int(self.read_ushort() / 2.0)
            self.skip(6)
            endCount = [self.read_ushort() for _ in xrange(segCount)]
            self.skip(2)
            startCount = [self.read_ushort() for _ in xrange(segCount)]
            idDelta = [self.read_short() for _ in xrange(segCount)]
            idRangeOffset_start = self._pos
            idRangeOffset = [self.read_ushort() for _ in xrange(segCount)]

            # Now it gets tricky.
            for n in xrange(segCount):
                for unichar in xrange(startCount[n], endCount[n] + 1):
                    if idRangeOffset[n] == 0:
                        glyph = (unichar + idDelta[n]) & 0xFFFF
                    else:
                        offset = (unichar - startCount[n]) * 2 + idRangeOffset[n]
                        offset = idRangeOffset_start + 2 * n + offset
                        if offset >= limit:
                            # workaround for broken fonts (like Thryomanes)
                            glyph = 0
                        else:
                            glyph = self.get_ushort(offset)
                            if glyph != 0:
                                glyph = (glyph + idDelta[n]) & 0xFFFF
                    charToGlyph[unichar] = glyph
                    glyphToChar.setdefault(glyph,[]).append(unichar)
        elif fmt==6:
            first = self.read_ushort()
            count = self.read_ushort()
            for glyph in xrange(first,first+count):
                unichar = self.read_ushort()
                charToGlyph[unichar] = glyph
                glyphToChar.setdefault(glyph,[]).append(unichar)
        elif fmt==12:
            segCount = self.read_ulong()
            for n in xrange(segCount):
                start = self.read_ulong()
                end = self.read_ulong()
                inc = self.read_ulong() - start
                for unichar in xrange(start,end+1):
                    glyph = unichar + inc
                    charToGlyph[unichar] = glyph
                    glyphToChar.setdefault(glyph,[]).append(unichar)
        elif fmt==2:
            T = [self.read_ushort() for i in xrange(256)]   #subheader keys
            maxSHK = max(T)
            SH = []
            for i in xrange(maxHK+1):
                firstCode = self.read_ushort()
                entryCount = self.read_ushort()
                idDelta = self.read_ushort()
                idRangeOffset = (self.read_ushort()-(maxSHK-i)*8-2)>>1
                SH.append(CMapFmt2SubHeader(firstCode,entryCount,idDelta,idRangeOffset))
            #number of glyph indexes to read. it is the length of the entire subtable minus that bit we've read so far
            entryCount = (length-(self._pos-(cmap_offset+encoffs)))>>1
            glyphs = [self.read_short() for i in xrange(entryCount)]
            last = -1
            for unichar in xrange(256):
                if T[unichar]==0:
                    #Special case, single byte encoding entry, look unichar up in subhead
                    if last!=-1:
                        glyph = 0
                    elif (unichar<SH[0].firstCode or unichar>=SH[0].firstCode+SH[0].entryCount or
                            SH[0].idRangeOffset+(unichar-SH[0].firstCode)>=entryCount):
                        glyph = 0
                    else:
                        glyph = glyphs[SH[0].idRangeOffset+(unichar-SH[0].firstCode)]
                        if glyph!=0:
                            glyph += SH[0].idDelta
                    #assume the single byte codes are ascii
                    if glyph!=0 and glyph<self.numGlyphs:
                        charToGlyph[unichar] = glyph
                        glyphToChar.setdefault(glyph,[]).append(unichar)
                else:
                    k = T[unichar]
                    for j in xrange(SH[k].entryCount):
                        if SH[k].idRangeOffset+j>=entryCount:
                            glyph = 0
                        else:
                            glyph = glyphs[SH[k].idRangeOffset+j]
                            if glyph!= 0:
                                glyph += SH[k].idDelta
                        if glyph!=0 and glyph<self.numGlyphs:
                            enc = (unichar<<8)|(j+SH[k].firstCode)
                            charToGlyph[enc] = glyph
                            glyphToChar.setdefault(glyph,[]).append(enc)
                    if last==-1:
                        last = unichar
        else:
            raise ValueError('Unsupported cmap encoding format %d' % fmt)

        # hmtx - Horizontal metrics table
        # (needs data from hhea, maxp, and cmap tables)
        self.seek_table("hmtx")
        aw = None
        self.charWidths = {}
        self.hmetrics = []
        for glyph in xrange(numberOfHMetrics):
            # advance width and left side bearing.  lsb is actually signed
            # short, but we don't need it anyway (except for subsetting)
            aw, lsb = self.read_ushort(), self.read_ushort()
            self.hmetrics.append((aw, lsb))
            aw = scale(aw)
            if glyph == 0:
                self.defaultWidth = aw
            if glyph in glyphToChar:
                for char in glyphToChar[glyph]:
                    self.charWidths[char] = aw
        for glyph in xrange(numberOfHMetrics, numGlyphs):
            # the rest of the table only lists advance left side bearings.
            # so we reuse aw set by the last iteration of the previous loop
            lsb = self.read_ushort()
            self.hmetrics.append((aw, lsb))
            if glyph in glyphToChar:
                for char in glyphToChar[glyph]:
                    self.charWidths[char] = aw

        # loca - Index to location
        self.seek_table('loca')
        self.glyphPos = []
        if indexToLocFormat == 0:
            for n in xrange(numGlyphs + 1):
                self.glyphPos.append(self.read_ushort() << 1)
        elif indexToLocFormat == 1:
            for n in xrange(numGlyphs + 1):
                self.glyphPos.append(self.read_ulong())
        else:
            raise TTFError('Unknown location table format (%d)' % indexToLocFormat)