Beispiel #1
0
class PDFSimpleFont(PDFFont):
    def __init__(self, descriptor, widths, spec):
        # Font encoding is specified either by a name of
        # built-in encoding or a dictionary that describes
        # the differences.
        if 'Encoding' in spec:
            encoding = resolve1(spec['Encoding'])
        else:
            encoding = LITERAL_STANDARD_ENCODING
        if isinstance(encoding, dict):
            name = literal_name(
                encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
            diff = list_value(encoding.get('Differences', None))
            self.encoding = EncodingDB.get_encoding(name, diff)
        else:
            self.encoding = EncodingDB.get_encoding(literal_name(encoding))
        self.ucs2_cmap = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.ucs2_cmap = CMap()
            CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
        PDFFont.__init__(self, descriptor, widths)
        return

    def to_unicode(self, cid):
        if not self.ucs2_cmap:
            try:
                return self.encoding[cid]
            except KeyError:
                raise PDFUnicodeNotDefined(None, cid)
        code = self.ucs2_cmap.tocode(cid)
        if not code:
            raise PDFUnicodeNotDefined(None, cid)
        chars = unpack('>%dH' % (len(code) / 2), code)
        return ''.join(unichr(c) for c in chars)
Beispiel #2
0
class PDFSimpleFont(PDFFont):
    def __init__(self, descriptor, widths, spec):
        # Font encoding is specified either by a name of
        # built-in encoding or a dictionary that describes
        # the differences.
        if "Encoding" in spec:
            encoding = resolve1(spec["Encoding"])
        else:
            encoding = LITERAL_STANDARD_ENCODING
        if isinstance(encoding, dict):
            name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
            diff = list_value(encoding.get("Differences", None))
            self.encoding = EncodingDB.get_encoding(name, diff)
        else:
            self.encoding = EncodingDB.get_encoding(literal_name(encoding))
        self.ucs2_cmap = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.ucs2_cmap = CMap()
            CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
        PDFFont.__init__(self, descriptor, widths)
        return

    def to_unicode(self, cid):
        if not self.ucs2_cmap:
            try:
                return self.encoding[cid]
            except KeyError:
                raise PDFUnicodeNotDefined(None, cid)
        code = self.ucs2_cmap.tocode(cid)
        if not code:
            raise PDFUnicodeNotDefined(None, cid)
        chars = unpack(">%dH" % (len(code) / 2), code)
        return "".join(unichr(c) for c in chars)
Beispiel #3
0
 def create_cmap(self):
     if 'cmap' not in self.tables:
         raise TrueTypeFont.CMapNotFound
     (base_offset, length) = self.tables['cmap']
     fp = self.fp
     fp.seek(base_offset)
     (version, nsubtables) = unpack('>HH', fp.read(4))
     subtables = []
     for i in xrange(nsubtables):
         subtables.append(unpack('>HHL', fp.read(8)))
     char2gid = {}
     # Only supports subtable type 0, 2 and 4.
     for (_1, _2, st_offset) in subtables:
         fp.seek(base_offset + st_offset)
         (fmttype, fmtlen, fmtlang) = unpack('>HHH', fp.read(6))
         if fmttype == 0:
             char2gid.update(enumerate(unpack('>256B', fp.read(256))))
         elif fmttype == 2:
             subheaderkeys = unpack('>256H', fp.read(512))
             firstbytes = [0] * 8192
             for (i, k) in enumerate(subheaderkeys):
                 firstbytes[k / 8] = i
             nhdrs = max(subheaderkeys) / 8 + 1
             hdrs = []
             for i in xrange(nhdrs):
                 (firstcode, entcount, delta,
                  offset) = unpack('>HHhH', fp.read(8))
                 hdrs.append((i, firstcode, entcount, delta,
                              fp.tell() - 2 + offset))
             for (i, firstcode, entcount, delta, pos) in hdrs:
                 if not entcount: continue
                 first = firstcode + (firstbytes[i] << 8)
                 fp.seek(pos)
                 for c in xrange(entcount):
                     gid = unpack('>H', fp.read(2))
                     if gid:
                         gid += delta
                     char2gid[first + c] = gid
         elif fmttype == 4:
             (segcount, _1, _2, _3) = unpack('>HHHH', fp.read(8))
             segcount /= 2
             ecs = unpack('>%dH' % segcount, fp.read(2 * segcount))
             fp.read(2)
             scs = unpack('>%dH' % segcount, fp.read(2 * segcount))
             idds = unpack('>%dh' % segcount, fp.read(2 * segcount))
             pos = fp.tell()
             idrs = unpack('>%dH' % segcount, fp.read(2 * segcount))
             for (ec, sc, idd, idr) in zip(ecs, scs, idds, idrs):
                 if idr:
                     fp.seek(pos + idr)
                     for c in xrange(sc, ec + 1):
                         char2gid[c] = (unpack('>H', fp.read(2))[0] +
                                        idd) & 0xffff
                 else:
                     for c in xrange(sc, ec + 1):
                         char2gid[c] = (c + idd) & 0xffff
     gid2char = dict(
         (gid, pack('>H', char)) for (char, gid) in char2gid.iteritems())
     return CMap().update(char2gid, gid2char)
Beispiel #4
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(
             encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.encoding = EncodingDB.get_encoding(name, diff)
     else:
         self.encoding = EncodingDB.get_encoding(literal_name(encoding))
     self.ucs2_cmap = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.ucs2_cmap = CMap()
         CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Beispiel #5
0
class PDFCIDFont(PDFFont):
    def __init__(self, rsrc, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = rsrc.get_cmap(name, strict=STRICT)
        except CMapDB.CMapNotFound, e:
            raise PDFFontError(e)
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.ucs2_cmap = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.ucs2_cmap = CMap()
            CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.ucs2_cmap = ttf.create_cmap()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.ucs2_cmap = rsrc.get_cmap('%s-UCS2' % self.cidcoding,
                                               strict=STRICT)
            except CMapDB.CMapNotFound, e:
                raise PDFFontError(e)
Beispiel #6
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if "Encoding" in spec:
         encoding = resolve1(spec["Encoding"])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get("BaseEncoding", LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get("Differences", None))
         self.encoding = EncodingDB.get_encoding(name, diff)
     else:
         self.encoding = EncodingDB.get_encoding(literal_name(encoding))
     self.ucs2_cmap = None
     if "ToUnicode" in spec:
         strm = stream_value(spec["ToUnicode"])
         self.ucs2_cmap = CMap()
         CMapParser(self.ucs2_cmap, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return