Beispiel #1
0
class PDFCIDFont(PDFFont):
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound, e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding in ('Adobe-Identity', 'Adobe-UCS'):
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound, e:
                pass
Beispiel #2
0
    def getobj(self, objid):
        assert objid != 0
        if not self.xrefs:
            raise PDFException('PDFDocument is not initialized')
        if 2 <= self.debug:
            print >>sys.stderr, 'getobj: objid=%r' % (objid)
        if objid in self._cached_objs:
            (obj, genno) = self._cached_objs[objid]
        else:
            for xref in self.xrefs:
                try:
                    (strmid, index, genno) = xref.get_pos(objid)
                except KeyError:
                    continue
                try:
                    if strmid is not None:
                        stream = stream_value(self.getobj(strmid))
                        obj = self._getobj_objstm(stream, index, objid)
                    else:
                        obj = self._getobj_parse(index, objid)
                        if self.decipher:
                            obj = decipher_all(self.decipher, objid, genno, obj)

                    if isinstance(obj, PDFStream):
                        obj.set_objid(objid, genno)
                    break
                except (PSEOF, PDFSyntaxError):
                    continue
            else:
                raise PDFObjectNotFound(objid)
            if 2 <= self.debug:
                print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
            if self.caching:
                self._cached_objs[objid] = (obj, genno)
        return obj
Beispiel #3
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(dict_value(xobj.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>sys.stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         # According to PDF reference 1.7 section 4.9.1, XObjects in 
         # earlier PDFs (prior to v1.2) use the page's Resources entry
         # instead of having their own Resources entry.
         resources = dict_value(xobj.get('Resources')) or self.resources.copy()
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Beispiel #5
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >> stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.dic.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj.dic:
         interpreter = self.dup()
         (x0, y0, x1, y1) = list_value(xobj.dic['BBox'])
         ctm = mult_matrix(
             list_value(xobj.dic.get('Matrix', MATRIX_IDENTITY)), self.ctm)
         (x0, y0) = apply_matrix(ctm, (x0, y0))
         (x1, y1) = apply_matrix(ctm, (x1, y1))
         bbox = (x0, y0, x1, y1)
         self.device.begin_figure(xobjid, bbox)
         interpreter.render_contents(dict_value(xobj.dic.get('Resources')),
                                     [xobj],
                                     ctm=ctm)
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj.dic and 'Height' in xobj.dic:
         (x0, y0) = apply_matrix(self.ctm, (0, 0))
         (x1, y1) = apply_matrix(self.ctm, (1, 1))
         self.device.begin_figure(xobjid, (x0, y0, x1, y1))
         (w, h) = (xobj.dic['Width'], xobj.dic['Height'])
         self.device.render_image(xobj, (w, h), self.ctm)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Beispiel #6
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(dict_value(xobj.get('Resources')), [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Beispiel #7
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError('Undefined xobject id: %r' % xobjid)
         return
     if 1 <= self.debug:
         print >>sys.stderr, 'Processing xobj: %r' % xobj
     subtype = xobj.get('Subtype')
     if subtype is LITERAL_FORM and 'BBox' in xobj:
         interpreter = self.dup()
         bbox = list_value(xobj['BBox'])
         matrix = list_value(xobj.get('Matrix', MATRIX_IDENTITY))
         # According to PDF reference 1.7 section 4.9.1, XObjects in 
         # earlier PDFs (prior to v1.2) use the page's Resources entry
         # instead of having their own Resources entry.
         resources = dict_value(xobj.get('Resources')) or self.resources.copy()
         self.device.begin_figure(xobjid, bbox, matrix)
         interpreter.render_contents(resources, [xobj], ctm=mult_matrix(matrix, self.ctm))
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and 'Width' in xobj and 'Height' in xobj:
         self.device.begin_figure(xobjid, (0,0,1,1), MATRIX_IDENTITY)
         self.device.render_image(xobjid, xobj)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Beispiel #8
0
 def do_Do(self, xobjid):
     xobjid = literal_name(xobjid)
     try:
         xobj = stream_value(self.xobjmap[xobjid])
     except KeyError:
         if STRICT:
             raise PDFInterpreterError("Undefined xobject id: %r" % xobjid)
         return
     if 1 <= self.debug:
         print >> stderr, "Processing xobj: %r" % xobj
     subtype = xobj.dic.get("Subtype")
     if subtype is LITERAL_FORM and "BBox" in xobj.dic:
         interpreter = self.dup()
         (x0, y0, x1, y1) = list_value(xobj.dic["BBox"])
         ctm = mult_matrix(list_value(xobj.dic.get("Matrix", MATRIX_IDENTITY)), self.ctm)
         (x0, y0) = apply_matrix(ctm, (x0, y0))
         (x1, y1) = apply_matrix(ctm, (x1, y1))
         bbox = (x0, y0, x1, y1)
         self.device.begin_figure(xobjid, bbox)
         interpreter.render_contents(dict_value(xobj.dic.get("Resources")), [xobj], ctm=ctm)
         self.device.end_figure(xobjid)
     elif subtype is LITERAL_IMAGE and "Width" in xobj.dic and "Height" in xobj.dic:
         (x0, y0) = apply_matrix(self.ctm, (0, 0))
         (x1, y1) = apply_matrix(self.ctm, (1, 1))
         self.device.begin_figure(xobjid, (x0, y0, x1, y1))
         (w, h) = (xobj.dic["Width"], xobj.dic["Height"])
         self.device.render_image(xobj, (w, h), self.ctm)
         self.device.end_figure(xobjid)
     else:
         # unsupported xobject type.
         pass
     return
Beispiel #9
0
 def getobj(self, objid):
     assert objid != 0
     if not self.xrefs:
         raise PDFException('PDFDocument is not initialized')
     if 2 <= self.debug:
         print >> sys.stderr, 'getobj: objid=%r' % (objid)
     if objid in self._cached_objs:
         (obj, genno) = self._cached_objs[objid]
     else:
         for xref in self.xrefs:
             try:
                 (strmid, index, genno) = xref.get_pos(objid)
             except KeyError:
                 continue
             try:
                 if strmid is not None:
                     stream = stream_value(self.getobj(strmid))
                     obj = self._getobj_objstm(stream, index, objid)
                 else:
                     obj = self._getobj_parse(index, objid)
                 if isinstance(obj, PDFStream):
                     obj.set_objid(objid, genno)
                 break
             except (PSEOF, PDFSyntaxError):
                 continue
         else:
             raise PDFObjectNotFound(objid)
         if 2 <= self.debug:
             print >> sys.stderr, 'register: objid=%r: %r' % (objid, obj)
         if self.caching:
             self._cached_objs[objid] = (obj, genno)
     if self.decipher:
         obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
Beispiel #10
0
 def fillfp(self):
     if not self.fp:
         if self.istream < len(self.streams):
             strm = stream_value(self.streams[self.istream])
             self.istream += 1
         else:
             raise PSEOF('Unexpected EOF, file truncated?')
         self.fp = StringIO(strm.get_data())
Beispiel #11
0
 def fillfp(self):
     if not self.fp:
         if self.istream < len(self.streams):
             strm = stream_value(self.streams[self.istream])
             self.istream += 1
         else:
             raise PSEOF('Unexpected EOF, file truncated?')
         self.fp = StringIO(strm.get_data())
     return
Beispiel #12
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == "ICCBased" and isinstance(spec, list) and 2 <= len(spec):
         return ColorSpace(name, stream_value(spec[1]).dic["N"])
     elif name == "DeviceN" and isinstance(spec, list) and 2 <= len(spec):
         return ColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE[name]
Beispiel #13
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, stream_value(spec[1])['N'])
     elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE[name]
Beispiel #14
0
 def get_colorspace(spec):
     if isinstance(spec, list):
         name = literal_name(spec[0])
     else:
         name = literal_name(spec)
     if name == 'ICCBased' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, stream_value(spec[1])['N'])
     elif name == 'DeviceN' and isinstance(spec, list) and 2 <= len(spec):
         return PDFColorSpace(name, len(list_value(spec[1])))
     else:
         return PREDEFINED_COLORSPACE.get(name)
Beispiel #15
0
 def fillfp(self):
     if not self.fp:
         if self.istream < len(self.streams):
             strm = stream_value(self.streams[self.istream])
             self.istream += 1
         else:
             raise PSEOF('Unexpected EOF, file truncated?')
         data = strm.get_data()
         if isinstance(data, bytes):
             data = data.decode('latin-1')
         self.fp = StringIO(data)
     return
Beispiel #16
0
 def load(self, parser, debug=0):
     parser.seek(0)
     while 1:
         try:
             (pos, line) = parser.nextline()
         except PSEOF:
             break
         if line.startswith('trailer'):
             parser.seek(pos)
             self.load_trailer(parser)
             if 1 <= debug:
                 print >> sys.stderr, 'trailer: %r' % self.get_trailer()
             break
         m = self.PDFOBJ_CUE.match(line)
         if not m:
             continue
         (objid, genno) = m.groups()
         objid = int(objid)
         genno = int(genno)
         self.offsets[objid] = (None, pos, genno)
         # expand ObjStm.
         parser.seek(pos)
         (_, obj) = parser.nextobject()
         if isinstance(obj,
                       PDFStream) and obj.get('Type') is LITERAL_OBJSTM:
             stream = stream_value(obj)
             try:
                 n = stream['N']
             except KeyError:
                 if STRICT:
                     raise PDFSyntaxError('N is not defined: %r' % stream)
                 n = 0
             parser1 = PDFStreamParser(stream.get_data())
             objs = []
             try:
                 while 1:
                     (_, obj) = parser1.nextobject()
                     objs.append(obj)
             except PSEOF:
                 pass
             n = min(n, len(objs) // 2)
             for index in xrange(n):
                 objid1 = objs[index * 2]
                 self.offsets[objid1] = (objid, index, 0)
     return
Beispiel #17
0
 def load(self, parser, debug=0):
     parser.seek(0)
     while 1:
         try:
             (pos, line) = parser.nextline()
         except PSEOF:
             break
         if line.startswith('trailer'):
             parser.seek(pos)
             self.load_trailer(parser)
             if 1 <= debug:
                 print >>sys.stderr, 'trailer: %r' % self.get_trailer()
             break
         m = self.PDFOBJ_CUE.match(line)
         if not m:
             continue
         (objid, genno) = m.groups()
         objid = int(objid)
         genno = int(genno)
         self.offsets[objid] = (None, pos, genno)
         # expand ObjStm.
         parser.seek(pos)
         (_, obj) = parser.nextobject()
         if isinstance(obj, PDFStream) and obj.get('Type') is LITERAL_OBJSTM:
             stream = stream_value(obj)
             try:
                 n = stream['N']
             except KeyError:
                 if STRICT:
                     raise PDFSyntaxError('N is not defined: %r' % stream)
                 n = 0
             parser1 = PDFStreamParser(stream.get_data())
             objs = []
             try:
                 while 1:
                     (_, obj) = parser1.nextobject()
                     objs.append(obj)
             except PSEOF:
                 pass
             n = min(n, len(objs)//2)
             for index in xrange(n):
                 objid1 = objs[index*2]
                 self.offsets[objid1] = (objid, index, 0)
     return
Beispiel #18
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
Beispiel #19
0
 def __init__(self, descriptor, widths, spec):
     # Font encoding is specified either by a name of
     # built-in encoding or a dictionary that describes
     # the differences.
     if 'Encoding' in spec:
         encoding = resolve1(spec['Encoding'])
     else:
         encoding = LITERAL_STANDARD_ENCODING
     if isinstance(encoding, dict):
         name = literal_name(encoding.get('BaseEncoding', LITERAL_STANDARD_ENCODING))
         diff = list_value(encoding.get('Differences', None))
         self.cid2unicode = EncodingDB.get_encoding(name, diff)
     else:
         self.cid2unicode = EncodingDB.get_encoding(literal_name(encoding))
     self.unicode_map = None
     if 'ToUnicode' in spec:
         strm = stream_value(spec['ToUnicode'])
         self.unicode_map = FileUnicodeMap()
         CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
     PDFFont.__init__(self, descriptor, widths)
     return
Beispiel #20
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         handle_error(PDFFontError, 'BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0] * 256))
         widths = dict((i+firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if 'Encoding' not in spec and 'FontFile' in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get('FontFile'))
         length1 = int_value(self.fontfile['Length1'])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
Beispiel #21
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec['BaseFont'])
     except KeyError:
         if STRICT:
             raise PDFFontError('BaseFont is missing')
         self.basefont = 'unknown'
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get('FontDescriptor', {}))
         firstchar = int_value(spec.get('FirstChar', 0))
         lastchar = int_value(spec.get('LastChar', 255))
         widths = list_value(spec.get('Widths', [0]*256))
         widths = dict( (i+firstchar,w) for (i,w) in enumerate(widths) )
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if 'Encoding' not in spec and 'FontFile' in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get('FontFile'))
         length1 = int_value(self.fontfile['Length1'])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
     return
Beispiel #22
0
 def __init__(self, rsrcmgr, spec):
     try:
         self.basefont = literal_name(spec["BaseFont"])
     except KeyError:
         if STRICT:
             raise PDFFontError("BaseFont is missing")
         self.basefont = "unknown"
     try:
         (descriptor, widths) = FontMetricsDB.get_metrics(self.basefont)
     except KeyError:
         descriptor = dict_value(spec.get("FontDescriptor", {}))
         firstchar = int_value(spec.get("FirstChar", 0))
         lastchar = int_value(spec.get("LastChar", 255))
         widths = list_value(spec.get("Widths", [0] * 256))
         widths = dict((i + firstchar, w) for (i, w) in enumerate(widths))
     PDFSimpleFont.__init__(self, descriptor, widths, spec)
     if "Encoding" not in spec and "FontFile" in descriptor:
         # try to recover the missing encoding info from the font file.
         self.fontfile = stream_value(descriptor.get("FontFile"))
         length1 = int_value(self.fontfile["Length1"])
         data = self.fontfile.get_data()[:length1]
         parser = Type1FontHeaderParser(StringIO(data))
         self.cid2unicode = parser.get_encoding()
     return
Beispiel #23
0
 def getobj(self, objid):
     if not self.xrefs:
         raise PDFException('PDFDocument is not initialized')
     if 2 <= self.debug:
         print >>sys.stderr, 'getobj: objid=%r' % (objid)
     if objid in self._cached_objs:
         genno = 0
         obj = self._cached_objs[objid]
     else:
         for xref in self.xrefs:
             try:
                 (strmid, index) = xref.get_pos(objid)
                 break
             except KeyError:
                 pass
         else:
             if STRICT:
                 raise PDFSyntaxError('Cannot locate objid=%r' % objid)
             # return null for a nonexistent reference.
             return None
         if strmid:
             stream = stream_value(self.getobj(strmid))
             if stream.get('Type') is not LITERAL_OBJSTM:
                 if STRICT:
                     raise PDFSyntaxError('Not a stream object: %r' % stream)
             try:
                 n = stream['N']
             except KeyError:
                 if STRICT:
                     raise PDFSyntaxError('N is not defined: %r' % stream)
                 n = 0
             if strmid in self._parsed_objs:
                 objs = self._parsed_objs[strmid]
             else:
                 parser = PDFStreamParser(stream.get_data())
                 parser.set_document(self)
                 objs = []
                 try:
                     while 1:
                         (_,obj) = parser.nextobject()
                         objs.append(obj)
                 except PSEOF:
                     pass
                 if self.caching:
                     self._parsed_objs[strmid] = objs
             genno = 0
             i = n*2+index
             try:
                 obj = objs[i]
             except IndexError:
                 if STRICT:
                     raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
                 # return None for an invalid object number
                 return None
             if isinstance(obj, PDFStream):
                 obj.set_objid(objid, 0)
         else:
             self._parser.seek(index)
             (_,objid1) = self._parser.nexttoken() # objid
             (_,genno) = self._parser.nexttoken() # genno
             (_,kwd) = self._parser.nexttoken()
             # #### hack around malformed pdf files
             #assert objid1 == objid, (objid, objid1)
             if objid1 != objid:
                 x = []
                 while kwd is not self.KEYWORD_OBJ:
                     (_,kwd) = self._parser.nexttoken()
                     x.append(kwd)
                 if x:
                     objid1 = x[-2]
                     genno = x[-1]
             # #### end hack around malformed pdf files
             if kwd is not self.KEYWORD_OBJ:
                 raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
             try:
                 (_,obj) = self._parser.nextobject()
                 if isinstance(obj, PDFStream):
                     obj.set_objid(objid, genno)
             except PSEOF:
                 return None
         if 2 <= self.debug:
             print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
         if self.caching:
             self._cached_objs[objid] = obj
     if self.decipher:
         obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
Beispiel #24
0
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec["BaseFont"])
        except KeyError:
            if STRICT:
                raise PDFFontError("BaseFont is missing")
            self.basefont = "unknown"
        self.cidsysteminfo = dict_value(spec.get("CIDSystemInfo", {}))
        self.cidcoding = "%s-%s" % (
            self.cidsysteminfo.get("Registry", "unknown"),
            self.cidsysteminfo.get("Ordering", "unknown"),
        )
        try:
            name = literal_name(spec["Encoding"])
        except KeyError:
            if STRICT:
                raise PDFFontError("Encoding is unspecified")
            name = "unknown"
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec["FontDescriptor"])
        except KeyError:
            if STRICT:
                raise PDFFontError("FontDescriptor is missing")
            descriptor = {}
        ttf = None
        if "FontFile2" in descriptor:
            self.fontfile = stream_value(descriptor.get("FontFile2"))
            ttf = TrueTypeFont(self.basefont, StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if "ToUnicode" in spec:
            strm = stream_value(spec["ToUnicode"])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding in ("Adobe-Identity", "Adobe-UCS"):
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get("W2", [])))
            self.disps = dict((cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
            (vy, w) = spec.get("DW2", [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get("W", [])))
            default_width = spec.get("DW", 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return
Beispiel #25
0
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get(
            'Registry',
            'unknown'), self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(
                    self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict(
                (cid, (vx, vy)) for (cid, (_, (vx, vy))) in widths.iteritems())
            (vy, w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None, vy)
            widths = dict((cid, w) for (cid, (w, _)) in widths.iteritems())
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return
Beispiel #26
0
 def getobj(self, objid):
   if not self.ready:
     raise PDFException('PDFDocument not initialized')
   #assert self.xrefs
   if 2 <= self.debug:
     print >>stderr, 'getobj: objid=%r' % (objid)
   if objid in self.objs:
     genno = 0
     obj = self.objs[objid]
   else:
     for xref in self.xrefs:
       try:
         (strmid, index) = xref.getpos(objid)
         break
       except KeyError:
         pass
     else:
       if STRICT:
         raise PDFSyntaxError('Cannot locate objid=%r' % objid)
       return None
     if strmid:
       stream = stream_value(self.getobj(strmid))
       if stream.dic.get('Type') is not LITERAL_OBJSTM:
         if STRICT:
           raise PDFSyntaxError('Not a stream object: %r' % stream)
       try:
         n = stream.dic['N']
       except KeyError:
         if STRICT:
           raise PDFSyntaxError('N is not defined: %r' % stream)
         n = 0
       if strmid in self.parsed_objs:
         objs = self.parsed_objs[stream]
       else:
         parser = PDFObjStrmParser(self, stream.get_data())
         objs = []
         try:
           while 1:
             (_,obj) = parser.nextobject()
             objs.append(obj)
         except PSEOF:
           pass
         self.parsed_objs[stream] = objs
       genno = 0
       i = n*2+index
       try:
         obj = objs[i]
       except IndexError:
         raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
       if isinstance(obj, PDFStream):
         obj.set_objid(objid, 0)
     else:
       self.parser.seek(index)
       (_,objid1) = self.parser.nexttoken() # objid
       (_,genno) = self.parser.nexttoken() # genno
       #assert objid1 == objid, (objid, objid1)
       (_,kwd) = self.parser.nexttoken()
       if kwd is not self.KEYWORD_OBJ:
         raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
       (_,obj) = self.parser.nextobject()
       if isinstance(obj, PDFStream):
         obj.set_objid(objid, genno)
     if 2 <= self.debug:
       print >>stderr, 'register: objid=%r: %r' % (objid, obj)
     self.objs[objid] = obj
   if self.decipher:
     obj = decipher_all(self.decipher, objid, genno, obj)
   return obj
Beispiel #27
0
    def __init__(self, rsrcmgr, spec):
        try:
            self.basefont = literal_name(spec['BaseFont'])
        except KeyError:
            if STRICT:
                raise PDFFontError('BaseFont is missing')
            self.basefont = 'unknown'
        self.cidsysteminfo = dict_value(spec.get('CIDSystemInfo', {}))
        self.cidcoding = '%s-%s' % (self.cidsysteminfo.get('Registry', 'unknown'),
                                    self.cidsysteminfo.get('Ordering', 'unknown'))
        try:
            name = literal_name(spec['Encoding'])
        except KeyError:
            if STRICT:
                raise PDFFontError('Encoding is unspecified')
            name = 'unknown'
        try:
            self.cmap = CMapDB.get_cmap(name)
        except CMapDB.CMapNotFound as e:
            if STRICT:
                raise PDFFontError(e)
            self.cmap = CMap()
        try:
            descriptor = dict_value(spec['FontDescriptor'])
        except KeyError:
            if STRICT:
                raise PDFFontError('FontDescriptor is missing')
            descriptor = {}
        ttf = None
        if 'FontFile2' in descriptor:
            self.fontfile = stream_value(descriptor.get('FontFile2'))
            ttf = TrueTypeFont(self.basefont,
                               StringIO(self.fontfile.get_data()))
        self.unicode_map = None
        if 'ToUnicode' in spec:
            strm = stream_value(spec['ToUnicode'])
            self.unicode_map = FileUnicodeMap()
            CMapParser(self.unicode_map, StringIO(strm.get_data())).run()
        elif self.cidcoding == 'Adobe-Identity':
            if ttf:
                try:
                    self.unicode_map = ttf.create_unicode_map()
                except TrueTypeFont.CMapNotFound:
                    pass
        else:
            try:
                self.unicode_map = CMapDB.get_unicode_map(self.cidcoding, self.cmap.is_vertical())
            except CMapDB.CMapNotFound as e:
                pass

        self.vertical = self.cmap.is_vertical()
        if self.vertical:
            # writing mode: vertical
            widths = get_widths2(list_value(spec.get('W2', [])))
            self.disps = dict( (cid,(vx,vy)) for (cid,(_,(vx,vy))) in widths.iteritems() )
            (vy,w) = spec.get('DW2', [880, -1000])
            self.default_disp = (None,vy)
            widths = dict( (cid,w) for (cid,(w,_)) in widths.iteritems() )
            default_width = w
        else:
            # writing mode: horizontal
            self.disps = {}
            self.default_disp = 0
            widths = get_widths(list_value(spec.get('W', [])))
            default_width = spec.get('DW', 1000)
        PDFFont.__init__(self, descriptor, widths, default_width=default_width)
        return
Beispiel #28
0
 def metadata(self):
     """Return a dictionary of metadata parsed from embedded XMP"""
     meta = {}
     if 'Metadata' in self.catalog:
         meta = xmpparse(stream_value(self.catalog['Metadata']).get_data())
     return meta