def getobj(self, objid): assert objid != 0 if not self.xrefs: raise PDFException('PDFDocument is not initialized') if 2 <= self.debug: print >> sys.stderr, 'getobj: objid=%r' % (objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: for xref in self.xrefs: try: (strmid, index, genno) = xref.get_pos(objid) except KeyError: continue try: if strmid is not None: stream = stream_value(self.getobj(strmid)) obj = self._getobj_objstm(stream, index, objid) else: obj = self._getobj_parse(index, objid) if isinstance(obj, PDFStream): obj.set_objid(objid, genno) break except (PSEOF, PDFSyntaxError): continue else: raise PDFObjectNotFound(objid) if 2 <= self.debug: print >> sys.stderr, 'register: objid=%r: %r' % (objid, obj) if self.caching: self._cached_objs[objid] = (obj, genno) if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) return obj
def getobj(self, objid): assert objid != 0 if not self.xrefs: raise PDFException('PDFDocument is not initialized') if 2 <= self.debug: print >>sys.stderr, 'getobj: objid=%r' % (objid) if objid in self._cached_objs: (obj, genno) = self._cached_objs[objid] else: for xref in self.xrefs: try: (strmid, index, genno) = xref.get_pos(objid) except KeyError: continue try: if strmid is not None: stream = stream_value(self.getobj(strmid)) obj = self._getobj_objstm(stream, index, objid) else: obj = self._getobj_parse(index, objid) if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) if isinstance(obj, PDFStream): obj.set_objid(objid, genno) break except (PSEOF, PDFSyntaxError): continue else: raise PDFObjectNotFound(objid) if 2 <= self.debug: print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj) if self.caching: self._cached_objs[objid] = (obj, genno) return obj
def getobj(self, objid): if not self.xrefs: raise PDFException('PDFDocument is not initialized') if 2 <= self.debug: print >>sys.stderr, 'getobj: objid=%r' % (objid) if objid in self._cached_objs: genno = 0 obj = self._cached_objs[objid] else: for xref in self.xrefs: try: (strmid, index) = xref.get_pos(objid) break except KeyError: pass else: if STRICT: raise PDFSyntaxError('Cannot locate objid=%r' % objid) # return null for a nonexistent reference. return None if strmid: stream = stream_value(self.getobj(strmid)) if stream.get('Type') is not LITERAL_OBJSTM: if STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream['N'] except KeyError: if STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 if strmid in self._parsed_objs: objs = self._parsed_objs[strmid] else: parser = PDFStreamParser(stream.get_data()) parser.set_document(self) objs = [] try: while 1: (_,obj) = parser.nextobject() objs.append(obj) except PSEOF: pass if self.caching: self._parsed_objs[strmid] = objs genno = 0 i = n*2+index try: obj = objs[i] except IndexError: if STRICT: raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) # return None for an invalid object number return None if isinstance(obj, PDFStream): obj.set_objid(objid, 0) else: self._parser.seek(index) (_,objid1) = self._parser.nexttoken() # objid (_,genno) = self._parser.nexttoken() # genno (_,kwd) = self._parser.nexttoken() # #### hack around malformed pdf files #assert objid1 == objid, (objid, objid1) if objid1 != objid: x = [] while kwd is not self.KEYWORD_OBJ: (_,kwd) = self._parser.nexttoken() x.append(kwd) if x: objid1 = x[-2] genno = x[-1] # #### end hack around malformed pdf files if kwd is not self.KEYWORD_OBJ: raise PDFSyntaxError('Invalid object spec: offset=%r' % index) try: (_,obj) = self._parser.nextobject() if isinstance(obj, PDFStream): obj.set_objid(objid, genno) except PSEOF: return None if 2 <= self.debug: print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj) if self.caching: self._cached_objs[objid] = obj if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) return obj
def getobj(self, objid): if not self.ready: raise PDFException('PDFDocument not initialized') #assert self.xrefs if 2 <= self.debug: print >>stderr, 'getobj: objid=%r' % (objid) if objid in self.objs: genno = 0 obj = self.objs[objid] else: for xref in self.xrefs: try: (strmid, index) = xref.getpos(objid) break except KeyError: pass else: if STRICT: raise PDFSyntaxError('Cannot locate objid=%r' % objid) return None if strmid: stream = stream_value(self.getobj(strmid)) if stream.dic.get('Type') is not LITERAL_OBJSTM: if STRICT: raise PDFSyntaxError('Not a stream object: %r' % stream) try: n = stream.dic['N'] except KeyError: if STRICT: raise PDFSyntaxError('N is not defined: %r' % stream) n = 0 if strmid in self.parsed_objs: objs = self.parsed_objs[stream] else: parser = PDFObjStrmParser(self, stream.get_data()) objs = [] try: while 1: (_,obj) = parser.nextobject() objs.append(obj) except PSEOF: pass self.parsed_objs[stream] = objs genno = 0 i = n*2+index try: obj = objs[i] except IndexError: raise PDFSyntaxError('Invalid object number: objid=%r' % (objid)) if isinstance(obj, PDFStream): obj.set_objid(objid, 0) else: self.parser.seek(index) (_,objid1) = self.parser.nexttoken() # objid (_,genno) = self.parser.nexttoken() # genno #assert objid1 == objid, (objid, objid1) (_,kwd) = self.parser.nexttoken() if kwd is not self.KEYWORD_OBJ: raise PDFSyntaxError('Invalid object spec: offset=%r' % index) (_,obj) = self.parser.nextobject() if isinstance(obj, PDFStream): obj.set_objid(objid, genno) if 2 <= self.debug: print >>stderr, 'register: objid=%r: %r' % (objid, obj) self.objs[objid] = obj if self.decipher: obj = decipher_all(self.decipher, objid, genno, obj) return obj