예제 #1
0
 def getobj(self, objid):
     assert objid != 0
     if not self.xrefs:
         raise PDFException('PDFDocument is not initialized')
     if 2 <= self.debug:
         print >> sys.stderr, 'getobj: objid=%r' % (objid)
     if objid in self._cached_objs:
         (obj, genno) = self._cached_objs[objid]
     else:
         for xref in self.xrefs:
             try:
                 (strmid, index, genno) = xref.get_pos(objid)
             except KeyError:
                 continue
             try:
                 if strmid is not None:
                     stream = stream_value(self.getobj(strmid))
                     obj = self._getobj_objstm(stream, index, objid)
                 else:
                     obj = self._getobj_parse(index, objid)
                 if isinstance(obj, PDFStream):
                     obj.set_objid(objid, genno)
                 break
             except (PSEOF, PDFSyntaxError):
                 continue
         else:
             raise PDFObjectNotFound(objid)
         if 2 <= self.debug:
             print >> sys.stderr, 'register: objid=%r: %r' % (objid, obj)
         if self.caching:
             self._cached_objs[objid] = (obj, genno)
     if self.decipher:
         obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
예제 #2
0
    def getobj(self, objid):
        assert objid != 0
        if not self.xrefs:
            raise PDFException('PDFDocument is not initialized')
        if 2 <= self.debug:
            print >>sys.stderr, 'getobj: objid=%r' % (objid)
        if objid in self._cached_objs:
            (obj, genno) = self._cached_objs[objid]
        else:
            for xref in self.xrefs:
                try:
                    (strmid, index, genno) = xref.get_pos(objid)
                except KeyError:
                    continue
                try:
                    if strmid is not None:
                        stream = stream_value(self.getobj(strmid))
                        obj = self._getobj_objstm(stream, index, objid)
                    else:
                        obj = self._getobj_parse(index, objid)
                        if self.decipher:
                            obj = decipher_all(self.decipher, objid, genno, obj)

                    if isinstance(obj, PDFStream):
                        obj.set_objid(objid, genno)
                    break
                except (PSEOF, PDFSyntaxError):
                    continue
            else:
                raise PDFObjectNotFound(objid)
            if 2 <= self.debug:
                print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
            if self.caching:
                self._cached_objs[objid] = (obj, genno)
        return obj
예제 #3
0
파일: pdfparser.py 프로젝트: xarg/pdfminer
 def getobj(self, objid):
     if not self.xrefs:
         raise PDFException('PDFDocument is not initialized')
     if 2 <= self.debug:
         print >>sys.stderr, 'getobj: objid=%r' % (objid)
     if objid in self._cached_objs:
         genno = 0
         obj = self._cached_objs[objid]
     else:
         for xref in self.xrefs:
             try:
                 (strmid, index) = xref.get_pos(objid)
                 break
             except KeyError:
                 pass
         else:
             if STRICT:
                 raise PDFSyntaxError('Cannot locate objid=%r' % objid)
             # return null for a nonexistent reference.
             return None
         if strmid:
             stream = stream_value(self.getobj(strmid))
             if stream.get('Type') is not LITERAL_OBJSTM:
                 if STRICT:
                     raise PDFSyntaxError('Not a stream object: %r' % stream)
             try:
                 n = stream['N']
             except KeyError:
                 if STRICT:
                     raise PDFSyntaxError('N is not defined: %r' % stream)
                 n = 0
             if strmid in self._parsed_objs:
                 objs = self._parsed_objs[strmid]
             else:
                 parser = PDFStreamParser(stream.get_data())
                 parser.set_document(self)
                 objs = []
                 try:
                     while 1:
                         (_,obj) = parser.nextobject()
                         objs.append(obj)
                 except PSEOF:
                     pass
                 if self.caching:
                     self._parsed_objs[strmid] = objs
             genno = 0
             i = n*2+index
             try:
                 obj = objs[i]
             except IndexError:
                 if STRICT:
                     raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
                 # return None for an invalid object number
                 return None
             if isinstance(obj, PDFStream):
                 obj.set_objid(objid, 0)
         else:
             self._parser.seek(index)
             (_,objid1) = self._parser.nexttoken() # objid
             (_,genno) = self._parser.nexttoken() # genno
             (_,kwd) = self._parser.nexttoken()
             # #### hack around malformed pdf files
             #assert objid1 == objid, (objid, objid1)
             if objid1 != objid:
                 x = []
                 while kwd is not self.KEYWORD_OBJ:
                     (_,kwd) = self._parser.nexttoken()
                     x.append(kwd)
                 if x:
                     objid1 = x[-2]
                     genno = x[-1]
             # #### end hack around malformed pdf files
             if kwd is not self.KEYWORD_OBJ:
                 raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
             try:
                 (_,obj) = self._parser.nextobject()
                 if isinstance(obj, PDFStream):
                     obj.set_objid(objid, genno)
             except PSEOF:
                 return None
         if 2 <= self.debug:
             print >>sys.stderr, 'register: objid=%r: %r' % (objid, obj)
         if self.caching:
             self._cached_objs[objid] = obj
     if self.decipher:
         obj = decipher_all(self.decipher, objid, genno, obj)
     return obj
예제 #4
0
파일: pdfparser.py 프로젝트: Big-Data/pypes
 def getobj(self, objid):
   if not self.ready:
     raise PDFException('PDFDocument not initialized')
   #assert self.xrefs
   if 2 <= self.debug:
     print >>stderr, 'getobj: objid=%r' % (objid)
   if objid in self.objs:
     genno = 0
     obj = self.objs[objid]
   else:
     for xref in self.xrefs:
       try:
         (strmid, index) = xref.getpos(objid)
         break
       except KeyError:
         pass
     else:
       if STRICT:
         raise PDFSyntaxError('Cannot locate objid=%r' % objid)
       return None
     if strmid:
       stream = stream_value(self.getobj(strmid))
       if stream.dic.get('Type') is not LITERAL_OBJSTM:
         if STRICT:
           raise PDFSyntaxError('Not a stream object: %r' % stream)
       try:
         n = stream.dic['N']
       except KeyError:
         if STRICT:
           raise PDFSyntaxError('N is not defined: %r' % stream)
         n = 0
       if strmid in self.parsed_objs:
         objs = self.parsed_objs[stream]
       else:
         parser = PDFObjStrmParser(self, stream.get_data())
         objs = []
         try:
           while 1:
             (_,obj) = parser.nextobject()
             objs.append(obj)
         except PSEOF:
           pass
         self.parsed_objs[stream] = objs
       genno = 0
       i = n*2+index
       try:
         obj = objs[i]
       except IndexError:
         raise PDFSyntaxError('Invalid object number: objid=%r' % (objid))
       if isinstance(obj, PDFStream):
         obj.set_objid(objid, 0)
     else:
       self.parser.seek(index)
       (_,objid1) = self.parser.nexttoken() # objid
       (_,genno) = self.parser.nexttoken() # genno
       #assert objid1 == objid, (objid, objid1)
       (_,kwd) = self.parser.nexttoken()
       if kwd is not self.KEYWORD_OBJ:
         raise PDFSyntaxError('Invalid object spec: offset=%r' % index)
       (_,obj) = self.parser.nextobject()
       if isinstance(obj, PDFStream):
         obj.set_objid(objid, genno)
     if 2 <= self.debug:
       print >>stderr, 'register: objid=%r: %r' % (objid, obj)
     self.objs[objid] = obj
   if self.decipher:
     obj = decipher_all(self.decipher, objid, genno, obj)
   return obj