def readindirect(self, objnum, gennum):
        ''' Read an indirect object.  If it has already
            been read, return it from the cache.
        '''
        def setobj(obj):
            # Store the new object in the dictionary
            # once we have its value
            record[1] = obj

        def ordinary(source, setobj, obj):
            # Deal with an ordinary (non-array, non-dict) object
            setobj(obj)
            return obj

        fdata, objnum, gennum = self.fdata, int(objnum), int(gennum)
        record = self.indirect_objects[fdata, objnum, gennum]
        if record[1] is not self.unresolved:
            return record[1]

        # Read the object header and validate it
        source = PdfTokens(fdata, record[0])
        objid = source.multiple(3)
        assert int(objid[0]) == objnum, objid
        assert int(objid[1]) == gennum, objid
        assert objid[2] == 'obj', objid

        # Read the object, and call special code if it starts
        # an array or dictionary
        obj = source.next()
        obj = self.special.get(obj, ordinary)(source, setobj, obj)
        self.readstream(obj, source)
        obj.indirect = True
        return obj
    def readindirect(self, objnum, gennum):
        ''' Read an indirect object.  If it has already
            been read, return it from the cache.
        '''

        def setobj(obj):
            # Store the new object in the dictionary
            # once we have its value
            record[1] = obj

        def ordinary(source, setobj, obj):
            # Deal with an ordinary (non-array, non-dict) object
            setobj(obj)
            return obj

        fdata, objnum, gennum = self.fdata, int(objnum), int(gennum)
        record = self.indirect_objects[fdata, objnum, gennum]
        if record[1] is not self.unresolved:
            return record[1]

        # Read the object header and validate it
        source = PdfTokens(fdata, record[0])
        objid = source.multiple(3)
        assert int(objid[0]) == objnum, objid
        assert int(objid[1]) == gennum, objid
        assert objid[2] == 'obj', objid

        # Read the object, and call special code if it starts
        # an array or dictionary
        obj = source.next()
        obj = self.special.get(obj, ordinary)(source, setobj, obj)
        self.readstream(obj, source)
        obj.indirect = True
        return obj
 def readxref(fdata):
     startloc = fdata.rindex('startxref')
     xrefinfo = list(PdfTokens(fdata, startloc, False))
     assert len(xrefinfo) == 3, xrefinfo
     assert xrefinfo[0] == 'startxref', xrefinfo[0]
     assert xrefinfo[1].isdigit(), xrefinfo[1]
     assert xrefinfo[2].rstrip() == '%%EOF', repr(xrefinfo[2])
     return startloc, PdfTokens(fdata, int(xrefinfo[1]))
    def readstream(obj, source):
        ''' Read optional stream following a dictionary
            object.
        '''
        tok = source.next()
        if tok == 'endobj':
            return  # No stream

        assert isinstance(obj, PdfDict)
        assert tok == 'stream', tok
        fdata = source.fdata
        floc = fdata.rindex(tok, 0, source.floc) + len(tok)
        ch = fdata[floc]
        if ch == '\r':
            floc += 1
            ch = fdata[floc]
        assert ch == '\n'
        startstream = floc + 1
        endstream = startstream + int(obj.Length)
        obj._stream = fdata[startstream:endstream]
        source = PdfTokens(fdata, endstream)
        endit = source.multiple(2)
        if endit != 'endstream endobj'.split():
            # /Length attribute is broken, try to read stream
            # anyway disregarding the specified value
            # TODO: issue warning here once we have some kind of
            # logging
            endstream = fdata.index('endstream', startstream)
            if fdata[endstream-2:endstream] == '\r\n':
                endstream -= 2
            elif fdata[endstream-1] in ['\n', '\r']:
                endstream -= 1
            source = PdfTokens(fdata, endstream)
            endit = source.multiple(2)
            assert endit == 'endstream endobj'.split()
            obj.Length = str(endstream-startstream)
            obj._stream = fdata[startstream:endstream]
    def readstream(obj, source):
        ''' Read optional stream following a dictionary
            object.
        '''
        tok = source.next()
        if tok == 'endobj':
            return  # No stream

        assert isinstance(obj, PdfDict)
        assert tok == 'stream', tok
        fdata = source.fdata
        floc = fdata.rindex(tok, 0, source.floc) + len(tok)
        ch = fdata[floc]
        if ch == '\r':
            floc += 1
            ch = fdata[floc]
        assert ch == '\n'
        startstream = floc + 1
        endstream = startstream + int(obj.Length)
        obj._stream = fdata[startstream:endstream]
        source = PdfTokens(fdata, endstream)
        endit = source.multiple(2)
        if endit != 'endstream endobj'.split():
            # /Length attribute is broken, try to read stream
            # anyway disregarding the specified value
            # TODO: issue warning here once we have some kind of
            # logging
            endstream = fdata.index('endstream', startstream)
            if fdata[endstream - 2:endstream] == '\r\n':
                endstream -= 2
            elif fdata[endstream - 1] in ['\n', '\r']:
                endstream -= 1
            source = PdfTokens(fdata, endstream)
            endit = source.multiple(2)
            assert endit == 'endstream endobj'.split()
            obj.Length = str(endstream - startstream)
            obj._stream = fdata[startstream:endstream]