Exemple #1
0
    def getNamedDestRoot(self):
        root = self.getObject(self._root)

        if '/Names' in root and isinstance(root['/Names'], DictionaryObject):
            names = root['/Names']
            idnum = self._objects.index(names) + 1
            namesRef = IndirectObject(idnum, 0, self)
            assert namesRef.getObject() == names
            if '/Dests' in names and isinstance(
                    names['/Dests'], DictionaryObject):
                dests = names['/Dests']
                idnum = self._objects.index(dests) + 1
                destsRef = IndirectObject(idnum, 0, self)
                assert destsRef.getObject() == dests
                if '/Names' in dests:
                    nd = dests['/Names']
                else:
                    nd = ArrayObject()
                    dests[NameObject('/Names')] = nd
            else:
                dests = DictionaryObject()
                destsRef = self._addObject(dests)
                names[NameObject('/Dests')] = destsRef
                nd = ArrayObject()
                dests[NameObject('/Names')] = nd
        else:
            names = DictionaryObject()
            namesRef = self._addObject(names)
            root[NameObject('/Names')] = namesRef
            dests = DictionaryObject()
            destsRef = self._addObject(dests)
            names[NameObject('/Dests')] = destsRef
            nd = ArrayObject()
            dests[NameObject('/Names')] = nd
        return nd
Exemple #2
0
 def getOutlineRoot(self):
     root = self.getObject(self._root)
     if '/Outlines' in root:
         outline = root['/Outlines']
         idnum = self._objects.index(outline) + 1
         outlineRef = IndirectObject(idnum, 0, self)
         assert outlineRef.getObject() == outline
     else:
         outline = TreeObject()
         outline.update({})
         outlineRef = self._addObject(outline)
         root[NameObject('/Outlines')] = outlineRef
     return outline
Exemple #3
0
 def getOutlineRoot(self):
     root = self.getObject(self._root)
     if '/Outlines' in root:
         outline = root['/Outlines']
         idnum = self._objects.index(outline) + 1
         outlineRef = IndirectObject(idnum, 0, self)
         assert outlineRef.getObject() == outline
     else:
         outline = TreeObject()
         outline.update({})
         outlineRef = self._addObject(outline)
         root[NameObject('/Outlines')] = outlineRef
     return outline
Exemple #4
0
    def getNamedDestRoot(self):
        root = self.getObject(self._root)

        if '/Names' in root and isinstance(root['/Names'], DictionaryObject):
            names = root['/Names']
            idnum = self._objects.index(names) + 1
            namesRef = IndirectObject(idnum, 0, self)
            assert namesRef.getObject() == names
            if '/Dests' in names and isinstance(names['/Dests'],
                                                DictionaryObject):
                dests = names['/Dests']
                idnum = self._objects.index(dests) + 1
                destsRef = IndirectObject(idnum, 0, self)
                assert destsRef.getObject() == dests
                if '/Names' in dests:
                    nd = dests['/Names']
                else:
                    nd = ArrayObject()
                    dests[NameObject('/Names')] = nd
            else:
                dests = DictionaryObject()
                destsRef = self._addObject(dests)
                names[NameObject('/Dests')] = destsRef
                nd = ArrayObject()
                dests[NameObject('/Names')] = nd
        else:
            names = DictionaryObject()
            namesRef = self._addObject(names)
            root[NameObject('/Names')] = namesRef
            dests = DictionaryObject()
            destsRef = self._addObject(dests)
            names[NameObject('/Dests')] = destsRef
            nd = ArrayObject()
            dests[NameObject('/Names')] = nd
        return nd
Exemple #5
0
 def _sweepIndirectReferences(self, externMap, data):
     if isinstance(data, DictionaryObject):
         for key, value in data.items():
             value = self._sweepIndirectReferences(externMap, value)
             if isinstance(value, StreamObject):
                 # a dictionary value is a stream.  streams must be indirect
                 # objects, so we need to change this value.
                 value = self._addObject(value)
             data[key] = value
         return data
     elif isinstance(data, ArrayObject):
         for i in range(len(data)):
             value = self._sweepIndirectReferences(externMap, data[i])
             if isinstance(value, StreamObject):
                 # an array value is a stream.  streams must be indirect
                 # objects, so we need to change this value
                 value = self._addObject(value)
             data[i] = value
         return data
     elif isinstance(data, IndirectObject):
         # internal indirect references are fine
         if data.pdf == self:
             if data.idnum in self.stack:
                 return data
             else:
                 self.stack.append(data.idnum)
                 realdata = self.getObject(data)
                 self._sweepIndirectReferences(externMap, realdata)
                 self.stack.pop()
                 return data
         else:
             newobj = externMap.get(data.pdf,
                                    {}).get(data.generation,
                                            {}).get(data.idnum, None)
             if newobj is None:
                 newobj = data.pdf.getObject(data)
                 self._objects.append(None)  # placeholder
                 idnum = len(self._objects)
                 newobj_ido = IndirectObject(idnum, 0, self)
                 externMap.setdefault(data.pdf, {})
                 externMap[data.pdf].setdefault(data.generation, {})
                 externMap[data.pdf][data.generation][data.idnum] = \
                     newobj_ido
                 newobj = self._sweepIndirectReferences(externMap, newobj)
                 self._objects[idnum - 1] = newobj
                 return newobj_ido
             return newobj
     else:
         return data
Exemple #6
0
 def _addObject(self, obj):
     self._objects.append(obj)
     return IndirectObject(len(self._objects), 0, self)
Exemple #7
0
 def getReference(self, obj):
     idnum = self._objects.index(obj) + 1
     ref = IndirectObject(idnum, 0, self)
     assert ref.getObject() == obj
     return ref
Exemple #8
0
    def write(self, stream):
        externalReferenceMap = {}

        # PDF objects sometimes have circular references to their /Page objects
        # inside their object tree (for example, annotations).  Those will be
        # indirect references to objects that we've recreated in this PDF.  To
        # address this problem, PageObject's store their original object
        # reference number, and we add it to the external reference map before
        # we sweep for indirect references.  This forces self-page-referencing
        # trees to reference the correct new object location, rather than
        # copying in a new copy of the page object.
        for objIndex in xrange(len(self._objects)):
            obj = self._objects[objIndex]
            if isinstance(obj, PageObject) and obj.indirectRef is not None:
                data = obj.indirectRef
                externalReferenceMap.setdefault(data.pdf, {})
                externalReferenceMap[data.pdf].setdefault(data.generation, {})
                externalReferenceMap[data.pdf][data.generation][data.idnum] = \
                    IndirectObject(objIndex + 1, 0, self)

        self.stack = []
        self._sweepIndirectReferences(externalReferenceMap, self._root)
        del self.stack

        # Begin writing:
        object_positions = []
        stream.write(self._header + b_("\n"))
        for i in range(len(self._objects)):
            idnum = (i + 1)
            obj = self._objects[i]
            object_positions.append(stream.tell())
            stream.write(b_(str(idnum) + " 0 obj\n"))
            key = None
            if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
                pack1 = struct.pack("<i", i + 1)[:3]
                pack2 = struct.pack("<i", 0)[:2]
                key = self._encrypt_key + pack1 + pack2
                assert len(key) == (len(self._encrypt_key) + 5)
                md5_hash = md5(key).digest()
                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
            if obj is not None:
                obj.writeToStream(stream, key)
                stream.write(b_("\nendobj\n"))

        # xref table
        xref_location = stream.tell()
        stream.write(b_("xref\n"))
        stream.write(b_("0 %s\n" % (len(self._objects) + 1)))
        stream.write(b_("%010d %05d f \n" % (0, 65535)))
        for offset in object_positions:
            stream.write(b_("%010d %05d n \n" % (offset, 0)))

        # trailer
        stream.write(b_("trailer\n"))
        trailer = DictionaryObject()
        trailer.update({
            NameObject("/Size"):
            NumberObject(len(self._objects) + 1),
            NameObject("/Root"):
            self._root,
            NameObject("/Info"):
            self._info
        })
        if hasattr(self, "_ID"):
            trailer[NameObject("/ID")] = self._ID
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)

        # eof
        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
Exemple #9
0
    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation,
                                          {}).get(indirectReference.idnum,
                                                  None)
        if retval is not None:
            return retval
        if indirectReference.generation == 0 \
                and indirectReference.idnum in self.xref_objStm:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First']+offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        if indirectReference.idnum \
                not in self.xref[indirectReference.generation]:
            warnings.warn("Object %d %d not defined." % (
                indirectReference.idnum, indirectReference.generation),
                utils.PdfReadWarning)
            return None
        start = self.xref[indirectReference.generation][
            indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        try:
            assert idnum == indirectReference.idnum
        except AssertionError:
            if self.xrefIndex:
                # Xref table probably had bad indexes due to not
                # being zero-indexed
                if self.strict:
                    raise utils.PdfReadError(
                        "Expected object ID (%d %d) does "
                        "not match actual (%d %d); xref "
                        "table not zero-indexed." % (
                            indirectReference.idnum,
                            indirectReference.generation,
                            idnum,
                            generation))
                else:
                    # should not happen since the xref table is corrected in
                    # non-strict mode
                    pass
            else:  # some other problem
                raise utils.PdfReadError("Expected object ID (%d %d) does not "
                                         " match actual (%d %d)." % (
                                             indirectReference.idnum,
                                             indirectReference.generation,
                                             idnum, generation))
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)
        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval
Exemple #10
0
    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation,
                                          {}).get(indirectReference.idnum,
                                                  None)
        if retval is not None:
            return retval
        if indirectReference.generation == 0 \
                and indirectReference.idnum in self.xref_objStm:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First'] + offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        if indirectReference.idnum \
                not in self.xref[indirectReference.generation]:
            warnings.warn(
                "Object %d %d not defined." %
                (indirectReference.idnum, indirectReference.generation),
                utils.PdfReadWarning)
            return None
        start = self.xref[indirectReference.generation][
            indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        try:
            assert idnum == indirectReference.idnum
        except AssertionError:
            if self.xrefIndex:
                # Xref table probably had bad indexes due to not
                # being zero-indexed
                if self.strict:
                    raise utils.PdfReadError(
                        "Expected object ID (%d %d) does "
                        "not match actual (%d %d); xref "
                        "table not zero-indexed." %
                        (indirectReference.idnum, indirectReference.generation,
                         idnum, generation))
                else:
                    # should not happen since the xref table is corrected in
                    # non-strict mode
                    pass
            else:  # some other problem
                raise utils.PdfReadError(
                    "Expected object ID (%d %d) does not "
                    " match actual (%d %d)." %
                    (indirectReference.idnum, indirectReference.generation,
                     idnum, generation))
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)
        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval
Exemple #11
0
 def getReference(self, obj):
     idnum = self._objects.index(obj) + 1
     ref = IndirectObject(idnum, 0, self)
     assert ref.getObject() == obj
     return ref