Beispiel #1
0
    def _chop_images(self):
        pages_count = len(self._flattened_pages)
        for i in range(pages_count):
            page = self._flattened_pages[i]
            bytes_data: bytes = page[_k.CONTENT].get_data()
            parts = _pattern_space.split(bytes_data)
            the_image = page[_k.RESOURCES][b'/XObject'][_get_image_name_from(parts)]
            _u.debug(parts, len(page[_k.RESOURCES][b'/XObject']), the_image)
            image_data = the_image.get_data()
            # _u.debug(the_image, len(image_data))
            if b'/Subtype' in the_image and the_image[b'/Subtype'] == b'/Image':
                (
                    width, height, compressed_length, compressed_data,
                ) = _im.chop_off_image_empty_edges(the_image, image_data, i + 1)
                the_image[NameObject(b'/Length')] = NumberObject(compressed_length)
                the_image[NameObject(b'/Width')] = NumberObject(width)
                the_image[NameObject(b'/Height')] = NumberObject(height)
                the_image._bytes_data = compressed_data

                # We might need to insert this matrix in the below line: 1 0 0 1 0 100 cm
                page[_k.CONTENT].set_data(
                    b'q ' + _u.s2b(str(width)) + b' 0 0 ' + _u.s2b(str(height)) + b' 0 0 cm ' + parts[-5] + b' Do Q'
                )
                page[_k.MEDIA_BOX][2] = NumberObject(width)
                page[_k.MEDIA_BOX][3] = NumberObject(height)
                _u.debug(
                    'Chopped empty edges for {:4}/{} image.'.format(i + 1, pages_count),
                    page[_k.MEDIA_BOX], page[_k.MEDIA_BOX][2:], parts, width, height,
                    page[_k.CONTENT].get_data(),
                )
            else:
                _u.debug(image_data)
                pass
Beispiel #2
0
    def __init__(self):
        self._id = None
        self._encrypt = None
        self._encrypt_key = None
        self._objects = []  # array of indirect objects

        # The root of our page tree node.
        pages = DictObject()
        pages.update({
            NameObject(_k.TYPE): NameObject(_k.PAGES),
            NameObject(_k.COUNT): NumberObject(0),
            NameObject(_k.KIDS): ArrayObject(),
        })
        self._pages = self._add_object(pages)

        # info object
        info = DictObject()
        info.update({
            NameObject(b'/Producer'): create_string_object(b'PyPDF - Refactored by QXF')
        })
        self._info = self._add_object(info)

        # root object
        self.__outlines = self._add_object(DictObject())
        root = DictObject()
        root.update({
            NameObject(_k.TYPE): NameObject(b'/Catalog'),
            NameObject(_k.PAGES): self._pages,
            NameObject(_k.OUTLINES): self.__outlines,
        })
        self._root = self._add_object(root)
Beispiel #3
0
    def encrypt(self, user_pwd, owner_pwd=None, use_128bit=True):
        """Encrypt this PDF file with the PDF Standard encryption handler.

        user_pwd - The "user password", which allows for opening and reading
                the PDF file with the restrictions provided.
        owner_pwd - The "owner password", which allows for opening the PDF
                files without any restrictions.  By default, the owner password is the
                same as the user password.
        use_128bit - Boolean argument as to whether to use 128bit
                encryption.  When false, 40bit encryption will be used.  By default, this
                flag is on."""
        if owner_pwd is None:
            owner_pwd = user_pwd
        if use_128bit:
            v = 2
            rev = 3
            keylen = 128 / 8
        else:
            v = 1
            rev = 2
            keylen = 40 / 8
        # permit everything:
        p = -1
        o = ByteStringObject(_u.algorithm_33(owner_pwd, user_pwd, rev, keylen))
        id_1 = _md5(bytes(repr(time.time()), _u.ENCODING_UTF8)).digest()
        id_2 = _md5(bytes(repr(random.random()), _u.ENCODING_UTF8)).digest()
        self._id = ArrayObject((ByteStringObject(id_1), ByteStringObject(id_2)))
        if rev == 2:
            u, key = _u.algorithm_34(user_pwd, o, p, id_1)
        else:
            assert rev == 3
            u, key = _u.algorithm_35(user_pwd, rev, keylen, o, p, id_1, False)
        encrypt = DictObject()
        encrypt[NameObject(b'/Filter')] = NameObject(b'/Standard')
        encrypt[NameObject(b'/V')] = NumberObject(v)
        if v == 2:
            encrypt[NameObject(b'/Length')] = NumberObject(keylen * 8)
        encrypt[NameObject(b'/R')] = NumberObject(rev)
        encrypt[NameObject(b'/O')] = ByteStringObject(o)
        encrypt[NameObject(b'/U')] = ByteStringObject(u)
        encrypt[NameObject(b'/P')] = NumberObject(p)
        self._encrypt = self._add_object(encrypt)
        self._encrypt_key = key
Beispiel #4
0
    def get_obj_of(self, reference: RefObject):
        retval = self._resolved_objects.get(reference.generation, {}).get(reference.idnum, None)
        if retval is not None:
            return retval
        if reference.generation == 0 and reference.idnum in self._xref_obj_stream:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self._xref_obj_stream[reference.idnum]
            obj_stm = RefObject(stmnum, 0, self).get_object()
            assert obj_stm[_k.TYPE] == b'/ObjStm'
            assert idx < obj_stm[b'/N']
            stream_data = BytesIO(obj_stm.get_data())
            for i in range(obj_stm[b'/N']):
                objnum = NumberObject.read_from_stream(stream_data)
                _u.seek_token(stream_data)
                offset = NumberObject.read_from_stream(stream_data)
                _u.seek_token(stream_data)
                t = stream_data.tell()
                stream_data.seek(obj_stm[b'/First'] + offset, io.SEEK_SET)
                obj = read_object(stream_data, self)
                self._resolved_objects[0][objnum] = obj
                stream_data.seek(t, io.SEEK_SET)
            return self._resolved_objects[0][reference.idnum]
        start = self._xref[reference.generation][reference.idnum]
        self._stream.seek(start, io.SEEK_SET)
        idnum, generation = _read_object_header(self._stream)
        assert idnum == reference.idnum
        assert generation == reference.generation
        retval = read_object(self._stream, self)

        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self._is_encrypted:
            # if we don't have the encryption key:
            if self._decryption_key is None:
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", reference.idnum)[:3]
            pack2 = struct.pack("<i", reference.generation)[:2]
            key = _u.encrypt(self._decryption_key, pack1, pack2)
            retval = self._decrypt_object(retval, key)

        self._cache_indirect_object(generation, idnum, retval)
        return retval
Beispiel #5
0
 def _write_trailer_to(self, stream):
     stream.write(b'trailer\n')
     trailer = DictObject()
     trailer.update({
         NameObject(_k.SIZE): NumberObject(len(self._objects) + 1),
         NameObject(_k.ROOT): self._root,
         NameObject(_k.INFO): self._info,
     })
     if self._id is not None:
         trailer[NameObject(_k.ID)] = self._id
     if self._encrypt is not None:
         trailer[NameObject(_k.ENCRYPT)] = self._encrypt
     trailer.write_to_stream(stream)
Beispiel #6
0
    def _add_page(self, page, callback_add):
        """
        Common method for inserting or adding a page to this PDF file.

        page - The page to add to the document.  This argument should be
                    an instance of {@link #PageObject PageObject}.
        callback_add - The function which will insert the page in the dictionary.
                      Takes: page list, page to add.
        """
        assert page[_k.TYPE] == _k.PAGE
        page[NameObject(b'/Parent')] = self._pages
        page = self._add_object(page)
        pages = self._pages.get_object()
        callback_add(pages[_k.KIDS], page)
        pages[NameObject(_k.COUNT)] = NumberObject(pages[_k.COUNT] + 1)
Beispiel #7
0
 def add_bookmark(self, title: str, page_index: int, container_ref=None):
     container_ref = self.__outlines if container_ref is None else container_ref
     title_obj = TextStringObject(title)
     mark_mami = DictObject()
     target_page = self.get_page_ref(page_index - 1)
     mark_mami.update({
         NameObject(_k.TITLE): title_obj,
         NameObject(_k.PARENT): container_ref,
         NameObject(_k.DEST): ArrayObject([target_page, NameObject(b'/Fit')]),
     })
     container = container_ref.get_object()
     mark_ref = self._add_object(mark_mami)
     if _k.COUNT in container:
         container[NameObject(_k.COUNT)] += 1
         last_mark = container[NameObject(_k.LAST)]
         container[_k.LAST] = mark_ref
         last_mark[_k.NEXT] = mark_ref
     else:
         container[NameObject(_k.TYPE)] = NameObject(_k.OUTLINES)
         container[NameObject(_k.COUNT)] = NumberObject(1)
         container[NameObject(_k.FIRST)] = mark_ref
         container[NameObject(_k.LAST)] = mark_ref
     return mark_ref
Beispiel #8
0
 def _rotate(self, angle):
     current_angle = self.get(_k.ROTATE, 0)
     self[NameObject(_k.ROTATE)] = NumberObject(current_angle + angle)