def get_reference(self, object: AnyPDFType,
                      context: WriteTransformerContext) -> Reference:
        """
        This function builds a Reference for the input object
        References are re-used whenever possible (hashing is used to detect duplicate objects)
        """
        obj_id = id(object)
        if obj_id in context.indirect_objects_by_id:
            cached_indirect_object: AnyPDFType = context.indirect_objects_by_id[
                obj_id]
            assert not isinstance(cached_indirect_object, Reference)
            return cached_indirect_object.get_reference(
            )  # type: ignore [union-attr]

        # look through existing indirect object hashes
        obj_hash: int = self._hash(object)
        if obj_hash in context.indirect_objects_by_hash:
            for obj in context.indirect_objects_by_hash[obj_hash]:
                if obj == object:
                    ref = obj.get_reference()  # type: ignore [union-attr]
                    assert ref is not None
                    assert isinstance(ref, Reference)
                    object.set_reference(ref)  # type: ignore [union-attr]
                    return ref

        # generate new object number
        existing_obj_numbers = set([
            item.get_reference().object_number  # type: ignore [union-attr]
            for sublist in
            [v for k, v in context.indirect_objects_by_hash.items()]
            for item in sublist
        ])
        obj_number = len(existing_obj_numbers) + 1
        while obj_number in existing_obj_numbers:  # type: ignore [union-attr]
            obj_number += 1

        # build reference
        ref = Reference(object_number=obj_number)
        object.set_reference(ref)  # type: ignore [union-attr]

        # insert into context.indirect_objects_by_hash
        if obj_hash in context.indirect_objects_by_hash:
            context.indirect_objects_by_hash[obj_hash].append(object)
        else:
            context.indirect_objects_by_hash[obj_hash] = [object]

        # insert into context.indirect_objects_by_id
        context.indirect_objects_by_id[obj_id] = object

        # return
        return ref
    def _start_object(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext],
    ):
        """
        This function starts a new direct object by writing
        its reference number followed by "obj" (e.g. "12 0 obj").
        It also does some bookkeeping to ensure the byte offset is stored in the XREF
        """
        # get offset position
        assert context is not None
        assert context.destination is not None
        byte_offset = context.destination.tell()

        # update offset
        ref = object_to_transform.get_reference()  # type: ignore [union-attr]
        assert ref is not None
        assert isinstance(ref, Reference)
        ref.byte_offset = byte_offset

        # write <object number> <generation number> obj
        assert ref.object_number is not None
        context.destination.write(
            bytes(
                "%d %d obj\n" %
                (ref.object_number, ref.generation_number or 0),
                "latin1",
            ))
Beispiel #3
0
    def start_object(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext],
    ):

        # get offset position
        assert context is not None
        assert context.destination is not None
        byte_offset = context.destination.tell()

        # update offset
        ref = object_to_transform.get_reference()  # type: ignore [union-attr]
        assert ref is not None
        assert isinstance(ref, Reference)
        ref.byte_offset = byte_offset

        # write <object number> <generation number> obj
        assert ref.object_number is not None
        context.destination.write(
            bytes(
                "%d %d obj\n" %
                (ref.object_number, ref.generation_number or 0),
                "latin1",
            ))
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes an ET.Element (representing XMP meta information) to a byte stream
        """
        assert isinstance(object_to_transform, ET.Element)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # build stream
        out_value = Stream()
        out_value[Name("Type")] = Name("Metadata")
        out_value[Name("Subtype")] = Name("XML")

        bts = ET.tostring(object_to_transform)
        out_value[Name("DecodedBytes")] = bts
        out_value[Name("Bytes")] = bts
        out_value[Name("Length")] = pDecimal(len(bts))

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self.start_object(out_value, context)

        # pass stream along to other transformer
        self.get_root_transformer().transform(out_value, context)

        # end object if needed
        if started_object:
            self.end_object(out_value, context)
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes an Image to a byte stream
        """
        assert context is not None
        assert context.destination is not None
        assert isinstance(object_to_transform, PILImage.Image)

        # get image bytes
        contents = None
        filter_name: Optional[Name] = None
        try:
            with io.BytesIO() as output:
                assert isinstance(object_to_transform, PILImage.Image)
                object_to_transform.save(output, format="JPEG")
                contents = output.getvalue()
            filter_name = Name("DCTDecode")
        except Exception as e:
            pass

        if contents is None:
            try:
                # TODO : properly store PNG (instead of converting it)
                with io.BytesIO() as output:
                    object_to_transform = self._convert_png_to_jpg(
                        object_to_transform)
                    assert isinstance(object_to_transform, PILImage.Image)
                    object_to_transform.save(output, format="JPEG")
                    contents = output.getvalue()
                filter_name = Name("DCTDecode")
            except Exception as e:
                pass
        assert contents is not None

        # build corresponding Stream (XObject)
        out_value = Stream()
        out_value[Name("Type")] = Name("XObject")
        out_value[Name("Subtype")] = Name("Image")
        out_value[Name("Width")] = pDecimal(object_to_transform.width)
        out_value[Name("Height")] = pDecimal(object_to_transform.height)
        out_value[Name("Length")] = pDecimal(len(contents))
        out_value[Name("Filter")] = filter_name
        out_value[Name("BitsPerComponent")] = pDecimal(8)
        out_value[Name("ColorSpace")] = Name("DeviceRGB")
        out_value[Name("Bytes")] = contents

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self._start_object(out_value, context)

        # write stream
        cl = context.compression_level
        context.compression_level = 9
        self.get_root_transformer().transform(out_value, context)
        context.compression_level = cl

        # end object if needed
        if started_object:
            self._end_object(out_value, context)
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a Stream to a byte stream
        """
        assert context is not None
        assert context.destination is not None
        assert isinstance(object_to_transform, Stream)

        # avoid resolving objects twice
        object_ref: typing.Optional[
            Reference] = object_to_transform.get_reference(
            )  # type: ignore [attr-defined]
        if object_ref is not None and object_ref in context.resolved_references:
            assert object_ref is not None
            assert object_ref.object_number is not None
            logger.debug(
                "skip writing object %d %d R (already resolved)" %
                (object_ref.object_number, object_ref.generation_number or 0))
            return

        # start object if needed
        started_object = False
        if object_ref is not None:
            assert object_ref.object_number is not None
            if object_ref.object_number is not None and object_ref.byte_offset is None:
                started_object = True
                self.start_object(object_to_transform, context)
            context.resolved_references.append(object_ref)

        # build stream dictionary
        stream_dictionary = Dictionary()

        # objects to turn into reference
        queue: typing.List[AnyPDFType] = []
        for k, v in object_to_transform.items():
            if k in ["Bytes", "DecodedBytes"]:
                continue
            if (isinstance(v, Dictionary) or isinstance(v, List)
                    or isinstance(v, Stream)
                ) and v.can_be_referenced():  # type: ignore [union-attr]
                stream_dictionary[k] = self.get_reference(v, context)
                queue.append(v)
            else:
                stream_dictionary[k] = v

        # if self.compression_level == 0, remove \Filter
        if context.compression_level == 0 and Name(
                "Filter") in stream_dictionary:
            stream_dictionary.pop(Name("Filter"))

        # handle compression
        if "DecodedBytes" in object_to_transform:
            if context.compression_level == 0:
                bts = object_to_transform["DecodedBytes"]
            else:
                bts = zlib.compress(object_to_transform["DecodedBytes"],
                                    context.compression_level)
            stream_dictionary[Name("Length")] = pDecimal(len(bts))
        else:
            assert "Bytes" in object_to_transform
            bts = object_to_transform["Bytes"]

        # write stream dictionary
        self.get_root_transformer().transform(stream_dictionary, context)

        # write "stream"
        context.destination.write(bytes("stream\n", "latin1"))

        # write bytes
        context.destination.write(bts)

        # write "endstream"
        context.destination.write(bytes("\nendstream\n", "latin1"))

        # end object if needed
        if started_object:
            self.end_object(object_to_transform, context)

        for e in queue:
            self.get_root_transformer().transform(e, context)
Beispiel #7
0
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        """
        This method writes a List to a byte stream
        """
        assert isinstance(object_to_transform, List)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # avoid resolving objects twice
        object_ref: typing.Optional[
            Reference] = object_to_transform.get_reference(
            )  # type: ignore [attr-defined]
        if object_ref is not None and object_ref in context.resolved_references:
            assert object_ref is not None
            assert object_ref.object_number is not None
            logger.debug(
                "skip writing object %d %d R (already resolved)" %
                (object_ref.object_number, object_ref.generation_number or 0))
            return

        # output value
        out_value = List()

        # objects to turn into reference
        queue: typing.List[AnyPDFType] = []
        for v in object_to_transform:
            if (isinstance(v, Dictionary) or isinstance(v, List)
                    or isinstance(v, Stream) or isinstance(v, Image)
                ) and v.can_be_referenced():  # type: ignore [union-attr]
                out_value.append(self.get_reference(v, context))
                queue.append(v)
            else:
                out_value.append(v)

        # start object if needed
        started_object = False
        if object_ref is not None:
            assert object_ref.object_number is not None
            if object_ref.object_number is not None and object_ref.byte_offset is None:
                started_object = True
                self.start_object(object_to_transform, context)
            context.resolved_references.append(object_ref)

        # write dictionary at current location
        context.destination.write(bytes("[", "latin1"))
        N = len(out_value)
        for i, v in enumerate(out_value):
            self.get_root_transformer().transform(v, context)
            if i != N - 1:
                context.destination.write(bytes(" ", "latin1"))
        context.destination.write(bytes("]\n", "latin1"))

        # end object if needed
        if started_object:
            self.end_object(object_to_transform, context)

        for e in queue:
            self.get_root_transformer().transform(e, context)

        # return
        return out_value