def get_reference(self, object: AnyPDFType, context: WriteTransformerContext) -> Reference: """ This function builds a Reference for the input object References are re-used whenever possible (hashing is used to detect duplicate objects) """ obj_id = id(object) if obj_id in context.indirect_objects_by_id: cached_indirect_object: AnyPDFType = context.indirect_objects_by_id[ obj_id] assert not isinstance(cached_indirect_object, Reference) return cached_indirect_object.get_reference( ) # type: ignore [union-attr] # look through existing indirect object hashes obj_hash: int = self._hash(object) if obj_hash in context.indirect_objects_by_hash: for obj in context.indirect_objects_by_hash[obj_hash]: if obj == object: ref = obj.get_reference() # type: ignore [union-attr] assert ref is not None assert isinstance(ref, Reference) object.set_reference(ref) # type: ignore [union-attr] return ref # generate new object number existing_obj_numbers = set([ item.get_reference().object_number # type: ignore [union-attr] for sublist in [v for k, v in context.indirect_objects_by_hash.items()] for item in sublist ]) obj_number = len(existing_obj_numbers) + 1 while obj_number in existing_obj_numbers: # type: ignore [union-attr] obj_number += 1 # build reference ref = Reference(object_number=obj_number) object.set_reference(ref) # type: ignore [union-attr] # insert into context.indirect_objects_by_hash if obj_hash in context.indirect_objects_by_hash: context.indirect_objects_by_hash[obj_hash].append(object) else: context.indirect_objects_by_hash[obj_hash] = [object] # insert into context.indirect_objects_by_id context.indirect_objects_by_id[obj_id] = object # return return ref
def _start_object( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext], ): """ This function starts a new direct object by writing its reference number followed by "obj" (e.g. "12 0 obj"). It also does some bookkeeping to ensure the byte offset is stored in the XREF """ # get offset position assert context is not None assert context.destination is not None byte_offset = context.destination.tell() # update offset ref = object_to_transform.get_reference() # type: ignore [union-attr] assert ref is not None assert isinstance(ref, Reference) ref.byte_offset = byte_offset # write <object number> <generation number> obj assert ref.object_number is not None context.destination.write( bytes( "%d %d obj\n" % (ref.object_number, ref.generation_number or 0), "latin1", ))
def start_object( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext], ): # get offset position assert context is not None assert context.destination is not None byte_offset = context.destination.tell() # update offset ref = object_to_transform.get_reference() # type: ignore [union-attr] assert ref is not None assert isinstance(ref, Reference) ref.byte_offset = byte_offset # write <object number> <generation number> obj assert ref.object_number is not None context.destination.write( bytes( "%d %d obj\n" % (ref.object_number, ref.generation_number or 0), "latin1", ))
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes an ET.Element (representing XMP meta information) to a byte stream """ assert isinstance(object_to_transform, ET.Element) assert context is not None assert context.destination is not None assert context.destination # build stream out_value = Stream() out_value[Name("Type")] = Name("Metadata") out_value[Name("Subtype")] = Name("XML") bts = ET.tostring(object_to_transform) out_value[Name("DecodedBytes")] = bts out_value[Name("Bytes")] = bts out_value[Name("Length")] = pDecimal(len(bts)) # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self.start_object(out_value, context) # pass stream along to other transformer self.get_root_transformer().transform(out_value, context) # end object if needed if started_object: self.end_object(out_value, context)
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes an Image to a byte stream """ assert context is not None assert context.destination is not None assert isinstance(object_to_transform, PILImage.Image) # get image bytes contents = None filter_name: Optional[Name] = None try: with io.BytesIO() as output: assert isinstance(object_to_transform, PILImage.Image) object_to_transform.save(output, format="JPEG") contents = output.getvalue() filter_name = Name("DCTDecode") except Exception as e: pass if contents is None: try: # TODO : properly store PNG (instead of converting it) with io.BytesIO() as output: object_to_transform = self._convert_png_to_jpg( object_to_transform) assert isinstance(object_to_transform, PILImage.Image) object_to_transform.save(output, format="JPEG") contents = output.getvalue() filter_name = Name("DCTDecode") except Exception as e: pass assert contents is not None # build corresponding Stream (XObject) out_value = Stream() out_value[Name("Type")] = Name("XObject") out_value[Name("Subtype")] = Name("Image") out_value[Name("Width")] = pDecimal(object_to_transform.width) out_value[Name("Height")] = pDecimal(object_to_transform.height) out_value[Name("Length")] = pDecimal(len(contents)) out_value[Name("Filter")] = filter_name out_value[Name("BitsPerComponent")] = pDecimal(8) out_value[Name("ColorSpace")] = Name("DeviceRGB") out_value[Name("Bytes")] = contents # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self._start_object(out_value, context) # write stream cl = context.compression_level context.compression_level = 9 self.get_root_transformer().transform(out_value, context) context.compression_level = cl # end object if needed if started_object: self._end_object(out_value, context)
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes a Stream to a byte stream """ assert context is not None assert context.destination is not None assert isinstance(object_to_transform, Stream) # avoid resolving objects twice object_ref: typing.Optional[ Reference] = object_to_transform.get_reference( ) # type: ignore [attr-defined] if object_ref is not None and object_ref in context.resolved_references: assert object_ref is not None assert object_ref.object_number is not None logger.debug( "skip writing object %d %d R (already resolved)" % (object_ref.object_number, object_ref.generation_number or 0)) return # start object if needed started_object = False if object_ref is not None: assert object_ref.object_number is not None if object_ref.object_number is not None and object_ref.byte_offset is None: started_object = True self.start_object(object_to_transform, context) context.resolved_references.append(object_ref) # build stream dictionary stream_dictionary = Dictionary() # objects to turn into reference queue: typing.List[AnyPDFType] = [] for k, v in object_to_transform.items(): if k in ["Bytes", "DecodedBytes"]: continue if (isinstance(v, Dictionary) or isinstance(v, List) or isinstance(v, Stream) ) and v.can_be_referenced(): # type: ignore [union-attr] stream_dictionary[k] = self.get_reference(v, context) queue.append(v) else: stream_dictionary[k] = v # if self.compression_level == 0, remove \Filter if context.compression_level == 0 and Name( "Filter") in stream_dictionary: stream_dictionary.pop(Name("Filter")) # handle compression if "DecodedBytes" in object_to_transform: if context.compression_level == 0: bts = object_to_transform["DecodedBytes"] else: bts = zlib.compress(object_to_transform["DecodedBytes"], context.compression_level) stream_dictionary[Name("Length")] = pDecimal(len(bts)) else: assert "Bytes" in object_to_transform bts = object_to_transform["Bytes"] # write stream dictionary self.get_root_transformer().transform(stream_dictionary, context) # write "stream" context.destination.write(bytes("stream\n", "latin1")) # write bytes context.destination.write(bts) # write "endstream" context.destination.write(bytes("\nendstream\n", "latin1")) # end object if needed if started_object: self.end_object(object_to_transform, context) for e in queue: self.get_root_transformer().transform(e, context)
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): """ This method writes a List to a byte stream """ assert isinstance(object_to_transform, List) assert context is not None assert context.destination is not None assert context.destination # avoid resolving objects twice object_ref: typing.Optional[ Reference] = object_to_transform.get_reference( ) # type: ignore [attr-defined] if object_ref is not None and object_ref in context.resolved_references: assert object_ref is not None assert object_ref.object_number is not None logger.debug( "skip writing object %d %d R (already resolved)" % (object_ref.object_number, object_ref.generation_number or 0)) return # output value out_value = List() # objects to turn into reference queue: typing.List[AnyPDFType] = [] for v in object_to_transform: if (isinstance(v, Dictionary) or isinstance(v, List) or isinstance(v, Stream) or isinstance(v, Image) ) and v.can_be_referenced(): # type: ignore [union-attr] out_value.append(self.get_reference(v, context)) queue.append(v) else: out_value.append(v) # start object if needed started_object = False if object_ref is not None: assert object_ref.object_number is not None if object_ref.object_number is not None and object_ref.byte_offset is None: started_object = True self.start_object(object_to_transform, context) context.resolved_references.append(object_ref) # write dictionary at current location context.destination.write(bytes("[", "latin1")) N = len(out_value) for i, v in enumerate(out_value): self.get_root_transformer().transform(v, context) if i != N - 1: context.destination.write(bytes(" ", "latin1")) context.destination.write(bytes("]\n", "latin1")) # end object if needed if started_object: self.end_object(object_to_transform, context) for e in queue: self.get_root_transformer().transform(e, context) # return return out_value