def _create_object(self, content, raw_stream=None): objid = self._pdf.get_free_objid() obj = PDFObject.create(objid=objid, gennum=0, content=content) if raw_stream is not None: obj.set_stream(EncodedObject.create(raw_stream)) self._pdf.replace_object(obj) return PDFXRef(objid, 0)
def _add_xmp_metadata(self): info_node_xref = self._pdf.trailer[PDFName("/Info")] info_node = self._pdf.lookup(info_node_xref) metadata_date = Timestamp.localnow() modify_date = Timestamp.frompdf(info_node.content[PDFName("/ModDate")].decode("ascii")) if (PDFName("/ModDate") in info_node.content) else metadata_date create_date = Timestamp.frompdf(info_node.content[PDFName("/CreationDate")].decode("ascii")) if (PDFName("/CreationDate") in info_node.content) else metadata_date xmp_metadata = { "creator_tool": self._pdf.get_info("Creator"), "producer": self._pdf.get_info("Producer"), "modify_date": modify_date.format_xml(), "create_date": create_date.format_xml(), "metadata_date": metadata_date.format_xml(), "description": self._pdf.get_info("Subject"), "title": self._pdf.get_info("Title"), "creator": self._pdf.get_info("Author"), "keywords": self._pdf.get_info("Keywords"), "document_uuid": str(uuid.uuid4()), "instance_uuid": str(uuid.uuid4()), "llpdf_version": "llpdf " + llpdf.VERSION, } xmp_metadata_template = pkgutil.get_data("llpdf.resources", "xmp_metadata.xml").decode("utf-8") stream = (xmp_metadata_template % xmp_metadata).encode("utf-8") content = { PDFName("/Type"): PDFName("/Metadata"), PDFName("/Subtype"): PDFName("/XML"), } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(stream, compress = False)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def get_fontfile_object(self, objid): content = { PDFName("/Length1"): len(self._cleardata), PDFName("/Length2"): len(self._cipherdata), PDFName("/Length3"): len(self._trailerdata), } stream = EncodedObject.create(self._cleardata + self._cipherdata + self._trailerdata, compress = True) obj = PDFObject.create(objid, 0, content, stream) return obj
def serialize_xref_object(self, trailer_dict, objid): offset_width = self._get_offset_width() content = dict(trailer_dict) content.update({ PDFName("/Type"): PDFName("/XRef"), PDFName("/Index"): [ 0, self._max_objid + 1 ], PDFName("/Size"): self._max_objid + 1, PDFName("/W"): [ 1, offset_width, 1 ], }) data = self._serialize_xref_data(offset_width) return PDFObject.create(objid = objid, gennum = 0, content = content, stream = EncodedObject.create(data))
def _add_color_intent(self, color_profile_xref): content = [{ PDFName("/Type"): PDFName("/OutputIntent"), PDFName("/DestOutputProfile"): color_profile_xref, PDFName("/Info"): b"sRGB IEC61966-2.1", PDFName("/OutputCondition"): b"sRGB", PDFName("/OutputConditionIdentifier"): b"Custom", PDFName("/RegistryName"): b"", PDFName("/S"): PDFName("/GTS_PDFA1"), }] objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum=0, content=content) self._pdf.replace_object(pdf_object) return pdf_object.xref
def _add_color_profile(self): if self._args.color_profile is None: profile_data = pkgutil.get_data("llpdf.resources", "sRGB_IEC61966-2-1_black_scaled.icc") else: with open(self._args.color_profile, "rb") as f: profile_data = f.read() content = { PDFName("/N"): 3, PDFName("/Range"): [ 0, 1, 0, 1, 0, 1 ], } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(profile_data)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def run(self): # Relink the content dictionaries relinked_objects = [ ] for obj in self._pdf: relinked_content = self._relink(obj.content) relinked_xref = self._old_to_new.get(obj.xref, obj.xref) relinked_object = PDFObject.create(relinked_xref.objid, relinked_xref.gennum, relinked_content, obj.stream) relinked_objects.append(relinked_object) # Then delete all old objects for delete_obj_xref in self._old_to_new: self._pdf.delete_object(delete_obj_xref.objid, delete_obj_xref.gennum) # And insert the relinked ones for relinked_object in relinked_objects: self._pdf.replace_object(relinked_object)
def get_font_descriptor_object(self, objid, fontfile_xref): bbox = self.get_font_bbox() content = { PDFName("/Type"): PDFName("/FontDescriptor"), PDFName("/ItalicAngle"): 0, PDFName("/FontFile"): fontfile_xref, PDFName("/FontName"): self.get_font_name(), PDFName("/Flags"): int(FontDescriptorFlag.Symbolic), PDFName("/FontBBox"): bbox, PDFName("/Ascent"): bbox[3], PDFName("/CapHeight"): bbox[3], PDFName("/Descent"): bbox[1], PDFName("/CharSet"): self.charset_string, PDFName("/MissingWidth"): self.get_missing_width(), # PDFName("/StemV"): 30, # TODO: Do we need this? } obj = PDFObject.create(objid, 0, content) return obj
def serialize(self, serializer): header = [ ] data = bytearray() for obj in self._contained_objects: obj_data = serializer.serialize(obj.content) offset = len(data) header.append(obj.objid) header.append(offset) data += obj_data + b"\n" header = " ".join(str(value) for value in header) header = header.encode("utf-8") + b"\n" full_data = header + data content = { PDFName("/Type"): PDFName("/ObjStm"), PDFName("/N"): self.objects_inside_count, PDFName("/First"): len(header), } return PDFObject.create(objid = self.objid, gennum = 0, content = content, stream = EncodedObject.create(full_data))
def get_font_object(self, objid, fontdescriptor_xref): widths_dict = self.get_widths_dict() first_char = min(widths_dict.keys()) last_char = max(widths_dict.keys()) default_width = self.get_missing_width() widths_array = [ widths_dict.get(i, default_width) for i in range(first_char, last_char + 1) ] content = { PDFName("/Type"): PDFName("/Font"), PDFName("/Subtype"): PDFName("/Type1"), PDFName("/FirstChar"): first_char, PDFName("/LastChar"): last_char, PDFName("/Widths"): widths_array, PDFName("/Encoding"): { PDFName("/Type"): PDFName("/Encoding"), PDFName("/Differences"): build_encoding_array("latin1"), }, PDFName("/BaseFont"): self.get_font_name(), PDFName("/FontDescriptor"): fontdescriptor_xref, } obj = PDFObject.create(objid, 0, content) return obj
def run(self): with open(self._args.embed_payload, "rb") as f: payload = f.read() objid = self._pdf.get_free_objid() self._log.debug( "Embedding %d bytes payload from file \"%s\" into PDF file as objid %d", len(payload), self._args.embed_payload, objid) mtime = os.stat(self._args.embed_payload).st_mtime mtime_str = datetime.datetime.utcfromtimestamp(mtime).strftime( "%Y-%m-%dT%H:%M:%SZ") content = { PDFName("/PDFMinify.OriginalFilename"): os.path.basename(self._args.embed_payload).encode(), PDFName("/PDFMinify.MTime"): mtime_str.encode(), PDFName("/PDFMinify.Version"): llpdf.VERSION.encode(), } obj = PDFObject.create(objid=objid, gennum=0, content=content) obj.set_stream(EncodedObject.create(payload, compress=False)) self._pdf.replace_object(obj)
def run(self): # Put an ID into the PDF self._pdf.trailer[PDFName("/ID")] = [os.urandom(16), os.urandom(16)] # Do not interpolate any image objects for image_obj in self._pdf.image_objects: image_obj.content[PDFName("/Interpolate")] = False # No pages may be transparency groups for page in self._pdf.pages: if PDFName("/Group") in page.content: del page.content[PDFName("/Group")] # No transparency groups in Form XObjects for obj in self._pdf: if (obj.getattr(PDFName("/Type")) == PDFName("/XObject")) and (obj.getattr( PDFName("/Subtype")) == PDFName("/Form")) and ( obj.getattr(PDFName("/Group")) is not None): del obj.content[PDFName("/Group")] # Add color profile data color_profile_xref = self._add_color_profile() # Add color intent object color_intent_xref = self._add_color_intent(color_profile_xref) # Add XMP metadata metadata_xref = self._add_xmp_metadata() # Set output intent and metadata reference for all catalogs for obj in self._pdf: if obj.getattr(PDFName("/Type")) == PDFName("/Catalog"): obj.content[PDFName("/OutputIntents")] = color_intent_xref obj.content[PDFName("/Metadata")] = metadata_xref # Set all annotations with annotation flag "printable" (4) for obj in self._pdf: if obj.getattr(PDFName("/Type")) == PDFName("/Annot"): obj.content[PDFName("/F")] = 4 fixed_descriptors = set() for obj in list(self._pdf): if obj.getattr(PDFName("/Type")) == PDFName("/Font"): font_obj = obj if font_obj.getattr( PDFName("/Subtype")) == PDFName("/CIDFontType2"): # Type2 fonts need to have a CIDtoGIDMap font_obj.content[PDFName("/CIDToGIDMap")] = PDFName( "/Identity") if PDFName("/FontDescriptor") in font_obj.content: font_descriptor_xref = font_obj.content[PDFName( "/FontDescriptor")] if font_descriptor_xref in fixed_descriptors: continue fixed_descriptors.add(font_descriptor_xref) font_descriptor_obj = self._pdf.lookup( font_descriptor_xref) if font_obj.getattr( PDFName("/Subtype")) == PDFName("/Type1"): # Update Type1 font descriptors with missing CharSet entries font_file_obj = self._pdf.lookup( font_descriptor_obj.content[PDFName("/FontFile")]) t1_font = T1Font.from_fontfile_obj(font_file_obj) font_descriptor_obj.content[PDFName( "/CharSet")] = t1_font.charset_string elif font_obj.getattr( PDFName("/Subtype")) == PDFName("/CIDFontType2"): # Type2 font descriptors need to have a CIDSet glyph_count = self.type2_font_glyph_count( font_obj.content[PDFName("/W")]) full_bytes = glyph_count // 8 set_bits = glyph_count % 8 last_byte = ((1 << set_bits) - 1) << (8 - set_bits) self._log.debug( "Assuming CIDSet for %d glyphs of %d full 0xff bytes and a final value of 0x%x.", glyph_count, full_bytes, last_byte) cidset_objid = self._pdf.get_free_objid() stream = (bytes([0xff]) * full_bytes) + bytes( [last_byte]) pdf_object = PDFObject.create( cidset_objid, gennum=0, content={}, stream=EncodedObject.create(stream)) self._pdf.replace_object(pdf_object) font_descriptor_obj.content[PDFName( "/CIDSet")] = pdf_object.xref