def test_array_arg(self): self.assertEqual( GraphicsParser.parse("<</Foo /Bar>>TJ"), [GraphCommand("TJ", {PDFName("/Foo"): PDFName("/Bar")})]) self.assertEqual( GraphicsParser.parse(r"[(some in-depth )3(things\).)]TJ"), [GraphCommand("TJ", ["(some in-depth )", 3, "(things\).)"])])
def _callback_fill_rect(self): if self._draw_callback is None: return patterns = self._page_resources.content.get(PDFName("/Pattern")) if patterns is None: return pattern_xref = patterns.get(self._gs["color_ns"]) if pattern_xref is None: return pattern = self._pdf_lookup.lookup(pattern_xref) pattern_bbox = pattern.content[PDFName("/BBox")] pattern_matrix = TransformationMatrix( *pattern.content[PDFName("/Matrix")]) pattern_resource_xrefs = list(pattern.content[PDFName("/Resources")][ PDFName("/XObject")].values()) if len(pattern_resource_xrefs) != 1: return image_resource = self._pdf_lookup.lookup(pattern_resource_xrefs[0]) native_extents = pattern_matrix.extents(pattern_bbox) self._log.debug( "Pattern draw object of %s found with CTM %s and pattern matrix %s; %s", image_resource, self._gs["CTM"], pattern_matrix, native_extents.format()) self._draw_callback( self._PatternDrawCallbackResult(drawtype="pattern", pattern_obj=pattern, image_obj=image_resource, native_extents=native_extents))
def run(self): for obj in self._pdf: if isinstance(obj.content, dict) and (PDFName("/Length") in obj.content) and isinstance( obj.content[PDFName("/Length")], PDFXRef) and (obj.stream is not None): obj.content[PDFName("/Length")] = len(obj.stream)
def _draw_callback(self, draw_cmd): if draw_cmd.image_obj.content.get(PDFName("/Subtype")) != PDFName("/Image"): # This might be a /Form that is plotted with the Do command or # similar. Ignore it. return image_xref = draw_cmd.image_obj.xref self._log.debug("Interpreter found %s image %s at %s", draw_cmd.drawtype, image_xref, draw_cmd.native_extents.format()) self._draw_cmds[image_xref].append(draw_cmd)
def get_fontfile_object(self, objid): content = { PDFName("/Length1"): len(self._cleardata), PDFName("/Length2"): len(self._cipherdata), PDFName("/Length3"): len(self._trailerdata), } stream = EncodedObject.create(self._cleardata + self._cipherdata + self._trailerdata, compress = True) obj = PDFObject.create(objid, 0, content, stream) return obj
def run(self): for obj in self._pdf: objtype = obj.getattr(PDFName("/Type")) if objtype == PDFName("/Font"): self._print_font(obj) for obj in self._pdf: objtype = obj.getattr(PDFName("/Type")) if objtype == PDFName("/Sig"): self._print_sig(obj)
def from_fontfile_obj(cls, fontfile_object): length1 = fontfile_object.content[PDFName("/Length1")] length2 = fontfile_object.content[PDFName("/Length2")] length3 = fontfile_object.content[PDFName("/Length3")] data = fontfile_object.stream.decode() cleardata = data[ : length1] cipherdata = data[length1 : length1 + length2] trailerdata = data[length1 + length2 : ] return cls(cleardata, cipherdata, trailerdata)
def test_dict(self): self.assertEqual( GraphicsParser.parse( "/Artifact <</Attached [/Top]/Type/Pagination>> BDC"), [ GraphCommand( "BDC", PDFName("/Artifact"), { PDFName("/Attached"): [PDFName("/Top")], PDFName("/Type"): PDFName("/Pagination"), }) ])
def from_object(cls, obj): if PDFName("/Filter") in obj.content: pdf_filter = obj.content[PDFName("/Filter")] if isinstance(pdf_filter, list): if len(pdf_filter) == 1: pdf_filter = pdf_filter[0] else: raise Exception( "Cannot create EncodedObject from object that has multiple filters applied: %s" % (str(pdf_filter))) filtering = cls._REV_FILTER_MAP[pdf_filter] else: filtering = Filter.Uncompressed if (PDFName("/DecodeParms") in obj.content) and (PDFName("/Predictor") in obj.content[PDFName("/DecodeParms")]): predictor = Predictor( obj.content[PDFName("/DecodeParms")][PDFName("/Predictor")]) columns = obj.content[PDFName("/DecodeParms")][PDFName("/Columns")] else: predictor = Predictor.NoPredictor columns = 1 return cls(encoded_data=obj.raw_stream, filtering=filtering, predictor=predictor, columns=columns)
def serialize_xref_object(self, trailer_dict, objid): offset_width = self._get_offset_width() content = dict(trailer_dict) content.update({ PDFName("/Type"): PDFName("/XRef"), PDFName("/Index"): [ 0, self._max_objid + 1 ], PDFName("/Size"): self._max_objid + 1, PDFName("/W"): [ 1, offset_width, 1 ], }) data = self._serialize_xref_data(offset_width) return PDFObject.create(objid = objid, gennum = 0, content = content, stream = EncodedObject.create(data))
def meta_dict(self): meta = { PDFName("/Length"): len(self), } if self._filtering != Filter.Uncompressed: meta[PDFName("/Filter")] = self._FILTER_MAP[self._filtering] if self._predictor != Predictor.NoPredictor: meta[PDFName("/DecodeParms")] = { PDFName("/Columns"): self.columns, PDFName("/Predictor"): int(self.predictor), } return meta
def _add_color_profile(self): if self._args.color_profile is None: profile_data = pkgutil.get_data("llpdf.resources", "sRGB_IEC61966-2-1_black_scaled.icc") else: with open(self._args.color_profile, "rb") as f: profile_data = f.read() content = { PDFName("/N"): 3, PDFName("/Range"): [ 0, 1, 0, 1, 0, 1 ], } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(profile_data)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def _generate_form(self): font_xref = self._get_font_reference() seal_template = PDFTemplate( pkgutil.get_data("llpdf.resources", "seal.pdft")) seal_xref = seal_template.merge_into_pdf(self._pdf)["SealObject"] sign_template = PDFTemplate( pkgutil.get_data("llpdf.resources", "sign_form.pdft")) sign_template["FontXRef"] = font_xref sign_template["SealFormXRef"] = seal_xref signform_xref = sign_template.merge_into_pdf( self._pdf)["SignFormObject"] signform = self._pdf.lookup(signform_xref) signform.content[PDFName("/BBox")] = self._get_signature_bbox() signform_data = signform.stream.decode() (posx, posy, width, height) = self._get_signature_bbox() signform_vars = { "WIDTH": b"%.0f" % (width - 1), "HEIGHT": b"%.0f" % (height - 1), "TEXT": self._get_signing_text(), } for (varname, replacement) in signform_vars.items(): key = ("${" + varname + "}").encode("ascii") signform_data = signform_data.replace(key, replacement) signform.set_stream(EncodedObject.create(signform_data, compress=True)) return signform_xref
def get_font_name(self): cleartext = self._cleardata.decode("ascii") result = self._FONT_NAME_RE.search(cleartext) if result is None: raise Exception("/FontName not found in clear text data of T1 font.") result = result.groupdict() return PDFName(result["name"])
def test_pdf_comment(self): self.assertEqual( PDFParser.parse("""<< /Length 213 0 R % Foobar >>"""), { PDFName("/Length"): PDFXRef(213, 0), })
def _add_xmp_metadata(self): info_node_xref = self._pdf.trailer[PDFName("/Info")] info_node = self._pdf.lookup(info_node_xref) metadata_date = Timestamp.localnow() modify_date = Timestamp.frompdf(info_node.content[PDFName("/ModDate")].decode("ascii")) if (PDFName("/ModDate") in info_node.content) else metadata_date create_date = Timestamp.frompdf(info_node.content[PDFName("/CreationDate")].decode("ascii")) if (PDFName("/CreationDate") in info_node.content) else metadata_date xmp_metadata = { "creator_tool": self._pdf.get_info("Creator"), "producer": self._pdf.get_info("Producer"), "modify_date": modify_date.format_xml(), "create_date": create_date.format_xml(), "metadata_date": metadata_date.format_xml(), "description": self._pdf.get_info("Subject"), "title": self._pdf.get_info("Title"), "creator": self._pdf.get_info("Author"), "keywords": self._pdf.get_info("Keywords"), "document_uuid": str(uuid.uuid4()), "instance_uuid": str(uuid.uuid4()), "llpdf_version": "llpdf " + llpdf.VERSION, } xmp_metadata_template = pkgutil.get_data("llpdf.resources", "xmp_metadata.xml").decode("utf-8") stream = (xmp_metadata_template % xmp_metadata).encode("utf-8") content = { PDFName("/Type"): PDFName("/Metadata"), PDFName("/Subtype"): PDFName("/XML"), } objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum = 0, content = content, stream = EncodedObject.create(stream, compress = False)) self._pdf.replace_object(pdf_object) return pdf_object.xref
def _add_color_intent(self, color_profile_xref): content = [{ PDFName("/Type"): PDFName("/OutputIntent"), PDFName("/DestOutputProfile"): color_profile_xref, PDFName("/Info"): b"sRGB IEC61966-2.1", PDFName("/OutputCondition"): b"sRGB", PDFName("/OutputConditionIdentifier"): b"Custom", PDFName("/RegistryName"): b"", PDFName("/S"): PDFName("/GTS_PDFA1"), }] objid = self._pdf.get_free_objid() pdf_object = PDFObject.create(objid, gennum=0, content=content) self._pdf.replace_object(pdf_object) return pdf_object.xref
def _replace_image(self, img_xref, resampled_image): image_meta = self._pdf.lookup(img_xref).content alpha_xref = image_meta.get(PDFName("/SMask")) new_image_obj = PDFObject.create_image(img_xref.objid, img_xref.gennum, resampled_image, alpha_xref = alpha_xref) self._pdf.replace_object(new_image_obj) if resampled_image.alpha: new_alpha_obj = PDFObject.create_image(alpha_xref.objid, alpha_xref.gennum, resampled_image.alpha) self._pdf.replace_object(new_alpha_obj)
def run(self): reformatter = ImageReformatter(lossless = True, scale_factor = 1) for image_obj in self._pdf.image_objects: if PDFName("/SMask") in image_obj.content: current_image = self._pdf.get_image(image_obj.xref) flattened_image = reformatter.flatten(current_image, background_color = self._args.background_color) flattened_image_obj = PDFObject.create_image(image_obj.xref.objid, image_obj.xref.gennum, flattened_image) self._pdf.replace_object(flattened_image_obj)
def serialize(self, serializer): header = [ ] data = bytearray() for obj in self._contained_objects: obj_data = serializer.serialize(obj.content) offset = len(data) header.append(obj.objid) header.append(offset) data += obj_data + b"\n" header = " ".join(str(value) for value in header) header = header.encode("utf-8") + b"\n" full_data = header + data content = { PDFName("/Type"): PDFName("/ObjStm"), PDFName("/N"): self.objects_inside_count, PDFName("/First"): len(header), } return PDFObject.create(objid = self.objid, gennum = 0, content = content, stream = EncodedObject.create(full_data))
def __init__(self, objid, gennum, rawdata): assert (objid is not None) assert (gennum is not None) assert (isinstance(objid, int)) assert (isinstance(gennum, int)) self._objid = objid self._gennum = gennum if rawdata is not None: strm = StreamRepr(rawdata) stream_begin = strm.read_until_token(b"stream") if stream_begin is not None: stream_data = strm.read_until_token(b"endstream") if stream_data is not None: content = stream_begin self._stream = stream_data else: # Probably erroneous stream data ("stream" maybe in dict, # but no "endstream") content = rawdata self._stream = None else: # No stream in this object found, just content content = rawdata self._stream = None content = content.decode("latin1") # Remove line continuations content = content.replace("\\\r\n", "") content = content.replace("\\\n", "") content = content.replace("\\\r", "") self._content = PDFParser.parse(content) if (self._stream is not None) and ( PDFName("/Length") in self._content) and isinstance( self._content[PDFName("/Length")], int): # When direct length field is given, then truncate the stream # according to it. For indirect streams, we don't do this (yet) self._stream = self._stream[:self._content[PDFName("/Length")]] else: self._stream = None self._content = None
def run(self): native_values = [ Measurements.convert(value, self._args.unit, "native") for value in self._args.cropbox ] native_cropbox = [ native_values[0], native_values[1], native_values[0] + native_values[2], native_values[1] + native_values[3] ] for page in self._pdf.pages: page.content[PDFName("/CropBox")] = native_cropbox
def _run_command(self, cmd): cmdcode = cmd.command if cmdcode == "q": # Save graphic state self._gss.append(dict(self._gs)) elif cmdcode == "Q": # Restore graphic state self._gs = self._gss.pop() elif cmdcode == "cm": # Apply to current transformation matrix (CTM) self._gs["CTM"] *= TransformationMatrix(*cmd.args) elif cmdcode in ["re"]: # Append a rectangle to the current path self._path.append(cmd) elif cmdcode in ["W"]: # Use current path as clipping # print("Cliping", self._path) pass elif cmdcode in ["S", "s", "f", "F", "f*", "B", "B*", "b", "b*", "n"]: # Any of these commands will finish a path if (cmdcode == "f") and (len(self._path) == 1) and (self._path[0].command == "re"): self._callback_fill_rect() self._path = [] elif cmdcode == "scn": # Set color for non-stroking self._gs["color_ns"] = cmd.args[0] elif cmdcode == "gs": # print("Load graphic state") pass elif cmdcode == "Do": # Draw object if self._draw_callback is not None: image_handle = cmd.args[0] if isinstance(self._page_resources, PDFObject): resources = self._page_resources.content else: resources = self._page_resources xobjects = resources[PDFName("/XObject")] image_xref = xobjects[image_handle] image_obj = self._pdf_lookup.lookup(image_xref) native_extents = self._gs["CTM"].extents([0, 0, 1, 1]) self._log.debug("Draw object of %s found with CTM %s; %s", image_obj, self._gs["CTM"], native_extents.format()) self._draw_callback( self._DirectDrawCallbackResult( drawtype="direct", image_obj=image_obj, native_extents=native_extents))
def build_encoding_array(codec): entries = [] last_codepoint = None for codepoint in range(256): char = bytes([codepoint]).decode(codec) psname = character_names.get(char) if psname is not None: psname = PDFName("/" + psname) if (last_codepoint is None) or (codepoint != last_codepoint + 1): entries.append(codepoint) entries.append(psname) last_codepoint = codepoint return entries
def _read_endfile(self, f, pdf): self._log.debug("Reading end-of-file data at 0x%x.", f.tell()) while True: line = self._read_textline(f).strip("\r\n ") if line == "": continue elif line == "xref": xref_table = XRefTable.read_xref_table_from_file(f) elif line == "trailer": trailer = self._read_trailer(f) pdf.trailer = trailer elif line == "startxref": xref_offset = int(f.readline()) if len(pdf.trailer) == 0: # Compressed XRef directory with f.tempseek(xref_offset) as marker: self._log.trace( "Will parse XRef stream at offset 0x%x referenced from 0x%x." % (xref_offset, marker.prev_offset)) xref_object = PDFObject.parse(f) if xref_object is None: self._log.error( "Could not parse a valid type /XRef object at 0x%x. Corrupt PDF?", xref_offset) else: trailer = xref_object.content assert ( trailer[PDFName("/Type")] == PDFName("/XRef")) pdf.trailer = trailer pdf.xref_table.parse_xref_object( xref_object.stream.decode(), pdf.trailer.get(PDFName("/Index")), pdf.trailer[PDFName("/W")]) elif line == "%%EOF": self._log.debug("Hit EOF marker at 0x%x.", f.tell()) break else: raise Exception("Unknown end file token '%s' at offset 0x%x." % (line, f.tell()))
def run(self): with open(self._args.embed_payload, "rb") as f: payload = f.read() objid = self._pdf.get_free_objid() self._log.debug( "Embedding %d bytes payload from file \"%s\" into PDF file as objid %d", len(payload), self._args.embed_payload, objid) mtime = os.stat(self._args.embed_payload).st_mtime mtime_str = datetime.datetime.utcfromtimestamp(mtime).strftime( "%Y-%m-%dT%H:%M:%SZ") content = { PDFName("/PDFMinify.OriginalFilename"): os.path.basename(self._args.embed_payload).encode(), PDFName("/PDFMinify.MTime"): mtime_str.encode(), PDFName("/PDFMinify.Version"): llpdf.VERSION.encode(), } obj = PDFObject.create(objid=objid, gennum=0, content=content) obj.set_stream(EncodedObject.create(payload, compress=False)) self._pdf.replace_object(obj)
def create_raw_from_object(cls, xobj): if (PDFName("/Width") not in xobj.content): raise UnsupportedImageException( "Unsupported image without width: %s" % (xobj)) width = xobj.content[PDFName("/Width")] height = xobj.content[PDFName("/Height")] colorspace_info = xobj.content[PDFName("/ColorSpace")] bits_per_component = xobj.content[PDFName("/BitsPerComponent")] filter_info = xobj.content[PDFName("/Filter")] decode = xobj.content.get(PDFName("/Decode")) if (decode is None) or (decode == [0, 1]): inverted = False elif decode == [1, 0]: inverted = True else: raise UnsupportedImageException( "Cannot generate PDFImage object with non-trivial value decode array: %s" % (decode)) if isinstance(filter_info, list): if len(filter_info) != 1: raise UnsupportedImageException( "Multi-filter application is unsupported as of now: %s." % (filter_info)) filter_info = filter_info[0] if isinstance(colorspace_info, list): raise UnsupportedImageException( "Indexed images are currently unsupported: %s" % (colorspace_info)) colorspace = { PDFName("/DeviceRGB"): PDFImageColorSpace.DeviceRGB, PDFName("/DeviceGray"): PDFImageColorSpace.DeviceGray, }.get(colorspace_info) if colorspace is None: raise UnsupportedImageException( "Unsupported image color space '%s'." % (colorspace_info)) return cls(width=width, height=height, colorspace=colorspace, bits_per_component=bits_per_component, imgdata=xobj.stream, inverted=inverted)
def test_dict(self): self.assertEqual(PDFParser.parse("<< /Foo /Bar >>"), {PDFName("/Foo"): PDFName("/Bar")}) self.assertEqual(PDFParser.parse("<< /Foo << /Bar /Koo>> >>"), {PDFName("/Foo"): { PDFName("/Bar"): PDFName("/Koo") }}) self.assertEqual(PDFParser.parse("<< /Foobar13478 123 >>"), {PDFName("/Foobar13478"): 123})
def run(self): if self._args.sign_font is not None: self._font = T1Font.from_pfb_file(self._args.sign_font) else: pfb_data = pkgutil.get_data("llpdf.resources", "bchr8a.pfb") self._font = T1Font.from_pfb_data(pfb_data) self._sign_datetime = Timestamp.localnow() self._log.debug("Signing document: Timestamp %s", self._sign_datetime) annotated_page_xref = None for (pageno, page) in enumerate(self._pdf.pages, 1): if pageno == self._args.sign_page: annotated_page_xref = page.xref break if annotated_page_xref is None: raise Exception( "Could not find page #%d in document on which to place the digital signature." % (self._args.sign_page)) signature_xref = self._sign_pdf() form_xref = self._generate_form() lock_xref = self._generate_lock() annot_xref = self._generate_signature_annotation( signature_xref, form_xref, lock_xref, annotated_page_xref) page = self._pdf.lookup(annotated_page_xref) if not PDFName("/Annots") in page.content: annots_xref = self._create_object([annot_xref]) page.content[PDFName("/Annots")] = annots_xref else: page.content[PDFName("/Annots")].append(annot_xref) root_xref = self._pdf.trailer[PDFName("/Root")] root_obj = self._pdf.lookup(root_xref) # Write the interactive form dictionary if PDFName("/AcroForm") in page.content: raise Exception( "We already have an /AcroForm set, unsure how to handle it. Bailing out instead of overwriting current contents." ) root_obj.content[PDFName("/AcroForm")] = self._create_object({ PDFName("/Fields"): [annot_xref], PDFName("/SigFlags"): SignatureFlag.SignaturesExist | SignatureFlag.AppendOnly, })
def test_array(self): self.assertEqual(PDFParser.parse("[ 1 2 /Foo 3 4 ]"), [1, 2, PDFName("/Foo"), 3, 4]) self.assertEqual(PDFParser.parse("[ /Foobar13478 /Barfoo999 ]"), [PDFName("/Foobar13478"), PDFName("/Barfoo999")]) self.assertEqual( PDFParser.parse("[ 12345 9999 48 489 8473 << /foo 3939 >>]"), [12345, 9999, 48, 489, 8473, { PDFName("/foo"): 3939 }]) self.assertEqual( PDFParser.parse( "[ 12345 9999 48 489 R 8473 3.43984 << /foo 3939 >>]"), [ 12345, 9999, PDFXRef(48, 489), 8473, 3.43984, { PDFName("/foo"): 3939 } ]) self.assertEqual( PDFParser.parse("[ 0.333679 0 0 0.333468 78.832642 172.074584 ]"), [0.333679, 0, 0, 0.333468, 78.832642, 172.074584]) self.assertEqual(PDFParser.parse("[ 1.2345 1.2345 ]"), [1.2345, 1.2345])