def insert_page( self, page: Page, index: int = -1) -> "Document": # type: ignore [name-defined] # build XRef if "XRef" not in self: self["XRef"] = PlainTextXREF() # build Trailer if "Trailer" not in self["XRef"]: self["XRef"]["Trailer"] = Dictionary() self["XRef"][Name("Size")] = Decimal(0) # build Root if "Root" not in self["XRef"]["Trailer"]: self["XRef"]["Trailer"][Name("Root")] = Dictionary() # build Pages if "Pages" not in self["XRef"]["Trailer"]["Root"]: self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary() self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Count")] = Decimal(0) self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name( "Kids")] = List() # update /Kids kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"] assert kids is not None assert isinstance(kids, List) kids.insert(index, page) # update /Count prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] = Decimal( prev_count + 1) # return return self
def test_hash_types(self): obj0 = Dictionary() obj0[Name("Root")] = Reference(object_number=10) obj0[Name("Marked")] = Boolean(True) obj1 = List() obj1.append(Name("Red")) obj1.append(Decimal(0.5)) print(hash(obj1))
def test_document(self, file) -> bool: doc = None with open(file, "rb") as pdf_file_handle: doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) if "XRef" not in doc: return False if "Trailer" not in doc["XRef"]: return False if "Info" not in doc["XRef"]["Trailer"]: doc["XRef"]["Trailer"][Name("Info")] = Dictionary() # change producer doc["XRef"]["Trailer"]["Info"]["Producer"] = String("pText") # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") with open(out_file, "wb") as pdf_file_handle: PDF.dumps(out_file, doc) return True
def test_document(self, file) -> bool: # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() doc = None with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) if "XRef" not in doc: return False if "Trailer" not in doc["XRef"]: return False if "Info" not in doc["XRef"]["Trailer"]: doc["XRef"]["Trailer"][Name("Info")] = Dictionary() # change author doc["XRef"]["Trailer"]["Info"]["Author"] = String("Joris Schellekens") # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") with open(out_file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, doc) return True
def __init__(self): super(Page, self).__init__() # size: A4 portrait self[Name("MediaBox")] = List().set_can_be_referenced(False) self["MediaBox"].append(pDecimal(0)) self["MediaBox"].append(pDecimal(0)) self["MediaBox"].append(pDecimal(595)) self["MediaBox"].append(pDecimal(842))
def append_watermark_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], contents: str, ) -> "Page": # create generic annotation annot = self._create_annotation(rectangle=rectangle, contents=contents) # specific for text annotations annot[Name("Subtype")] = Name("Watermark") # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def append_stamp_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], contents: Optional[str] = None, color: Optional[Color] = None, name: Optional[str] = None, ) -> "Page": # create generic annotation annot = self._create_annotation( rectangle=rectangle, contents=contents, color=color ) # specific for text annotations annot[Name("Subtype")] = Name("Stamp") if name is not None: assert name in [ "Approved", "Experimental", "NotApproved", "Asis", "Expired", "NotForPublicRelease", "Confidential", "Final", "Sold", "Departmental", "ForComment", "TopSecret", "Draft", "ForPublicRelease", ] annot[Name("Name")] = Name(name) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def append_highlight_annotation( self, rectangle: Rectangle, color: Color = X11Color("Yellow"), contents: Optional[str] = None, ) -> "Page": # create generic annotation annot = self._create_annotation(rectangle=rectangle, color=color, contents=contents) # (Required) The type of annotation that this dictionary describes; shall # be Highlight, Underline, Squiggly, or StrikeOut for a highlight, # underline, squiggly-underline, or strikeout annotation, respectively. annot[Name("Subtype")] = Name("Highlight") # (Required) An array of 8 × n numbers specifying the coordinates of n # quadrilaterals in default user space. Each quadrilateral shall # encompasses a word or group of contiguous words in the text # underlying the annotation. The coordinates for each quadrilateral shall # be given in the order # x1 y1 x2 y2 x3 y3 x4 y4 annot[Name("QuadPoints")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] # x1, y1 annot["QuadPoints"].append(pDecimal(rectangle.get_x())) annot["QuadPoints"].append(pDecimal(rectangle.get_y())) # x4, y4 annot["QuadPoints"].append(pDecimal(rectangle.get_x())) annot["QuadPoints"].append( pDecimal(rectangle.get_y() + rectangle.get_height())) # x2, y2 annot["QuadPoints"].append( pDecimal(rectangle.get_x() + rectangle.get_width())) annot["QuadPoints"].append(pDecimal(rectangle.get_y())) # x3, y3 annot["QuadPoints"].append( pDecimal(rectangle.get_x() + rectangle.get_width())) annot["QuadPoints"].append( pDecimal(rectangle.get_y() + rectangle.get_height())) # border annot[Name("Border")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] annot["Border"].append(pDecimal(0)) annot["Border"].append(pDecimal(0)) annot["Border"].append(pDecimal(1)) # CA annot[Name("CA")] = pDecimal(1) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def append_highlight_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], color: Color, ) -> "Page": # create generic annotation annot = self._create_annotation(rectangle=rectangle, color=color) annot.pop("Rect") # (Required) The type of annotation that this dictionary describes; shall # be Highlight, Underline, Squiggly, or StrikeOut for a highlight, # underline, squiggly-underline, or strikeout annotation, respectively. annot[Name("Subtype")] = Name("Highlight") # (Required) An array of 8 × n numbers specifying the coordinates of n # quadrilaterals in default user space. Each quadrilateral shall # encompasses a word or group of contiguous words in the text # underlying the annotation. The coordinates for each quadrilateral shall # be given in the order # x 1 y 1 x 2 y 2 x 3 y 3 x 4 y 4 annot[Name("QuadPoints")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] return self
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): assert isinstance(object_to_transform, ET.Element) assert context is not None assert context.destination is not None assert context.destination # build stream out_value = Stream() out_value[Name("Type")] = Name("Metadata") out_value[Name("Subtype")] = Name("XML") out_value[Name("Filter")] = Name("FlateDecode") bts = ET.tostring(object_to_transform) btsz = zlib.compress(bts, 9) out_value[Name("DecodedBytes")] = bts out_value[Name("Bytes")] = btsz out_value[Name("Length")] = Decimal(len(btsz)) # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self.start_object(out_value, context) # pass stream along to other transformer self.get_root_transformer().transform(out_value, context) # end object if needed if started_object: self.end_object(out_value, context)
def transform( self, object_to_transform: Any, context: Optional[WriteTransformerContext] = None, ): # write header assert context is not None assert context.destination is not None context.destination.write(b"%PDF-1.7\n") context.destination.write(b"%") context.destination.write(bytes([226, 227, 207, 211])) context.destination.write(b"\n") # invalidate all references WritePDFTransformer._invalidate_all_references(object_to_transform) # create Info dictionary if needed if "Info" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary() # set /ID random_id = HexadecimalString("%032x" % random.randrange(16**32)) if "ID" not in object_to_transform["XRef"]["Trailer"]: object_to_transform["XRef"]["Trailer"][Name("ID")] = List( ).set_can_be_referenced( # type: ignore [attr-defined] False) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) object_to_transform["XRef"]["Trailer"]["ID"].append(random_id) else: object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id # set CreationDate modification_date = WritePDFTransformer._timestamp_to_str() if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name( "Info")]: object_to_transform["XRef"]["Trailer"][Name("Info")][Name( "CreationDate")] = String(modification_date) # set ModDate object_to_transform["XRef"]["Trailer"]["Info"][Name( "ModDate")] = String(modification_date) # set Producer object_to_transform["XRef"]["Trailer"]["Info"][Name( "Producer")] = String("pText") # transform XREF self.get_root_transformer().transform(object_to_transform["XRef"], context)
def read_dictionary(self) -> Dictionary: """ This method processes the next tokens and returns a PDFDictionary. It fails and throws various errors if the next tokens do not represent a PDFDictionary. """ token = self.next_non_comment_token() if token is None: raise PDFEOFError() if token.token_type != TokenType.START_DICT: raise PDFSyntaxError(message="invalid dictionary", byte_offset=token.byte_offset) out_dict = Dictionary() while True: # attempt to read name token token = self.next_non_comment_token() if token is None: raise PDFEOFError() if token.token_type == TokenType.END_DICT: break if token.token_type != TokenType.NAME: raise PDFSyntaxError( message="dictionary key must be a name", byte_offset=token.byte_offset, ) # store name name = Name(token.text[1:]) # attempt to read value value = self.read_object() if value is None: raise PDFSyntaxError( message="unexpected end of dictionary", byte_offset=token.byte_offset, ) # store in dict object if name is not None: out_dict[name] = value return out_dict
def append_text_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], contents: str, open: Optional[bool] = None, color: Optional[Color] = None, name_of_icon: Optional[str] = None, ) -> "Page": """ A text annotation represents a “sticky note” attached to a point in the PDF document. When closed, the annotation shall appear as an icon; when open, it shall display a pop-up window containing the text of the note in a font and size chosen by the conforming reader. Text annotations shall not scale and rotate with the page; they shall behave as if the NoZoom and NoRotate annotation flags (see Table 165) were always set. Table 172 shows the annotation dictionary entries specific to this type of annotation. """ # create generic annotation annot = self._create_annotation( rectangle=rectangle, contents=contents, color=color ) # specific for text annotations annot[Name("Subtype")] = Name("Text") if open is not None: annot[Name("Open")] = Boolean(open) if name_of_icon is not None: assert name_of_icon in [ "Comment", "Key", "Note", "Help", "NewParagraph", "Paragraph", "Insert", ] annot[Name("Name")] = Name(name_of_icon) # annot[Name("State")] = None # annot[Name("StateModel")] = None # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def read_object( self, xref: Optional["XREF"] = None ) -> Optional[AnyPDFType]: # type: ignore [name-defined] token = self.next_non_comment_token() if token is None or len(token.text) == 0: return None if token.token_type == TokenType.START_DICT: self.seek(token.byte_offset) # go to start of dictionary return self.read_dictionary() if token.token_type == TokenType.START_ARRAY: self.seek(token.byte_offset) # go to start of array return self.read_array() # <number> <number> "R" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) # go to start of indirect reference potential_indirect_reference = self.read_indirect_reference() if potential_indirect_reference is not None: return potential_indirect_reference # <number> <number> "obj" # <<dictionary>> # "stream" # <bytes> # "endstream" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) potential_stream = self.read_stream(xref) if potential_stream is not None: return potential_stream # <number> <number> "obj" if token.token_type == TokenType.NUMBER: self.seek(token.byte_offset) potential_indirect_object = self.read_indirect_object() if potential_indirect_object is not None: return potential_indirect_object # numbers if token.token_type == TokenType.NUMBER: self.seek(self.tell() + len(token.text)) return Decimal(Decimal(token.text)) # boolean if token.token_type == TokenType.OTHER and token.text in [ "true", "false" ]: return Boolean(token.text == "true") # canvas operators if (token.token_type == TokenType.OTHER and token.text in CanvasOperatorName.VALID_NAMES): return CanvasOperatorName(token.text) # names if token.token_type == TokenType.NAME: return Name(token.text[1:]) # literal strings and hex strings if token.token_type in [TokenType.STRING, TokenType.HEX_STRING]: if token.token_type == TokenType.STRING: return String(token.text[1:-1]) else: return HexadecimalString(token.text[1:-1]) # default return None
def append_line_annotation( self, start_point: Tuple[Decimal, Decimal], end_point: Tuple[Decimal, Decimal], left_line_end_style: Optional[str] = None, right_line_end_style: Optional[str] = None, stroke_color: Color = X11Color("Black"), ) -> "Page": """ The purpose of a line annotation (PDF 1.3) is to display a single straight line on the page. When opened, it shall display a pop-up window containing the text of the associated note. Table 175 shows the annotation dictionary entries specific to this type of annotation. """ x = min([start_point[0], end_point[0]]) y = min([start_point[1], end_point[1]]) w = max([start_point[0], end_point[0]]) - x h = max([start_point[1], end_point[1]]) - y # create generic annotation annot = self._create_annotation(rectangle=Rectangle(x, y, w, h), color=stroke_color) # (Required) The type of annotation that this dictionary describes; shall be # Line for a line annotation. annot[Name("Subtype")] = Name("Line") # (Required) An array of four numbers, [ x 1 y 1 x 2 y 2 ], specifying the # starting and ending coordinates of the line in default user space. # If the LL entry is present, this value shall represent the endpoints of the # leader lines rather than the endpoints of the line itself; see Figure 60. annot[Name("L")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] annot["L"].append(start_point[0]) annot["L"].append(start_point[1]) annot["L"].append(end_point[0]) annot["L"].append(end_point[1]) # (Optional; PDF 1.4) An array of two names specifying the line ending # styles that shall be used in drawing the line. The first and second # elements of the array shall specify the line ending styles for the endpoints # defined, respectively, by the first and second pairs of coordinates, (x 1 , y 1 ) # and (x 2 , y 2 ), in the L array. Table 176 shows the possible values. Default # value: [ /None /None ]. annot[Name("LE")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] if left_line_end_style is not None: assert left_line_end_style in [ "Square", "Circle", "Diamond", "OpenArrow", "ClosedArrow", "None", "Butt", "ROpenArrow", "RClosedArrow", "Slash", ] annot["LE"].append(Name(left_line_end_style)) else: annot["LE"].append(Name("None")) if right_line_end_style is not None: assert right_line_end_style in [ "Square", "Circle", "Diamond", "OpenArrow", "ClosedArrow", "None", "Butt", "ROpenArrow", "RClosedArrow", "Slash", ] annot["LE"].append(Name(right_line_end_style)) else: annot["LE"].append(Name("None")) # (Optional; PDF 1.4) An array of numbers that shall be in the range 0.0 to # 1.0 and shall specify the interior color with which to fill the annotation’s # rectangle or ellipse. The number of array elements determines the colour # space in which the colour shall be defined if stroke_color is not None: color_max = pDecimal(256) annot[Name("IC")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] annot["IC"].append(pDecimal(stroke_color.to_rgb().red / color_max)) annot["IC"].append( pDecimal(stroke_color.to_rgb().green / color_max)) annot["IC"].append(pDecimal(stroke_color.to_rgb().blue / color_max)) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): assert context is not None assert context.destination is not None assert isinstance(object_to_transform, Image) # check whether image has alpha # IF image has alpha --> write image as PNG # ELSE --> write image as JPEG has_alpha = False if object_to_transform.mode == "RGBA": has_alpha = True if object_to_transform.mode == "P": transparency_index = object_to_transform.info.get( "transparency", -1) for _, index in object_to_transform.getcolors(): if index == transparency_index: has_alpha = True break # get image bytes format = "PNG" if has_alpha else "JPEG" contents = None with io.BytesIO() as output: object_to_transform.save(output, format=format) contents = output.getvalue() # build corresponding Stream (XObject) out_value = Stream() out_value[Name("Type")] = Name("XObject") out_value[Name("Subtype")] = Name("Image") out_value[Name("Width")] = Decimal(object_to_transform.width) out_value[Name("Height")] = Decimal(object_to_transform.height) out_value[Name("Length")] = Decimal(len(contents)) out_value[Name("Filter")] = Name("DCTDecode") out_value[Name("BitsPerComponent")] = Decimal(8) out_value[Name("ColorSpace")] = Name("DeviceRGB") out_value[Name("Bytes")] = contents # copy reference out_value.set_reference( object_to_transform.get_reference()) # type: ignore [attr-defined] # start object if needed started_object = False ref = out_value.get_reference() # type: ignore [attr-defined] if ref is not None: assert isinstance(ref, Reference) if ref.object_number is not None and ref.byte_offset is None: started_object = True self.start_object(out_value, context) # write stream self.get_root_transformer().transform(out_value, context) # end object if needed if started_object: self.end_object(out_value, context)
def append_redact_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], overlay_text: Optional[str] = None, repeat_overlay_text: Optional[bool] = None, interior_color: Optional[Color] = None, ) -> "Page": """ A redaction annotation (PDF 1.7) identifies content that is intended to be removed from the document. The intent of redaction annotations is to enable the following process: a) Content identification. A user applies redact annotations that specify the pieces or regions of content that should be removed. Up until the next step is performed, the user can see, move and redefine these annotations. b) Content removal. The user instructs the viewer application to apply the redact annotations, after which the content in the area specified by the redact annotations is removed. In the removed content’s place, some marking appears to indicate the area has been redacted. Also, the redact annotations are removed from the PDF document. Redaction annotations provide a mechanism for the first step in the redaction process (content identification). This allows content to be marked for redaction in a non-destructive way, thus enabling a review process for evaluating potential redactions prior to removing the specified content. """ # create generic annotation annot = self._create_annotation(rectangle=rectangle) # (Required) The type of annotation that this dictionary describes; shall # be Redact for a redaction annotation. annot[Name("Subtype")] = Name("Redact") # (Optional) An array of three numbers in the range 0.0 to 1.0 # specifying the components, in the DeviceRGB colour space, of the # interior colour with which to fill the redacted region after the affected # content has been removed. If this entry is absent, the interior of the # redaction region is left transparent. This entry is ignored if the RO # entry is present. if interior_color is not None: color_max = pDecimal(256) annot[Name("IC")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["IC"].append(pDecimal(interior_color.to_rgb().red / color_max)) annot["IC"].append(pDecimal(interior_color.to_rgb().green / color_max)) annot["IC"].append(pDecimal(interior_color.to_rgb().blue / color_max)) # (Optional) A text string specifying the overlay text that should be # drawn over the redacted region after the affected content has been # removed. This entry is ignored if the RO entry is present. if overlay_text is not None: annot[Name("OverlayText")] = String(overlay_text) # (Optional) If true, then the text specified by OverlayText should be # repeated to fill the redacted region after the affected content has been # removed. This entry is ignored if the RO entry is present. Default # value: false. if repeat_overlay_text is not None: assert overlay_text is not None annot[Name("Repeat")] = Boolean(repeat_overlay_text) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def append_polyline_annotation( self, points: typing.List[Tuple[Decimal, Decimal]], stroke_color: Color, left_line_end_style: Optional[str] = None, right_line_end_style: Optional[str] = None, fill_color: Optional[Color] = None, contents: Optional[str] = None, ) -> "Page": """ Polygon annotations (PDF 1.5) display closed polygons on the page. Such polygons may have any number of vertices connected by straight lines. Polyline annotations (PDF 1.5) are similar to polygons, except that the first and last vertex are not implicitly connected. """ # must be at least 3 points assert len(points) >= 3 # bounding box min_x = points[0][0] min_y = points[0][1] max_x = min_x max_y = min_y for p in points: min_x = min(min_x, p[0]) min_y = min(min_y, p[1]) max_x = max(max_x, p[0]) max_y = max(max_y, p[1]) # create generic annotation annot = self._create_annotation( rectangle=Rectangle(min_x, min_y, max_x - min_x, max_y - min_y), color=stroke_color, contents=contents, ) annot[Name("Subtype")] = Name("PolyLine") annot[Name("CA")] = pDecimal(1) annot[Name("Vertices")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] for p in points: annot["Vertices"].append(pDecimal(p[0])) annot["Vertices"].append(pDecimal(p[1])) # (Optional; PDF 1.4) An array of two names specifying the line ending # styles that shall be used in drawing the line. The first and second # elements of the array shall specify the line ending styles for the endpoints # defined, respectively, by the first and second pairs of coordinates, (x 1 , y 1 ) # and (x 2 , y 2 ), in the L array. Table 176 shows the possible values. Default # value: [ /None /None ]. annot[Name("LE")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] if left_line_end_style is not None: assert left_line_end_style in [ "Square", "Circle", "Diamond", "OpenArrow", "ClosedArrow", "None", "Butt", "ROpenArrow", "RClosedArrow", "Slash", ] annot["LE"].append(Name(left_line_end_style)) else: annot["LE"].append(Name("None")) if right_line_end_style is not None: assert right_line_end_style in [ "Square", "Circle", "Diamond", "OpenArrow", "ClosedArrow", "None", "Butt", "ROpenArrow", "RClosedArrow", "Slash", ] annot["LE"].append(Name(right_line_end_style)) else: annot["LE"].append(Name("None")) if fill_color is not None: color_max = pDecimal(256) annot[Name("IC")] = List().set_can_be_referenced( False) # type: ignore [attr-defined] annot["IC"].append(pDecimal(fill_color.to_rgb().red / color_max)) annot["IC"].append(pDecimal(fill_color.to_rgb().green / color_max)) annot["IC"].append(pDecimal(fill_color.to_rgb().blue / color_max)) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def transform( self, object_to_transform: AnyPDFType, context: Optional[WriteTransformerContext] = None, ): assert isinstance(object_to_transform, XREF) assert "Trailer" in object_to_transform assert isinstance(object_to_transform["Trailer"], Dictionary) assert context is not None assert context.destination is not None # transform Trailer dictionary (replacing objects by references) trailer_out = Dictionary() # /Root trailer_out[Name("Root")] = self.get_reference( object_to_transform["Trailer"]["Root"], context) # /Info if "Info" in object_to_transform["Trailer"]: trailer_out[Name("Info")] = self.get_reference( object_to_transform["Trailer"]["Info"], context) # /Size if ("Trailer" in object_to_transform and "Size" in object_to_transform["Trailer"]): trailer_out[Name("Size")] = object_to_transform["Trailer"]["Size"] else: trailer_out[Name("Size")] = Decimal(0) # /ID if "ID" in object_to_transform["Trailer"]: trailer_out[Name("ID")] = self.get_reference( object_to_transform["Trailer"]["ID"], context) # write Root object self.get_root_transformer().transform( object_to_transform["Trailer"]["Root"], context) # write Info object if "Info" in object_to_transform["Trailer"]: self.get_root_transformer().transform( object_to_transform["Trailer"]["Info"], context) # write ID object if "ID" in object_to_transform["Trailer"]: self.get_root_transformer().transform( object_to_transform["Trailer"]["ID"], context) # write XREF start_of_xref = context.destination.tell() context.destination.write(bytes("xref\n", "latin1")) for section in self._section_xref(context): context.destination.write( bytes("%d %d\n" % (section[0].object_number, len(section)), "latin1")) for r in section: if r.is_in_use: context.destination.write( bytes("{0:010d} 00000 n\n".format(r.byte_offset), "latin1")) else: context.destination.write( bytes("{0:010d} 00000 f\n".format(r.byte_offset), "latin1")) # update Size trailer_out[Name("Size")] = Decimal( sum([len(v) for k, v in context.indirect_objects.items()])) # write Trailer context.destination.write(bytes("trailer\n", "latin1")) self.get_root_transformer().transform(trailer_out, context) context.destination.write(bytes("startxref\n", "latin1")) # write byte offset of last cross-reference section context.destination.write(bytes(str(start_of_xref) + "\n", "latin1")) # write EOF context.destination.write(bytes("%%EOF", "latin1"))
def decode_stream(s: Stream) -> Stream: assert isinstance(s, Stream) assert "Bytes" in s # determine filter(s) to apply filters: typing.List[str] = [] if "Filter" in s: if isinstance(s["Filter"], List): filters = s["Filter"] else: filters = [s["Filter"]] decode_params: typing.List[Dictionary] = [] if "DecodeParms" in s: if isinstance(s["DecodeParms"], List): decode_params = s["DecodeParms"] else: assert s["DecodeParms"] is not None assert isinstance(s["DecodeParms"], Dictionary) decode_params = [s["DecodeParms"]] else: decode_params = [Dictionary() for x in range(0, len(filters))] # apply filter(s) transformed_bytes = s["Bytes"] for filter_index, filter_name in enumerate(filters): # FLATE if filter_name in ["FlateDecode", "Fl"]: transformed_bytes = FlateDecode.decode( bytes_in=transformed_bytes, columns=int(decode_params[filter_index].get( "Columns", Decimal(1))), predictor=int(decode_params[filter_index].get( "Predictor", Decimal(1))), bits_per_component=int(decode_params[filter_index].get( "BitsPerComponent", Decimal(8))), ) continue # ASCII85 if filter_name in ["ASCII85Decode"]: transformed_bytes = ASCII85Decode.decode(transformed_bytes) continue # LZW if filter_name in ["LZWDecode"]: transformed_bytes = LZWDecode.decode(transformed_bytes) continue # RunLengthDecode if filter_name in ["RunLengthDecode"]: transformed_bytes = RunLengthDecode.decode(transformed_bytes) continue # unknown filter raise PDFValueError( expected_value_description= "[/ASCII85Decode, /FlateDecode, /Fl, /LZWDecode, /RunLengthDecode]", received_value_description=str(filter_name), ) # set DecodedBytes s[Name("DecodedBytes")] = transformed_bytes # set Type if not yet set if "Type" not in s: s[Name("Type")] = Name("Stream") # return return s
def read_stream( self, xref: Optional["XREF"] = None ) -> Optional[Stream]: # type: ignore [name-defined] byte_offset = self.tell() # attempt to read <number> <number> obj # followed by dictionary stream_dictionary = self.read_indirect_object() if stream_dictionary is None or not isinstance(stream_dictionary, dict): self.seek(byte_offset) return None # attempt to read keyword "stream" stream_token = self.next_non_comment_token() assert stream_token is not None if stream_token.token_type != TokenType.OTHER or stream_token.text != "stream": self.seek(byte_offset) return None # process \Length if "Length" not in stream_dictionary: raise PDFTypeError(received_type=None, expected_type=int) length_of_stream = stream_dictionary["Length"] if isinstance(length_of_stream, Reference): if xref is None: raise RuntimeError( "unable to process reference \Length when no XREF is given" ) pos_before = self.tell() length_of_stream = int( xref.get_object(length_of_stream, src=self.io_source, tok=self)) self.seek(pos_before) # process newline ch = self._next_char() if ch not in ["\r", "\n"]: raise PDFSyntaxError( "The keyword stream that follows the stream dictionary shall be followed by an end-of-line marker consisting of either a CARRIAGE RETURN and a LINE FEED or just a LINE FEED, and not by a CARRIAGE RETURN alone.", byte_offset=self.tell(), ) if ch == "\r": ch = self._next_char() if ch != "\n": raise PDFSyntaxError( "The keyword stream that follows the stream dictionary shall be followed by an end-of-line marker consisting of either a CARRIAGE RETURN and a LINE FEED or just a LINE FEED, and not by a CARRIAGE RETURN alone.", byte_offset=self.tell(), ) bytes = self.io_source.read(int(length_of_stream)) # attempt to read token "endstream" end_of_stream_token = self.next_non_comment_token() assert end_of_stream_token is not None if (end_of_stream_token.token_type != TokenType.OTHER or end_of_stream_token.text != "endstream"): raise PDFSyntaxError( "A stream shall consist of a dictionary followed by zero or more bytes bracketed between the keywords stream (followed by newline) and endstream", byte_offset=self.tell(), ) # set Bytes stream_dictionary[Name("Bytes")] = bytes # return return Stream(stream_dictionary)
def append_circle_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], color: Color, rectangle_difference: Optional[ Tuple[Decimal, Decimal, Decimal, Decimal] ] = None, interior_color: Optional[Color] = None, ) -> "Page": """ Square and circle annotations (PDF 1.3) shall display, respectively, a rectangle or an ellipse on the page. When opened, they shall display a pop-up window containing the text of the associated note. The rectangle or ellipse shall be inscribed within the annotation rectangle defined by the annotation dictionary’s Rect entry (see Table 168). """ # create generic annotation annot = self._create_annotation(rectangle=rectangle, color=color) # (Required) The type of annotation that this dictionary describes; shall be # Square or Circle for a square or circle annotation, respectively. annot[Name("Subtype")] = Name("Circle") # (Optional) A border style dictionary (see Table 166) specifying the line # width and dash pattern that shall be used in drawing the rectangle or # ellipse. # The annotation dictionary’s AP entry, if present, shall take precedence # over the Rect and BS entries; see Table 168 and 12.5.5, “Appearance # Streams.” # annot[Name("BS")] = None # (Optional; PDF 1.4) An array of numbers that shall be in the range 0.0 to # 1.0 and shall specify the interior color with which to fill the annotation’s # rectangle or ellipse. The number of array elements determines the colour # space in which the colour shall be defined if interior_color is not None: color_max = pDecimal(256) annot[Name("IC")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["IC"].append(pDecimal(interior_color.to_rgb().red / color_max)) annot["IC"].append(pDecimal(interior_color.to_rgb().green / color_max)) annot["IC"].append(pDecimal(interior_color.to_rgb().blue / color_max)) # (Optional; PDF 1.5) A border effect dictionary describing an effect applied # to the border described by the BS entry (see Table 167). # annot[Name("BE")] = None # (Optional; PDF 1.5) A set of four numbers that shall describe the # numerical differences between two rectangles: the Rect entry of the # annotation and the actual boundaries of the underlying square or circle. # Such a difference may occur in situations where a border effect # (described by BE) causes the size of the Rect to increase beyond that of # the square or circle. # The four numbers shall correspond to the differences in default user # space between the left, top, right, and bottom coordinates of Rect and # those of the square or circle, respectively. Each value shall be greater # than or equal to 0. The sum of the top and bottom differences shall be # less than the height of Rect, and the sum of the left and right differences # shall be less than the width of Rect. if rectangle_difference is not None: annot[Name("RD")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["RD"].append(pDecimal(rectangle_difference[0])) annot["RD"].append(pDecimal(rectangle_difference[1])) annot["RD"].append(pDecimal(rectangle_difference[2])) annot["RD"].append(pDecimal(rectangle_difference[3])) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self
def _create_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], contents: Optional[str] = None, color: Optional[Color] = None, border_horizontal_corner_radius: Optional[Decimal] = None, border_vertical_corner_radius: Optional[Decimal] = None, border_width: Optional[Decimal] = None, ): annot = Dictionary() # (Optional) The type of PDF object that this dictionary describes; if # present, shall be Annot for an annotation dictionary. annot[Name("Type")] = Name("Annot") # (Required) The annotation rectangle, defining the location of the # annotation on the page in default user space units. annot[Name("Rect")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["Rect"].append(pDecimal(rectangle[0])) annot["Rect"].append(pDecimal(rectangle[1])) annot["Rect"].append(pDecimal(rectangle[2])) annot["Rect"].append(pDecimal(rectangle[3])) # (Optional) Text that shall be displayed for the annotation or, if this type of # annotation does not display text, an alternate description of the # annotation’s contents in human-readable form. In either case, this text is # useful when extracting the document’s contents in support of # accessibility to users with disabilities or for other purposes (see 14.9.3, # “Alternate Descriptions”). See 12.5.6, “Annotation Types” for more # details on the meaning of this entry for each annotation type. if contents is not None: annot[Name("Contents")] = String(contents) # (Optional except as noted below; PDF 1.3; not used in FDF files) An # indirect reference to the page object with which this annotation is # associated. # This entry shall be present in screen annotations associated with # rendition actions (PDF 1.5; see 12.5.6.18, “Screen Annotations” and # 12.6.4.13, “Rendition Actions”). annot[Name("P")] = self # (Optional; PDF 1.4) The annotation name, a text string uniquely # identifying it among all the annotations on its page. len_annots = len(self["Annots"]) if "Annots" in self else 0 annot[Name("NM")] = String("annotation-{0:03d}".format(len_annots)) # (Optional; PDF 1.1) The date and time when the annotation was most # recently modified. The format should be a date string as described in # 7.9.4, “Dates,” but conforming readers shall accept and display a string # in any format. annot[Name("M")] = String(self._timestamp_to_str()) # (Optional; PDF 1.1) A set of flags specifying various characteristics of # the annotation (see 12.5.3, “Annotation Flags”). Default value: 0. annot[Name("F")] = pDecimal(4) # (Optional; PDF 1.2) An appearance dictionary specifying how the # annotation shall be presented visually on the page (see 12.5.5, # “Appearance Streams”). Individual annotation handlers may ignore this # entry and provide their own appearances. # annot[Name("AP")] = None # (Required if the appearance dictionary AP contains one or more # subdictionaries; PDF 1.2) The annotation’s appearance state, which # selects the applicable appearance stream from an appearance # subdictionary (see Section 12.5.5, “Appearance Streams”). # annot[Name("AS")] = None # Optional) An array specifying the characteristics of the annotation’s # border, which shall be drawn as a rounded rectangle. # (PDF 1.0) The array consists of three numbers defining the horizontal # corner radius, vertical corner radius, and border width, all in default user # space units. If the corner radii are 0, the border has square (not rounded) # corners; if the border width is 0, no border is drawn. # (PDF 1.1) The array may have a fourth element, an optional dash array # defining a pattern of dashes and gaps that shall be used in drawing the # border. The dash array shall be specified in the same format as in the # line dash pattern parameter of the graphics state (see 8.4.3.6, “Line # Dash Pattern”). if ( border_horizontal_corner_radius is not None and border_vertical_corner_radius is not None and border_width is not None ): annot[Name("Border")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["Border"].append(pDecimal(border_horizontal_corner_radius)) annot["Border"].append(pDecimal(border_vertical_corner_radius)) annot["Border"].append(pDecimal(border_width)) # (Optional; PDF 1.1) An array of numbers in the range 0.0 to 1.0, # representing a colour used for the following purposes: # The background of the annotation’s icon when closed # The title bar of the annotation’s pop-up window # The border of a link annotation # The number of array elements determines the colour space in which the # colour shall be defined if color is not None: color_max = pDecimal(256) annot[Name("C")] = List().set_can_be_referenced(False) # type: ignore [attr-defined] annot["C"].append(pDecimal(color.to_rgb().red / color_max)) annot["C"].append(pDecimal(color.to_rgb().green / color_max)) annot["C"].append(pDecimal(color.to_rgb().blue / color_max)) # (Required if the annotation is a structural content item; PDF 1.3) The # integer key of the annotation’s entry in the structural parent tree (see # 14.7.4.4, “Finding Structure Elements from Content Items”) # annot[Name("StructParent")] = None # (Optional; PDF 1.5) An optional content group or optional content # membership dictionary (see 8.11, “Optional Content”) specifying the # optional content properties for the annotation. Before the annotation is # drawn, its visibility shall be determined based on this entry as well as the # annotation flags specified in the F entry (see 12.5.3, “Annotation Flags”). # If it is determined to be invisible, the annotation shall be skipped, as if it # were not in the document. # annot[Name("OC")] = None # return return annot
def get_annotations(self) -> List: if "Annots" not in self: self[Name("Annots")] = List() return self["Annots"]
def append_link_annotation( self, rectangle: Tuple[Decimal, Decimal, Decimal, Decimal], page: Decimal, location_on_page: str, left: Optional[Decimal] = None, bottom: Optional[Decimal] = None, right: Optional[Decimal] = None, top: Optional[Decimal] = None, zoom: Optional[Decimal] = None, highlighting_mode: Optional[str] = None, color: Optional[Color] = None, ) -> "Page": """ A link annotation represents either a hypertext link to a destination elsewhere in the document (see 12.3.2, “Destinations”) or an action to be performed (12.6, “Actions”). Table 173 shows the annotation dictionary entries specific to this type of annotation. """ # create generic annotation annot = self._create_annotation(rectangle=rectangle, color=color) # specific for text annotations annot[Name("Subtype")] = Name("Link") # (Optional; PDF 1.1) An action that shall be performed when the link # annotation is activated (see 12.6, “Actions”). # annot[Name("A")] = None # (Optional; not permitted if an A entry is present) A destination that shall # be displayed when the annotation is activated (see 12.3.2, # “Destinations”). assert location_on_page in [ "XYZ", "Fit", "FitH", "FitV", "FitR", "FitB", "FitBH", "FitBV", ] destination = List().set_can_be_referenced(False) # type: ignore [attr-defined] destination.append(pDecimal(page)) destination.append(Name(location_on_page)) if location_on_page == "XYZ": assert ( left is not None and bottom is None and right is None and top is not None and zoom is not None ) destination.append(pDecimal(left)) destination.append(pDecimal(top)) destination.append(pDecimal(zoom)) if location_on_page == "Fit": assert ( left is None and bottom is None and right is None and top is None and zoom is None ) if location_on_page == "FitH": assert ( left is None and bottom is None and right is None and top is not None and zoom is None ) destination.append(pDecimal(top)) if location_on_page == "FitV": assert ( left is not None and bottom is None and right is None and top is None and zoom is None ) destination.append(pDecimal(left)) if location_on_page == "FitR": assert ( left is not None and bottom is not None and right is not None and top is not None and zoom is None ) destination.append(pDecimal(left)) destination.append(pDecimal(bottom)) destination.append(pDecimal(right)) destination.append(pDecimal(top)) if location_on_page == "FitBH": assert ( left is None and bottom is None and right is None and top is not None and zoom is None ) destination.append(pDecimal(top)) if location_on_page == "FitBV": assert ( left is not None and bottom is None and right is None and top is None and zoom is None ) destination.append(pDecimal(left)) annot[Name("Dest")] = destination # (Optional; PDF 1.2) The annotation’s highlighting mode, the visual effect # that shall be used when the mouse button is pressed or held down # inside its active area: # N (None) No highlighting. # I (Invert) Invert the contents of the annotation rectangle. # O (Outline) Invert the annotation’s border. # P (Push) Display the annotation as if it were being pushed below the surface of the page. if highlighting_mode is not None: assert highlighting_mode in ["N", "I", "O", "P"] annot[Name("H")] = String(highlighting_mode) # append to /Annots if "Annots" not in self: self[Name("Annots")] = List() assert isinstance(self["Annots"], List) self["Annots"].append(annot) # return return self