Example #1
0
 def insert_page(
         self,
         page: Page,
         index: int = -1) -> "Document":  # type: ignore [name-defined]
     # build XRef
     if "XRef" not in self:
         self["XRef"] = PlainTextXREF()
     # build Trailer
     if "Trailer" not in self["XRef"]:
         self["XRef"]["Trailer"] = Dictionary()
         self["XRef"][Name("Size")] = Decimal(0)
     # build Root
     if "Root" not in self["XRef"]["Trailer"]:
         self["XRef"]["Trailer"][Name("Root")] = Dictionary()
     # build Pages
     if "Pages" not in self["XRef"]["Trailer"]["Root"]:
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")] = Dictionary()
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Count")] = Decimal(0)
         self["XRef"]["Trailer"][Name("Root")][Name("Pages")][Name(
             "Kids")] = List()
     # update /Kids
     kids = self["XRef"]["Trailer"]["Root"]["Pages"]["Kids"]
     assert kids is not None
     assert isinstance(kids, List)
     kids.insert(index, page)
     # update /Count
     prev_count = self["XRef"]["Trailer"]["Root"]["Pages"]["Count"]
     self["XRef"]["Trailer"]["Root"]["Pages"]["Count"] = Decimal(
         prev_count + 1)
     # return
     return self
Example #2
0
    def test_hash_types(self):

        obj0 = Dictionary()
        obj0[Name("Root")] = Reference(object_number=10)
        obj0[Name("Marked")] = Boolean(True)

        obj1 = List()
        obj1.append(Name("Red"))
        obj1.append(Decimal(0.5))

        print(hash(obj1))
Example #3
0
    def test_document(self, file) -> bool:

        doc = None
        with open(file, "rb") as pdf_file_handle:
            doc = None
            with open(file, "rb") as pdf_file_handle:
                doc = PDF.loads(pdf_file_handle)

        if "XRef" not in doc:
            return False
        if "Trailer" not in doc["XRef"]:
            return False

        if "Info" not in doc["XRef"]["Trailer"]:
            doc["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # change producer
        doc["XRef"]["Trailer"]["Info"]["Producer"] = String("pText")

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")
        with open(out_file, "wb") as pdf_file_handle:
            PDF.dumps(out_file, doc)

        return True
    def test_document(self, file) -> bool:

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        doc = None
        with open(file, "rb") as pdf_file_handle:
            doc = PDF.loads(pdf_file_handle)

        if "XRef" not in doc:
            return False
        if "Trailer" not in doc["XRef"]:
            return False

        if "Info" not in doc["XRef"]["Trailer"]:
            doc["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # change author
        doc["XRef"]["Trailer"]["Info"]["Author"] = String("Joris Schellekens")

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")
        with open(out_file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, doc)

        return True
Example #5
0
    def __init__(self):
        super(Page, self).__init__()

        # size: A4 portrait
        self[Name("MediaBox")] = List().set_can_be_referenced(False)
        self["MediaBox"].append(pDecimal(0))
        self["MediaBox"].append(pDecimal(0))
        self["MediaBox"].append(pDecimal(595))
        self["MediaBox"].append(pDecimal(842))
Example #6
0
    def append_watermark_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        contents: str,
    ) -> "Page":
        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle, contents=contents)

        # specific for text annotations
        annot[Name("Subtype")] = Name("Watermark")

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #7
0
    def append_stamp_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        contents: Optional[str] = None,
        color: Optional[Color] = None,
        name: Optional[str] = None,
    ) -> "Page":
        # create generic annotation
        annot = self._create_annotation(
            rectangle=rectangle, contents=contents, color=color
        )

        # specific for text annotations
        annot[Name("Subtype")] = Name("Stamp")

        if name is not None:
            assert name in [
                "Approved",
                "Experimental",
                "NotApproved",
                "Asis",
                "Expired",
                "NotForPublicRelease",
                "Confidential",
                "Final",
                "Sold",
                "Departmental",
                "ForComment",
                "TopSecret",
                "Draft",
                "ForPublicRelease",
            ]
            annot[Name("Name")] = Name(name)

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #8
0
    def append_highlight_annotation(
        self,
        rectangle: Rectangle,
        color: Color = X11Color("Yellow"),
        contents: Optional[str] = None,
    ) -> "Page":
        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle,
                                        color=color,
                                        contents=contents)

        # (Required) The type of annotation that this dictionary describes; shall
        # be Highlight, Underline, Squiggly, or StrikeOut for a highlight,
        # underline, squiggly-underline, or strikeout annotation, respectively.
        annot[Name("Subtype")] = Name("Highlight")

        # (Required) An array of 8 × n numbers specifying the coordinates of n
        # quadrilaterals in default user space. Each quadrilateral shall
        # encompasses a word or group of contiguous words in the text
        # underlying the annotation. The coordinates for each quadrilateral shall
        # be given in the order
        # x1 y1 x2 y2 x3 y3 x4 y4
        annot[Name("QuadPoints")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        # x1, y1
        annot["QuadPoints"].append(pDecimal(rectangle.get_x()))
        annot["QuadPoints"].append(pDecimal(rectangle.get_y()))
        # x4, y4
        annot["QuadPoints"].append(pDecimal(rectangle.get_x()))
        annot["QuadPoints"].append(
            pDecimal(rectangle.get_y() + rectangle.get_height()))
        # x2, y2
        annot["QuadPoints"].append(
            pDecimal(rectangle.get_x() + rectangle.get_width()))
        annot["QuadPoints"].append(pDecimal(rectangle.get_y()))
        # x3, y3
        annot["QuadPoints"].append(
            pDecimal(rectangle.get_x() + rectangle.get_width()))
        annot["QuadPoints"].append(
            pDecimal(rectangle.get_y() + rectangle.get_height()))

        # border
        annot[Name("Border")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        annot["Border"].append(pDecimal(0))
        annot["Border"].append(pDecimal(0))
        annot["Border"].append(pDecimal(1))

        # CA
        annot[Name("CA")] = pDecimal(1)

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #9
0
    def append_highlight_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        color: Color,
    ) -> "Page":
        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle, color=color)
        annot.pop("Rect")

        # (Required) The type of annotation that this dictionary describes; shall
        # be Highlight, Underline, Squiggly, or StrikeOut for a highlight,
        # underline, squiggly-underline, or strikeout annotation, respectively.
        annot[Name("Subtype")] = Name("Highlight")

        # (Required) An array of 8 × n numbers specifying the coordinates of n
        # quadrilaterals in default user space. Each quadrilateral shall
        # encompasses a word or group of contiguous words in the text
        # underlying the annotation. The coordinates for each quadrilateral shall
        # be given in the order
        # x 1 y 1 x 2 y 2 x 3 y 3 x 4 y 4
        annot[Name("QuadPoints")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]

        return self
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        assert isinstance(object_to_transform, ET.Element)
        assert context is not None
        assert context.destination is not None
        assert context.destination

        # build stream
        out_value = Stream()
        out_value[Name("Type")] = Name("Metadata")
        out_value[Name("Subtype")] = Name("XML")
        out_value[Name("Filter")] = Name("FlateDecode")

        bts = ET.tostring(object_to_transform)
        btsz = zlib.compress(bts, 9)
        out_value[Name("DecodedBytes")] = bts
        out_value[Name("Bytes")] = btsz
        out_value[Name("Length")] = Decimal(len(btsz))

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self.start_object(out_value, context)

        # pass stream along to other transformer
        self.get_root_transformer().transform(out_value, context)

        # end object if needed
        if started_object:
            self.end_object(out_value, context)
Example #11
0
    def transform(
        self,
        object_to_transform: Any,
        context: Optional[WriteTransformerContext] = None,
    ):
        # write header
        assert context is not None
        assert context.destination is not None

        context.destination.write(b"%PDF-1.7\n")
        context.destination.write(b"%")
        context.destination.write(bytes([226, 227, 207, 211]))
        context.destination.write(b"\n")

        # invalidate all references
        WritePDFTransformer._invalidate_all_references(object_to_transform)

        # create Info dictionary if needed
        if "Info" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("Info")] = Dictionary()

        # set /ID
        random_id = HexadecimalString("%032x" % random.randrange(16**32))
        if "ID" not in object_to_transform["XRef"]["Trailer"]:
            object_to_transform["XRef"]["Trailer"][Name("ID")] = List(
            ).set_can_be_referenced(  # type: ignore [attr-defined]
                False)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
            object_to_transform["XRef"]["Trailer"]["ID"].append(random_id)
        else:
            object_to_transform["XRef"]["Trailer"]["ID"][1] = random_id

        # set CreationDate
        modification_date = WritePDFTransformer._timestamp_to_str()
        if "CreationDate" not in object_to_transform["XRef"]["Trailer"][Name(
                "Info")]:
            object_to_transform["XRef"]["Trailer"][Name("Info")][Name(
                "CreationDate")] = String(modification_date)

        # set ModDate
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "ModDate")] = String(modification_date)

        # set Producer
        object_to_transform["XRef"]["Trailer"]["Info"][Name(
            "Producer")] = String("pText")

        # transform XREF
        self.get_root_transformer().transform(object_to_transform["XRef"],
                                              context)
Example #12
0
    def read_dictionary(self) -> Dictionary:
        """
        This method processes the next tokens and returns a PDFDictionary.
        It fails and throws various errors if the next tokens do not represent a PDFDictionary.
        """
        token = self.next_non_comment_token()
        if token is None:
            raise PDFEOFError()
        if token.token_type != TokenType.START_DICT:
            raise PDFSyntaxError(message="invalid dictionary",
                                 byte_offset=token.byte_offset)

        out_dict = Dictionary()
        while True:

            # attempt to read name token
            token = self.next_non_comment_token()
            if token is None:
                raise PDFEOFError()
            if token.token_type == TokenType.END_DICT:
                break
            if token.token_type != TokenType.NAME:
                raise PDFSyntaxError(
                    message="dictionary key must be a name",
                    byte_offset=token.byte_offset,
                )

            # store name
            name = Name(token.text[1:])

            # attempt to read value
            value = self.read_object()
            if value is None:
                raise PDFSyntaxError(
                    message="unexpected end of dictionary",
                    byte_offset=token.byte_offset,
                )

            # store in dict object
            if name is not None:
                out_dict[name] = value

        return out_dict
Example #13
0
    def append_text_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        contents: str,
        open: Optional[bool] = None,
        color: Optional[Color] = None,
        name_of_icon: Optional[str] = None,
    ) -> "Page":
        """
        A text annotation represents a “sticky note” attached to a point in the PDF document. When closed, the
        annotation shall appear as an icon; when open, it shall display a pop-up window containing the text of the note
        in a font and size chosen by the conforming reader. Text annotations shall not scale and rotate with the page;
        they shall behave as if the NoZoom and NoRotate annotation flags (see Table 165) were always set. Table 172
        shows the annotation dictionary entries specific to this type of annotation.
        """
        # create generic annotation
        annot = self._create_annotation(
            rectangle=rectangle, contents=contents, color=color
        )

        # specific for text annotations
        annot[Name("Subtype")] = Name("Text")

        if open is not None:
            annot[Name("Open")] = Boolean(open)

        if name_of_icon is not None:
            assert name_of_icon in [
                "Comment",
                "Key",
                "Note",
                "Help",
                "NewParagraph",
                "Paragraph",
                "Insert",
            ]
            annot[Name("Name")] = Name(name_of_icon)

        # annot[Name("State")] = None
        # annot[Name("StateModel")] = None

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #14
0
    def read_object(
        self,
        xref: Optional["XREF"] = None
    ) -> Optional[AnyPDFType]:  # type: ignore [name-defined]

        token = self.next_non_comment_token()
        if token is None or len(token.text) == 0:
            return None

        if token.token_type == TokenType.START_DICT:
            self.seek(token.byte_offset)  # go to start of dictionary
            return self.read_dictionary()

        if token.token_type == TokenType.START_ARRAY:
            self.seek(token.byte_offset)  # go to start of array
            return self.read_array()

        # <number> <number> "R"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)  # go to start of indirect reference
            potential_indirect_reference = self.read_indirect_reference()
            if potential_indirect_reference is not None:
                return potential_indirect_reference

        # <number> <number> "obj"
        # <<dictionary>>
        # "stream"
        # <bytes>
        # "endstream"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)
            potential_stream = self.read_stream(xref)
            if potential_stream is not None:
                return potential_stream

        # <number> <number> "obj"
        if token.token_type == TokenType.NUMBER:
            self.seek(token.byte_offset)
            potential_indirect_object = self.read_indirect_object()
            if potential_indirect_object is not None:
                return potential_indirect_object

        # numbers
        if token.token_type == TokenType.NUMBER:
            self.seek(self.tell() + len(token.text))
            return Decimal(Decimal(token.text))

        # boolean
        if token.token_type == TokenType.OTHER and token.text in [
                "true", "false"
        ]:
            return Boolean(token.text == "true")

        # canvas operators
        if (token.token_type == TokenType.OTHER
                and token.text in CanvasOperatorName.VALID_NAMES):
            return CanvasOperatorName(token.text)

        # names
        if token.token_type == TokenType.NAME:
            return Name(token.text[1:])

        # literal strings and hex strings
        if token.token_type in [TokenType.STRING, TokenType.HEX_STRING]:
            if token.token_type == TokenType.STRING:
                return String(token.text[1:-1])
            else:
                return HexadecimalString(token.text[1:-1])

        # default
        return None
Example #15
0
    def append_line_annotation(
            self,
            start_point: Tuple[Decimal, Decimal],
            end_point: Tuple[Decimal, Decimal],
            left_line_end_style: Optional[str] = None,
            right_line_end_style: Optional[str] = None,
            stroke_color: Color = X11Color("Black"),
    ) -> "Page":
        """
        The purpose of a line annotation (PDF 1.3) is to display a single straight line on the page. When opened, it shall
        display a pop-up window containing the text of the associated note. Table 175 shows the annotation dictionary
        entries specific to this type of annotation.
        """

        x = min([start_point[0], end_point[0]])
        y = min([start_point[1], end_point[1]])
        w = max([start_point[0], end_point[0]]) - x
        h = max([start_point[1], end_point[1]]) - y

        # create generic annotation
        annot = self._create_annotation(rectangle=Rectangle(x, y, w, h),
                                        color=stroke_color)

        # (Required) The type of annotation that this dictionary describes; shall be
        # Line for a line annotation.
        annot[Name("Subtype")] = Name("Line")

        # (Required) An array of four numbers, [ x 1 y 1 x 2 y 2 ], specifying the
        # starting and ending coordinates of the line in default user space.
        # If the LL entry is present, this value shall represent the endpoints of the
        # leader lines rather than the endpoints of the line itself; see Figure 60.
        annot[Name("L")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        annot["L"].append(start_point[0])
        annot["L"].append(start_point[1])
        annot["L"].append(end_point[0])
        annot["L"].append(end_point[1])

        # (Optional; PDF 1.4) An array of two names specifying the line ending
        # styles that shall be used in drawing the line. The first and second
        # elements of the array shall specify the line ending styles for the endpoints
        # defined, respectively, by the first and second pairs of coordinates, (x 1 , y 1 )
        # and (x 2 , y 2 ), in the L array. Table 176 shows the possible values. Default
        # value: [ /None /None ].
        annot[Name("LE")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        if left_line_end_style is not None:
            assert left_line_end_style in [
                "Square",
                "Circle",
                "Diamond",
                "OpenArrow",
                "ClosedArrow",
                "None",
                "Butt",
                "ROpenArrow",
                "RClosedArrow",
                "Slash",
            ]
            annot["LE"].append(Name(left_line_end_style))
        else:
            annot["LE"].append(Name("None"))
        if right_line_end_style is not None:
            assert right_line_end_style in [
                "Square",
                "Circle",
                "Diamond",
                "OpenArrow",
                "ClosedArrow",
                "None",
                "Butt",
                "ROpenArrow",
                "RClosedArrow",
                "Slash",
            ]
            annot["LE"].append(Name(right_line_end_style))
        else:
            annot["LE"].append(Name("None"))

        # (Optional; PDF 1.4) An array of numbers that shall be in the range 0.0 to
        # 1.0 and shall specify the interior color with which to fill the annotation’s
        # rectangle or ellipse. The number of array elements determines the colour
        # space in which the colour shall be defined
        if stroke_color is not None:
            color_max = pDecimal(256)
            annot[Name("IC")] = List().set_can_be_referenced(
                False)  # type: ignore [attr-defined]
            annot["IC"].append(pDecimal(stroke_color.to_rgb().red / color_max))
            annot["IC"].append(
                pDecimal(stroke_color.to_rgb().green / color_max))
            annot["IC"].append(pDecimal(stroke_color.to_rgb().blue /
                                        color_max))

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #16
0
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        assert context is not None
        assert context.destination is not None
        assert isinstance(object_to_transform, Image)

        # check whether image has alpha
        # IF image has alpha --> write image as PNG
        # ELSE --> write image as JPEG

        has_alpha = False
        if object_to_transform.mode == "RGBA":
            has_alpha = True
        if object_to_transform.mode == "P":
            transparency_index = object_to_transform.info.get(
                "transparency", -1)
            for _, index in object_to_transform.getcolors():
                if index == transparency_index:
                    has_alpha = True
                    break

        # get image bytes
        format = "PNG" if has_alpha else "JPEG"
        contents = None
        with io.BytesIO() as output:
            object_to_transform.save(output, format=format)
            contents = output.getvalue()

        # build corresponding Stream (XObject)
        out_value = Stream()
        out_value[Name("Type")] = Name("XObject")
        out_value[Name("Subtype")] = Name("Image")
        out_value[Name("Width")] = Decimal(object_to_transform.width)
        out_value[Name("Height")] = Decimal(object_to_transform.height)
        out_value[Name("Length")] = Decimal(len(contents))
        out_value[Name("Filter")] = Name("DCTDecode")
        out_value[Name("BitsPerComponent")] = Decimal(8)
        out_value[Name("ColorSpace")] = Name("DeviceRGB")
        out_value[Name("Bytes")] = contents

        # copy reference
        out_value.set_reference(
            object_to_transform.get_reference())  # type: ignore [attr-defined]

        # start object if needed
        started_object = False
        ref = out_value.get_reference()  # type: ignore [attr-defined]
        if ref is not None:
            assert isinstance(ref, Reference)
            if ref.object_number is not None and ref.byte_offset is None:
                started_object = True
                self.start_object(out_value, context)

        # write stream
        self.get_root_transformer().transform(out_value, context)

        # end object if needed
        if started_object:
            self.end_object(out_value, context)
Example #17
0
    def append_redact_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        overlay_text: Optional[str] = None,
        repeat_overlay_text: Optional[bool] = None,
        interior_color: Optional[Color] = None,
    ) -> "Page":
        """
        A redaction annotation (PDF 1.7) identifies content that is intended to be removed from the document. The
        intent of redaction annotations is to enable the following process:

        a) Content identification. A user applies redact annotations that specify the pieces or regions of content that
        should be removed. Up until the next step is performed, the user can see, move and redefine these
        annotations.

        b) Content removal. The user instructs the viewer application to apply the redact annotations, after which the
        content in the area specified by the redact annotations is removed. In the removed content’s place, some
        marking appears to indicate the area has been redacted. Also, the redact annotations are removed from
        the PDF document.

        Redaction annotations provide a mechanism for the first step in the redaction process (content identification).
        This allows content to be marked for redaction in a non-destructive way, thus enabling a review process for
        evaluating potential redactions prior to removing the specified content.
        """

        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle)

        # (Required) The type of annotation that this dictionary describes; shall
        # be Redact for a redaction annotation.
        annot[Name("Subtype")] = Name("Redact")

        # (Optional) An array of three numbers in the range 0.0 to 1.0
        # specifying the components, in the DeviceRGB colour space, of the
        # interior colour with which to fill the redacted region after the affected
        # content has been removed. If this entry is absent, the interior of the
        # redaction region is left transparent. This entry is ignored if the RO
        # entry is present.
        if interior_color is not None:
            color_max = pDecimal(256)
            annot[Name("IC")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            annot["IC"].append(pDecimal(interior_color.to_rgb().red / color_max))
            annot["IC"].append(pDecimal(interior_color.to_rgb().green / color_max))
            annot["IC"].append(pDecimal(interior_color.to_rgb().blue / color_max))

        # (Optional) A text string specifying the overlay text that should be
        # drawn over the redacted region after the affected content has been
        # removed. This entry is ignored if the RO entry is present.
        if overlay_text is not None:
            annot[Name("OverlayText")] = String(overlay_text)

        # (Optional) If true, then the text specified by OverlayText should be
        # repeated to fill the redacted region after the affected content has been
        # removed. This entry is ignored if the RO entry is present. Default
        # value: false.
        if repeat_overlay_text is not None:
            assert overlay_text is not None
            annot[Name("Repeat")] = Boolean(repeat_overlay_text)

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #18
0
    def append_polyline_annotation(
        self,
        points: typing.List[Tuple[Decimal, Decimal]],
        stroke_color: Color,
        left_line_end_style: Optional[str] = None,
        right_line_end_style: Optional[str] = None,
        fill_color: Optional[Color] = None,
        contents: Optional[str] = None,
    ) -> "Page":
        """
        Polygon annotations (PDF 1.5) display closed polygons on the page. Such polygons may have any number of
        vertices connected by straight lines. Polyline annotations (PDF 1.5) are similar to polygons, except that the first
        and last vertex are not implicitly connected.
        """

        # must be at least 3 points
        assert len(points) >= 3

        # bounding box
        min_x = points[0][0]
        min_y = points[0][1]
        max_x = min_x
        max_y = min_y
        for p in points:
            min_x = min(min_x, p[0])
            min_y = min(min_y, p[1])
            max_x = max(max_x, p[0])
            max_y = max(max_y, p[1])

        # create generic annotation
        annot = self._create_annotation(
            rectangle=Rectangle(min_x, min_y, max_x - min_x, max_y - min_y),
            color=stroke_color,
            contents=contents,
        )

        annot[Name("Subtype")] = Name("PolyLine")

        annot[Name("CA")] = pDecimal(1)

        annot[Name("Vertices")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        for p in points:
            annot["Vertices"].append(pDecimal(p[0]))
            annot["Vertices"].append(pDecimal(p[1]))

        # (Optional; PDF 1.4) An array of two names specifying the line ending
        # styles that shall be used in drawing the line. The first and second
        # elements of the array shall specify the line ending styles for the endpoints
        # defined, respectively, by the first and second pairs of coordinates, (x 1 , y 1 )
        # and (x 2 , y 2 ), in the L array. Table 176 shows the possible values. Default
        # value: [ /None /None ].
        annot[Name("LE")] = List().set_can_be_referenced(
            False)  # type: ignore [attr-defined]
        if left_line_end_style is not None:
            assert left_line_end_style in [
                "Square",
                "Circle",
                "Diamond",
                "OpenArrow",
                "ClosedArrow",
                "None",
                "Butt",
                "ROpenArrow",
                "RClosedArrow",
                "Slash",
            ]
            annot["LE"].append(Name(left_line_end_style))
        else:
            annot["LE"].append(Name("None"))
        if right_line_end_style is not None:
            assert right_line_end_style in [
                "Square",
                "Circle",
                "Diamond",
                "OpenArrow",
                "ClosedArrow",
                "None",
                "Butt",
                "ROpenArrow",
                "RClosedArrow",
                "Slash",
            ]
            annot["LE"].append(Name(right_line_end_style))
        else:
            annot["LE"].append(Name("None"))

        if fill_color is not None:
            color_max = pDecimal(256)
            annot[Name("IC")] = List().set_can_be_referenced(
                False)  # type: ignore [attr-defined]
            annot["IC"].append(pDecimal(fill_color.to_rgb().red / color_max))
            annot["IC"].append(pDecimal(fill_color.to_rgb().green / color_max))
            annot["IC"].append(pDecimal(fill_color.to_rgb().blue / color_max))

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #19
0
    def transform(
        self,
        object_to_transform: AnyPDFType,
        context: Optional[WriteTransformerContext] = None,
    ):
        assert isinstance(object_to_transform, XREF)
        assert "Trailer" in object_to_transform
        assert isinstance(object_to_transform["Trailer"], Dictionary)
        assert context is not None
        assert context.destination is not None

        # transform Trailer dictionary (replacing objects by references)
        trailer_out = Dictionary()
        # /Root
        trailer_out[Name("Root")] = self.get_reference(
            object_to_transform["Trailer"]["Root"], context)
        # /Info
        if "Info" in object_to_transform["Trailer"]:
            trailer_out[Name("Info")] = self.get_reference(
                object_to_transform["Trailer"]["Info"], context)
        # /Size
        if ("Trailer" in object_to_transform
                and "Size" in object_to_transform["Trailer"]):
            trailer_out[Name("Size")] = object_to_transform["Trailer"]["Size"]
        else:
            trailer_out[Name("Size")] = Decimal(0)
        # /ID
        if "ID" in object_to_transform["Trailer"]:
            trailer_out[Name("ID")] = self.get_reference(
                object_to_transform["Trailer"]["ID"], context)

        # write Root object
        self.get_root_transformer().transform(
            object_to_transform["Trailer"]["Root"], context)

        # write Info object
        if "Info" in object_to_transform["Trailer"]:
            self.get_root_transformer().transform(
                object_to_transform["Trailer"]["Info"], context)

        # write ID object
        if "ID" in object_to_transform["Trailer"]:
            self.get_root_transformer().transform(
                object_to_transform["Trailer"]["ID"], context)

        # write XREF
        start_of_xref = context.destination.tell()
        context.destination.write(bytes("xref\n", "latin1"))
        for section in self._section_xref(context):
            context.destination.write(
                bytes("%d %d\n" % (section[0].object_number, len(section)),
                      "latin1"))
            for r in section:
                if r.is_in_use:
                    context.destination.write(
                        bytes("{0:010d} 00000 n\n".format(r.byte_offset),
                              "latin1"))
                else:
                    context.destination.write(
                        bytes("{0:010d} 00000 f\n".format(r.byte_offset),
                              "latin1"))

        # update Size
        trailer_out[Name("Size")] = Decimal(
            sum([len(v) for k, v in context.indirect_objects.items()]))

        # write Trailer
        context.destination.write(bytes("trailer\n", "latin1"))
        self.get_root_transformer().transform(trailer_out, context)
        context.destination.write(bytes("startxref\n", "latin1"))

        # write byte offset of last cross-reference section
        context.destination.write(bytes(str(start_of_xref) + "\n", "latin1"))

        # write EOF
        context.destination.write(bytes("%%EOF", "latin1"))
Example #20
0
def decode_stream(s: Stream) -> Stream:

    assert isinstance(s, Stream)
    assert "Bytes" in s

    # determine filter(s) to apply
    filters: typing.List[str] = []
    if "Filter" in s:
        if isinstance(s["Filter"], List):
            filters = s["Filter"]
        else:
            filters = [s["Filter"]]

    decode_params: typing.List[Dictionary] = []
    if "DecodeParms" in s:
        if isinstance(s["DecodeParms"], List):
            decode_params = s["DecodeParms"]
        else:
            assert s["DecodeParms"] is not None
            assert isinstance(s["DecodeParms"], Dictionary)
            decode_params = [s["DecodeParms"]]
    else:
        decode_params = [Dictionary() for x in range(0, len(filters))]

    # apply filter(s)
    transformed_bytes = s["Bytes"]
    for filter_index, filter_name in enumerate(filters):
        # FLATE
        if filter_name in ["FlateDecode", "Fl"]:
            transformed_bytes = FlateDecode.decode(
                bytes_in=transformed_bytes,
                columns=int(decode_params[filter_index].get(
                    "Columns", Decimal(1))),
                predictor=int(decode_params[filter_index].get(
                    "Predictor", Decimal(1))),
                bits_per_component=int(decode_params[filter_index].get(
                    "BitsPerComponent", Decimal(8))),
            )
            continue

        # ASCII85
        if filter_name in ["ASCII85Decode"]:
            transformed_bytes = ASCII85Decode.decode(transformed_bytes)
            continue

        # LZW
        if filter_name in ["LZWDecode"]:
            transformed_bytes = LZWDecode.decode(transformed_bytes)
            continue

        # RunLengthDecode
        if filter_name in ["RunLengthDecode"]:
            transformed_bytes = RunLengthDecode.decode(transformed_bytes)
            continue

        # unknown filter
        raise PDFValueError(
            expected_value_description=
            "[/ASCII85Decode, /FlateDecode, /Fl, /LZWDecode, /RunLengthDecode]",
            received_value_description=str(filter_name),
        )

    # set DecodedBytes
    s[Name("DecodedBytes")] = transformed_bytes

    # set Type if not yet set
    if "Type" not in s:
        s[Name("Type")] = Name("Stream")

    # return
    return s
Example #21
0
    def read_stream(
        self,
        xref: Optional["XREF"] = None
    ) -> Optional[Stream]:  # type: ignore [name-defined]

        byte_offset = self.tell()

        # attempt to read <number> <number> obj
        # followed by dictionary
        stream_dictionary = self.read_indirect_object()
        if stream_dictionary is None or not isinstance(stream_dictionary,
                                                       dict):
            self.seek(byte_offset)
            return None

        # attempt to read keyword "stream"
        stream_token = self.next_non_comment_token()
        assert stream_token is not None
        if stream_token.token_type != TokenType.OTHER or stream_token.text != "stream":
            self.seek(byte_offset)
            return None

        # process \Length
        if "Length" not in stream_dictionary:
            raise PDFTypeError(received_type=None, expected_type=int)
        length_of_stream = stream_dictionary["Length"]
        if isinstance(length_of_stream, Reference):
            if xref is None:
                raise RuntimeError(
                    "unable to process reference \Length when no XREF is given"
                )
            pos_before = self.tell()
            length_of_stream = int(
                xref.get_object(length_of_stream, src=self.io_source,
                                tok=self))
            self.seek(pos_before)

        # process newline
        ch = self._next_char()
        if ch not in ["\r", "\n"]:
            raise PDFSyntaxError(
                "The keyword stream that follows the stream dictionary shall be followed by an end-of-line marker consisting of either a CARRIAGE RETURN and a LINE FEED or just a LINE FEED, and not by a CARRIAGE RETURN alone.",
                byte_offset=self.tell(),
            )
        if ch == "\r":
            ch = self._next_char()
            if ch != "\n":
                raise PDFSyntaxError(
                    "The keyword stream that follows the stream dictionary shall be followed by an end-of-line marker consisting of either a CARRIAGE RETURN and a LINE FEED or just a LINE FEED, and not by a CARRIAGE RETURN alone.",
                    byte_offset=self.tell(),
                )

        bytes = self.io_source.read(int(length_of_stream))

        # attempt to read token "endstream"
        end_of_stream_token = self.next_non_comment_token()
        assert end_of_stream_token is not None
        if (end_of_stream_token.token_type != TokenType.OTHER
                or end_of_stream_token.text != "endstream"):
            raise PDFSyntaxError(
                "A stream shall consist of a dictionary followed by zero or more bytes bracketed between the keywords stream (followed by newline) and endstream",
                byte_offset=self.tell(),
            )

        # set Bytes
        stream_dictionary[Name("Bytes")] = bytes

        # return
        return Stream(stream_dictionary)
Example #22
0
    def append_circle_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        color: Color,
        rectangle_difference: Optional[
            Tuple[Decimal, Decimal, Decimal, Decimal]
        ] = None,
        interior_color: Optional[Color] = None,
    ) -> "Page":
        """
        Square and circle annotations (PDF 1.3) shall display, respectively, a rectangle or an ellipse on the page. When
        opened, they shall display a pop-up window containing the text of the associated note. The rectangle or ellipse
        shall be inscribed within the annotation rectangle defined by the annotation dictionary’s Rect entry (see
        Table 168).
        """

        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle, color=color)

        # (Required) The type of annotation that this dictionary describes; shall be
        # Square or Circle for a square or circle annotation, respectively.
        annot[Name("Subtype")] = Name("Circle")

        # (Optional) A border style dictionary (see Table 166) specifying the line
        # width and dash pattern that shall be used in drawing the rectangle or
        # ellipse.
        # The annotation dictionary’s AP entry, if present, shall take precedence
        # over the Rect and BS entries; see Table 168 and 12.5.5, “Appearance
        # Streams.”
        # annot[Name("BS")] = None

        # (Optional; PDF 1.4) An array of numbers that shall be in the range 0.0 to
        # 1.0 and shall specify the interior color with which to fill the annotation’s
        # rectangle or ellipse. The number of array elements determines the colour
        # space in which the colour shall be defined
        if interior_color is not None:
            color_max = pDecimal(256)
            annot[Name("IC")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            annot["IC"].append(pDecimal(interior_color.to_rgb().red / color_max))
            annot["IC"].append(pDecimal(interior_color.to_rgb().green / color_max))
            annot["IC"].append(pDecimal(interior_color.to_rgb().blue / color_max))

        # (Optional; PDF 1.5) A border effect dictionary describing an effect applied
        # to the border described by the BS entry (see Table 167).
        # annot[Name("BE")] = None

        # (Optional; PDF 1.5) A set of four numbers that shall describe the
        # numerical differences between two rectangles: the Rect entry of the
        # annotation and the actual boundaries of the underlying square or circle.
        # Such a difference may occur in situations where a border effect
        # (described by BE) causes the size of the Rect to increase beyond that of
        # the square or circle.
        # The four numbers shall correspond to the differences in default user
        # space between the left, top, right, and bottom coordinates of Rect and
        # those of the square or circle, respectively. Each value shall be greater
        # than or equal to 0. The sum of the top and bottom differences shall be
        # less than the height of Rect, and the sum of the left and right differences
        # shall be less than the width of Rect.
        if rectangle_difference is not None:
            annot[Name("RD")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            annot["RD"].append(pDecimal(rectangle_difference[0]))
            annot["RD"].append(pDecimal(rectangle_difference[1]))
            annot["RD"].append(pDecimal(rectangle_difference[2]))
            annot["RD"].append(pDecimal(rectangle_difference[3]))

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self
Example #23
0
    def _create_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        contents: Optional[str] = None,
        color: Optional[Color] = None,
        border_horizontal_corner_radius: Optional[Decimal] = None,
        border_vertical_corner_radius: Optional[Decimal] = None,
        border_width: Optional[Decimal] = None,
    ):
        annot = Dictionary()

        # (Optional) The type of PDF object that this dictionary describes; if
        # present, shall be Annot for an annotation dictionary.
        annot[Name("Type")] = Name("Annot")

        # (Required) The annotation rectangle, defining the location of the
        # annotation on the page in default user space units.
        annot[Name("Rect")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
        annot["Rect"].append(pDecimal(rectangle[0]))
        annot["Rect"].append(pDecimal(rectangle[1]))
        annot["Rect"].append(pDecimal(rectangle[2]))
        annot["Rect"].append(pDecimal(rectangle[3]))

        # (Optional) Text that shall be displayed for the annotation or, if this type of
        # annotation does not display text, an alternate description of the
        # annotation’s contents in human-readable form. In either case, this text is
        # useful when extracting the document’s contents in support of
        # accessibility to users with disabilities or for other purposes (see 14.9.3,
        # “Alternate Descriptions”). See 12.5.6, “Annotation Types” for more
        # details on the meaning of this entry for each annotation type.
        if contents is not None:
            annot[Name("Contents")] = String(contents)

        # (Optional except as noted below; PDF 1.3; not used in FDF files) An
        # indirect reference to the page object with which this annotation is
        # associated.
        # This entry shall be present in screen annotations associated with
        # rendition actions (PDF 1.5; see 12.5.6.18, “Screen Annotations” and
        # 12.6.4.13, “Rendition Actions”).
        annot[Name("P")] = self

        # (Optional; PDF 1.4) The annotation name, a text string uniquely
        # identifying it among all the annotations on its page.
        len_annots = len(self["Annots"]) if "Annots" in self else 0
        annot[Name("NM")] = String("annotation-{0:03d}".format(len_annots))

        # (Optional; PDF 1.1) The date and time when the annotation was most
        # recently modified. The format should be a date string as described in
        # 7.9.4, “Dates,” but conforming readers shall accept and display a string
        # in any format.
        annot[Name("M")] = String(self._timestamp_to_str())

        # (Optional; PDF 1.1) A set of flags specifying various characteristics of
        # the annotation (see 12.5.3, “Annotation Flags”). Default value: 0.
        annot[Name("F")] = pDecimal(4)

        # (Optional; PDF 1.2) An appearance dictionary specifying how the
        # annotation shall be presented visually on the page (see 12.5.5,
        # “Appearance Streams”). Individual annotation handlers may ignore this
        # entry and provide their own appearances.
        # annot[Name("AP")] = None

        # (Required if the appearance dictionary AP contains one or more
        # subdictionaries; PDF 1.2) The annotation’s appearance state, which
        # selects the applicable appearance stream from an appearance
        # subdictionary (see Section 12.5.5, “Appearance Streams”).
        # annot[Name("AS")] = None

        # Optional) An array specifying the characteristics of the annotation’s
        # border, which shall be drawn as a rounded rectangle.
        # (PDF 1.0) The array consists of three numbers defining the horizontal
        # corner radius, vertical corner radius, and border width, all in default user
        # space units. If the corner radii are 0, the border has square (not rounded)
        # corners; if the border width is 0, no border is drawn.
        # (PDF 1.1) The array may have a fourth element, an optional dash array
        # defining a pattern of dashes and gaps that shall be used in drawing the
        # border. The dash array shall be specified in the same format as in the
        # line dash pattern parameter of the graphics state (see 8.4.3.6, “Line
        # Dash Pattern”).
        if (
            border_horizontal_corner_radius is not None
            and border_vertical_corner_radius is not None
            and border_width is not None
        ):
            annot[Name("Border")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            annot["Border"].append(pDecimal(border_horizontal_corner_radius))
            annot["Border"].append(pDecimal(border_vertical_corner_radius))
            annot["Border"].append(pDecimal(border_width))

        # (Optional; PDF 1.1) An array of numbers in the range 0.0 to 1.0,
        # representing a colour used for the following purposes:
        # The background of the annotation’s icon when closed
        # The title bar of the annotation’s pop-up window
        # The border of a link annotation
        # The number of array elements determines the colour space in which the
        # colour shall be defined
        if color is not None:
            color_max = pDecimal(256)
            annot[Name("C")] = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
            annot["C"].append(pDecimal(color.to_rgb().red / color_max))
            annot["C"].append(pDecimal(color.to_rgb().green / color_max))
            annot["C"].append(pDecimal(color.to_rgb().blue / color_max))

        # (Required if the annotation is a structural content item; PDF 1.3) The
        # integer key of the annotation’s entry in the structural parent tree (see
        # 14.7.4.4, “Finding Structure Elements from Content Items”)
        # annot[Name("StructParent")] = None

        # (Optional; PDF 1.5) An optional content group or optional content
        # membership dictionary (see 8.11, “Optional Content”) specifying the
        # optional content properties for the annotation. Before the annotation is
        # drawn, its visibility shall be determined based on this entry as well as the
        # annotation flags specified in the F entry (see 12.5.3, “Annotation Flags”).
        # If it is determined to be invisible, the annotation shall be skipped, as if it
        # were not in the document.
        # annot[Name("OC")] = None

        # return
        return annot
Example #24
0
 def get_annotations(self) -> List:
     if "Annots" not in self:
         self[Name("Annots")] = List()
     return self["Annots"]
Example #25
0
    def append_link_annotation(
        self,
        rectangle: Tuple[Decimal, Decimal, Decimal, Decimal],
        page: Decimal,
        location_on_page: str,
        left: Optional[Decimal] = None,
        bottom: Optional[Decimal] = None,
        right: Optional[Decimal] = None,
        top: Optional[Decimal] = None,
        zoom: Optional[Decimal] = None,
        highlighting_mode: Optional[str] = None,
        color: Optional[Color] = None,
    ) -> "Page":
        """
        A link annotation represents either a hypertext link to a destination elsewhere in the document (see 12.3.2,
        “Destinations”) or an action to be performed (12.6, “Actions”). Table 173 shows the annotation dictionary
        entries specific to this type of annotation.
        """
        # create generic annotation
        annot = self._create_annotation(rectangle=rectangle, color=color)

        # specific for text annotations
        annot[Name("Subtype")] = Name("Link")

        # (Optional; PDF 1.1) An action that shall be performed when the link
        # annotation is activated (see 12.6, “Actions”).
        # annot[Name("A")] = None

        # (Optional; not permitted if an A entry is present) A destination that shall
        # be displayed when the annotation is activated (see 12.3.2,
        # “Destinations”).
        assert location_on_page in [
            "XYZ",
            "Fit",
            "FitH",
            "FitV",
            "FitR",
            "FitB",
            "FitBH",
            "FitBV",
        ]
        destination = List().set_can_be_referenced(False)  # type: ignore [attr-defined]
        destination.append(pDecimal(page))
        destination.append(Name(location_on_page))
        if location_on_page == "XYZ":
            assert (
                left is not None
                and bottom is None
                and right is None
                and top is not None
                and zoom is not None
            )
            destination.append(pDecimal(left))
            destination.append(pDecimal(top))
            destination.append(pDecimal(zoom))
        if location_on_page == "Fit":
            assert (
                left is None
                and bottom is None
                and right is None
                and top is None
                and zoom is None
            )
        if location_on_page == "FitH":
            assert (
                left is None
                and bottom is None
                and right is None
                and top is not None
                and zoom is None
            )
            destination.append(pDecimal(top))
        if location_on_page == "FitV":
            assert (
                left is not None
                and bottom is None
                and right is None
                and top is None
                and zoom is None
            )
            destination.append(pDecimal(left))
        if location_on_page == "FitR":
            assert (
                left is not None
                and bottom is not None
                and right is not None
                and top is not None
                and zoom is None
            )
            destination.append(pDecimal(left))
            destination.append(pDecimal(bottom))
            destination.append(pDecimal(right))
            destination.append(pDecimal(top))
        if location_on_page == "FitBH":
            assert (
                left is None
                and bottom is None
                and right is None
                and top is not None
                and zoom is None
            )
            destination.append(pDecimal(top))
        if location_on_page == "FitBV":
            assert (
                left is not None
                and bottom is None
                and right is None
                and top is None
                and zoom is None
            )
            destination.append(pDecimal(left))
        annot[Name("Dest")] = destination

        # (Optional; PDF 1.2) The annotation’s highlighting mode, the visual effect
        # that shall be used when the mouse button is pressed or held down
        # inside its active area:
        # N     (None) No highlighting.
        # I     (Invert) Invert the contents of the annotation rectangle.
        # O     (Outline) Invert the annotation’s border.
        # P     (Push) Display the annotation as if it were being pushed below the surface of the page.
        if highlighting_mode is not None:
            assert highlighting_mode in ["N", "I", "O", "P"]
            annot[Name("H")] = String(highlighting_mode)

        # append to /Annots
        if "Annots" not in self:
            self[Name("Annots")] = List()
        assert isinstance(self["Annots"], List)
        self["Annots"].append(annot)

        # return
        return self