Esempio n. 1
0
    def test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to read PDF
        delta = time.time()
        doc = None
        with open(file, "rb") as in_file_handle:
            print("\treading (1) ..")
            doc = PDF.loads(in_file_handle)
        print("time elapsed : %d" % (time.time() - delta))
        delta = time.time()

        # attempt to store PDF
        with open(out_file, "wb") as out_file_handle:
            print("\twriting ..")
            PDF.dumps(out_file_handle, doc)
        print("time elapsed : %d" % (time.time() - delta))
        delta = time.time()

        size_of_original = Path(file).stat().st_size
        size_of_copy = Path(out_file).stat().st_size
        ratio = (size_of_copy + 0.0) / size_of_original
        print("%s %d %d %f" %
              (file.stem, size_of_original, size_of_copy, ratio))

        if ratio > 1.05:
            raise Exception("Copied PDF is %f times larger than the original" %
                            ratio)

        return True
Esempio n. 2
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = JPGExport()
            doc = PDF.loads(pdf_file_handle, [l])
            output_file = self.output_dir / (file.stem + ".jpg")
            with open(output_file, "wb") as svg_file_handle:
                im = l.image_per_page.get(0)
                im.save(output_file)

        return True
Esempio n. 3
0
    def test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to read PDF
        doc = None
        with open(file, "rb") as in_file_handle:
            print("\treading (1) ..")
            doc = PDF.loads(in_file_handle)

        # add annotation
        doc.get_page(0).append_polygon_annotation(
            points=[
                (Decimal(72), Decimal(390)),
                (Decimal(242), Decimal(500)),
                (Decimal(156), Decimal(390)),
            ],
            color=X11Color("Crimson"),
        )

        # attempt to store PDF
        with open(out_file, "wb") as out_file_handle:
            print("\twriting ..")
            PDF.dumps(out_file_handle, doc)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            print("\treading (2) ..")
            doc = PDF.loads(in_file_handle)

        return True
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to read PDF
        doc = None
        with open(file, "rb") as in_file_handle:
            print("\treading (1) ..")
            doc = PDF.loads(in_file_handle)

        # add annotation
        doc.get_page(0).append_text_annotation(
            contents="The quick brown fox ate the lazy mouse",
            rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64),
                                Decimal(64)),
            text_annotation_icon=TextAnnotationIconType.KEY,
            open=True,
            color=X11Color("Orange"),
        )

        # attempt to store PDF
        with open(out_file, "wb") as out_file_handle:
            print("\twriting ..")
            PDF.dumps(out_file_handle, doc)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            print("\treading (2) ..")
            doc = PDF.loads(in_file_handle)

        return True
Esempio n. 5
0
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)

        # append paragraph
        Paragraph(
            "Once upon a midnight dreary, while I pondered weak and weary, over many a quaint and curious volume of forgotten lore",
            font_size=Decimal(20),
            text_alignment=Alignment.JUSTIFIED,
        ).layout(
            page,
            Rectangle(Decimal(20), Decimal(600), Decimal(500), Decimal(124)),
        )

        # add rectangle annotation
        page.append_square_annotation(
            stroke_color=X11Color("Red"),
            rectangle=Rectangle(Decimal(20), Decimal(600), Decimal(500),
                                Decimal(124)),
        )

        # determine output location
        out_file = self.output_dir / "output.pdf"

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            doc = PDF.loads(pdf_file_handle)
            output_file = self.output_dir / (file.stem + ".json")

            # export to json
            with open(output_file, "w") as json_file_handle:
                json_file_handle.write(
                    json.dumps(doc.to_json_serializable(doc), indent=4)
                )
    def test_extract_text_in_area(self):
        r = Rectangle(Decimal(50), Decimal(400), Decimal(200), Decimal(100))
        doc = None
        file: Path = Path("/home/joris/Code/pdf-corpus/0600.pdf")
        with open(file, "rb") as pdf_file_handle:
            doc = PDF.loads(pdf_file_handle)

        output_file = self.output_dir / (file.stem + "_bill_to_marked.pdf")
        with open(output_file, "wb") as pdf_file_handle:
            doc.get_page(0).append_polygon_annotation(
                LineArtFactory.rectangle(r),
                stroke_color=X11Color("Red"),
            )
            PDF.dumps(pdf_file_handle, doc)

        l1 = SimpleTextExtraction()
        l2 = LocationFilter(
            r.get_x(), r.get_y(), r.get_x() + r.get_width(), r.get_y() + r.get_height()
        ).add_listener(l1)

        with open(file, "rb") as pdf_file_handle:
            doc = PDF.loads(pdf_file_handle, [l2])

        print(l1.get_text(0))
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)

        layout = SingleColumnLayout(page)

        layout.add(
            Paragraph(
                "Once upon a midnight dreary, while I pondered weak and weary, over many a quaint and curious volume of forgotten lore.",
                font_size=Decimal(20),
                text_alignment=Alignment.RIGHT,
                horizontal_alignment=Alignment.RIGHT,
            ))
        layout.add(
            Paragraph(
                "While I nodded, nearly napping, suddenly there came a tapping. As of someone gently rapping, rapping at my chamberdoor.",
                font_size=Decimal(20),
                text_alignment=Alignment.RIGHT,
                horizontal_alignment=Alignment.RIGHT,
            ))

        # determine output location
        out_file = self.output_dir / "output.pdf"

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)
Esempio n. 9
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to read PDF
        doc = None
        l = RegularExpressionTextExtraction("[sS]orbitol")
        with open(file, "rb") as in_file_handle:
            print("\treading (1) ..")
            doc = PDF.loads(in_file_handle, [l])

        # add annotation
        print("\tAdding %d annotations" %
              len(l.get_matched_chunk_of_text_render_events_per_page(0)))
        for e in l.get_matched_chunk_of_text_render_events_per_page(0):
            doc.get_page(0).append_square_annotation(
                e.get_bounding_box(),
                stroke_color=X11Color("Firebrick"),
            )

        # attempt to store PDF
        with open(out_file, "wb") as out_file_handle:
            print("\twriting ..")
            PDF.dumps(out_file_handle, doc)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            print("\treading (2) ..")
            doc = PDF.loads(in_file_handle)

        return True
Esempio n. 10
0
    def test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = SimpleImageExtraction()
            doc = PDF.loads(pdf_file_handle, [l])

            for i, img in enumerate(l.get_images_per_page(0)):
                output_file = self.output_dir / (file.stem + str(i) + ".jpg")
                with open(output_file, "wb") as image_file_handle:
                    img.save(image_file_handle)

        return True
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = TFIDFKeywordExtraction(ENGLISH_STOP_WORDS)
            doc = PDF.loads(pdf_file_handle, [l])

            # export txt
            output_file = self.output_dir / (file.stem + ".json")
            with open(output_file, "w") as json_file_handle:
                json_file_handle.write(
                    json.dumps(
                        [x.__dict__ for x in l.get_keywords_per_page(0, 5)],
                        indent=4))
    def _test_document(self, file):

        if not self.output_dir.exists():
            self.output_dir.mkdir()

        txt_ground_truth_file = self.input_dir / (file.stem + ".txt")
        txt_ground_truth = ""
        with open(txt_ground_truth_file, "r") as txt_ground_truth_file_handle:
            txt_ground_truth = txt_ground_truth_file_handle.read()

        with open(file, "rb") as pdf_file_handle:
            l = SimpleTextExtraction()
            doc = PDF.loads(pdf_file_handle, [l])
            self._compare_text(file.stem, txt_ground_truth, l.get_text(0))

        # return
        return True
Esempio n. 13
0
 def _test_document(self, file) -> bool:
     with open(file, "rb") as pdf_file_handle:
         doc = PDF.loads(pdf_file_handle)
         doc_info = doc.get_xmp_document_info()
         print("title                : %s" % doc_info.get_title())
         print("author               : %s" % doc_info.get_author())
         print("creator              : %s" % doc_info.get_creator())
         print("producer             : %s" % doc_info.get_producer())
         print("ids                  : %s" % doc_info.get_ids())
         print("language             : %s" % doc_info.get_language())
         print("document-ID          : %s" % doc_info.get_document_id())
         print("original document-ID : %s" % doc_info.get_original_document_id())
         print("creation date        : %s" % doc_info.get_creation_date())
         print("modification date    : %s" % doc_info.get_modification_date())
         print("metadata date        : %s" % doc_info.get_metadata_date())
         print("")
     return True
Esempio n. 14
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to store PDF
        doc = None
        with open(file, "rb") as in_file_handle:
            doc = PDF.loads(in_file_handle)

        with open(out_file, "wb") as out_file_handle:
            wc = TransformerWriteContext(destination=out_file_handle, root_object=doc)
            PDFTransformer().transform(context=wc, object_to_transform=doc)
    def _test_document(self, file):
        with open(file, "rb") as pdf_file_handle:

            # process document
            spe = SimpleParagraphExtraction()
            doc = PDF.loads(pdf_file_handle, [spe])

            # find longest paragraph
            biggest_paragraph: Optional[Paragraph] = None
            for p in spe.get_paragraphs(0):
                if biggest_paragraph is None or len(
                        biggest_paragraph.text) < len(p.text):
                    biggest_paragraph = p

            # print
            if biggest_paragraph is not None:
                print(biggest_paragraph.text)
        return True
Esempio n. 16
0
    def test_extract_font_names(self):

        # create output directory if it does not exist yet
        if not self.output_file.parent.exists():
            self.output_file.parent.mkdir()

        # extract font names
        font_names = []
        with open(self.input_file, "rb") as pdf_file_handle:
            l = FontExtraction()
            doc = PDF.loads(pdf_file_handle, [l])
            for fn in l.get_font_names_per_page(0):
                font_names.append(str(fn))

        # write output
        with open(self.output_file, "w") as json_file_handle:
            json_file_handle.write(json.dumps(font_names))

        return True
Esempio n. 17
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = RegularExpressionTextExtraction("[hH]ealth")
            doc = PDF.loads(pdf_file_handle, [l])

            # export matches
            output_file = self.output_dir / (file.stem + ".json")
            with open(output_file, "w") as json_file_handle:
                obj = [{
                    "text": x.get_text(),
                    "x0": int(x.get_baseline().x0),
                    "y0": int(x.get_baseline().y0),
                    "x1": int(x.get_baseline().x1),
                    "y1": int(x.get_baseline().y1),
                } for x in l.get_matched_text_render_info_events_per_page(0)]
                json_file_handle.write(json.dumps(obj, indent=4))
Esempio n. 18
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = RegularExpressionTextExtraction("[sS]orbitol")
            doc = PDF.loads(pdf_file_handle, [l])

            # export matches
            output_file = self.output_dir / (file.stem + ".json")
            with open(output_file, "w") as json_file_handle:
                obj = [{
                    "text": x.text,
                    "x": int(x.get_baseline().x),
                    "y": int(x.get_baseline().y),
                    "width": int(x.get_baseline().width),
                    "height": int(x.get_baseline().height),
                } for x in l.get_matched_chunk_of_text_render_events_per_page(
                    0)]
                json_file_handle.write(json.dumps(obj, indent=4))

        return True
Esempio n. 19
0
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)
        layout = SingleColumnLayout(page)

        t = Table(number_of_rows=10, number_of_columns=4)
        t.add(
            Paragraph(
                "lowercase",
                font_color=X11Color("YellowGreen"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "uppercase",
                font_color=X11Color("YellowGreen"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "lowercase acute",
                font_color=X11Color("YellowGreen"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "uppercase acute",
                font_color=X11Color("YellowGreen"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        # A
        font: Font = TrueTypeFont.true_type_font_from_file(
            Path(__file__).parent / "Pacifico.ttf")
        t.add(Paragraph("a", font=font))
        t.add(Paragraph("A", font=font))
        t.add(Paragraph("á", font=font))
        t.add(Paragraph("Á", font=font))
        # B
        t.add(Paragraph("b", font=font))
        t.add(Paragraph("B", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))
        # C
        t.add(Paragraph("c", font=font))
        t.add(Paragraph("C", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))
        # D
        t.add(Paragraph("d", font=font))
        t.add(Paragraph("D", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))
        # E
        t.add(Paragraph("e", font=font))
        t.add(Paragraph("E", font=font))
        t.add(Paragraph("é", font=font))
        t.add(Paragraph("É", font=font))
        # F
        t.add(Paragraph("f", font=font))
        t.add(Paragraph("F", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))
        # G
        t.add(Paragraph("g", font=font))
        t.add(Paragraph("G", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))
        # ..
        t.add(
            Paragraph(
                "...",
                font_color=X11Color("LightGray"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "...",
                font_color=X11Color("LightGray"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "...",
                font_color=X11Color("LightGray"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        t.add(
            Paragraph(
                "...",
                font_color=X11Color("LightGray"),
                horizontal_alignment=Alignment.CENTERED,
            ))
        # Z
        t.add(Paragraph("z", font=font))
        t.add(Paragraph("Z", font=font))
        t.add(Paragraph("-", font=font))
        t.add(Paragraph("-", font=font))

        t.set_border_width_on_all_cells(Decimal(0.2))
        t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5),
                                   Decimal(5))

        layout.add(t)

        layout.add(
            Paragraph(
                text=
                "**These are the characters pText can currently render in a PDF",
                font_size=Decimal(8),
                font_color=X11Color("Gray"),
                horizontal_alignment=Alignment.RIGHT,
            ))

        # determine output location
        out_file = self.output_dir / ("output.pdf")

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            PDF.loads(in_file_handle)
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create empty document
        pdf: Document = Document()

        # create empty page
        page: Page = Page()

        # add page to document
        pdf.append_page(page)

        # add Image
        layout = MultiColumnLayout(page)

        # add image
        layout.add(
            Image(
                "https://images.unsplash.com/photo-1550155864-3033f844da36?ixid=MXwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHw%3D&ixlib=rb-1.2.1&auto=format&fit=crop&w=634&q=80",
                width=Decimal(256),
            )
        )
        layout.switch_to_next_column()

        # add title
        layout.add(
            Paragraph(
                "Love you more",
                font_color=X11Color("Crimson"),
                font="Helvetica-Bold",
                font_size=Decimal(20),
            )
        )
        layout.add(
            Paragraph(
                """When I say I love you more,
                                I don't just mean I love you more
                                than you love me. I mean I love
                                you more than the bad days
                                ahead of us. I love you more
                                than any fight we will ever have.
                                I love you more than the distance between us.
                                I love you more than any obstacle that
                                could ever try and come
                                between us. I love you the most.
                                """,
                respect_newlines_in_text=True,
            )
        )
        layout.add(
            Paragraph(
                """yours, most sincerely
                                JS
                             """,
                font_color=X11Color("SlateGray"),
                font="Helvetica-Bold",
                font_size=Decimal(8),
                respect_newlines_in_text=True,
            )
        )

        # write
        file = self.output_dir / "output.pdf"
        with open(file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, pdf)

        return True
    def test_document(self, file):

        m = [
            [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
            [0, 0, 0, 2, 2, 2, 3, 3, 2, 3, 0, 0, 0, 0],
            [0, 0, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 0, 0],
            [0, 0, 2, 3, 2, 2, 3, 3, 3, 2, 3, 3, 3, 0],
            [0, 0, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 0, 0],
            [0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0],
            [0, 0, 0, 1, 1, 4, 1, 1, 1, 1, 1, 0, 0, 0],
            [0, 0, 1, 1, 1, 4, 1, 1, 4, 1, 1, 1, 0, 0],
            [0, 1, 1, 1, 1, 4, 4, 4, 4, 1, 1, 1, 1, 0],
            [0, 3, 3, 1, 4, 5, 4, 4, 5, 4, 1, 3, 3, 0],
            [0, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 0],
            [0, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 0],
            [0, 0, 0, 4, 4, 4, 0, 0, 4, 4, 4, 0, 0, 0],
            [0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0],
            [0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0],
        ]
        c = [
            None,
            X11Color("Red"),
            X11Color("Black"),
            X11Color("Tan"),
            X11Color("Blue"),
            X11Color("White"),
        ]

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / (file.stem + "_out.pdf")

        # attempt to read PDF
        doc = None
        with open(file, "rb") as in_file_handle:
            print("\treading (1) ..")
            doc = PDF.loads(in_file_handle)

        # add annotation
        pixel_size = 2
        for i in range(0, len(m)):
            for j in range(0, len(m[i])):
                if m[i][j] == 0:
                    continue
                x = pixel_size * j
                y = pixel_size * (len(m) - i)
                doc.get_page(0).append_link_annotation(
                    page=Decimal(0),
                    color=c[m[i][j]],
                    location_on_page="Fit",
                    rectangle=Rectangle(
                        Decimal(x),
                        Decimal(y),
                        Decimal(pixel_size),
                        Decimal(pixel_size),
                    ),
                )

        # attempt to store PDF
        with open(out_file, "wb") as out_file_handle:
            print("\twriting ..")
            PDF.dumps(out_file_handle, doc)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            print("\treading (2) ..")
            doc = PDF.loads(in_file_handle)

        return True
Esempio n. 22
0
def read_document():
    doc: typing.Optional[Document] = None
    with open(Path("/home/joris/Code/pdf-corpus/0063_page_0.pdf"),
              "rb") as pdf_file_handle:
        doc = PDF.loads(pdf_file_handle)
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create empty document
        pdf: Document = Document()

        # create empty page
        page: Page = Page()
        pdf.append_page(page)
        layout = SingleColumnLayout(page)

        # add title
        layout.add(
            Paragraph(
                "Complete the picture",
                font_size=Decimal(20),
                font_color=X11Color("YellowGreen"),
            ))

        layout.add(
            Paragraph(
                """
                Can you complete the picture on the right by copying the completed picture on the left?
                """,
                respect_newlines_in_text=True,
                font_color=X11Color("SlateGray"),
                font_size=Decimal(8),
            ))

        # add image
        image_a = PILImage.open(
            requests.get(
                "https://www.mozilla.org/media/protocol/img/logos/firefox/browser/logo-lg-high-res.fbc7ffbb50fd.png",
                stream=True,
            ).raw)
        image_a = TestWriteCompleteThePictureHorizontallyPuzzle._convert_png_to_jpg(
            image_a)
        image_a = image_a.resize((256, 256))
        image_b = PILImage.new(size=(256, 256),
                               color=(255, 255, 255),
                               mode="RGB")
        pixels_a = image_a.load()
        pixels_b = image_b.load()
        for i in range(0, 256):
            for j in range(0, 256):
                if i == 0 or j == 0 or i == 255 or j == 255 or i % 64 == 0:
                    pixels_b[(i, j)] = (0, 0, 0)
                    continue
                if int(i / 64) % 2 == 0:
                    pixels_b[(i, j)] = pixels_a[(i, j)]

        t: Table = Table(number_of_columns=2, number_of_rows=1)
        t.add(Image(image_a))
        t.add(Image(image_b))
        t.no_borders()
        t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5),
                                   Decimal(5))

        layout.add(t)

        # write
        file = self.output_dir / "output.pdf"
        with open(file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, pdf)

        return True
Esempio n. 24
0
    def test_write_document(self):

        sentences = [
            "THE BOAT WILL ARRIVE ON MONDAY",
            "SHE LIVES AT THE HOUSE WITH THE BLUE DOOR",
            "A FRIEND IN NEED IS A FRIEND INDEED",
            "AN APPLE A DAY KEEPS THE DOCTOR AWAY",
        ]

        pdf = Document()
        page = Page()
        pdf.append_page(page)

        # layout
        layout = SingleColumnLayout(page)

        # add title
        layout.add(
            Paragraph(
                "Reverse the words",
                font_size=Decimal(20),
                font_color=X11Color("YellowGreen"),
            ))

        # add text
        layout.add(
            Paragraph(
                """
                This is perhaps the simplest code to use and solve. 
                Simply read each word backwards.
                """,
                font_color=X11Color("SlateGray"),
                font_size=Decimal(8),
            ))

        # add grid
        t = Table(
            number_of_rows=len(sentences) * 2,
            number_of_columns=2,
            column_widths=[Decimal(1), Decimal(9)],
        )
        for i, s in enumerate(sentences):
            # code word
            coded_sentence = "".join([
                "".join([y for y in reversed(x)]) + "   " for x in s.split(" ")
            ])
            t.add(
                TableCell(
                    Paragraph(str(i + 1) + "."),
                    border_top=False,
                    border_right=False,
                    border_left=False,
                    border_bottom=False,
                    row_span=2,
                ))
            t.add(
                TableCell(
                    Paragraph(coded_sentence, respect_spaces_in_text=True),
                    border_top=False,
                    border_right=False,
                    border_left=False,
                    border_bottom=False,
                ))
            t.add(
                TableCell(
                    Paragraph(".."),
                    border_top=False,
                    border_right=False,
                    border_left=False,
                    border_bottom=True,
                ))

        t.set_padding_on_all_cells(Decimal(15), Decimal(5), Decimal(5),
                                   Decimal(5))
        layout.add(t)

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # determine output location
        out_file = self.output_dir / ("output.pdf")

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)

        layout = SingleColumnLayout(page)
        t = Table(number_of_columns=10, number_of_rows=25)
        for _ in range(0, 10):
            for _ in range(0, 25):
                put_star = random.choice([x <= 3 for x in range(0, 10)])
                if put_star:
                    c: Color = random.choice(self.COLORS)
                    s: Decimal = random.choice(
                        [
                            Decimal(16),
                            Decimal(16),
                            Decimal(16),
                            Decimal(16),
                            Decimal(8),
                            Decimal(4),
                        ]
                    )
                    t.add(
                        Shape(
                            LineArtFactory.n_pointed_star(
                                bounding_box=Rectangle(Decimal(0), Decimal(0), s, s),
                                n=random.choice([3, 5, 7, 12]),
                            ),
                            fill_color=c,
                            stroke_color=c,
                            line_width=Decimal(1),
                        )
                    )
                else:
                    t.add(Paragraph(" ", respect_spaces_in_text=True))
        t.no_borders()
        t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5), Decimal(5))
        layout.add(t)

        # footer
        rectangle_box = Rectangle(
            Decimal(0),
            Decimal(0),
            page.get_page_info().get_width(),
            page.get_page_info().get_height() * Decimal(0.1),
        )
        Shape(
            LineArtFactory.rectangle(rectangle_box),
            fill_color=self.COLORS[0],
            stroke_color=self.COLORS[0],
            line_width=Decimal(1),
        ).layout(page, rectangle_box)

        rectangle_box = Rectangle(
            Decimal(0),
            page.get_page_info().get_height() * Decimal(0.1),
            page.get_page_info().get_width(),
            Decimal(2),
        )
        Shape(
            LineArtFactory.rectangle(rectangle_box),
            fill_color=self.COLORS[1],
            stroke_color=self.COLORS[1],
            line_width=Decimal(1),
        ).layout(page, rectangle_box)

        # determine output location
        out_file = self.output_dir / "output.pdf"

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)

        with open(out_file, "rb") as in_file_handle:
            PDF.loads(in_file_handle)
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # fmt: off
        mazes = [
            ("https://i.pinimg.com/originals/1e/c2/a7/1ec2a73d0a45016c7d1b52ef9f11e740.png", "395E66"),
            ("https://i.pinimg.com/originals/f8/23/88/f823882e7c5fa42790e78f43ecf7e8bf.jpg", "387D7A"),
            ("https://i.pinimg.com/600x315/2d/94/33/2d94334b737efb5d3a5ef32aef9daefc.jpg", "32936F"),
            ("https://i.pinimg.com/originals/f1/c9/07/f1c907c09d65d5c86fba304fed1009ca.jpg", "26A96C"),
            ("https://cdn.pixabay.com/photo/2017/08/24/12/11/silhouette-2676573_960_720.png", "2BC016"),
            ("https://images-na.ssl-images-amazon.com/images/I/61bqYbAeUgL._AC_SL1500_.jpg", "395E66"),
            ("https://i.pinimg.com/originals/55/e8/91/55e891af7de086a8868e1a8e02fb4426.jpg","387D7A"),
            ("https://cdn.shopify.com/s/files/1/2123/8425/products/166422700-LRG_242a4c8b-cad5-476e-afd1-c8b882d48fc2_530x.jpg","32936F"),
            ("http://www.silhcdn.com/3/i/shapes/lg/7/6/d124067.jpg","26A96C"),
            ("https://cdn.pixabay.com/photo/2018/03/04/23/28/frog-3199601_1280.png","2BC016")
        ]
        # fmt: on

        # add mazes
        for (url, color) in mazes:
            for _ in range(0, 3):
                self._write_maze_page(pdf, url, color)

        # add ack page
        page = Page()
        pdf.append_page(page)
        layout = SingleColumnLayout(page)

        # content of ack page
        layout.add(
            Paragraph(
                "Hi there,",
                font_color=HexColor("32936F"),
                font_size=Decimal(20),
            )
        )
        layout.add(
            Paragraph(
                "This PDF was made by pText. Check out the GitHub repository to find more fun examples of what you can do with PDF's.",
                font_color=X11Color("SlateGray"),
                font_size=Decimal(12),
            )
        )
        layout.add(
            Barcode(
                data="https://github.com/jorisschellekens/ptext-release",
                type=BarcodeType.QR,
                width=Decimal(64),
            )
        )

        # determine output location
        out_file = self.output_dir / "output.pdf"

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)
Esempio n. 27
0
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)
        layout = SingleColumnLayout(page)

        # add title
        layout.add(
            Paragraph(
                "Match Up Puzzle",
                font_size=Decimal(20),
                font_color=X11Color("YellowGreen"),
            )
        )

        # add explanation
        layout.add(
            Paragraph(
                """
        These simple "match up" puzzles help children with observation skills. 
        They will also need to learn a way of marking or remembering which items they have matched, 
        so that they can identify the odd ones out. 
        If you would like to reuse puzzles you could place counters on each "pair" that your child finds, perhaps.""",
                font_color=X11Color("SlateGray"),
                font_size=Decimal(8),
            )
        )

        # random locations for each image
        imgs = [
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Orc-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/King-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Knight-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Medusa-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Monster-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Sorceress-Witch-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Centaur-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Elf-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Poison-Spider-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Unicorn-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Viking-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Villager-icon.png",
            "https://icons.iconarchive.com/icons/chanut/role-playing/128/Dragon-Egg-icon.png",
        ]

        N = 10
        random.shuffle(imgs)
        image_positions: typing.Dict[int, str] = {}
        for i, img_url in enumerate(imgs[0 : (N + 1)]):
            # place image 1
            p0 = random.randint(0, N ** 2)
            while p0 in image_positions:
                p0 = random.randint(0, N ** 2)
            image_positions[p0] = img_url
            if i != 0:
                # place image 2
                p1 = random.randint(0, N ** 2)
                while p1 in image_positions:
                    p1 = random.randint(0, N ** 2)
                image_positions[p1] = img_url

        t = Table(number_of_rows=N, number_of_columns=N)
        for i in range(0, N ** 2):
            if i in image_positions:
                t.add(Image(image_positions[i], width=Decimal(32), height=Decimal(32)))
            else:
                t.add(Paragraph(" ", respect_spaces_in_text=True))
        t.no_borders()
        t.set_padding_on_all_cells(Decimal(2), Decimal(2), Decimal(2), Decimal(2))
        layout.add(t)

        # determine output location
        out_file = self.output_dir / ("output.pdf")

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)

        # attempt to re-open PDF
        with open(out_file, "rb") as in_file_handle:
            PDF.loads(in_file_handle)
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create document
        pdf = Document()

        # add page
        page = Page()
        pdf.append_page(page)

        layout = MultiColumnLayout(page)

        # background
        self._write_background(page)

        # table
        avatar_urls = [
            "https://avatars.githubusercontent.com/u/" + str(x)
            for x in self.FIRST_100_STARS
        ]
        t = Table(number_of_columns=4, number_of_rows=25)
        for s in avatar_urls[0 : (4 * 25)]:
            im = PILImage.open(requests.get(s, stream=True).raw)
            t.add(Image(im, width=Decimal(20), height=Decimal(20)))
        t.set_padding_on_all_cells(Decimal(2), Decimal(2), Decimal(2), Decimal(2))
        t.no_borders()
        layout.add(t)

        layout.add(
            Paragraph(
                "100 stars!",
                font="Helvetica-Bold",
                font_size=Decimal(20),
                font_color=self.ACCENT_COLOR_1,
                horizontal_alignment=Alignment.CENTERED,
            )
        )

        # next column
        layout.switch_to_next_column()

        # paragraph
        layout.add(
            Paragraph(
                "Thank you,",
                font="Helvetica-Bold",
                font_size=Decimal(20),
                font_color=self.ACCENT_COLOR_1,
            )
        )
        layout.add(
            Paragraph(
                "Your support and encouragement have always been the driving factors in the development of pText. "
                "I want you to know that I value your appreciation immensely!"
            )
        )
        layout.add(
            Paragraph(
                "-- Joris Schellekens",
                font="Helvetica-Oblique",
                font_size=Decimal(8),
                font_color=self.ACCENT_COLOR_2,
            )
        )

        layout.add(
            Barcode(
                data="https://github.com/jorisschellekens/ptext-release/stargazers",
                type=BarcodeType.QR,
                width=Decimal(128),
                stroke_color=self.ACCENT_COLOR_1,
            )
        )

        # footer
        rectangle_box = Rectangle(
            Decimal(0),
            Decimal(0),
            page.get_page_info().get_width(),
            page.get_page_info().get_height() * Decimal(0.1),
        )
        Shape(
            LineArtFactory.rectangle(rectangle_box),
            fill_color=self.ACCENT_COLOR_1,
            stroke_color=self.ACCENT_COLOR_1,
            line_width=Decimal(1),
        ).layout(page, rectangle_box)

        rectangle_box = Rectangle(
            Decimal(0),
            page.get_page_info().get_height() * Decimal(0.1),
            page.get_page_info().get_width(),
            Decimal(2),
        )
        Shape(
            LineArtFactory.rectangle(rectangle_box),
            fill_color=self.ACCENT_COLOR_2,
            stroke_color=self.ACCENT_COLOR_2,
            line_width=Decimal(1),
        ).layout(page, rectangle_box)

        # determine output location
        out_file = self.output_dir / "output.pdf"

        # attempt to store PDF
        with open(out_file, "wb") as in_file_handle:
            PDF.dumps(in_file_handle, pdf)

        with open(out_file, "rb") as in_file_handle:
            PDF.loads(in_file_handle)
Esempio n. 29
0
    def _test_document(self, file):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        with open(file, "rb") as pdf_file_handle:
            l = TextRenderInfoMeasurandListener()

            # read document
            doc = PDF.loads(pdf_file_handle, [l])

            # export json
            output_file = self.output_dir / (file.stem + "_text_rendering.json")
            with open(output_file, "w") as txt_file_handle:
                txt_file_handle.write(json.dumps(l.measurands, indent=4))

        # load ground truth
        ground_truth_results = []
        with open(
            Path(self.input_dir / (file.stem + "_text_rendering.json")), "r"
        ) as json_file_handle:
            ground_truth_results = json.loads(json_file_handle.read())

        # compare
        pos_in_test_array = 0
        pos_in_gt_array = 0
        while pos_in_test_array < len(l.measurands) and pos_in_gt_array < len(
            ground_truth_results
        ):
            while pos_in_test_array < len(l.measurands) and re.match(
                "[^a-zA-Z ]+", l.measurands[pos_in_test_array]["text"]
            ):
                pos_in_test_array += 1
            while pos_in_gt_array < len(ground_truth_results) and re.match(
                "[^a-zA-Z ]+", ground_truth_results[pos_in_gt_array]["text"]
            ):
                pos_in_gt_array += 1
            # check text
            if (
                l.measurands[pos_in_test_array]["text"]
                != ground_truth_results[pos_in_gt_array]["text"]
            ):
                print("text inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array))
                print(
                    "\tground truth: %s" % ground_truth_results[pos_in_gt_array]["text"]
                )
                print("\ttest        : %s" % l.measurands[pos_in_test_array]["text"])
                return False

            x_delta = abs(
                int(l.measurands[pos_in_test_array]["x0"])
                - int(ground_truth_results[pos_in_gt_array]["x0"])
            )
            if x_delta > self.max_distance:
                print("x0 inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array))
                print(
                    "\tground truth: %f" % ground_truth_results[pos_in_gt_array]["x0"]
                )
                print("\ttest        : %f" % l.measurands[pos_in_test_array]["x0"])
                return False

            x_delta = abs(
                int(l.measurands[pos_in_test_array]["y"])
                - int(ground_truth_results[pos_in_gt_array]["y"])
            )
            if x_delta > self.max_distance:
                print("x1 inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array))
                print(
                    "\tground truth: %f" % ground_truth_results[pos_in_gt_array]["x1"]
                )
                print("\ttest        : %f" % l.measurands[pos_in_test_array]["x1"])
                return False

            y_delta = abs(
                int(l.measurands[pos_in_test_array]["y"])
                - int(ground_truth_results[pos_in_gt_array]["y"])
            )
            if y_delta > self.max_distance:
                print("y inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array))
                print("\tground truth: %f" % ground_truth_results[pos_in_gt_array]["y"])
                print("\ttest        : %f" % l.measurands[pos_in_test_array]["y"])
                return False

            print(
                "%s %d %d %d"
                % (
                    l.measurands[pos_in_test_array]["text"],
                    l.measurands[pos_in_test_array]["x0"],
                    l.measurands[pos_in_test_array]["x1"],
                    l.measurands[pos_in_test_array]["y"],
                )
            )
            pos_in_test_array += 1
            pos_in_gt_array += 1

        return True
    def test_write_document(self):

        # create output directory if it does not exist yet
        if not self.output_dir.exists():
            self.output_dir.mkdir()

        # create empty document
        pdf: Document = Document()

        # create empty page
        page: Page = Page()

        # add page to document
        pdf.append_page(page)

        # set layout
        layout = SingleColumnLayout(page)

        # add barcode
        layout.add(
            Table(number_of_rows=5,
                  number_of_columns=2).add(Paragraph("CODE 128")).add(
                      Barcode(
                          data="123456789128",
                          type=BarcodeType.CODE_128,
                          width=Decimal(128),
                          stroke_color=HexColor("#080708"),
                      )).add(Paragraph("CODE 39")).add(
                          Barcode(
                              data="123456789128",
                              type=BarcodeType.CODE_39,
                              width=Decimal(128),
                              stroke_color=HexColor("#3772FF"),
                          )).add(Paragraph("EAN 13")).add(
                              Barcode(
                                  data="123456789128",
                                  type=BarcodeType.EAN_13,
                                  width=Decimal(128),
                                  stroke_color=HexColor("#DF2935"),
                              )).add(Paragraph("EAN 14")).add(
                                  Barcode(
                                      data="1234567891280",
                                      type=BarcodeType.EAN_14,
                                      width=Decimal(128),
                                      stroke_color=HexColor("#FDCA40"),
                                  )).add(Paragraph("QR")).add(
                                      Barcode(
                                          data="1234567891280",
                                          type=BarcodeType.QR,
                                          width=Decimal(128),
                                          stroke_color=HexColor("#E6E8E6"),
                                          fill_color=HexColor("#DF2935"),
                                      )).set_padding_on_all_cells(
                                          Decimal(10), Decimal(5), Decimal(5),
                                          Decimal(5)))

        # write
        file = self.output_dir / "output.pdf"
        with open(file, "wb") as pdf_file_handle:
            PDF.dumps(pdf_file_handle, pdf)

        return True