def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF delta = time.time() doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) print("time elapsed : %d" % (time.time() - delta)) delta = time.time() # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) print("time elapsed : %d" % (time.time() - delta)) delta = time.time() size_of_original = Path(file).stat().st_size size_of_copy = Path(out_file).stat().st_size ratio = (size_of_copy + 0.0) / size_of_original print("%s %d %d %f" % (file.stem, size_of_original, size_of_copy, ratio)) if ratio > 1.05: raise Exception("Copied PDF is %f times larger than the original" % ratio) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = JPGExport() doc = PDF.loads(pdf_file_handle, [l]) output_file = self.output_dir / (file.stem + ".jpg") with open(output_file, "wb") as svg_file_handle: im = l.image_per_page.get(0) im.save(output_file) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_polygon_annotation( points=[ (Decimal(72), Decimal(390)), (Decimal(242), Decimal(500)), (Decimal(156), Decimal(390)), ], color=X11Color("Crimson"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation doc.get_page(0).append_text_annotation( contents="The quick brown fox ate the lazy mouse", rectangle=Rectangle(Decimal(128), Decimal(128), Decimal(64), Decimal(64)), text_annotation_icon=TextAnnotationIconType.KEY, open=True, color=X11Color("Orange"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) # append paragraph Paragraph( "Once upon a midnight dreary, while I pondered weak and weary, over many a quaint and curious volume of forgotten lore", font_size=Decimal(20), text_alignment=Alignment.JUSTIFIED, ).layout( page, Rectangle(Decimal(20), Decimal(600), Decimal(500), Decimal(124)), ) # add rectangle annotation page.append_square_annotation( stroke_color=X11Color("Red"), rectangle=Rectangle(Decimal(20), Decimal(600), Decimal(500), Decimal(124)), ) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf)
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) output_file = self.output_dir / (file.stem + ".json") # export to json with open(output_file, "w") as json_file_handle: json_file_handle.write( json.dumps(doc.to_json_serializable(doc), indent=4) )
def test_extract_text_in_area(self): r = Rectangle(Decimal(50), Decimal(400), Decimal(200), Decimal(100)) doc = None file: Path = Path("/home/joris/Code/pdf-corpus/0600.pdf") with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) output_file = self.output_dir / (file.stem + "_bill_to_marked.pdf") with open(output_file, "wb") as pdf_file_handle: doc.get_page(0).append_polygon_annotation( LineArtFactory.rectangle(r), stroke_color=X11Color("Red"), ) PDF.dumps(pdf_file_handle, doc) l1 = SimpleTextExtraction() l2 = LocationFilter( r.get_x(), r.get_y(), r.get_x() + r.get_width(), r.get_y() + r.get_height() ).add_listener(l1) with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle, [l2]) print(l1.get_text(0))
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) layout.add( Paragraph( "Once upon a midnight dreary, while I pondered weak and weary, over many a quaint and curious volume of forgotten lore.", font_size=Decimal(20), text_alignment=Alignment.RIGHT, horizontal_alignment=Alignment.RIGHT, )) layout.add( Paragraph( "While I nodded, nearly napping, suddenly there came a tapping. As of someone gently rapping, rapping at my chamberdoor.", font_size=Decimal(20), text_alignment=Alignment.RIGHT, horizontal_alignment=Alignment.RIGHT, )) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf)
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None l = RegularExpressionTextExtraction("[sS]orbitol") with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle, [l]) # add annotation print("\tAdding %d annotations" % len(l.get_matched_chunk_of_text_render_events_per_page(0))) for e in l.get_matched_chunk_of_text_render_events_per_page(0): doc.get_page(0).append_square_annotation( e.get_bounding_box(), stroke_color=X11Color("Firebrick"), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = SimpleImageExtraction() doc = PDF.loads(pdf_file_handle, [l]) for i, img in enumerate(l.get_images_per_page(0)): output_file = self.output_dir / (file.stem + str(i) + ".jpg") with open(output_file, "wb") as image_file_handle: img.save(image_file_handle) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = TFIDFKeywordExtraction(ENGLISH_STOP_WORDS) doc = PDF.loads(pdf_file_handle, [l]) # export txt output_file = self.output_dir / (file.stem + ".json") with open(output_file, "w") as json_file_handle: json_file_handle.write( json.dumps( [x.__dict__ for x in l.get_keywords_per_page(0, 5)], indent=4))
def _test_document(self, file): if not self.output_dir.exists(): self.output_dir.mkdir() txt_ground_truth_file = self.input_dir / (file.stem + ".txt") txt_ground_truth = "" with open(txt_ground_truth_file, "r") as txt_ground_truth_file_handle: txt_ground_truth = txt_ground_truth_file_handle.read() with open(file, "rb") as pdf_file_handle: l = SimpleTextExtraction() doc = PDF.loads(pdf_file_handle, [l]) self._compare_text(file.stem, txt_ground_truth, l.get_text(0)) # return return True
def _test_document(self, file) -> bool: with open(file, "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle) doc_info = doc.get_xmp_document_info() print("title : %s" % doc_info.get_title()) print("author : %s" % doc_info.get_author()) print("creator : %s" % doc_info.get_creator()) print("producer : %s" % doc_info.get_producer()) print("ids : %s" % doc_info.get_ids()) print("language : %s" % doc_info.get_language()) print("document-ID : %s" % doc_info.get_document_id()) print("original document-ID : %s" % doc_info.get_original_document_id()) print("creation date : %s" % doc_info.get_creation_date()) print("modification date : %s" % doc_info.get_modification_date()) print("metadata date : %s" % doc_info.get_metadata_date()) print("") return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to store PDF doc = None with open(file, "rb") as in_file_handle: doc = PDF.loads(in_file_handle) with open(out_file, "wb") as out_file_handle: wc = TransformerWriteContext(destination=out_file_handle, root_object=doc) PDFTransformer().transform(context=wc, object_to_transform=doc)
def _test_document(self, file): with open(file, "rb") as pdf_file_handle: # process document spe = SimpleParagraphExtraction() doc = PDF.loads(pdf_file_handle, [spe]) # find longest paragraph biggest_paragraph: Optional[Paragraph] = None for p in spe.get_paragraphs(0): if biggest_paragraph is None or len( biggest_paragraph.text) < len(p.text): biggest_paragraph = p # print if biggest_paragraph is not None: print(biggest_paragraph.text) return True
def test_extract_font_names(self): # create output directory if it does not exist yet if not self.output_file.parent.exists(): self.output_file.parent.mkdir() # extract font names font_names = [] with open(self.input_file, "rb") as pdf_file_handle: l = FontExtraction() doc = PDF.loads(pdf_file_handle, [l]) for fn in l.get_font_names_per_page(0): font_names.append(str(fn)) # write output with open(self.output_file, "w") as json_file_handle: json_file_handle.write(json.dumps(font_names)) return True
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = RegularExpressionTextExtraction("[hH]ealth") doc = PDF.loads(pdf_file_handle, [l]) # export matches output_file = self.output_dir / (file.stem + ".json") with open(output_file, "w") as json_file_handle: obj = [{ "text": x.get_text(), "x0": int(x.get_baseline().x0), "y0": int(x.get_baseline().y0), "x1": int(x.get_baseline().x1), "y1": int(x.get_baseline().y1), } for x in l.get_matched_text_render_info_events_per_page(0)] json_file_handle.write(json.dumps(obj, indent=4))
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = RegularExpressionTextExtraction("[sS]orbitol") doc = PDF.loads(pdf_file_handle, [l]) # export matches output_file = self.output_dir / (file.stem + ".json") with open(output_file, "w") as json_file_handle: obj = [{ "text": x.text, "x": int(x.get_baseline().x), "y": int(x.get_baseline().y), "width": int(x.get_baseline().width), "height": int(x.get_baseline().height), } for x in l.get_matched_chunk_of_text_render_events_per_page( 0)] json_file_handle.write(json.dumps(obj, indent=4)) return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) t = Table(number_of_rows=10, number_of_columns=4) t.add( Paragraph( "lowercase", font_color=X11Color("YellowGreen"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "uppercase", font_color=X11Color("YellowGreen"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "lowercase acute", font_color=X11Color("YellowGreen"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "uppercase acute", font_color=X11Color("YellowGreen"), horizontal_alignment=Alignment.CENTERED, )) # A font: Font = TrueTypeFont.true_type_font_from_file( Path(__file__).parent / "Pacifico.ttf") t.add(Paragraph("a", font=font)) t.add(Paragraph("A", font=font)) t.add(Paragraph("á", font=font)) t.add(Paragraph("Á", font=font)) # B t.add(Paragraph("b", font=font)) t.add(Paragraph("B", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) # C t.add(Paragraph("c", font=font)) t.add(Paragraph("C", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) # D t.add(Paragraph("d", font=font)) t.add(Paragraph("D", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) # E t.add(Paragraph("e", font=font)) t.add(Paragraph("E", font=font)) t.add(Paragraph("é", font=font)) t.add(Paragraph("É", font=font)) # F t.add(Paragraph("f", font=font)) t.add(Paragraph("F", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) # G t.add(Paragraph("g", font=font)) t.add(Paragraph("G", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) # .. t.add( Paragraph( "...", font_color=X11Color("LightGray"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "...", font_color=X11Color("LightGray"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "...", font_color=X11Color("LightGray"), horizontal_alignment=Alignment.CENTERED, )) t.add( Paragraph( "...", font_color=X11Color("LightGray"), horizontal_alignment=Alignment.CENTERED, )) # Z t.add(Paragraph("z", font=font)) t.add(Paragraph("Z", font=font)) t.add(Paragraph("-", font=font)) t.add(Paragraph("-", font=font)) t.set_border_width_on_all_cells(Decimal(0.2)) t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5), Decimal(5)) layout.add(t) layout.add( Paragraph( text= "**These are the characters pText can currently render in a PDF", font_size=Decimal(8), font_color=X11Color("Gray"), horizontal_alignment=Alignment.RIGHT, )) # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create empty document pdf: Document = Document() # create empty page page: Page = Page() # add page to document pdf.append_page(page) # add Image layout = MultiColumnLayout(page) # add image layout.add( Image( "https://images.unsplash.com/photo-1550155864-3033f844da36?ixid=MXwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHw%3D&ixlib=rb-1.2.1&auto=format&fit=crop&w=634&q=80", width=Decimal(256), ) ) layout.switch_to_next_column() # add title layout.add( Paragraph( "Love you more", font_color=X11Color("Crimson"), font="Helvetica-Bold", font_size=Decimal(20), ) ) layout.add( Paragraph( """When I say I love you more, I don't just mean I love you more than you love me. I mean I love you more than the bad days ahead of us. I love you more than any fight we will ever have. I love you more than the distance between us. I love you more than any obstacle that could ever try and come between us. I love you the most. """, respect_newlines_in_text=True, ) ) layout.add( Paragraph( """yours, most sincerely JS """, font_color=X11Color("SlateGray"), font="Helvetica-Bold", font_size=Decimal(8), respect_newlines_in_text=True, ) ) # write file = self.output_dir / "output.pdf" with open(file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, pdf) return True
def test_document(self, file): m = [ [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 2, 2, 2, 3, 3, 2, 3, 0, 0, 0, 0], [0, 0, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 0, 0], [0, 0, 2, 3, 2, 2, 3, 3, 3, 2, 3, 3, 3, 0], [0, 0, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 0, 0], [0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0], [0, 0, 0, 1, 1, 4, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 4, 1, 1, 4, 1, 1, 1, 0, 0], [0, 1, 1, 1, 1, 4, 4, 4, 4, 1, 1, 1, 1, 0], [0, 3, 3, 1, 4, 5, 4, 4, 5, 4, 1, 3, 3, 0], [0, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 0], [0, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 0], [0, 0, 0, 4, 4, 4, 0, 0, 4, 4, 4, 0, 0, 0], [0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0], [0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0], ] c = [ None, X11Color("Red"), X11Color("Black"), X11Color("Tan"), X11Color("Blue"), X11Color("White"), ] # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / (file.stem + "_out.pdf") # attempt to read PDF doc = None with open(file, "rb") as in_file_handle: print("\treading (1) ..") doc = PDF.loads(in_file_handle) # add annotation pixel_size = 2 for i in range(0, len(m)): for j in range(0, len(m[i])): if m[i][j] == 0: continue x = pixel_size * j y = pixel_size * (len(m) - i) doc.get_page(0).append_link_annotation( page=Decimal(0), color=c[m[i][j]], location_on_page="Fit", rectangle=Rectangle( Decimal(x), Decimal(y), Decimal(pixel_size), Decimal(pixel_size), ), ) # attempt to store PDF with open(out_file, "wb") as out_file_handle: print("\twriting ..") PDF.dumps(out_file_handle, doc) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: print("\treading (2) ..") doc = PDF.loads(in_file_handle) return True
def read_document(): doc: typing.Optional[Document] = None with open(Path("/home/joris/Code/pdf-corpus/0063_page_0.pdf"), "rb") as pdf_file_handle: doc = PDF.loads(pdf_file_handle)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create empty document pdf: Document = Document() # create empty page page: Page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) # add title layout.add( Paragraph( "Complete the picture", font_size=Decimal(20), font_color=X11Color("YellowGreen"), )) layout.add( Paragraph( """ Can you complete the picture on the right by copying the completed picture on the left? """, respect_newlines_in_text=True, font_color=X11Color("SlateGray"), font_size=Decimal(8), )) # add image image_a = PILImage.open( requests.get( "https://www.mozilla.org/media/protocol/img/logos/firefox/browser/logo-lg-high-res.fbc7ffbb50fd.png", stream=True, ).raw) image_a = TestWriteCompleteThePictureHorizontallyPuzzle._convert_png_to_jpg( image_a) image_a = image_a.resize((256, 256)) image_b = PILImage.new(size=(256, 256), color=(255, 255, 255), mode="RGB") pixels_a = image_a.load() pixels_b = image_b.load() for i in range(0, 256): for j in range(0, 256): if i == 0 or j == 0 or i == 255 or j == 255 or i % 64 == 0: pixels_b[(i, j)] = (0, 0, 0) continue if int(i / 64) % 2 == 0: pixels_b[(i, j)] = pixels_a[(i, j)] t: Table = Table(number_of_columns=2, number_of_rows=1) t.add(Image(image_a)) t.add(Image(image_b)) t.no_borders() t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5), Decimal(5)) layout.add(t) # write file = self.output_dir / "output.pdf" with open(file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, pdf) return True
def test_write_document(self): sentences = [ "THE BOAT WILL ARRIVE ON MONDAY", "SHE LIVES AT THE HOUSE WITH THE BLUE DOOR", "A FRIEND IN NEED IS A FRIEND INDEED", "AN APPLE A DAY KEEPS THE DOCTOR AWAY", ] pdf = Document() page = Page() pdf.append_page(page) # layout layout = SingleColumnLayout(page) # add title layout.add( Paragraph( "Reverse the words", font_size=Decimal(20), font_color=X11Color("YellowGreen"), )) # add text layout.add( Paragraph( """ This is perhaps the simplest code to use and solve. Simply read each word backwards. """, font_color=X11Color("SlateGray"), font_size=Decimal(8), )) # add grid t = Table( number_of_rows=len(sentences) * 2, number_of_columns=2, column_widths=[Decimal(1), Decimal(9)], ) for i, s in enumerate(sentences): # code word coded_sentence = "".join([ "".join([y for y in reversed(x)]) + " " for x in s.split(" ") ]) t.add( TableCell( Paragraph(str(i + 1) + "."), border_top=False, border_right=False, border_left=False, border_bottom=False, row_span=2, )) t.add( TableCell( Paragraph(coded_sentence, respect_spaces_in_text=True), border_top=False, border_right=False, border_left=False, border_bottom=False, )) t.add( TableCell( Paragraph(".."), border_top=False, border_right=False, border_left=False, border_bottom=True, )) t.set_padding_on_all_cells(Decimal(15), Decimal(5), Decimal(5), Decimal(5)) layout.add(t) # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) t = Table(number_of_columns=10, number_of_rows=25) for _ in range(0, 10): for _ in range(0, 25): put_star = random.choice([x <= 3 for x in range(0, 10)]) if put_star: c: Color = random.choice(self.COLORS) s: Decimal = random.choice( [ Decimal(16), Decimal(16), Decimal(16), Decimal(16), Decimal(8), Decimal(4), ] ) t.add( Shape( LineArtFactory.n_pointed_star( bounding_box=Rectangle(Decimal(0), Decimal(0), s, s), n=random.choice([3, 5, 7, 12]), ), fill_color=c, stroke_color=c, line_width=Decimal(1), ) ) else: t.add(Paragraph(" ", respect_spaces_in_text=True)) t.no_borders() t.set_padding_on_all_cells(Decimal(5), Decimal(5), Decimal(5), Decimal(5)) layout.add(t) # footer rectangle_box = Rectangle( Decimal(0), Decimal(0), page.get_page_info().get_width(), page.get_page_info().get_height() * Decimal(0.1), ) Shape( LineArtFactory.rectangle(rectangle_box), fill_color=self.COLORS[0], stroke_color=self.COLORS[0], line_width=Decimal(1), ).layout(page, rectangle_box) rectangle_box = Rectangle( Decimal(0), page.get_page_info().get_height() * Decimal(0.1), page.get_page_info().get_width(), Decimal(2), ) Shape( LineArtFactory.rectangle(rectangle_box), fill_color=self.COLORS[1], stroke_color=self.COLORS[1], line_width=Decimal(1), ).layout(page, rectangle_box) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # fmt: off mazes = [ ("https://i.pinimg.com/originals/1e/c2/a7/1ec2a73d0a45016c7d1b52ef9f11e740.png", "395E66"), ("https://i.pinimg.com/originals/f8/23/88/f823882e7c5fa42790e78f43ecf7e8bf.jpg", "387D7A"), ("https://i.pinimg.com/600x315/2d/94/33/2d94334b737efb5d3a5ef32aef9daefc.jpg", "32936F"), ("https://i.pinimg.com/originals/f1/c9/07/f1c907c09d65d5c86fba304fed1009ca.jpg", "26A96C"), ("https://cdn.pixabay.com/photo/2017/08/24/12/11/silhouette-2676573_960_720.png", "2BC016"), ("https://images-na.ssl-images-amazon.com/images/I/61bqYbAeUgL._AC_SL1500_.jpg", "395E66"), ("https://i.pinimg.com/originals/55/e8/91/55e891af7de086a8868e1a8e02fb4426.jpg","387D7A"), ("https://cdn.shopify.com/s/files/1/2123/8425/products/166422700-LRG_242a4c8b-cad5-476e-afd1-c8b882d48fc2_530x.jpg","32936F"), ("http://www.silhcdn.com/3/i/shapes/lg/7/6/d124067.jpg","26A96C"), ("https://cdn.pixabay.com/photo/2018/03/04/23/28/frog-3199601_1280.png","2BC016") ] # fmt: on # add mazes for (url, color) in mazes: for _ in range(0, 3): self._write_maze_page(pdf, url, color) # add ack page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) # content of ack page layout.add( Paragraph( "Hi there,", font_color=HexColor("32936F"), font_size=Decimal(20), ) ) layout.add( Paragraph( "This PDF was made by pText. Check out the GitHub repository to find more fun examples of what you can do with PDF's.", font_color=X11Color("SlateGray"), font_size=Decimal(12), ) ) layout.add( Barcode( data="https://github.com/jorisschellekens/ptext-release", type=BarcodeType.QR, width=Decimal(64), ) ) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = SingleColumnLayout(page) # add title layout.add( Paragraph( "Match Up Puzzle", font_size=Decimal(20), font_color=X11Color("YellowGreen"), ) ) # add explanation layout.add( Paragraph( """ These simple "match up" puzzles help children with observation skills. They will also need to learn a way of marking or remembering which items they have matched, so that they can identify the odd ones out. If you would like to reuse puzzles you could place counters on each "pair" that your child finds, perhaps.""", font_color=X11Color("SlateGray"), font_size=Decimal(8), ) ) # random locations for each image imgs = [ "https://icons.iconarchive.com/icons/chanut/role-playing/128/Orc-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/King-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Knight-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Medusa-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Monster-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Sorceress-Witch-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Centaur-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Elf-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Poison-Spider-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Unicorn-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Viking-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Villager-icon.png", "https://icons.iconarchive.com/icons/chanut/role-playing/128/Dragon-Egg-icon.png", ] N = 10 random.shuffle(imgs) image_positions: typing.Dict[int, str] = {} for i, img_url in enumerate(imgs[0 : (N + 1)]): # place image 1 p0 = random.randint(0, N ** 2) while p0 in image_positions: p0 = random.randint(0, N ** 2) image_positions[p0] = img_url if i != 0: # place image 2 p1 = random.randint(0, N ** 2) while p1 in image_positions: p1 = random.randint(0, N ** 2) image_positions[p1] = img_url t = Table(number_of_rows=N, number_of_columns=N) for i in range(0, N ** 2): if i in image_positions: t.add(Image(image_positions[i], width=Decimal(32), height=Decimal(32))) else: t.add(Paragraph(" ", respect_spaces_in_text=True)) t.no_borders() t.set_padding_on_all_cells(Decimal(2), Decimal(2), Decimal(2), Decimal(2)) layout.add(t) # determine output location out_file = self.output_dir / ("output.pdf") # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) # attempt to re-open PDF with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create document pdf = Document() # add page page = Page() pdf.append_page(page) layout = MultiColumnLayout(page) # background self._write_background(page) # table avatar_urls = [ "https://avatars.githubusercontent.com/u/" + str(x) for x in self.FIRST_100_STARS ] t = Table(number_of_columns=4, number_of_rows=25) for s in avatar_urls[0 : (4 * 25)]: im = PILImage.open(requests.get(s, stream=True).raw) t.add(Image(im, width=Decimal(20), height=Decimal(20))) t.set_padding_on_all_cells(Decimal(2), Decimal(2), Decimal(2), Decimal(2)) t.no_borders() layout.add(t) layout.add( Paragraph( "100 stars!", font="Helvetica-Bold", font_size=Decimal(20), font_color=self.ACCENT_COLOR_1, horizontal_alignment=Alignment.CENTERED, ) ) # next column layout.switch_to_next_column() # paragraph layout.add( Paragraph( "Thank you,", font="Helvetica-Bold", font_size=Decimal(20), font_color=self.ACCENT_COLOR_1, ) ) layout.add( Paragraph( "Your support and encouragement have always been the driving factors in the development of pText. " "I want you to know that I value your appreciation immensely!" ) ) layout.add( Paragraph( "-- Joris Schellekens", font="Helvetica-Oblique", font_size=Decimal(8), font_color=self.ACCENT_COLOR_2, ) ) layout.add( Barcode( data="https://github.com/jorisschellekens/ptext-release/stargazers", type=BarcodeType.QR, width=Decimal(128), stroke_color=self.ACCENT_COLOR_1, ) ) # footer rectangle_box = Rectangle( Decimal(0), Decimal(0), page.get_page_info().get_width(), page.get_page_info().get_height() * Decimal(0.1), ) Shape( LineArtFactory.rectangle(rectangle_box), fill_color=self.ACCENT_COLOR_1, stroke_color=self.ACCENT_COLOR_1, line_width=Decimal(1), ).layout(page, rectangle_box) rectangle_box = Rectangle( Decimal(0), page.get_page_info().get_height() * Decimal(0.1), page.get_page_info().get_width(), Decimal(2), ) Shape( LineArtFactory.rectangle(rectangle_box), fill_color=self.ACCENT_COLOR_2, stroke_color=self.ACCENT_COLOR_2, line_width=Decimal(1), ).layout(page, rectangle_box) # determine output location out_file = self.output_dir / "output.pdf" # attempt to store PDF with open(out_file, "wb") as in_file_handle: PDF.dumps(in_file_handle, pdf) with open(out_file, "rb") as in_file_handle: PDF.loads(in_file_handle)
def _test_document(self, file): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() with open(file, "rb") as pdf_file_handle: l = TextRenderInfoMeasurandListener() # read document doc = PDF.loads(pdf_file_handle, [l]) # export json output_file = self.output_dir / (file.stem + "_text_rendering.json") with open(output_file, "w") as txt_file_handle: txt_file_handle.write(json.dumps(l.measurands, indent=4)) # load ground truth ground_truth_results = [] with open( Path(self.input_dir / (file.stem + "_text_rendering.json")), "r" ) as json_file_handle: ground_truth_results = json.loads(json_file_handle.read()) # compare pos_in_test_array = 0 pos_in_gt_array = 0 while pos_in_test_array < len(l.measurands) and pos_in_gt_array < len( ground_truth_results ): while pos_in_test_array < len(l.measurands) and re.match( "[^a-zA-Z ]+", l.measurands[pos_in_test_array]["text"] ): pos_in_test_array += 1 while pos_in_gt_array < len(ground_truth_results) and re.match( "[^a-zA-Z ]+", ground_truth_results[pos_in_gt_array]["text"] ): pos_in_gt_array += 1 # check text if ( l.measurands[pos_in_test_array]["text"] != ground_truth_results[pos_in_gt_array]["text"] ): print("text inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array)) print( "\tground truth: %s" % ground_truth_results[pos_in_gt_array]["text"] ) print("\ttest : %s" % l.measurands[pos_in_test_array]["text"]) return False x_delta = abs( int(l.measurands[pos_in_test_array]["x0"]) - int(ground_truth_results[pos_in_gt_array]["x0"]) ) if x_delta > self.max_distance: print("x0 inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array)) print( "\tground truth: %f" % ground_truth_results[pos_in_gt_array]["x0"] ) print("\ttest : %f" % l.measurands[pos_in_test_array]["x0"]) return False x_delta = abs( int(l.measurands[pos_in_test_array]["y"]) - int(ground_truth_results[pos_in_gt_array]["y"]) ) if x_delta > self.max_distance: print("x1 inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array)) print( "\tground truth: %f" % ground_truth_results[pos_in_gt_array]["x1"] ) print("\ttest : %f" % l.measurands[pos_in_test_array]["x1"]) return False y_delta = abs( int(l.measurands[pos_in_test_array]["y"]) - int(ground_truth_results[pos_in_gt_array]["y"]) ) if y_delta > self.max_distance: print("y inequality %d %d !!" % (pos_in_gt_array, pos_in_test_array)) print("\tground truth: %f" % ground_truth_results[pos_in_gt_array]["y"]) print("\ttest : %f" % l.measurands[pos_in_test_array]["y"]) return False print( "%s %d %d %d" % ( l.measurands[pos_in_test_array]["text"], l.measurands[pos_in_test_array]["x0"], l.measurands[pos_in_test_array]["x1"], l.measurands[pos_in_test_array]["y"], ) ) pos_in_test_array += 1 pos_in_gt_array += 1 return True
def test_write_document(self): # create output directory if it does not exist yet if not self.output_dir.exists(): self.output_dir.mkdir() # create empty document pdf: Document = Document() # create empty page page: Page = Page() # add page to document pdf.append_page(page) # set layout layout = SingleColumnLayout(page) # add barcode layout.add( Table(number_of_rows=5, number_of_columns=2).add(Paragraph("CODE 128")).add( Barcode( data="123456789128", type=BarcodeType.CODE_128, width=Decimal(128), stroke_color=HexColor("#080708"), )).add(Paragraph("CODE 39")).add( Barcode( data="123456789128", type=BarcodeType.CODE_39, width=Decimal(128), stroke_color=HexColor("#3772FF"), )).add(Paragraph("EAN 13")).add( Barcode( data="123456789128", type=BarcodeType.EAN_13, width=Decimal(128), stroke_color=HexColor("#DF2935"), )).add(Paragraph("EAN 14")).add( Barcode( data="1234567891280", type=BarcodeType.EAN_14, width=Decimal(128), stroke_color=HexColor("#FDCA40"), )).add(Paragraph("QR")).add( Barcode( data="1234567891280", type=BarcodeType.QR, width=Decimal(128), stroke_color=HexColor("#E6E8E6"), fill_color=HexColor("#DF2935"), )).set_padding_on_all_cells( Decimal(10), Decimal(5), Decimal(5), Decimal(5))) # write file = self.output_dir / "output.pdf" with open(file, "wb") as pdf_file_handle: PDF.dumps(pdf_file_handle, pdf) return True