def check_file_content(original, converted): original_pdf = PdfFileReader(open(original, mode="rb"), strict=False) original_page_num = original_pdf.numPages with open(converted, mode="rb") as converted_data: converted_pdf = PdfFileReader(converted_data, strict=False) page = PageObject(converted_data) if (page.getContents() is None or original_page_num != converted_pdf.numPages): return False return True
def build_folio(pages, padding=None, starting_page_number=None): assert(len(pages) <= 12) collation_patterns = {4:[3,0,1,2], 8:[7,0,1,6,5,2,3,4], 12:[11,0,1,10,9,2,3,8,7,4,5,6]} if not padding: # If no padding page was provided, create a blank page to pad with. padding = ('blank', PageObject.createBlankPage(None, 8.5*72, 11*72), 'landscape') # pad out the folio till it is 4, 8, or 12 pages long. while len(pages) not in collation_patterns and len(pages) <= 12: pages.append(padding) print(len(pages)) assert(len(pages) <= 12) collated = [pages[n] for n in collation_patterns[len(pages)]] joined = [] for n in range(int(len(collated)/2)): page1 = collated[n*2+0] # if the width is greater than the height, the document is in landscape and will need to be rotated. rot1 = page1['/MediaBox'][2] > page1['/MediaBox'][3] page2 = collated[n*2+1] rot2 = page2['/MediaBox'][2] > page2['/MediaBox'][3] if starting_page_number: lpage = starting_page_number + collation_patterns[len(pages)][n*2+0] rpage = starting_page_number + collation_patterns[len(pages)][n*2+1] joined.append(create_page(page1, page2, rot1, rot2, lpage, rpage)) else: joined.append(create_page(page1, page2, rot1, rot2)) return joined
def merge_to_page(page1, page2): outs = PageObject.createBlankPage(None, 17*72, 11*72) outs.mergeScaledTranslatedPage(p1, 1, 0, 0) outs.mergeScaledTranslatedPage(p2, 1, 8.5*72, 0) outs.scaleTo(11*72,8.5*72) outs.rotateCounterClockwise(90) return outs
def pypdf3(): """Much slower than PyPDF3 method.""" # 5b. Get our files ready document_reader = PdfFileReader(document) output_file = PdfFileWriter() # Number of pages in input document page_count = document_reader.getNumPages() # Watermark objects watermark_reader = PdfFileReader(watermark) wtrmrk_page = watermark_reader.getPage(0) wtrmrk_width = (wtrmrk_page.mediaBox.getWidth() / 2) + 0 wtrmrk_height = (wtrmrk_page.mediaBox.getHeight() / 2) + 80 wtrmrk_rotate = -int(Info(watermark_reader).rotate) if Info(watermark_reader).rotate is not None else 0 # 5c. Go through all the input file pages to add a watermark to them for page_number in range(page_count): # Merge the watermark with the page if not self.underneath: input_page = document_reader.getPage(page_number) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) else: size = Info(document_reader).dimensions input_page = PageObject().createBlankPage(document_reader, size['w'], size['h']) if wtrmrk_rotate != 0: input_page.mergeRotatedTranslatedPage(wtrmrk_page, wtrmrk_rotate, wtrmrk_width, wtrmrk_height) else: wtrmrk_width = 0 wtrmrk_height = 0 input_page.mergeTranslatedPage(wtrmrk_page, wtrmrk_width, wtrmrk_height) input_page.mergePage(document_reader.getPage(page_number)) # Add page from input file to output document output_file.addPage(input_page) # 5d. finally, write "output" to PDF with open(output_filename, "wb") as outputStream: output_file.write(outputStream) return output_filename
def pypdf3(self): reader = PdfFileReader(self.file_name) writer = PdfFileWriter() # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=self.target_w, height=self.target_h) page.mergeScaledTranslatedPage(wtrmrk, self.scale, self.margin_x, self.margin_y) writer.addPage(page) with open(self.output, "wb") as outputStream: writer.write(outputStream) return self.output
def create_page(p1, p2, rot1=False, rot2=False, lpage_num=None, rpage_num=None): outs = PageObject.createBlankPage(None, 17*72, 11*72) if rot1: outs.mergeRotatedScaledTranslatedPage(p1, 90, 1, 8.5*72, 0) else: outs.mergeScaledTranslatedPage(p1, 1, 0, 0) if rot2: outs.mergeRotatedScaledTranslatedPage(p2, 90, 1, 8.5*72*2, 0) else: outs.mergeScaledTranslatedPage(p2, 1, 8.5*72, 0) outs.scaleTo(11*72,8.5*72) outs.rotateCounterClockwise(90) temp = NumberPDF(lpage_num, rpage_num) temp.add_page(orientation='L') temp.output('/tmp/pntemp.pdf') num_page = PdfFileReader('/tmp/pntemp.pdf').getPage(0) outs.mergePage(num_page) return outs
def upscale(file_name, scale=1.5, margin_x=0, margin_y=0, suffix='scaled', tempdir=None): """Upscale a PDF to a large size.""" # Set output file name if tempdir: output = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name elif suffix: output = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix)) else: output = NamedTemporaryFile(suffix='.pdf').name reader = PdfFileReader(file_name) writer = PdfFileWriter() dims = dimensions(file_name) target_w = dims['w'] * scale target_h = dims['h'] * scale # Number of pages in input document page_count = reader.getNumPages() for page_number in range(page_count): wtrmrk = reader.getPage(page_number) page = PageObject.createBlankPage(width=target_w, height=target_h) page.mergeScaledTranslatedPage(wtrmrk, scale, margin_x, margin_y) writer.addPage(page) with open(output, "wb") as outputStream: writer.write(outputStream) return output