def a5(input_path, output_path='new_pdf_document'): pdf_writer_A = PdfFileWriter() pdf_writer_B = PdfFileWriter() pdf_reader = PdfFileReader(input_path) number_of_pages = pdf_reader.getNumPages() page = pdf_reader.getPage(0) dims_A5 = page.trimBox.getWidth(), page.trimBox.getHeight() dims_A4 = dims_A5[0] * 2, dims_A5[1] offset = number_of_pages % 4 if offset != 0: offset = 4 - offset print(offset) page_list_A = [] page_list_B = [] for i in range(0, math.ceil(number_of_pages/2), 2): new_page = PageObject.createBlankPage(None, dims_A4[0], dims_A4[1]) p2 = pdf_reader.getPage(i) try: p1 = pdf_reader.getPage(number_of_pages-1-i+offset) except IndexError: p1 = PageObject.createBlankPage(None, dims_A5[0], dims_A5[1]) new_page.mergeTranslatedPage(p1, 0, 0) new_page.mergeTranslatedPage(p2, dims_A5[0], 0) page_list_A.append(new_page) new_page = PageObject.createBlankPage(None, dims_A4[0], dims_A4[1]) p1 = pdf_reader.getPage(i+1) try: p2 = pdf_reader.getPage(number_of_pages-2-i+offset) except IndexError: p2 = PageObject.createBlankPage(None, dims_A5[0], dims_A5[1]) new_page.mergeTranslatedPage(p1, 0, 0) new_page.mergeTranslatedPage(p2, dims_A5[0], 0) page_list_B.append(new_page) for page in page_list_A: pdf_writer_A.addPage(page) for page in page_list_B: pdf_writer_B.addPage(page) with open(output_path + '_A.pdf', 'wb') as output: pdf_writer_A.write(output) with open(output_path + '_B.pdf', 'wb') as output: pdf_writer_B.write(output)
def duplex_pages(p1, p2, orig_width=0, orig_height=0): """ Given two PageObjects representing full-size portrait pdf pages, put them both onto a single landscape pdf page. orig_width / orig_height represent the original dimensions of the input pdfs (which are assumed to be the same size). If not provided, they will be pulled from the input pdfs. """ if not orig_width: orig_width = p1.mediaBox.getWidth() if not orig_height: orig_height = p1.mediaBox.getHeight() orig_width = int(orig_width) orig_height = int(orig_height) # Target is landscape (reverse original width and height) target = PageObject.createBlankPage(None, orig_height, orig_width) # Scale pages SCALE_FACTOR = .7 p1.scaleBy(SCALE_FACTOR) p2.scaleBy(SCALE_FACTOR) new_width, new_height = SCALE_FACTOR * orig_width, SCALE_FACTOR * orig_height # Merge them into the target page target.mergeTranslatedPage(p1, (orig_height / 2 - new_width) / 2, (orig_width - new_height) / 2) target.mergeTranslatedPage( p2, orig_height / 2 + (orig_height / 2 - new_width) / 2, (orig_width - new_height) / 2) return target
def make_pdf_handout(pdfFileObj): # to store pdf byteArray pdf = BytesIO() # open the lines pdf linesFileObj = open('templates/notesField.pdf', 'rb') # creating a pdf Reader objects pdfReader = PdfFileReader(pdfFileObj) pdfLines = PdfFileReader(linesFileObj) # creating a pdf writer object for new pdf pdfWriter = PdfFileWriter() for pageNum in range(math.ceil(pdfReader.numPages/3)): # create blank page template_page = PageObject.createBlankPage(None, 850, 1100) # load the notes field notesPage = pdfLines.getPage(0) # add the pages that go in template for indx in range(0,3): pageIdx = pageNum*3+indx if pageIdx < pdfReader.numPages: pageObj = pdfReader.getPage(pageIdx) template_page.mergeScaledTranslatedPage(pageObj, 0.5, 60, 730-(330*indx), False) template_page.mergeScaledTranslatedPage(notesPage, 1, 480, 760-(330*indx), False) # adding template page object to pdf writer pdfWriter.addPage(template_page) # write the pdf into an IO Stream pdfWriter.write(pdf) return pdf
def resize_pdf_file(input_fp, output_fp, page_size=letter, margin=0.5 * inch, scale=1.0, overlap=0): margin_page = make_margin_page(page_size, margin) input_pages = get_pages(input_fp) usable_height = (page_size[1] - margin * 2) pdf = PdfFileWriter() for i in range(len(input_pages)): input_pages[i].scaleBy(scale) page = copy.copy(input_pages[i]) coords = [(float(c[0]), float(c[1])) for c in [page.mediaBox.lowerLeft, page.mediaBox.upperRight]] p_height = coords[1][1] - coords[0][1] num_pages = int(p_height / usable_height + 1) num_pages = int((p_height + overlap * num_pages) / usable_height + 1) y_offset = margin + usable_height - p_height for j in range(num_pages): x_offset = margin if j: y_offset += usable_height - overlap p = PageObject.createBlankPage(width=page_size[0], height=page_size[1]) p.mergeTranslatedPage(page, x_offset, y_offset) p.mergePage(margin_page) pdf.addPage(p) with open(output_fp, 'wb') as fh: pdf.write(fh)
def merge_page_with_possible_rotation(page: PageObject, numbers_page: PageObject): ur_x = page.artBox.getUpperRight_x() ur_y = page.artBox.getUpperRight_y() if ur_x > ur_y: # This page is oriented in a very odd way # and we need to rotate our overlay on top of it. # https://stackoverflow.com/a/23633769 page.mergeRotatedTranslatedPage( numbers_page, 270, numbers_page.mediaBox.getWidth() / 2, numbers_page.mediaBox.getWidth() / 2, ) else: page.mergePage(numbers_page)
def merge_pages(self, upper: PageObject, lower: PageObject) -> PageObject: """ Given two dinA5 pages, it merges them into an dinA4 page. This was a chamba, and now it is only a chambinha. :param upper: Page to locate in the upper half of the dinA4 sheet. :param lower: Page to locate in the lower half of the dinA4 sheet. :return: The dinA4 page with upper and lower pages merged. """ # w, h = self.page_shape(upper) # dinA5 nu_w, nu_h = mm2inch(210), mm2inch(297) # dinA4 merged_page = PageObject.createBlankPage(width=nu_w, height=nu_h) tx_up, ty_up = self.real_translation(x=nu_w / 2, y=nu_h / 4) merged_page.mergeRotatedTranslatedPage(upper, rotation=90, tx=tx_up, ty=ty_up) tx_dw, ty_dw = self.real_translation(x=nu_w / 2, y=0) merged_page.mergeRotatedTranslatedPage(lower, rotation=90, tx=tx_dw, ty=ty_dw) return merged_page
def order_all_pages(order, reader): for n in order: if n is None: # Add a padding page yield PageObject.createBlankPage() else: yield reader.getPage(n)
def get_page_from_nums(pdf, page_nums): the_pages = [] for n in page_nums: if n >= 0: page = pdf.getPage(n) elif n == -1: page = PageObject.createBlankPage(pdf) the_pages.append(page) return the_pages
def get_cached_content_stream(pdf, page_number: int) -> ContentStream: key = (pdf.stream.name, page_number) with _lock: if key not in parsed_content_stream_data: page = pdf.getPage(page_number) originalContent = page.getContents() assert originalContent is not None parsed_content_stream_data[key] = PageObject._pushPopGS( originalContent, pdf) return parsed_content_stream_data[key]
def generate(self): pnum = self.src_pdf.getNumPages() page_list = self.__page_list() gen_pdf = PdfFileWriter() for p in page_list: if p > pnum: gen_pdf.addPage(PageObject.createBlankPage(self.src_pdf)) else: gen_pdf.addPage(self.src_pdf.getPage(p - 1)) with open(self.generate_path, "wb") as out: gen_pdf.write(out)
def test_reports_merge_pdf(self): reports_path = [] for i in range(0, 3): result = tempfile.mktemp('.txt') writer = PdfFileWriter() writer.addPage(PageObject.createBlankPage(width=100, height=100)) with open(result, 'wb') as fp: writer.write(fp) reports_path.append(result) res = self.py3o_report._merge_pdf(reports_path) self.assertTrue(res)
def add_text_on_page(pdf_page: PageObject, texts: Iterable[TextOnPage]) -> PageObject: buf = io.BytesIO() # create a new PDF with Reportlab pdfmetrics.registerFont(UnicodeCIDFont(PDF_FONT)) # 日本語表示のためにフォントを登録する必要がある can = canvas.Canvas(buf, pagesize=A4) # 与えられたテキスト情報を canvas に追加していく for text in texts: text_obj = can.beginText(text.position.x, text.position.y) text_obj.setFont(PDF_FONT, text.position.font_size) text_obj.setCharSpace(text.position.char_space) text_obj.textLine(text.text) # textLine() する前に他の setXxx() を終わらせる必要がある can.drawText(text_obj) can.save() buf.seek(0) temp_pdf = PdfFileReader(buf) pdf_page.mergePage(temp_pdf.getPage(0)) return pdf_page
def a6(input_path, output_path='new_pdf_document.pdf'): pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader(input_path) number_of_pages = pdf_reader.getNumPages() page = pdf_reader.getPage(0) dims_A6 = page.trimBox.getWidth(), page.trimBox.getHeight() dims_A4 = dims_A6[0] * 2, dims_A6[1] * 2 offset = number_of_pages % 8 if offset != 0: offset = 8 - offset page_list = [] for i in range(0, math.ceil(number_of_pages/2), 4): new_page = PageObject.createBlankPage(None, dims_A4[0], dims_A4[1]) p2 = pdf_reader.getPage(i) p4 = pdf_reader.getPage(i+3) try: p1 = pdf_reader.getPage(number_of_pages-1-i+offset) except IndexError: p1 = PageObject.createBlankPage(None, dims_A6[0], dims_A6[1]) try: p3 = pdf_reader.getPage(number_of_pages-4-i+offset) except IndexError: p3 = PageObject.createBlankPage(None, dims_A6[0], dims_A6[1]) new_page.mergeTranslatedPage(p1, 0, dims_A6[1]) new_page.mergeTranslatedPage(p2, dims_A6[0], dims_A6[1]) new_page.mergeRotatedTranslatedPage(p3, 180, dims_A6[0]/2, dims_A6[1]/2) new_page.mergeRotatedTranslatedPage(p4, 180, dims_A6[0], dims_A6[1]/2) pdf_writer.addPage(new_page) new_page = PageObject.createBlankPage(None, dims_A4[0], dims_A4[1]) p1 = pdf_reader.getPage(i+1) p3 = pdf_reader.getPage(i+2) try: p2 = pdf_reader.getPage(number_of_pages-2-i+offset) except IndexError: p2 = PageObject.createBlankPage(None, dims_A6[0], dims_A6[1]) try: p4 = pdf_reader.getPage(number_of_pages-3-i+offset) except IndexError: p4 = PageObject.createBlankPage(None, dims_A6[0], dims_A6[1]) new_page.mergeTranslatedPage(p1, 0, dims_A6[1]) new_page.mergeTranslatedPage(p2, dims_A6[0], dims_A6[1]) new_page.mergeRotatedTranslatedPage(p3, 180, dims_A6[0]/2, dims_A6[1]/2) new_page.mergeRotatedTranslatedPage(p4, 180, dims_A6[0], dims_A6[1]/2) page_list.append(new_page) for page in page_list: pdf_writer.addPage(page) with open(output_path, 'wb') as output: pdf_writer.write(output)
def pdfmerge(basePath, outputPath, pdfRanges=None, rotate=0, progress=None): if isinstance(basePath, PdfFileReader): baseReader = basePath else: baseReader = PdfFileReader(basePath, strict=False) annotReader = PdfFileReader(outputPath, strict=False) if pdfRanges is None: pageNum = min(baseReader.getNumPages(), annotReader.getNumPages()) pdfRanges = range(pageNum) else: pageNum = sum(len(r) for r in pdfRanges) pdfRanges = chain(*pdfRanges) writer = TolerantPdfWriter() _progress(progress, 0, pageNum + 1) for apage, page in enumerate(pdfRanges): bp = baseReader.getPage(page) ap = annotReader.getPage(apage) s = ap.cropBox or ap.artBox aw, ah = s.upperRight[0] - s.upperLeft[0], s.upperLeft[ 1] - s.lowerLeft[1] s = bp.cropBox or bp.artBox w, h = s.upperRight[0] - s.upperLeft[0], s.upperLeft[1] - s.lowerLeft[1] np = PageObject.createBlankPage(writer, aw, ah) if w <= h: ratio = min(aw / w, ah / h) tx = 0 ty = ah - (h * ratio) rot = 0 else: w, h = h, w ratio = min(aw / w, ah / h) tx = w * ratio ty = ah - (h * ratio) rot = 90 np.mergeRotatedScaledTranslatedPage(bp, rot, ratio, tx, ty) np.mergePage(ap) if rotate: np.rotateCounterClockwise(rotate) writer.addPage(np) _progress(progress, page, pageNum + 1) writer.removeLinks() # until we implement transformations on annotations with open(outputPath, 'wb') as out: writer.write(out) _progress(progress, pageNum + 1, pageNum + 1)
def delivery_type(page: pdf.PageObject) -> dict: result = dict() page_text = page.extractText() if re.search("Tracking", page_text) is not None: result["type"] = "Label" result["amount"] = len((re.findall("Tracking", page_text))) return result elif is_it_blank_page(page): result["type"] = "Empty Page" result["amount"] = 0 return result else: result["type"] = "Shipping List" result["amount"] = 1 return result
def label_merger(label_pages, print_size: LabelPrintSize): labels_per_page = print_size.get_num_labels_in_page() print_page_height, print_page_width = print_size.get_print_decimal_pts_height_width( ) label_page_height, label_page_width = print_size.get_label_decimal_pts_height_width( ) page_horizontal_print_area = print_size.get_print_decimal_pts_horizontal_print_area( ) label_vertical_print_area = print_size.get_label_decimal_pts_vertial_print_area( ) label_horizontal_print_area = print_size.get_label_decimal_pts_horizontal_print_area( ) pp_margin_top, pp_margin_right, pp_margin_bottom, pp_margin_left = print_size.get_print_margins( ) x_offset = ( (page_horizontal_print_area - label_page_width) / 2) + pp_margin_left y_offset = pp_margin_top writer = PdfFileWriter() page_buffer = None cnt = 0 for label in label_pages: if page_buffer is None: page_buffer = PageObject.createBlankPage(None, print_page_width, print_page_height) scale_x = label_horizontal_print_area / label.mediaBox.getWidth() scale_y = label_vertical_print_area / label.mediaBox.getHeight() x = x_offset y = print_page_height - ((cnt + 1) * label_page_height - y_offset) page_buffer.mergeTransformedPage(label, (scale_x, 0, 0, scale_y, x, y)) cnt += 1 if cnt >= labels_per_page: cnt = 0 writer.addPage(page_buffer) page_buffer = None if page_buffer is not None: writer.addPage(page_buffer) return writer
def convert_label_files(label_files_name, output_file_name, print_size=A4_PAGE, label_media_size=LABEL_MEDIA_PAGE): l_files = [open(f, 'rb') for f in label_files_name] print_width, print_height = print_size label_width, label_height = label_media_size l_pages = read_all_label_pages(l_files) label_per_page = int(print_height / label_height) writer = PdfFileWriter() scalex = decimal.Decimal(mm2pts(label_width)) / \ l_pages[0].mediaBox.getWidth() scaley = decimal.Decimal(mm2pts(label_height)) / \ l_pages[0].mediaBox.getHeight() pw = decimal.Decimal(mm2pts(print_width)) # Page Width ph = decimal.Decimal(mm2pts(print_height)) # Page Height lw = decimal.Decimal(mm2pts(label_width)) # Label Width lh = decimal.Decimal(mm2pts(label_height)) # Label Height page_buffer = None i = 0 for label in l_pages: if page_buffer is None: page_buffer = PageObject.createBlankPage(None, pw, ph) x = (pw - lw) / 2 y = ph - ((i + 1) * lh) page_buffer.mergeTransformedPage(label, (scalex, 0, 0, scaley, x, y)) i = i + 1 if i >= label_per_page: i = 0 writer.addPage(page_buffer) page_buffer = None if page_buffer is not None: writer.addPage(page_buffer) with open(output_file_name, 'wb') as f: writer.write(f) for f in l_files: f.close()
def merge3(): reader = PdfFileReader(open('pliki testowe/r1.pdf','rb')) invoice_page = reader.getPage(0) sup_reader = PdfFileReader(open('pliki testowe/w1.pdf','rb')) sup_page = sup_reader.getPage(0) # We pick the second page here translated_page = PageObject.createBlankPage(None, sup_page.mediaBox.getWidth(), sup_page.mediaBox.getHeight()) translated_page.mergeScaledTranslatedPage(sup_page, 1, 0, -400) # -400 is approximate mid-page translated_page.mergePage(invoice_page) writer = PdfFileWriter() writer.addPage(translated_page) with open('pliki testowe/out.pdf', 'wb') as f: writer.write(f)
def createOutput(infileName, outfileName): # Get the one and only page of our input. pdfInput = PdfFileReader(open(infileName, "rb")) card = pdfInput.pages[0] # Starting with a clean page, merge the input PDF at every tiled location tilePage = PageObject.createBlankPage(None, width=PAGE_WIDTH, height=PAGE_HEIGHT) for x in range(2): for y in range(5): print(f'tiling {x}, {y}') tilePage.mergeTranslatedPage(card, MARGIN_X + x * CARD_WIDTH, MARGIN_Y + y * CARD_HEIGHT) pdfOutput = PdfFileWriter() pdfOutput.addPage(tilePage) pdfOutput.write(open(outfileName, "wb"))
def _parse_annot_pdf_page(page: PageObject, page_idx: int) -> List[Annotation]: """Fetch annotations on this pdf page and return them as a list.""" outputs = [] if not (page.cropBox[0] == page.cropBox[1] == 0): raise CannotReadPdf( f"cannot find positions of annotations, cropBox of page does not start with zeros (={page.cropBox})" ) page_height = page.cropBox[ 3] # assuming the mediabox has form [0,0,width,height] annots = page.get('/Annots', []) if not isinstance(annots, list): # something is strange if not isinstance(annots, IndirectObject): logger.warning(f"cannot read annotations from {input}") return [] annots = annots.getObject( ) # now let's hope to get a list; in some cases this helps if not isinstance(annots, list): logger.warning(f"cannot read annotations from {input}") return [] for ann in annots: current = ann.getObject() if "/Subj" in current: annot_type = current["/Subj"].lower() current_rec = AnnotationExtractor._create_annotations_bounding_box( current.get("/Rect"), page_height) text_content = current.get("/Contents") who_annotated = current.get("/T") if isinstance(text_content, ByteStringObject): text_content = text_content.decode('utf-8') if annot_type in ADMISSIBLE_ANNOTATION_TYPES: outputs.append( Annotation(page=page_idx, type=annot_type, box=current_rec, text_content=text_content, who_annotated=who_annotated)) else: logger.warning( f"foreign annotation found (type {annot_type}, src {input})" ) return outputs
def create_document(src, selection): """Puts selected pages of a PDF in portrait mode with every other page blank""" click.echo(f'Reading from file: {src}') outfile_prefix = src.split('.pdf')[0] outfile_name = f'{outfile_prefix}_notes.pdf' pdf_reader = PdfFileReader(src) input_n_pages = pdf_reader.getNumPages() selected_page_numbers = list() if selection: for page_start_end in selection: page_start, page_end = page_start_end selected_page_numbers += range(page_start - 1, page_end) selected_page_numbers = list(set(selected_page_numbers)) selected_page_numbers.sort() else: selected_page_numbers = list(range(0, input_n_pages)) click.echo( f'Preparing pages from source file between ' f'p.{selected_page_numbers[0] + 1}-{selected_page_numbers[-1] + 1}') pdf_writer = PdfFileWriter() for page_number in selected_page_numbers: page = pdf_reader.getPage(page_number) height = float(page.mediaBox.getHeight()) width = float(page.mediaBox.getWidth()) hw_scale = height / width new_page = PageObject.createBlankPage(height=height, width=width) new_page.mergeRotatedScaledTranslatedPage(page, 90, 1.0 / hw_scale, width, 0, expand=False) new_page.rotateClockwise(90) pdf_writer.addPage(new_page) click.echo(f'Writing to file: {outfile_name}') with open(outfile_name, 'wb') as outfile: pdf_writer.write(outfile)
def merge_pages(pages: [Iterable[PageObject]]) -> Iterable[PageObject]: while True: try: left = next(pages) right = next(pages) except StopIteration: break width = left.artBox.upperRight[0] height = left.artBox.upperRight[1] new_page = PageObject.createBlankPage(width=0, height=0) new_page.mergeRotatedScaledTranslatedPage(left, 90, 0.5, 0, 0) new_page.mergeRotatedScaledTranslatedPage(right, 90, 0.5, 0, height / 2, expand=True) yield new_page
def scale(query, pdfs): """Scale PDF files to a given page size.""" try: for pdf in pdfs: reader = PdfFileReader(pdf, strict=False) if reader.isEncrypted: raise FileEncryptedError writer = PdfFileWriter() w, h = [float(i) * 72 for i in query.split('x')] for i in xrange(reader.numPages): inp_page = reader.getPage(i) inp_page_w = float(inp_page.mediaBox[2]) inp_page_h = float(inp_page.mediaBox[3]) scale_w = w / inp_page_w scale_h = h / inp_page_h scale = min(scale_w, scale_h) out_page = PageObject.createBlankPage(None, w, h) out_page.mergeScaledTranslatedPage(inp_page, scale, 0, 0) writer.addPage(out_page) noextpath = os.path.splitext(pdf)[0] out_file = '{} (scaled).pdf'.format(noextpath) with open(out_file, 'wb') as f: writer.write(f) except FileEncryptedError: notify.notify('Alfred PDF Tools', 'Scale action cannot handle an encrypted PDF file.') except PdfReadError: notify.notify('Alfred PDF Tools', 'Cannot scale a malformed PDF file.')
def analyze_webletter(self, pdf_letter): """ Look if the web letter has a minimum of 2 page. If not add one blank page. """ pdf = PdfFileReader(BytesIO(pdf_letter)) if pdf.numPages < 2: final_pdf = PdfFileWriter() final_pdf.addPage(pdf.getPage(0)) width = float(pdf.getPage(0).mediaBox.getWidth()) height = float(pdf.getPage(0).mediaBox.getHeight()) new_page = PageObject.createBlankPage(None, width, height) final_pdf.addPage(new_page) output_stream = BytesIO() final_pdf.write(output_stream) output_stream.seek(0) pdf_letter = output_stream.read() output_stream.close() return pdf_letter
def resize_pdf(filename, out_path): path_file = args.dir_path + "/" + filename print("Open pdf file: ", path_file) reader = PdfFileReader(open(path_file, 'rb')) number_of_pages = reader.getNumPages() # Take first page (used for the size) first_page = reader.getPage(0) writer = PdfFileWriter() for i in range(0, number_of_pages): # Create white page with margins and merge with other files blank_page = PageObject.createBlankPage( None, first_page.mediaBox.getWidth() + args.margin_w, first_page.mediaBox.getHeight() + args.margin_h) blank_page.mergeScaledTranslatedPage( reader.getPage(i), 1, 0, 0) # page, scale, offset_x, offset_y writer.addPage(blank_page) output_file = out_path + "/resized_" + filename print("Save resized pdf in: ", output_file) with open(output_file, 'wb') as f: writer.write(f)
def shrink(): reader = PdfFileReader(open("Doc1.pdf", 'rb+')) invoice_page = reader.getPage(0) sup_reader = PdfFileReader(open(filename, 'rb+')) sup_page = sup_reader.getPage(0) # We pick the second page here translated_page = PageObject.createBlankPage(None, sup_page.mediaBox.getWidth(), sup_page.mediaBox.getHeight()) translated_page.mergeScaledTranslatedPage(sup_page, 0.98, 0, 0) # -400 is approximate mid-page translated_page.mergePage(invoice_page) writer = PdfFileWriter() writer.addPage(translated_page) with open('out.pdf', 'wb') as f: writer.write(f) f.save() if f.save: print("Saved File Successfully") else: print("Failed for save file")
def create_pdf_marks_layers(url_xml): # Поиск в XML с помощью XPath tree = ET.parse(url_xml) root = tree.getroot() cut_name = root.find('.//JOBNAME').text size_cut = { 'width': float(root.find('.//WIDTH').attrib['number']), 'height': float(root.find('.//HEIGHT').attrib['number']) } number_up = root.find('.//NUMBERUP').text size_up = { 'width': float(root.find('.//HSIZE').text), 'height': float(root.find('.//VSIZE').text) } print('название штампа cut_name: ', cut_name[:-4]) print('количество этикеток number_up: ', number_up) print('размер единички size_up: ', size_up) print('размер штампа с блидами size_cut: ', size_cut) def label_coord_list(): items = list(root.findall('.//POSITION')) item_list = list( map(lambda x: [float(x.find('H').text), float(x.find('V').text)], items)) return item_list print('label_coord_list', label_coord_list()) def colom_coord_list(): items = list(root.findall('.//POSITION')) item_list = list(set(map(lambda x: float(x.find('H').text), items))) item_list = sorted(item_list) return item_list def row_coord_list(): items = list(root.findall('.//POSITION')) item_list = list(set(map(lambda x: float(x.find('V').text), items))) item_list = sorted(item_list) return item_list print('================колонки===================') print('colom_coord_list', colom_coord_list()) print('=================строки====================') print('row_coord_list: ', row_coord_list()) def offset_row(): if len(row_coord_list()) > 1: offset = (row_coord_list()[1] - row_coord_list()[0] - size_up['height']) else: offset = 0 return offset print('offset_row:', offset_row()) def offset_col(): if len(colom_coord_list()) > 1: offset = (colom_coord_list()[1] - colom_coord_list()[0]) - size_up['width'] else: offset = (size_cut['width'] - size_up['width']) / 2 return offset print('offset_col:', offset_col()) # for i in range(1, int(number_up), 4): # print(label_coord_list().__getitem__(i)) # print(label_coord_list()) # for element in tree.findall(".//JOBNAME"): # print(element.tag) # tree. # print(tree.getroot().tag) # 1 дюйм = 72, 0000000000005 # пункт НИС / PostScript 1 пункт НИС / PostScript = 0.3527777777778 миллиметр ######################################### START генерируем PDF ########################################### if __name__ == "__main__": bigpage = '\\\esko\\ae_base\\TEMP-Shuttle-IN\\' + cut_name[:-4] + '_Mark_TEST.pdf' else: bigpage = args.outputFolder + "\\" + cut_name[:-4] + '_mark.pdf' # bigpage = args.outputFolder + "\\" + '2_mark_bt.pdf' mark_url = r'\\esko\bg_data_marks_v010\dat\krest_for_bottom layer_layout.pdf' mm_to_pt = 25.4 / 72 """ Константа для преобразования мм в пункты PS """ bpw = size_cut['width'] / mm_to_pt bph = size_cut['height'] / mm_to_pt scale_mark = 1 mark_read = PdfFileReader(open(mark_url, 'rb')) mark = mark_read.getPage(0) mark_size_pt = { 'width': float(mark.mediaBox.getWidth()), 'height': float(mark.mediaBox.getHeight()) } print('mark', mark['/MediaBox']) print('mark_size', mark_size_pt) ## PyPDF2 работает с размерами в пунктах 1/72 ps дюйма в мм = 25,4/72 big_page = PageObject.createBlankPage(None, bpw, bph) if offset_row() < 1 and offset_col() < 1 and len(row_coord_list()) > 1: pass elif len(colom_coord_list()) >= 1 and len(row_coord_list()) > 1: # крест на нижний-правый угол верхней-левой этикетки tx = ((colom_coord_list()[0] + size_up['width']) / mm_to_pt) - mark_size_pt['width'] / 2 tx += (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[0] + size_up['height']) / mm_to_pt + mark_size_pt['height'] / 2 ty += (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) # крест на нижний-левый угол верхней-правой этикетки tx = ((colom_coord_list()[-1]) / mm_to_pt) - mark_size_pt['width'] / 2 tx -= (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[0] + size_up['height']) / mm_to_pt + mark_size_pt['height'] / 2 ty += (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) # крест на верхний-левый угол нижней-правой этикетки tx = ((colom_coord_list()[-1]) / mm_to_pt) - mark_size_pt['width'] / 2 tx -= (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[-1]) / mm_to_pt + mark_size_pt['height'] / 2 ty -= (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) # крест на верхний-правый угол нижней-левой этикетки tx = ((colom_coord_list()[0] + size_up['width']) / mm_to_pt) - mark_size_pt['width'] / 2 tx += (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[-1] / mm_to_pt) + (mark_size_pt['height'] / 2) ty -= (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) elif len(colom_coord_list()) > 1 and len(row_coord_list()) == 1: # крест на нижний-правый угол верхней-левой этикетки tx = ((colom_coord_list()[0] + size_up['width']) / mm_to_pt) - mark_size_pt['width'] / 2 tx += (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[0] + size_up['height']) / mm_to_pt + mark_size_pt['height'] / 2 ty += (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) # крест на верхний-правый угол верхней-левой этикетки tx = ((colom_coord_list()[0] + size_up['width']) / mm_to_pt) - mark_size_pt['width'] / 2 tx += (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[0]) / mm_to_pt + mark_size_pt['height'] / 2 ty += (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) # крест на нижний-левый угол верхней-правой этикетки tx = ((colom_coord_list()[-1]) / mm_to_pt) - mark_size_pt['width'] / 2 tx -= (offset_col() / 2) / mm_to_pt ty = (row_coord_list()[0] + size_up['height']) / mm_to_pt + mark_size_pt['height'] / 2 ty += (offset_row() / 2) / mm_to_pt big_page.mergeScaledTranslatedPage(mark, scale_mark, tx=tx, ty=bph - ty) writer = PdfFileWriter() writer.addPage(big_page) with open(bigpage, 'wb') as f: writer.write(f) f.close()
def cutsheet(filename): print('\nCURRENTLY WORKING ON FILE: %s\n' % filename) # file indication # SCALE pages ----------------------------------------------------------------------------------------------------------------- # Take a pdf file as input inputpdf = filename # inputpdf = input('name of input PDF: ') + '.pdf' # DEBUGGING purposes only # inputpdf = 'iondatasheet.pdf' # DEBUGGING purposes only cutsheetfile = open(inputpdf, "rb") input1 = PdfFileReader(cutsheetfile) # open the file you wish to work on scale_output = PdfFileWriter( ) # assign a name to write the final scaled PDF #-----------------------------------------------------------------------------------------------------------------------------# pages = [] # empty list to be populated later for sheet in range( input1.getNumPages()): # for item in # of pages in input1... pages.append( input1.getPage(sheet)) # add that page to the empty list "pages" count = 0 # start count at page 1 for page in pages: # for each item in list "pages" (which is populated with input1) page.scaleTo(width=527, height=682) # scale each page -- default: 527 682 scale_output.addPage(page) # add each scaled page to page print("SCALING: Page %d is done" % (count + 1)) # page completion indicator count += 1 # go to next page print("Scaling complete\n") # total completion indicator # Make and write to an output document scaleoutpdf = open( 'scaleout.pdf', 'wb' ) # open scale output PDF file to be written to (default name: scaleout.pdf) with scaleoutpdf as s: # with scaleout.pdf as name: s... scale_output.write(s) # write scaled pages to output PDF #%% # MERGE pages #headerfile = open("./src/Cut Sheet Template v2017v01.pdf",'rb') # open header PDF - in src folder #headerfile = open("./src/Cut_Sheet_Template_v2020v02.pdf",'rb') # open header PDF - in src folder headerfile = open("./src/cut_sheet_template_v2020v03.pdf", 'rb') header = PdfFileReader(headerfile) # open header PDF header_page = header.getPage( 0) # get page 1 of header PDF (only has 1 page) scalefile = open("scaleout.pdf", 'rb') # open scale output PDF scale_reader = PdfFileReader( scalefile) # open scaled page (see above code) scale_page = scale_reader.getPage(0) # get page 1 of scaled PDF blankfile = open( "./src/blank.pdf", 'rb' ) # open blank file (used as template for 8.5" x 11") - in src folder blank_reader = PdfFileReader( blankfile) # open blank page as template for 8.5 x 11 blank_page = blank_reader.getPage( 0) # get page 1 of template page (onlt has 1 page) writer = PdfFileWriter() # assign writer to write PDF pages1 = [] # empty list to be populated later for i in range(scale_reader.getNumPages() ): # for item in # of pages in scale_reader... pages1.append(scale_reader.getPage( i)) # add that page to the empty list "pages1" count = 0 # start count at page 1 for page in pages1: # for each page in list "pages1" (which is populated with scale_reader) scale_page = scale_reader.getPage( count) # iterate through pages using "count" complete_page = PageObject.createBlankPage( None, blank_page.mediaBox.getWidth(), blank_page.mediaBox.getHeight( )) # get size of final PDF (to be 8.5" x 11") complete_page.mergeScaledTranslatedPage( scale_page, 1.05, 30, -5) # merge, scale, and translate page -- default: 1.05 30 -5 complete_page.mergePage(header_page) # merge page with header #complete_page.mergeTranslatedPage(header_page, 0, 712) writer.addPage(complete_page) # write completed page to PDF print("MERGING: Page %d is done" % (count + 1)) # completion indicator count += 1 # go to next page print("Merging complete\n") # total completion indicator mergeoutpdf = open( 'mergeout.pdf', 'wb' ) # open merge output PDF file to be written to (default name: mergeout.pdf) with mergeoutpdf as m: # with mergeout.pdf as name: m... writer.write(m) # write final PDF #%% # Job code & product info #jobcode = 'ABC123 - Debugging Code' # DEBUGGING purposes only #productname = 'this text should be caps' # DEBUGGING purposes only jobcode = os.path.basename(os.getcwd()) productname = item.replace('.pdf', '') c = canvas.Canvas( 'jobinfo.pdf', pagesize=letter) # specify output PDF name & set paper size to letter width, height = letter # set the width & height to letter (w = 8.5", h = 11") c.setFontSize(18) # set font size of job name c.drawCentredString( width / 2.0, 750, jobcode) # specify job name (to be centered on top page) c.setFontSize(14) # set font size for product name c.drawCentredString( width / 2.0, 730, productname) # specify product name (to be centered on top of page) c.save() # save the file #%% # WATERMARK the job name and product name on top of merged & scaled output = PdfFileWriter() basewfile = open('mergeout.pdf', 'rb') # open the base PDF file (to be watermarked) ipdf = PdfFileReader(basewfile, 'rb') # read the base PDF file you want to watermark watermarkfile = open( 'jobinfo.pdf', 'rb' ) # opem the file you are using as a watermark (created with jobcode & productname) wpdf = PdfFileReader(watermarkfile, 'rb') # read the file you are using as a watermark watermark = wpdf.getPage( 0) # get page 1 of watermark (watermark file should only have 1 page) count = 0 # start count at page 1 for i in range(ipdf.getNumPages( )): # for each page in the number of pages in watermarking PDF... page = ipdf.getPage(i) # get page number page.mergePage(watermark) # watermark the page output.addPage(page) # add watermarked page to output file print("WATERMARKING: Page %d is done" % (count + 1)) # page completion indicator count += 1 # go to next page print("Watermarking complete") # total completion indicator finalname = productname + ' Cut Sheet.pdf' # final name of cut sheet PDF will be productname + ' Cut Sheet.pdf' nameoutpdf = open(finalname, 'wb') # open final cut sheet PDF to be written to with nameoutpdf as n: # with (productname) Cut Sheet.pdf as name: n... output.write(n) # write output file n.close() # close the file #%% # Close all files cutsheetfile.close() # close file headerfile.close() # close file scalefile.close() # close file blankfile.close() # close file basewfile.close() # close file watermarkfile.close() # close file basewfile.close() # close file count += 1
def get_blank_page(): inch = 72 return PageObject.createBlankPage(width=8.5*inch, height=11*inch)
page.scale(scale) page.x = x_increment if i & 2 else 1 page.y = 0 if i & 1 else y_increment return srcpages.render() pages = PdfReader('document-output.pdf').pages writer = PdfWriter('out.pdf') for index in range(0, len(pages), len(pages)): print(len(pages)) writer.addpage(get4(pages[index:index + len(pages)])) writer.write() reader = PdfFileReader(open("out.pdf",'rb')) invoice_page = reader.getPage(0) sup_reader = PdfFileReader(open("download.pdf",'rb')) sup_page = sup_reader.getPage(0) # We pick the second page here translated_page = PageObject.createBlankPage(None, sup_page.mediaBox.getWidth(), sup_page.mediaBox.getHeight()) translated_page.mergeScaledTranslatedPage(sup_page, 1, 0, 0) # -400 is approximate mid-page translated_page.mergePage(invoice_page) writer = PdfFileWriter() writer.addPage(translated_page) with open('output1.pdf', 'wb') as f: writer.write(f)
def is_it_blank_page(page: pdf.PageObject) -> bool: if page.extractText() == '' and '/XObject' not in page['/Resources']: return True return False
# 1 user space unit is 1/72 inch # 1/72 inch ~ 0.352 millimeters bigpage = '\\\Server-esko\\ae_base\\TEMP-Shuttle-IN\\fon.pdf' maket = '\\\Server-esko\\ae_base\\TEMP-Shuttle-IN\\maket.pdf' outfile = '\\\Server-esko\\ae_base\\TEMP-Shuttle-IN\\output.pdf' tx = 100 ty = 100 bpw = 700 bph = None tx *= 0.352 ty *= 0.352 bpw = 700 bph = 500 scale = 1 inMaket = PdfFileReader(open(maket, 'rb')) min_page = inMaket.getPage(0) # Большая страница вместит 4 старницы (2x2) big_page = PageObject.createBlankPage(None, bpw, bph) # mergeScaledTranslatedPage(page2, scale, tx, ty, expand=False) # https://pythonhosted.org/PyPDF2/PageObject.html big_page.mergeScaledTranslatedPage(inMaket.getPage(0), scale, tx, ty) writer = PdfFileWriter() writer.addPage(big_page) with open(outfile, 'wb') as f: writer.write(f)
def add_text(self, page_no, font, text_items, debug=False): '''Adds text, given as a {string: coords} to the given page.''' page = self.getPage(page_no) newContentsArray = ArrayObject() newContentsArray.append(PageObject._pushPopGS(page.getContents(), page.pdf)) addedContents = NovelContentStream(page.pdf) addedContents.operations = self.text_to_operations(page, font, text_items, debug) newContentsArray.append(addedContents) newContents = ContentStream(newContentsArray, page.pdf).flateEncode() page[NameObject('/Contents')] = newContents