def diff_pdf_pages(pdf1_path, pdf2_path): pdf2_fp = PdfFileReader(io.BytesIO(pdf2_path)) pdf2_len = pdf2_fp.getNumPages() if not pdf1_path: return list(range(0, pdf2_len)) pdf1_fp = PdfFileReader(io.BytesIO(pdf1_path)) pdf1_len = pdf1_fp.getNumPages() list_differents = list() for i in range(pdf1_len): if i >= pdf2_len: list_differents.append(i) continue output1 = PdfFileWriter() output2 = PdfFileWriter() output1.addPage(pdf1_fp.getPage(i)) output2.addPage(pdf2_fp.getPage(i)) fp1 = io.BytesIO() fp2 = io.BytesIO() output1.write(fp1) output2.write(fp2) fp1.seek(0) fp2.seek(0) if fp1.read() != fp2.read(): list_differents.append(i) return list_differents
def add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out): """添加水印 """ pdf_output = PdfFileWriter() input_stream = open(pdf_file_in, 'rb') pdf_input = PdfFileReader(input_stream) # PDF文件被加密了 if pdf_input.getIsEncrypted(): print('该PDF文件被加密了.') # 尝试用空密码解密 try: pdf_input.decrypt('') except Exception as e: print('尝试用空密码解密失败.') return False else: print('用空密码解密成功.') # 获取PDF文件的页数 page_num = pdf_input.getNumPages() # 读入水印pdf文件 pdf_watermark_input_stream = open(pdf_file_mark, 'rb') pdf_watermark = PdfFileReader(pdf_watermark_input_stream) # 给每一页打水印 for i in range(page_num): page = pdf_input.getPage(i) page.mergePage(pdf_watermark.getPage(0)) page.compressContentStreams() # 压缩内容 pdf_output.addPage(page) output_stream = open(pdf_file_out, "wb") pdf_output.write(output_stream) input_stream.close() pdf_watermark_input_stream.close() output_stream.close()
def imp_exp_pdf(inputfile, outputfile, size, margin, padding): "For Import and Export PDF files by resizing" output = PdfFileWriter() input = PdfFileReader(file(inputfile, 'rb'), strict=False) totalPages = input.getNumPages() p = [] for i in range(0, input.getNumPages()): p.append(input.getPage(i)) if len(p) == 10: output_one_page(p, size, margin, padding, output) p = [] echoer = "Printed {} of {} [{:.2f}%]".format( i + 1, totalPages, (i + 1) / float(totalPages) * 100) print echoer if len(p) > 0: tmppdf = PdfFileReader(file('BlankA4.pdf', 'rb'), strict=False) tmppage = tmppdf.getPage(0) (w, h) = tmppage.mediaBox.upperRight output_one_page(p, size, margin, padding, output) p = [] print print 'Completed converting.' print 'Saving...' outputStream = file(outputfile, "wb") output.write(outputStream) outputStream.close() print 'END OF PROGRAM'
def __call__(self, data, attachments=[], pages=None): self.rendered = {} for field, ctx in self.fields.items(): if "template" not in ctx: continue self.context = ctx kwargs = self.template_args(data) template = self.context["template"] try: rendered_field = template.render(**kwargs) except Exception as err: logger.error("%s: %s %s", field, template, err) else: # Skip the field if it is already rendered by filter if field not in self.rendered: if PY3: field = field.decode('utf-8') self.rendered[field] = rendered_field filled = PdfFileReader(self.exec_pdftk(self.rendered)) for pagenumber, watermark in self.watermarks: page = filled.getPage(pagenumber) page.mergePage(watermark) output = PdfFileWriter() pages = pages or xrange(filled.getNumPages()) for p in pages: output.addPage(filled.getPage(p)) for attachment in attachments: output.addBlankPage().mergePage(attachment.pdf()) return output
def splitPdf(inputPath, splitLeftPath,splitRightPath,splitIndex): pdf = PdfFileReader(open(inputPath , "rb")) numOfPages = pdf.getNumPages() if splitIndex<0: print("split index should be native number. task canceled!") return if numOfPages<=splitIndex: print("split index is out of page range. task canceled!") return leftWriter = PdfFileWriter() rightWriter = PdfFileWriter() for i in range(0, numOfPages): if i<splitIndex: leftWriter.addPage(pdf.getPage(i)) else: rightWriter.addPage(pdf.getPage(i)) def writePdf2File(writer,path): stream = open(path, "wb") writer.write(stream) stream.close() writePdf2File(leftWriter,splitLeftPath) writePdf2File(rightWriter,splitRightPath)
def add_signature(request, registration_id): registration = get_object_or_404(models.Registration, pk=registration_id) str_key = str(registration.key) path_convention = settings.MEDIA_ROOT+'/registration_data/conventions/'+str_key+'/convention_'+str_key+'.pdf' path_signature = settings.MEDIA_ROOT+'/signature/signature_only.pdf' output = PdfFileWriter() input1 = PdfFileReader(file(path_convention, "rb")) watermark = PdfFileReader(file(path_signature, "rb")) input1.getPage(2).mergePage(watermark.getPage(0)) output.addPage(input1.getPage(0)) output.addPage(input1.getPage(1)) output.addPage(input1.getPage(2)) # finally, write "output" to document-output.pdf outputStream = file(settings.MEDIA_ROOT+'/registration_data/conventions/'+str_key+'/convention_'+str_key+'_final.pdf', "wb") output.write(outputStream) outputStream.close() registration.convention.name = 'registration_data/conventions/'+str_key+'/convention_'+str_key+'_final.pdf' registration.state=3 registration.save() mail.send_convocation(registration) return redirect('registration-archive-list')
def union(input_files, output_file): output = PdfFileWriter() for input_file in input_files: if input_file.endswith('.pdf'): input = PdfFileReader(open(input_file, 'rb')) num_pages = input.getNumPages() for i in range(0, num_pages): output.addPage(input.getPage(i)) else: # input_file isn't pdf ex. jpeg, png im = PIL.Image.open(input_file) input_file_pdf = input_file.split('.')[0]+'.pdf' im.save(input_file_pdf, 'PDF', resoultion = 100.0) input = PdfFileReader(open(input_file_pdf, 'rb')) num_pages = input.getNumPages() for i in range(0, num_pages): output.addPage(input.getPage(i)) os.remove(input_file_pdf) with open(output_file, 'wb') as outputStream: output.write(outputStream) print('completed.') print('Union of some file is ' + output_file)
def generate_document(self, data): packet = StringIO() if self.template_file is not None: template = PdfFileReader(open(self.template_file, 'rb')) c = canvas.Canvas(packet, pagesize=(self.width, self.height)) i = 0 for field_cls in self.fields: # TODO: Catch exception if there is less columns than fields field = field_cls(self, c, data[i]) field.render() i += 1 # Save canvas c.save() packet.seek(0) text = PdfFileReader(packet) output = PdfFileWriter() if self.template_file is not None: # Merge text with base page = template.getPage(0) page.mergePage(text.getPage(0)) else: page = text.getPage(0) output.addPage(page) # Save file filename = "%s/%s.pdf" % (self.output_dir, self.generate_filename(data)) outputStream = open(filename, 'wb') output.write(outputStream) outputStream.close()
def handle(self, *args, **options): for cert_type, ss_class_children in settings.CERT_CHILDREN.iteritems(): self.stdout.write('Certificate Type: {}\n'.format(cert_type)) for ss_class, children in ss_class_children.iteritems(): self.stdout.write('SS Class: {}\n'.format(ss_class)) for child in children: self.stdout.write('Child: {}\n'.format(child)) paf_path = os.path.join(settings.CERT_TEMPLATE_PATH, settings.CERT_FILE[cert_type]) pdf = PdfFileReader(paf_path) page = pdf.getPage(0) s = StringIO.StringIO() c = canvas.Canvas(s, pagesize=letter) # Child font_name = settings.CERT_COORD[cert_type]['child']['font']['name'] font_size = settings.CERT_COORD[cert_type]['child']['font']['size'] x = settings.CERT_COORD[cert_type]['child']['x'] y = settings.CERT_COORD[cert_type]['child']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, child) # Event font_name = settings.CERT_COORD[cert_type]['event']['font']['name'] font_size = settings.CERT_COORD[cert_type]['event']['font']['size'] x = settings.CERT_COORD[cert_type]['event']['x'] y = settings.CERT_COORD[cert_type]['event']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'Sunday School Summer Festival {}'.format(datetime.now().strftime('%Y'))) # Date font_name = settings.CERT_COORD[cert_type]['date']['font']['name'] font_size = settings.CERT_COORD[cert_type]['date']['font']['size'] x = settings.CERT_COORD[cert_type]['date']['x'] y = settings.CERT_COORD[cert_type]['date']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, '{}'.format(datetime.now().strftime('%B %Y'))) # Church font_name = settings.CERT_COORD[cert_type]['church']['font']['name'] font_size = settings.CERT_COORD[cert_type]['church']['font']['size'] x = settings.CERT_COORD[cert_type]['church']['x'] y = settings.CERT_COORD[cert_type]['church']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'St. Mark Coptic Orthodox Church') c.save() pdf_with_custom_text = PdfFileReader(s) page.mergePage(pdf_with_custom_text.getPage(0)) writer = PdfFileWriter() writer.addPage(page) output_file = '{}_{}.pdf'.format(child, datetime.now().strftime('%Y')) output_dir = os.path.join(settings.CERT_PATH, ss_class) if not os.path.exists(output_dir): os.makedirs(output_dir) output_path = os.path.join(output_dir, output_file) with open(output_path, 'wb') as f: writer.write(f)
def write_pdf(self, output): # get plain pdf from rml template = select_template([ 'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.slug), 'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.subject_type), 'leprikon/{}/subject.rml'.format(self.pdf_export), ]) rml_content = template.render({ 'object': self, 'site': LeprikonSite.objects.get_current(), }) pdf_content = trml2pdf.parseString(rml_content.encode('utf-8')) # merge with background if self.print_setup.background: template_pdf = PdfFileReader(self.print_setup.background.file) registration_pdf = PdfFileReader(BytesIO(pdf_content)) writer = PdfFileWriter() # merge pages from both template and registration for i in range(registration_pdf.getNumPages()): if i < template_pdf.getNumPages(): page = template_pdf.getPage(i) page.mergePage(registration_pdf.getPage(i)) else: page = registration_pdf.getPage(i) writer.addPage(page) # write result to output writer.write(output) else: # write basic pdf registration to response output.write(pdf_content) return output
def combine_for_print(folder_title): drive = get_drive() filenames = [] # Download all pdf files from GDrive. for i, fil in enumerate(get_pdf_files(drive, folder_title), 1): print(fil['title']) filename = '__temp-{}.pdf'.format(i) fil.GetContentFile(filename) filenames.append(filename) if not len(filenames): print('No pdf files were downloaded') return # Compute output name by using date and number of files. output_filename = '{:%Y-%m-%d %H%M} ({}).pdf'.format( datetime.datetime.now(), len(filenames)) print('Combining files into {}'.format(output_filename)) writer = PdfFileWriter() for i, filename in enumerate(filenames): reader = PdfFileReader(open(filename, 'rb'), strict=False) if (i % 2) == 0: # if even page page = reader.getPage(0) writer.addPage(page) else: page.mergeTranslatedPage(reader.getPage(0), 0, -5.3*inch) with open(output_filename, 'wb') as fp: writer.write(fp) # Delete temp pdf files. subprocess.call('rm __temp-*.pdf', shell=True)
def split(paperpdf, splitpdf): output = PdfFileWriter() with open(paperpdf, "rb") as l: with open(paperpdf, "rb") as r: # I know... I know. # We have to do this because PyPDF2 kind of sucks. left = PdfFileReader(l) right = PdfFileReader(r) pagecount = left.getNumPages() print("%s has %s pages to split." % (paperpdf,pagecount)) for num in range(0, pagecount): left_page = left.getPage(num) right_page = right.getPage(num) midpoint = ( left_page.mediaBox.getUpperRight_x() / 2, left_page.mediaBox.getUpperRight_y() ) left_page.mediaBox.upperRight = midpoint output.addPage(left_page) right_page.mediaBox.upperLeft = midpoint output.addPage(right_page) print("Writing %s pages to %s" % (output.getNumPages(), splitpdf)) with open(splitpdf, "wb") as s: output.write(s)
def generate_pdf_letter(filename, template, formatdict): # conjure up a fake request for PDFTemplateResponse request = RequestFactory().get('/') request.user = AnonymousUser() request.session = {} # produce text-only PDF from template pdfgenerator = PDFTemplateResponse( request=request, template=template, context=formatdict, cmd_options={ 'margin-top': 50, 'margin-bottom': 50, }, ) textonlypdf = io.BytesIO() textonlypdf.write(pdfgenerator.rendered_content) # create a blank pdf to work with finalpdf = PdfFileWriter() # open the text-only pdf pdfreader = PdfFileReader(textonlypdf) # get watermark from watermark file watermark = PdfFileReader( open( os.path.join( settings.STATICFILES_DIRS[0], 'pdf', settings.PDF_LETTERHEAD_FILENAME ), 'rb' ) ) # add the watermark to all pages for pagenum in range(pdfreader.getNumPages()): page = watermark.getPage(0) try: page.mergePage(pdfreader.getPage(pagenum)) except ValueError: # watermark pdf might be broken? return False # add page to output finalpdf.addPage(page) # save the generated pdf to the archive fullpath = os.path.join(settings.PDF_ARCHIVE_PATH, filename) with open(fullpath, 'wb') as fh: finalpdf.write(fh) logger.info('Saved pdf to archive: %s' % fullpath) returnfile = io.BytesIO() finalpdf.write(returnfile) return returnfile
def stampPages(self,listOfPageObjects,xPercentOffset=0.2,yPercentOffset=0.2): #def stampPages(self,listOfPageObjects,filepath): #output = PdfFileWriter() global output j=0 stampedPages=[] for page in listOfPageObjects: packet = StringIO.StringIO() existingPdfPage=page widthInches=existingPdfPage.trimBox[2]/72 heightInches=existingPdfPage.trimBox[3]/72 widthMill=widthInches*25.4 heightMill=heightInches*25.4 dimensionCurrentPdfPage=(widthInches*72,heightInches*72) can = canvas.Canvas(packet, dimensionCurrentPdfPage) font=25 offset=0.25*font top_offset=0 can.setFillColorRGB(1,0,0,alpha=0.25) #canvas.setStrokeColor(red) can.setFont("Helvetica-Bold", font) #can.drawString(100, 100, "ISSUED FOR CONSTRUCTION") can.drawString(xPercentOffset*widthMill, yPercentOffset*heightMill, "ISSUED FOR CONSTRUCTION") #can.drawString(0,top_offset-font-offset, "BY_____________________") #can.drawString(0,top_offset-2*font-2*offset, "HOLA") can.save() packet.seek(0) new_pdf = PdfFileReader(packet) # existingPdfPage.mergePage(new_pdf.getPage(0)) # output.addPage(existingPdfPage) if '/Rotate' in page: #print True rotationAngle=page['/Rotate'] else: #print False rotationAngle=0 if rotationAngle==0: existingPdfPage.mergePage(new_pdf.getPage(0)) output.addPage(existingPdfPage) elif rotationAngle !=0: pageHeight=existingPdfPage.trimBox[3] translatePageDown=(float(pageHeight)/72)*25.4*sqrt(2) existingPdfPage.mergeRotatedTranslatedPage(new_pdf.getPage(0),rotation=90,tx=translatePageDown,ty=translatePageDown) output.addPage(existingPdfPage)
def get_claim_report_user(self, employee_id, **post): if not request.env.user.has_group('fleet.fleet_group_manager'): return request.not_found() employee = request.env['hr.employee'].search([('id', '=', employee_id)], limit=1) partner_ids = (employee.user_id.partner_id | employee.address_home_id).ids if not employee or not partner_ids: return request.not_found() car_assignation_logs = request.env['fleet.vehicle.assignation.log'].search([('driver_id', 'in', partner_ids)]) doc_list = request.env['ir.attachment'].search([ ('res_model', '=', 'fleet.vehicle.assignation.log'), ('res_id', 'in', car_assignation_logs.ids)], order='create_date') writer = PdfFileWriter() font = "Helvetica" normal_font_size = 14 for document in doc_list: car_line_doc = request.env['fleet.vehicle.assignation.log'].browse(document.res_id) try: reader = PdfFileReader(io.BytesIO(base64.b64decode(document.datas)), strict=False, overwriteWarnings=False) except Exception: continue width = float(reader.getPage(0).mediaBox.getUpperRight_x()) height = float(reader.getPage(0).mediaBox.getUpperRight_y()) header = io.BytesIO() can = canvas.Canvas(header) can.setFont(font, normal_font_size) can.setFillColorRGB(1, 0, 0) car_name = car_line_doc.vehicle_id.display_name date_start = car_line_doc.date_start date_end = car_line_doc.date_end or '...' text_to_print = _("%s (driven from: %s to %s)") % (car_name, date_start, date_end) can.drawCentredString(width / 2, height - normal_font_size, text_to_print) can.save() header_pdf = PdfFileReader(header, overwriteWarnings=False) for page_number in range(0, reader.getNumPages()): page = reader.getPage(page_number) page.mergePage(header_pdf.getPage(0)) writer.addPage(page) _buffer = io.BytesIO() writer.write(_buffer) merged_pdf = _buffer.getvalue() _buffer.close() pdfhttpheaders = [('Content-Type', 'application/pdf'), ('Content-Length', len(merged_pdf))] return request.make_response(merged_pdf, headers=pdfhttpheaders)
def merge_pdfs(f1, f2, output_f): pdf1 = PdfFileReader(f1) pdf2 = PdfFileReader(f2) output = PdfFileWriter() page = pdf1.getPage(0) page.mergePage(pdf2.getPage(0)) output.addPage(page) output.write(output_f)
def generate_course_info_page_pdf(class_data, num_students): #create page1 mask packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=(792, 612)) can.setFont("Helvetica", 10) if (class_data['options']['New']): can.drawString(58, 428, u"✗") if (class_data['options']['Renewal']): can.drawString(58, 416, u"✗") if (class_data['options']['Instructor']): can.drawString(58, 404, u"✗") if (class_data['options']['Provider']): can.drawString(58, 392, u"✗") can.drawString(495, 426, class_data['curr_instructor']['instructor_name']) can.drawString(519, 403, class_data['curr_instructor']['instructor_renewal_date']) can.drawString(493, 389, class_data['curr_instructor']['training_center_id']) can.drawString(512, 378, class_data['curr_instructor']['training_center_name']) can.drawString(510, 366, '') #training site name can.drawString(493, 354, class_data['class_location']) can.drawString(493, 341, '') #address can.drawString(165, 283, class_data['class_date'].strftime("%m/%d/%y")) can.drawString(378, 283, class_data['class_date'].strftime("%m/%d/%y")) can.drawString(614, 283, "4") can.drawString(153, 259, str(num_students)) can.drawString(381, 259, class_data['student_manikin_ratio']) can.drawString(581, 259, class_data['card_issue_date'].strftime("%m/%y")) can.save() packet.seek(0) mask = PdfFileReader(packet) dir = os.path.realpath('.') #cards roster_filename = os.path.join(dir, 'pdf_templates','HCP_roster.pdf') roster = PdfFileReader(file(roster_filename, "rb")) #merge template with mask merged_roster = PdfFileWriter() page = roster.getPage(0) page.mergePage(mask.getPage(0)) merged_roster.addPage(page) return merged_roster
def add_page_numbers(inputfile, outputfile, startno=None, endno=None, fontname="Helvetica", fontsize=12, pagenoformat="- %i -", pagesize=A4, posx=280, posy=800): """ Adds page numbers to the input PDF file and stores the modified PDF in output. Optionally, the page range can be limited. :param inputfile: the input PDF :type inputfile: str :param outputfile: the output PDF :type outputfile: str :param startno: the first page to number, 1-based, use None to start from first page :type startno: int :param endno: the last page to number, 1-based, use None to end with last page :type endno: int :param fontname: the name of the font to use, eg 'Helvetica' :type fontname: str :param fontsize: the size of the font, eg 12 :type fontsize: int :param pagenoformat: the format string for the page number, eg '- %i -' :type pagenoformat: str :param pagesize: the page size, eg A4 :type pagesize: object :param posx: the X position for the page number :type posx: int :param posy: the Y position for the page number :type posy: int """ inputpdf = PdfFileReader(open(inputfile, "rb")) outputpdf = PdfFileWriter() if startno is None: startno = 1 if endno is None: endno = inputpdf.getNumPages() for i in xrange(inputpdf.getNumPages()): page = i + 1 current = inputpdf.getPage(i) # add page number? # taken from here: http://stackoverflow.com/a/17538003 if (page >= startno) and (page <= endno): packet = StringIO.StringIO() can = canvas.Canvas(packet, pagesize=pagesize) can.setFont(fontname, fontsize) can.drawString(posx, posy, pagenoformat % page) can.save() packet.seek(0) pagenopdf = PdfFileReader(packet) logger.info("Page " + str(page) + " added") current.mergePage(pagenopdf.getPage(0)) else: logger.info("Page " + str(page)) outputpdf.addPage(current) outputstream = file(outputfile, "wb") outputpdf.write(outputstream)
def add_to_letterhead(self, data, letterhead): #move to the beginning of the StringIO buffer new_pdf = PdfFileReader(data) # read your existing PDF existing_pdf = PdfFileReader(io.BytesIO(letterhead)) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) return output
def merge_pdf(infnList, outfn): """ 合并pdf :param infnList: 要合并的PDF文件路径列表 :param outfn: 保存的PDF文件名 :return: None """ pagenum = 0 pdf_output = PdfFileWriter() for pdf in infnList: # 先合并一级目录的内容 first_level_title = pdf['title'] dir_name = os.path.join(os.path.dirname( __file__), 'gen', first_level_title) padf_path = os.path.join(dir_name, first_level_title + '.pdf') pdf_input = PdfFileReader(open(padf_path, 'rb')) # 获取 pdf 共用多少页 page_count = pdf_input.getNumPages() for i in range(page_count): pdf_output.addPage(pdf_input.getPage(i)) # 添加书签 parent_bookmark = pdf_output.addBookmark( first_level_title, pagenum=pagenum) # 页数增加 pagenum += page_count # 存在子章节 if pdf['child_chapters']: for child in pdf['child_chapters']: second_level_title = child['title'] padf_path = os.path.join(dir_name, second_level_title + '.pdf') pdf_input = PdfFileReader(open(padf_path, 'rb')) # 获取 pdf 共用多少页 page_count = pdf_input.getNumPages() for i in range(page_count): pdf_output.addPage(pdf_input.getPage(i)) # 添加书签 pdf_output.addBookmark(second_level_title, pagenum=pagenum, parent=parent_bookmark) # 增加页数 pagenum += page_count # 合并 pdf_output.write(open(outfn, 'wb')) # 删除所有章节文件 shutil.rmtree(os.path.join(os.path.dirname(__file__), 'gen'))
def create_overlayed_page(entry_name): output = PdfFileWriter() input1 = PdfFileReader(open("%s.original.pdf" % entry_name, "rb")) watermark = PdfFileReader(open("overlay_tmp.pdf", "rb")) page1 = input1.getPage(0) page1_watermark = watermark.getPage(0) page1.mergePage(page1_watermark) output.addPage(page1) outputStream = file("%s.with-ref.pdf" % entry_name, "wb") output.write(outputStream)
def generate_course_info_page_pdf(course_info, num_students): #create page1 mask packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=(792, 612)) can.setFont("Helvetica", 10) can.drawString(58, 428, u"✗") if (course_info['child_cpr'] == 'yes'): can.drawString(76, 415, u"✗") if (course_info['infant_cpr'] == 'yes'): can.drawString(171, 415, u"✗") if (course_info['written_test'] == 'yes'): can.drawString(243, 415, u"✗") can.drawString(495, 426, course_info['instructor_name']) can.drawString(519, 403, u"✗") can.drawString(512, 389, course_info['instructor_renewal_date']) can.drawString(493, 378, course_info['training_center_name']) can.drawString(510, 366, course_info['training_center_id']) can.drawString(493, 341, course_info['course_location']) can.drawString(165, 268, course_info['course_date']) can.drawString(378, 268, course_info['course_date']) can.drawString(614, 268, "4") can.drawString(153, 244, str(num_students)) can.drawString(381, 244, course_info['student_manikin_ratio']) can.drawString(581, 244, course_info['card_issue_date']) can.save() packet.seek(0) mask = PdfFileReader(packet) dir = os.path.realpath('.') #cards roster_filename = os.path.join(dir, 'templates','HS_roster.pdf') roster = PdfFileReader(file(roster_filename, "rb")) #merge template with mask merged_roster = PdfFileWriter() page = roster.getPage(0) page.mergePage(mask.getPage(0)) merged_roster.addPage(page) return merged_roster
def page_extract(start, end, SUBSECTION): PDF_IN = PdfFileReader(open(PDF_DIR, 'rb')) # for i in xrange(PDF_IN.numPages): # for all pages for i in range(int(start) - 1, int(end)): output = PdfFileWriter() output.addPage(PDF_IN.getPage(i)) base, name_ext = os.path.split(PDF_DIR) name, ext = os.path.splitext(name_ext) PDF_OUT = '{}{}'.format(TMP_DIR, '{}-{}{}'.format(name, str(i).zfill(6), ext)) with open(PDF_OUT, 'wb') as outputStream: output.write(outputStream) gs_pdf_to_png(PDF_OUT) os.remove(PDF_OUT) png_list = group(os.listdir(TMP_DIR), 2) for tup in png_list: print tup card_front = os.path.join(TMP_DIR, tup[0]) card_back = os.path.join(TMP_DIR, tup[1]) make_cards(card_front, card_back, SUBSECTION)
def tearpage(filename, startpage=1): """ Copy filename to a tempfile, write pages startpage..N to filename. :param filename: PDF filepath :param startpage: page number for the new first page """ # Copy the pdf to a tmp file tmp = tempfile.NamedTemporaryFile() shutil.copy(filename, tmp.name) # Read the copied pdf try: input_file = PdfFileReader(open(tmp.name, 'rb')) except PdfReadError: _fixPdf(filename, tmp.name) input_file = PdfFileReader(open(tmp.name, 'rb')) # Seek for the number of pages num_pages = input_file.getNumPages() # Write pages excepted the first one output_file = PdfFileWriter() for i in range(startpage, num_pages): output_file.addPage(input_file.getPage(i)) tmp.close() outputStream = open(filename, "wb") output_file.write(outputStream)
class PdfSplitter: def __init__(self, path): self._stream = open(path, "rb") self._input_pdf = PdfFileReader(self._stream) def split(self, pages, filename): """ Split pages from the wrapped pdf file (see constructor) into a new file called `filename` :param pages: Either a zero indexed page number or a list of pages :param filename: The name of the new file :return: None """ if type(pages) is int: pages = [pages] output_writer = PdfFileWriter() for page in pages: output_writer.addPage(self._input_pdf.getPage(page)) with open(filename, "wb") as output_stream: output_writer.write(output_stream) def close(self): self._stream.close()
def getPLBURL(journal,doi,count): cj = http.cookiejar.CookieJar() # initialize the cookie jar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) url = 'http://dx.doi.org/'+doi user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = [('User-Agent', user_agent)] opener.addheaders = headers #with opener.open(url) as response: response= opener.open(url) output = response.read() import re p = re.compile('pdfurl="(.*?)"') m = p.search(output.strip().decode('utf-8')) # need to convert from bytes to string m.group(1) response=opener.open(m.group(1)) out = response.read() type(out) f = io.BytesIO(out) if f: o = PdfFileReader(f) merged = PdfFileWriter() outName= "Single_"+str(count)+".pdf" merged.addPage(o.getPage(0)) with open(outName,'wb') as pdf: merged.write(pdf)
def pdf_to_csv_with_PyPDF(): """ Iterates throught all the pdf stored in ./data/pdf/ folder and export its content to the file data.csv. The format of the csv file should have two columns: id and text """ bar = progressbar.ProgressBar() csv_data_file = _DATA_PATH + "data.csv" with open(csv_data_file, "w", newline='') as csvfile: data_writer = csv.writer(csvfile) data_writer.writerow(["document_id","document_text"]) for fn in bar(os.listdir(_PDF_PATH)): file_path = os.path.join(_PDF_PATH, fn) if file_path.endswith(".pdf"): try: input_file = PdfFileReader(open(file_path, 'rb')) text = "" for p in range(input_file.getNumPages()): text += input_file.getPage(p).extractText() + " " except utils.PdfReadError as e: print("Error al leer el PDF: {0}".format(fn)) except Exception as e: print("Error desconocido en el PDF: {0}".format(fn)) print("Error: {0}".format(e)) else: #TODO: Check if text is not empty data_writer.writerow([fn,text])
def _merge_pdf(documents): '''Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf ''' writer = PdfFileWriter() streams = [] # We have to close the streams *after* PdfFilWriter's call to write() try: for document in documents: pdfreport = open(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport, overwriteWarnings=False) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp(suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) finally: for stream in streams: try: stream.close() except Exception: pass for stream in streams: stream.close() return merged_file_path
def buildPDF(self, data, document_root): data = json.loads(data)[0]['fields'] content = StringIO.StringIO() parser = canvas.Canvas(content, pagesize=letter) self.employee_name(parser, data['name']) self.social_security(parser, data['ssn']) self.title(parser, data['title']) self.base_salary(parser, data['base_salary']) self.period(parser, data['period']) self.period_year(parser, data['period_year']) self.effective_date(parser, data['effective_date']) self.multi_campus(parser, data['multi_campus']) self.sponsored_accounts(parser, data['sponsored_accounts']) self.cost_sharing(parser, data['cost_sharing']) self.university_funds(parser, data['university_funds']) self.payments_paid(parser, data['payments_paid']) self.comments(parser, data['comments']) parser.save() content.seek(0) text = PdfFileReader(content) form = PdfFileReader(document_root+'/a125.pdf').getPage(0) output = PdfFileWriter() form.mergePage(text.getPage(0)) output.addPage(form) outputStream = open(document_root+'/a125-gen.pdf', 'wb') output.write(outputStream) self.form = output
def get_images(pdf_file): with open(pdf_file, 'rb') as fp: reader = PdfFileReader(fp) page = reader.getPage(0) xObject = page['/Resources']['/XObject'].getObject() for obj in xObject: if xObject[obj]['/Subtype'] == '/Image': width, height = (xObject[obj]['/Width'], xObject[obj]['/Height']) # Ignore smaller images. if height < 100: continue size = width, height data = xObject[obj].getData() if xObject[obj]['/ColorSpace'] == '/DeviceRGB': mode = "RGB" else: mode = "P" encoding = xObject[obj]['/Filter'] if encoding == '/FlateDecode' or '/FlateDecode' in encoding: yield Image.frombytes(mode, size, data) else: raise Exception( 'Unexpected image encoding: {}'.format(encoding))
def generate(config, pdf_config, save_path): # support chinese font_chinese = 'STSong-Light' # from Adobe's Asian Language Packs pdfmetrics.registerFont(UnicodeCIDFont(font_chinese)) # Create the watermark from an image w = 595.27 h = 841.89 # page1 c1 = canvas.Canvas(pdf_config['watermark1'], (w, h)) c1.setFont(font_chinese, size=11) # get current time now = datetime.datetime.now() date = now.strftime("%Y-%m-%d") time = now.strftime('%H:%M') # Add content c1.drawString(137, h - 134, config['patient-id']) c1.drawString(160, h - 159, config['report-id']) c1.drawString(147, h - 184, date) c1.drawString(147, h - 209, time) c1.drawString(122, h - 234, config['eye']) # image quality content offset = 0 if config['vessel_analysis']: c1.drawCentredString(304, h - 282, config['vessel-point']) c1.drawCentredString(334, h - 282, config['vessel-quality']) c1.drawCentredString(129, h - 299, config['distance']) c1.drawCentredString(432, h - 298, config['angle']) c1.drawCentredString(127, h - 316, config['standard']) else: offset -= 16 c1.drawCentredString(377, h - 282, config['distance']) c1.drawCentredString(289, h - 299, config['angle']) c1.drawCentredString(346, h - 298, config['standard']) # diabetic retinopathy c1.drawCentredString(215, h - 339 - offset, config['dr']) if config['dr'] == u'有': c1.drawCentredString(262, h - 340 - offset, config['stage']) c1.drawCentredString(391, h - 340 - offset, config['bleed']) c1.drawCentredString(151, h - 357 - offset, config['1-bleed']) c1.drawCentredString(221, h - 356 - offset, config['exudation']) c1.drawCentredString(378, h - 357 - offset, config['1-exudation']) if config['vessel_analysis']: if config['dr'] == u'有': offset += 18 c1.drawCentredString(250, h - 364 - offset, config['2-length']) c1.drawCentredString(409, h - 364 - offset, config['2-length_compare']) c1.drawCentredString(124, h - 381 - offset, config['2-density']) c1.drawCentredString(260, h - 381 - offset, config['2-density_compare']) c1.drawCentredString(378, h - 381 - offset, config['2-diameter']) c1.drawCentredString(151, h - 397 - offset, config['2-diameter_compare']) c1.drawImage(config['patient-image'], 153, h - 658, width=2880 // 10, height=2136 // 10) c1.save() # page2 c2 = canvas.Canvas(pdf_config['watermark2'], (w, h)) c2.setFont(font_chinese, size=11) # Add content c2.drawImage(config['output_images']['macular_image'], 100, h - 245, width=2880 // 18, height=2136 // 18) c2.drawCentredString(447, h - 141, config['DR_prob']) c2.drawCentredString(310, h - 176, config['stage']) c2.drawCentredString(447, h - 176, config['level1_prob']) c2.drawCentredString(447, h - 211, config['disc_diameter']) c2.drawCentredString(447, h - 241, config['macular_center_coordinate']) if config['dr'] == u'有': c2.drawImage(config['output_images']['bleed_image'], 113, h - 522, width=2880 // 18, height=2136 // 18) c2.drawImage(config['output_images']['bleed_histogram'], 307, h - 532, width=187, height=140) c2.drawImage(config['output_images']['exudation_image'], 113, h - 703, width=2880 // 18, height=2136 // 18) c2.drawImage(config['output_images']['exudation_histogram'], 307, h - 713, width=187, height=140) c2.save() if config['vessel_analysis']: # page3 c3 = canvas.Canvas(pdf_config['watermark3'], (w, h)) c3.setFont(font_chinese, size=11) # Add content c3.drawImage(config['output_images']['retinal_vessel_image'], 113, h - 273, width=2880 // 18, height=2136 // 18) c3.drawImage(config['output_images']['quadrant_segmentation_image'], 320, h - 273, width=2880 // 18, height=2136 // 18) c3.drawCentredString(274, h - 447, config['a-density']) c3.drawCentredString(428, h - 447, config['a-density_compare']) c3.drawCentredString(274, h - 464, config['a-length']) c3.drawCentredString(428, h - 464, config['a-length_compare']) c3.drawImage(config['output_images']['a-patient_length_histogram'], 198, h - 588, width=150, height=116) c3.drawImage(config['a-normal_length_histogram'], 354, h - 588, width=150, height=116) c3.drawCentredString(274, h - 604, config['a-diameter']) c3.drawCentredString(428, h - 604, config['a-diameter_compare']) c3.drawImage(config['output_images']['a-patient_diameter_histogram'], 198, h - 729, width=150, height=116) c3.drawImage(config['a-normal_diameter_histogram'], 354, h - 729, width=150, height=116) c3.save() ''' # page4 c4 = canvas.Canvas('./backup/watermark4.pdf', (w, h)) c4.setFont(font_chinese, size=11) # Add content c4.drawCentredString(254, h-125, config['b-density']) c4.drawCentredString(428, h-125, config['b-density_compare']) c4.drawCentredString(254, h-142, config['b-length']) c4.drawCentredString(428, h-142, config['b-length_compare']) c4.drawImage(config['b-patient_length_histogram'], 178, h-253, width=152, height=106) c4.drawImage(config['b-patient_length_histogram'], 349, h-253, width=152, height=106) c4.drawCentredString(254, h-267, config['b-diameter']) c4.drawCentredString(428, h-267, config['b-diameter_compare']) c4.drawImage(config['b-patient_diameter_histogram'], 177, h-379, width=154, height=108) c4.drawImage(config['b-patient_diameter_histogram'], 346, h-379, width=154, height=108) c4.drawCentredString(254, h-459, config['c-density']) c4.drawCentredString(428, h-459, config['c-density_compare']) c4.drawCentredString(254, h-476, config['c-length']) c4.drawCentredString(428, h-476, config['c-length_compare']) c4.drawImage(config['c-patient_length_histogram'], 178, h-587, width=152, height=106) c4.drawImage(config['c-patient_length_histogram'], 349, h-587, width=152, height=106) c4.drawCentredString(254, h-601, config['c-diameter']) c4.drawCentredString(428, h-601, config['c-diameter_compare']) c4.drawImage(config['c-patient_diameter_histogram'], 177, h-713, width=154, height=106) c4.drawImage(config['c-patient_diameter_histogram'], 346, h-713, width=154, height=106) c4.save() ''' # Get the watermark file you just created watermark1 = PdfFileReader(open(pdf_config['watermark1'], "rb")) watermark2 = PdfFileReader(open(pdf_config['watermark2'], "rb")) if config['vessel_analysis']: watermark3 = PdfFileReader(open(pdf_config['watermark3'], "rb")) #watermark4 = PdfFileReader(open("./backup/watermark4.pdf", "rb")) # Get our files ready output_file = PdfFileWriter() # Number of pages in input document if config['vessel_analysis']: if config['dr'] == u'有': input_file = PdfFileReader(open(pdf_config['template1'], "rb")) else: input_file = PdfFileReader(open(pdf_config['template4'], "rb")) page_count = 3 else: if config['dr'] == u'有': input_file = PdfFileReader(open(pdf_config['template2'], "rb")) else: input_file = PdfFileReader(open(pdf_config['template3'], "rb")) page_count = 2 # Go through all the input file pages to add a watermark to them for page_number in range(page_count): print("Watermarking page {} of {}".format(page_number, page_count)) # merge the watermark with the page input_page = input_file.getPage(page_number) if page_number == 0: input_page.mergePage(watermark1.getPage(0)) elif page_number == 1: input_page.mergePage(watermark2.getPage(0)) elif page_number == 2: input_page.mergePage(watermark3.getPage(0)) else: input_page.mergePage(watermark4.getPage(0)) # add page from input file to output document output_file.addPage(input_page) # finally, write "output" to document-output.pdf with open(save_path, 'wb') as outputStream: output_file.write(outputStream)
import PyPDF2 import io import requests from PyPDF2 import PdfFileReader for url in pdfs: print("Scraping from" + url) r = requests.get(url) fi = io.BytesIO(r.content) reader = PdfFileReader(fi) number_of_pages = reader.getNumPages() for page_number in range(number_of_pages): page = reader.getPage(page_number) page_content = page.extractText() f.write(page_content) def findHrefs(data): links = [] for i in range(len(data)): g = data[i].find_all('a') for h in g: if "spotlights" in h['href'] and "www." in h["href"]: links.append(h['href']) return links soup = scraping("https://www.larimer.org/health/communicable-disease/coronavirus-covid-19/covid-19-public-health-orders-and-press-releases") data = soup.find_all("ul") links = findHrefs(data)
engine.setProperty('rate', 120) engine.setProperty('voice', 'punjabi') engine.say(data) engine.runAndWait() name = raw_input( "Type the file name Excluding .pdf if that is in same folder else give abs path without .pdf -- " ) name = name + ".pdf" infile = PdfFileReader(name, 'rb') page = raw_input("Enter Page Number you wanna read -- ") page = int(page) num = page reader_temp = infile.getPage(page) data = reader_temp.extractText() # gtts sounds pretty better than pyttsx that's why I'm reading it through gtts # gtts takes nearly 30 sec to save that into a file and start reading but pyttsx do it within few seconds # So if you don't want to wait then you can put func2() here and delete lines which is below func1() for num in range(page + 1, infile.numPages): reader_temp = infile.getPage(num) data = reader_temp.extractText() name = str(num) + ".mp3" p1 = Process(target=func1) p1.start() p2 = Process(target=func3(name))
from PyPDF2 import PdfFileReader, PdfFileWriter import tkinter as tk from tkinter import filedialog root = tk.Tk() root.withdraw() file_path = filedialog.askopenfilename() #asking for any paf file to choose file = file_path.split('/') #extracting the filename from the pdf file_name = file[-1] file_name = "Encrypted " + file_name pdf = PdfFileReader(file_path) #Reading the pdf file out_pdf = PdfFileWriter() #Creating an instace of pdf file writer pages = pdf.numPages # getting the number of pages for i in range(pages): page_details = pdf.getPage(i) #extracting the details of each page out_pdf.addPage(page_details) #adding the page to out_pdf password = input("Enter your password for encryption: ") out_pdf.encrypt(password) #using encrypt method to encrypt the pdf with open(file_name, 'wb') as filename: out_pdf.write(filename) print( "\nYou can find your encrypted file under file name 'Encrypted filename(i.e. your original file name)' into your current directory!" )
class Renderer: def __init__(self, event, layout, background_file): self.layout = layout self.background_file = background_file self.variables = get_variables(event) if self.background_file: self.bg_pdf = PdfFileReader(BytesIO(self.background_file.read())) else: self.bg_pdf = None @classmethod def _register_fonts(cls): pdfmetrics.registerFont( TTFont('Open Sans', finders.find('fonts/OpenSans-Regular.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans I', finders.find('fonts/OpenSans-Italic.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans B', finders.find('fonts/OpenSans-Bold.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans B I', finders.find('fonts/OpenSans-BoldItalic.ttf'))) for family, styles in get_fonts().items(): pdfmetrics.registerFont( TTFont(family, finders.find(styles['regular']['truetype']))) if 'italic' in styles: pdfmetrics.registerFont( TTFont(family + ' I', finders.find(styles['italic']['truetype']))) if 'bold' in styles: pdfmetrics.registerFont( TTFont(family + ' B', finders.find(styles['bold']['truetype']))) if 'bolditalic' in styles: pdfmetrics.registerFont( TTFont(family + ' B I', finders.find(styles['bolditalic']['truetype']))) def _draw_barcodearea(self, canvas: Canvas, op: OrderPosition, o: dict): reqs = float(o['size']) * mm qrw = QrCodeWidget(op.secret, barLevel='H', barHeight=reqs, barWidth=reqs) d = Drawing(reqs, reqs) d.add(qrw) qr_x = float(o['left']) * mm qr_y = float(o['bottom']) * mm renderPDF.draw(d, canvas, qr_x, qr_y) def _get_text_content(self, op: OrderPosition, order: Order, o: dict): ev = op.subevent or order.event if not o['content']: return '(error)' if o['content'] == 'other': return o['text'].replace("\n", "<br/>\n") elif o['content'].startswith('meta:'): return ev.meta_data.get(o['content'][5:]) or '' elif o['content'] in self.variables: try: return self.variables[o['content']]['evaluate'](op, order, ev) except: logger.exception('Failed to process variable.') return '(error)' return '' def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = re.sub( "<br[^>]*>", "<br/>", bleach.clean(self._get_text_content(op, order, o) or "", tags=["br"], attributes={}, styles=[], strip=True)) p = Paragraph(text, style=style) p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) p.drawOn(canvas, float(o['left']) * mm, float(o['bottom']) * mm - ad[1]) def draw_page(self, canvas: Canvas, order: Order, op: OrderPosition): for o in self.layout: if o['type'] == "barcodearea": self._draw_barcodearea(canvas, op, o) elif o['type'] == "textarea": self._draw_textarea(canvas, op, order, o) canvas.showPage() def render_background(self, buffer, title=_('Ticket')): from PyPDF2 import PdfFileWriter, PdfFileReader buffer.seek(0) new_pdf = PdfFileReader(buffer) output = PdfFileWriter() for page in new_pdf.pages: bg_page = copy.copy(self.bg_pdf.getPage(0)) bg_page.mergePage(page) output.addPage(bg_page) output.addMetadata({ '/Title': str(title), '/Creator': 'pretix', }) outbuffer = BytesIO() output.write(outbuffer) outbuffer.seek(0) return outbuffer
print('usage: pcut --help') sys.exit(1) try: with open(args.input, 'rb') as pdf_file: file_reader = PdfFileReader(pdf_file) # Let's check if start and end arguments make sense. file_pages_number = file_reader.getNumPages() if args.start > file_pages_number: print('{}: this file only has {} pages, cannot start cutting at page {}'.format( sys.argv[0], file_pages_number, args.start)) sys.exit(1) elif args.end > file_pages_number: print('{}: this file only has {} pages, cannot end cutting at page {}'.format( sys.argv[0], file_pages_number, args.end)) sys.exit(1) output_pdf = PdfFileWriter() for i in range(args.start, args.end + 1): page = file_reader.getPage(i) output_pdf.addPage(page) output_file = open(args.output, 'wb') output_pdf.write(output_file) output_file.close() except FileNotFoundError: print('{}: file \"{}\" not found'.format(sys.argv[0], args.input)) except utils.PdfReadError: print('{}: that is not a PDF!'.format(sys.argv[0]))
# print(f'This is the start of the output for: {files}') headerset = 0 data_row = [] pdf_file = open(f'{pdf_output_path}/{files}', 'rb') pdfreader = PdfFileReader(pdf_file) num_pages = pdfreader.numPages if num_pages < 23: up_limit = num_pages else: up_limit = 23 dfset = 0 county_added_list = [] for pages in range(3, up_limit): datalines = 0 page = pdfreader.getPage(pages) if page.extractText().find('Coronavirus: PUI testing by county') > 0 or page.extractText().find( 'Coronavirus: All persons with tests reported') > 0: pdf_writer.addPage(page) page_text = page.extractText() col_num = 0 index_num = 0 for lines in page_text.split('\n'): line_n = len(page_text.split('\n')) write_line = 0 if not re.match(text_line_search, lines, flags=0) and not re.match(date_search, lines, flags=0) and datalines == 0 and lines not in county_list and headerset != 1: header.append(lines) if re.match(date_search, lines, flags=0): date = lines[0:12]
def extractPages(nameList): global original all_tickets = PdfFileReader(ticketsD) all_tag = PdfFileReader(tagsD) for i in range(len(nameList)): c = canvas.Canvas("Mergeable.pdf") c.drawString(100, 740, nameList[i]) c.showPage() c.save() watermark = PdfFileReader("Mergeable.pdf") watermarkpage = watermark.getPage(0) pdf = PdfFileReader("Traveler.pdf") pdfwrite = PdfFileWriter() pdfpage = pdf.getPage(0) pdfpage.mergePage(watermarkpage) pdfwrite.addPage(pdfpage) with open(nameList[i] + "WM.pdf", 'wb') as fh: pdfwrite.write(fh) pgnum = all_tickets.getNumPages() outloc = open('OutputLocation.txt', 'r') end = outloc.readline().strip() outloc.close() dat = open('date&default.txt', 'r') prevdate = dat.readline().strip() dat.close() doubleloc = False if not os.path.isdir(end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get()): dir = os.path.join(end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get()) if not os.path.exists(dir): os.mkdir(dir) else: for i in range(20): if not os.path.isdir(end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get() + "(" + str(i + 1) + ")"): dir = os.path.join(end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get() + "(" + str(i + 1) + ")") if not os.path.exists(dir): os.mkdir(dir) puthere = i + 1 doubleloc = True break for i in range(pgnum): cons = PdfFileReader(nameList[i] + 'WM.pdf') curr_ticket = all_tickets.getPage(i) curr_tag = all_tag.getPage(i) constant = cons.getPage(0) pdf_writer = PdfFileWriter() pdf_writer.addPage(curr_ticket) pdf_writer.addPage(curr_tag) pdf_writer.addPage(constant) with Path(str(nameList[i]) + ".pdf").open(mode="wb") as output_file: pdf_writer.write(output_file) for k in range(len(final)): for m in range(len(nameList)): if nameList[m][:8] in final[k]: win32api.ShellExecute(0, 'print', str(nameList[m]) + '.pdf', currentprinter, '.', 0) nameList[m] = nameList[m] + "$" print("Printed " + str(nameList[m]) + ".pdf") for m in range(len(nameList)): if nameList[m][-1] != "$": pass win32api.ShellExecute(0, 'print', str(nameList[m]) + '.pdf', currentprinter, '.', 0) else: nameList[m] = nameList[m][:-1] for i in range(len(nameList)): os.remove(str(nameList[i]) + "WM.pdf") isdone = "null" while isdone == "null": isdone = input( "Press Enter if Printing is Done (PDFs are no longer open on PC): " ) #time.sleep(30) while True: try: for i in range(pgnum): if doubleloc == False: try: shutil.move( (original + "/" + str(nameList[i]) + ".pdf"), (end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get())) except shutil.Error: pass else: try: shutil.move( (original + "/" + str(nameList[i]) + ".pdf"), (end + "/" + month_entry.get() + "_" + day_entry.get() + "_" + year_entry.get() + "(" + str(puthere) + ")")) except shutil.Error: pass break except PermissionError: print("Waiting for printing")
# Creating pdf from existing pdf from PyPDF2 import PdfFileWriter, PdfFileReader pdf_writer = PdfFileWriter() pdf_reader = PdfFileReader('sample.pdf') for page in range(pdf_reader.numPages): obj = pdf_reader.getPage(page) pdf_writer.addPage(obj) output_file = open('output.pdf', 'wb') pdf_writer.write(output_file) print('File created successfully')
from PyPDF2 import PdfFileWriter, PdfFileReader inputpdf = PdfFileReader(open("CERN.pdf", "rb")) for i in range(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) with open("page%s.pdf" % i, "wb") as outputStream: output.write(outputStream)
def post_create(request): form = PostForm(request.POST or None, request.FILES or None) if form.is_valid(): instance = form.save(commit=False) instance.save() folder.append(str(instance)) try: os.makedirs(cropped_folder + folder[0]) except: pass #print(type(instance.pdf) inputpdf = PdfFileReader(instance.pdf.open()) for k in range(inputpdf.numPages): temp = [] output = PdfFileWriter() output.addPage(inputpdf.getPage(k)) f_name = dest_pdf + str(instance) + '-page-' + str(k + 1) + '.pdf' with open(f_name, "wb") as outputStream: output.write(outputStream) page = convert_from_path(f_name, dpi=100) fname = dest_png + str(instance) + '-page-' + str(k + 1) + '.png' #print("\n\nIMAGES ARE SPLIT\n\n") # Cropping the image page[0].save(fname, 'PNG') img = cv2.imread(fname) fn = crop(img, str(instance), k, cropped_folder + str(instance)) #print("\n\nIMAGES ARE CROPPED\n\n") # Apply the model #print("\n\nRUNNING THE MODEL\n\n") command = "python3 {} --graph={} --image={}".format( src_label, src_graph, fn) test = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) output, err = test.communicate() output = output.decode("utf-8") output = output.split('\n') #print(output) f = fn.split('/') img_list.append(str(f[-1])) temp.append(str(f[-1])) s = output[3] c = s[0] temp.append(c) f = s[s.find("(") + 1:s.find(")")] num = f.split('=')[1] temp.append(num) if (float(num) > 0.9): temp.append("Valid") else: temp.append("ValidationRequired") main_csv.append(temp) #classes.append(output[3]) classes.append(Encoder[c]) #classes.append(Encoder[int(output[3].split()[0])]) for file in os.listdir(pdfs_norm): f_path = os.path.join(pdfs_norm, file) try: if os.path.isfile(f_path): os.unlink(f_path) except Exception as e: pass for file in os.listdir(dest_pdf): f_path = os.path.join(dest_pdf, file) try: if os.path.isfile(f_path): os.unlink(f_path) except Exception as e: pass for file in os.listdir(dest_png): f_path = os.path.join(dest_png, file) try: if os.path.isfile(f_path): os.unlink(f_path) except Exception as e: pass with open(cropped_folder + str(instance) + '/data.csv', 'w') as csvFile: writer = csv.writer(csvFile) writer.writerows(main_csv) csvFile.close() messages.success(request, "Success!") # redirect() to a page return redirect('/after/') #return HttpResponse("<h1>Done successfully</h1>") context = {"form": form} return render(request, 'up_conv/base.html', context)
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger import os caminho_pdf = "pdf" # novo_pdf = PyPDF2.PdfFileMerger() # # for root, dirs, files in os.walk(caminho_pdf): # for file in files: # camininho_completo = os.path.join(root, file) # # arquivo_pdf = open(camininho_completo, "rb") # novo_pdf.append(arquivo_pdf) # # # with open(f"{caminho_pdf}/novo_arquivo.pdf", "wb") as meu_novo_pdf: # novo_pdf.write(meu_novo_pdf) with open("pdf/arquivo1.pdf", "rb") as arquivo_pdf: leitor = PdfFileReader(arquivo_pdf) num_paginhas = leitor.getNumPages() for num_paginha in range(num_paginhas): escritor = PdfFileWriter() pagina_atual = leitor.getPage(num_paginha) escritor.addPage(pagina_atual) with open(f"novos_pdf/{num_paginha}.pdf", "wb") as novo_pdf: escritor.write(novo_pdf)
with open('Offer Letters Candidates.csv', 'r') as csvFile: reader = csv.reader(csvFile) for row in reader: name = row[1] date = row[0] #------------------------------------------------- packet = io.BytesIO() can = canvas.Canvas(packet, pagesize=letter) can.setFillColorRGB(1, 1, 1) can.rect(95, 645, 120, 15, fill=1, stroke=0) can.rect(103, 689, 120, 20, fill=1, stroke=0) can.setFillColorRGB(0, 0, 0) can.setFont('Calibri', 12) can.drawString(97, 649, name) can.setFont('Calibri', 12) can.drawString(104, 689, date) can.save() #-------------------------------------------------- packet.seek(0) new_pdf = PdfFileReader(packet) existing_pdf = PdfFileReader(open("Offer Letter Template.pdf", "rb")) output = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) outputStream = open("Offer Letter " + name + ".pdf", "wb") output.write(outputStream) outputStream.close() csvFile.close()
pdf_path = (Path.home() / "creating-and-modifying-pdfs" / "practice_files" / "Pride_and_Prejudice.pdf") pdf = PdfFileReader(str(pdf_path)) print(pdf.getNumPages()) print(pdf.documentInfo) print(pdf.documentInfo.title) # --------------------------- # Extracting Text From a Page # --------------------------- first_page = pdf.getPage(0) print(type(first_page)) print(first_page.extractText()) for page in pdf.pages: print(page.extractText()) # ----------------------- # Putting It All Together # ----------------------- from pathlib import Path # noqa from PyPDF2 import PdfFileReader # noqa
class pypdfProcessor(object): """ Create an instance of this class to open a PDF file, process the contents of each page and draw each one on demand using the Python pypdf package """ def __init__(self, parent, fileobj, showloadprogress): self.parent = parent self.showloadprogress = showloadprogress self.pdfdoc = PdfFileReader(fileobj) self.numpages = self.pdfdoc.getNumPages() page1 = self.pdfdoc.getPage(0) self.pagewidth = float(page1.mediaBox.getUpperRight_x()) self.pageheight = float(page1.mediaBox.getUpperRight_y()) self.pagedrawings = {} self.unimplemented = {} self.formdrawings = {} "These methods interpret the PDF contents as a set of drawing commands" def Progress(self, ptype, value): " This function is called at regular intervals during Drawfile" if ptype == 'start': msg = 'Reading pdf file' self.progbar = wx.ProgressDialog('Load file', msg, value, None, wx.PD_AUTO_HIDE| wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME) elif ptype == 'progress': self.progbar.Update(value) elif ptype == 'end': self.progbar.Destroy() def DrawFile(self, frompage, topage): """ Build set of drawing commands from PDF contents. Ideally these could be drawn straight into a PseudoDC and the visible section painted directly into scrolled window, but we need to be able to zoom and scale the output quickly without having to rebuild the drawing commands (slow). So roll our own command lists, one per page, into self.pagedrawings. """ t0 = time.time() numpages_generated = 0 rp = (self.showloadprogress and frompage == 0 and topage == self.numpages-1) if rp: self.Progress('start', self.numpages) for self.pageno in range(frompage, topage+1): self.gstate = pdfState() # state is reset with every new page self.saved_state = [] self.page = self.pdfdoc.getPage(self.pageno) numpages_generated += 1 pdf_fonts = self.FetchFonts(self.page) self.pagedrawings[self.pageno] = self.ProcessOperators( self.page.extractOperators(), pdf_fonts) if rp: self.Progress('progress', numpages_generated) ## print 'Pages %d to %d. %d pages created in %.2f seconds' % ( ## frompage, topage, numpages_generated,(time.time()-t0)) if rp: self.Progress('end', None) self.parent.GoPage(frompage) def RenderPage(self, gc, pageno, scale=None): """ Render the set of pagedrawings In a pdf file, bitmaps are treated as being of unit width and height and are scaled via a previous ConcatTransform containing the corresponding width and height as scale factors. wx.GraphicsContext/Cairo appear not to respond to this so scaling is removed from transform and width & height are added to the Drawbitmap call. """ drawdict = {'ConcatTransform': gc.ConcatTransform, 'PushState': gc.PushState, 'PopState': gc.PopState, 'SetFont': gc.SetFont, 'SetPen': gc.SetPen, 'SetBrush': gc.SetBrush, 'DrawText': gc.DrawText, 'DrawBitmap': gc.DrawBitmap, 'CreatePath': gc.CreatePath, 'DrawPath': gc.DrawPath } for drawcmd, args, kwargs in self.pagedrawings[pageno]: if drawcmd == 'ConcatTransform': cm = gc.CreateMatrix(*args, **kwargs) args = (cm,) if drawcmd == 'CreatePath': gp = drawdict[drawcmd](*args, **kwargs) continue elif drawcmd == 'DrawPath': args = (gp, args[1]) if drawcmd in drawdict: drawdict[drawcmd](*args, **kwargs) else: pathdict = {'MoveToPoint': gp.MoveToPoint, 'AddLineToPoint': gp.AddLineToPoint, 'AddCurveToPoint': gp.AddCurveToPoint, 'AddRectangle': gp.AddRectangle, 'CloseSubpath': gp.CloseSubpath } if drawcmd in pathdict: pathdict[drawcmd](*args, **kwargs) def FetchFonts(self, currentobject): " Return the standard fonts in current page or form" pdf_fonts = {} try: fonts = currentobject["/Resources"].getObject()['/Font'] for key in fonts: pdf_fonts[key] = fonts[key]['/BaseFont'][1:] # remove the leading '/' except KeyError: pass return pdf_fonts def ProcessOperators(self, opslist, pdf_fonts): " Interpret each operation in opslist and return in drawlist" drawlist = [] path = [] for operand, operator in opslist : g = self.gstate if operator == 'cm': # new transformation matrix # some operands need inverting because directions of y axis # in pdf and graphics context are opposite a, b, c, d, e, f = map(float, operand) drawlist.append(['ConcatTransform', (a, -b, -c, d, e, -f), {}]) elif operator == 'q': # save state self.saved_state.append(copy.deepcopy(g)) drawlist.append(['PushState', (), {}]) elif operator == 'Q': # restore state self.gstate = self.saved_state.pop() drawlist.append(['PopState', (), {}]) elif operator == 'RG': # Stroke RGB rs, gs, bs = [int(v*255) for v in map(float, operand)] g.strokeRGB = wx.Colour(rs, gs, bs) elif operator == 'rg': # Fill RGB rf, gf, bf = [int(v*255) for v in map(float, operand)] g.fillRGB = wx.Colour(rf, gf, bf) elif operator == 'K': # Stroke CMYK rs, gs, bs = self.ConvertCMYK(operand) g.strokeRGB = wx.Colour(rs, gs, bs) elif operator == 'k': # Fill CMYK rf, gf, bf = self.ConvertCMYK(operand) g.fillRGB = wx.Colour(rf, gf, bf) elif operator == 'w': # Line width g.lineWidth = float(operand[0]) elif operator == 'J': # Line cap ix = float(operand[0]) g.lineCapStyle = {0: wx.CAP_BUTT, 1: wx.CAP_ROUND, 2: wx.CAP_PROJECTING}[ix] elif operator == 'j': # Line join ix = float(operand[0]) g.lineJoinStyle = {0: wx.JOIN_MITER, 1: wx.JOIN_ROUND, 2: wx.JOIN_BEVEL}[ix] elif operator == 'd': # Line dash pattern g.lineDashArray = map(int, operand[0]) g.lineDashPhase = int(operand[1]) elif operator in ('m', 'c', 'l', 're', 'v', 'y', 'h'): # path defining ops path.append([map(float, operand), operator]) elif operator in ('b', 'B', 'b*', 'B*', 'f', 'F', 'f*', 's', 'S', 'n'): # path drawing ops drawlist.extend(self.DrawPath(path, operator)) path = [] elif operator == 'BT': # begin text object g.textMatrix = [1, 0, 0, 1, 0, 0] g.textLineMatrix = [1, 0, 0, 1, 0, 0] elif operator == 'ET': # end text object continue elif operator == 'Tm': # text matrix g.textMatrix = map(float, operand) g.textLineMatrix = map(float, operand) elif operator == 'TL': # text leading g.leading = float(operand[0]) #elif operator == 'Tc': # character spacing # g.charSpacing = float(operand[0]) elif operator == 'Tw': # word spacing g.wordSpacing = float(operand[0]) elif operator == 'Ts': # super/subscript g.textRise = float(operand[0]) elif operator == 'Td': # next line via offsets g.textLineMatrix[4] += float(operand[0]) g.textLineMatrix[5] += float(operand[1]) g.textMatrix = copy.copy(g.textLineMatrix) elif operator == 'T*': # next line via leading g.textLineMatrix[4] += 0 g.textLineMatrix[5] -= g.leading if g.leading is not None else 0 g.textMatrix = copy.copy(g.textLineMatrix) elif operator == 'Tf': # text font g.font = pdf_fonts[operand[0]] g.fontSize = float(operand[1]) elif operator == 'Tj': # show text drawlist.extend(self.DrawTextString(operand[0])) elif operator == 'Do': # invoke named XObject dlist = self.InsertXObject(operand[0]) if dlist: # may be unimplemented decode drawlist.extend(dlist) elif operator == 'INLINE IMAGE': # special pyPdf case + operand is a dict dlist = self.InlineImage(operand) if dlist: # may be unimplemented decode drawlist.extend(dlist) else: # report once if operator not in self.unimplemented: if VERBOSE: print 'PDF operator %s is not implemented' % operator self.unimplemented[operator] = 1 # Fix bitmap transform. Remove the scaling from any transform matrix that precedes # a DrawBitmap operation as the scaling is now done in that operation. for k in range(len(drawlist)-1): if drawlist[k][0] == 'ConcatTransform' and drawlist[k+1][0] == 'DrawBitmap': args = list(drawlist[k][1]) args[0] = 1.0 args[3] = 1.0 drawlist[k][1] = tuple(args) return drawlist def SetFont(self, pdfont, size): """ Returns wx.Font instance from supplied pdf font information """ self.knownfont = True pdfont = pdfont.lower() if pdfont.count('courier'): family = wx.FONTFAMILY_MODERN font = 'Courier New' elif pdfont.count('helvetica'): family = wx.FONTFAMILY_SWISS font = 'Arial' elif pdfont.count('times'): family = wx.FONTFAMILY_ROMAN font = 'Times New Roman' elif pdfont.count('symbol'): family = wx.FONTFAMILY_DEFAULT font = 'Symbol' elif pdfont.count('zapfdingbats'): family = wx.FONTFAMILY_DEFAULT font = 'Wingdings' else: if VERBOSE: print 'Unknown font %s' % pdfont self.knownfont = False family = wx.FONTFAMILY_SWISS font = 'Arial' weight = wx.FONTWEIGHT_NORMAL if pdfont.count('bold'): weight = wx.FONTWEIGHT_BOLD style = wx.FONTSTYLE_NORMAL if pdfont.count('oblique') or pdfont.count('italic'): style = wx.FONTSTYLE_ITALIC return wx.Font(max(1,size), family, style, weight, faceName=font) def DrawTextString(self, text): "word spacing only works for horizontal text (??)" dlist = [] g = self.gstate f = self.SetFont(g.font, g.fontSize*self.parent.font_scale) dlist.append(['SetFont', (f, g.fillRGB), {}]) if g.wordSpacing > 0: textlist = text.split(' ') else: textlist = [text,] for item in textlist: dlist.append(self.DrawTextItem(item, f)) return dlist def DrawTextItem(self, textitem, f): dc = wx.ClientDC(self.parent) # dummy dc for text extents g = self.gstate x = g.textMatrix[4] y = g.textMatrix[5] + g.textRise if g.wordSpacing > 0: textitem += ' ' wid, ht, descend, xlead = dc.GetFullTextExtent(textitem, f) if have_rlwidth and self.knownfont: # use ReportLab stringWidth if available width = stringWidth(textitem, g.font, g.fontSize) else: width = wid g.textMatrix[4] += (width + g.wordSpacing) # update current x position return ['DrawText', (textitem, x, -y-(ht-descend)), {}] def DrawPath(self, path, action): """ Stroke and/or fill the defined path depending on operator """ dlist = [] g = self.gstate acts = {'S': (1, 0, 0), 's': (1, 0, 0), 'f': (0, 1, wx.WINDING_RULE), 'F': (0, 1, wx.WINDING_RULE), 'f*': (0, 1, wx.ODDEVEN_RULE), 'B': (1, 1, wx.WINDING_RULE), 'B*': (1, 1, wx.ODDEVEN_RULE), 'b': (1, 1, wx.WINDING_RULE), 'b*': (1, 1, wx.ODDEVEN_RULE), 'n': (0, 0, 0) } stroke, fill, rule = acts[action] if action in ('s', 'b', 'b*'): path.append([[], 'h']) # close path if stroke: if g.lineDashArray: style = wx.USER_DASH else: style = wx.SOLID cpen = wx.Pen(g.strokeRGB, g.lineWidth, style) cpen.SetCap(g.lineCapStyle) cpen.SetJoin(g.lineJoinStyle) if g.lineDashArray: cpen.SetDashes(g.lineDashArray) dlist.append(['SetPen', (cpen,), {}]) else: dlist.append(['SetPen', (wx.TRANSPARENT_PEN,), {}]) if fill: dlist.append(['SetBrush', (wx.Brush(g.fillRGB),), {}]) else: dlist.append(['SetBrush', (wx.TRANSPARENT_BRUSH,), {}]) dlist.append(['CreatePath', (), {}]) for xylist, op in path: if op == 'm': # move (to) current point x0 = xc = xylist[0] y0 = yc = -xylist[1] dlist.append(['MoveToPoint', (x0, y0), {}]) elif op == 'l': # draw line x2 = xylist[0] y2 = -xylist[1] dlist.append(['AddLineToPoint', (x2, y2), {}]) xc = x2 yc = y2 elif op == 're': # draw rectangle (x,y at top left) x = xylist[0] y = -xylist[1] w = xylist[2] h = xylist[3] dlist.append(['AddRectangle', (x, y-h, w, h), {}]) elif op in ('c', 'v', 'y'): # draw Bezier curve args = [] if op == 'v': args.extend([xc, yc]) args.extend([xylist[0], -xylist[1], xylist[2], -xylist[3]]) if op == 'y': args.extend([xylist[2], -xylist[3]]) if op == 'c': args.extend([xylist[4], -xylist[5]]) dlist.append(['AddCurveToPoint', args, {}]) elif op == 'h': dlist.append(['CloseSubpath', (), {}]) dlist.append(['DrawPath', ('GraphicsPath', rule), {}]) return dlist def InsertXObject(self, name): " XObject can be an image or a 'form' (an arbitrary PDF sequence) " dlist = [] xobject = self.page["/Resources"].getObject()['/XObject'] stream = xobject[name] if stream.get('/Subtype') == '/Form': # insert contents into current page drawing if not name in self.formdrawings: # extract if not already done pdf_fonts = self.FetchFonts(stream) bbox = stream.get('/BBox') matrix = stream.get('/Matrix') form_ops = ContentStream(stream, self.pdfdoc).operations oplist = [([], 'q'), (matrix, 'cm')] # push state & apply matrix oplist.extend(form_ops) # add form contents oplist.append(([], 'Q')) # restore original state self.formdrawings[name] = self.ProcessOperators(oplist, pdf_fonts) dlist.extend(self.formdrawings[name]) elif stream.get('/Subtype') == '/Image': width = stream['/Width'] height = stream['/Height'] depth = stream['/BitsPerComponent'] filters = stream["/Filter"] item = self.AddBitmap(stream._data, width, height, filters) if item: # may be unimplemented dlist.append(item) return dlist def InlineImage(self, operand): " operand contains an image" dlist = [] data = operand.get('data') settings = operand.get('settings') width = settings['/W'] height = settings['/H'] depth = settings['/BPC'] filters = settings['/F'] item = self.AddBitmap(data, width, height, filters) if item: # may be unimplemented dlist.append(item) return dlist def AddBitmap(self, data, width, height, filters): "Add wx.Bitmap from data, processed by filters" if '/A85' in filters or '/ASCII85Decode' in filters: data = _AsciiBase85DecodePYTHON(data) if '/Fl' in filters or '/FlateDecode' in filters: data = FlateDecode.decode(data, None) if '/CCF' in filters or '/CCITTFaxDecode' in filters: if VERBOSE: print 'PDF operation /CCITTFaxDecode is not implemented' return [] if '/DCT' in filters or '/DCTDecode' in filters: stream = cStringIO.StringIO(data) image = wx.ImageFromStream(stream, wx.BITMAP_TYPE_JPEG) bitmap = wx.BitmapFromImage(image) else: bitmap = wx.BitmapFromBuffer(width, height, data) return ['DrawBitmap', (bitmap, 0, 0-height, width, height), {}] def ConvertCMYK(self, operand): "Convert CMYK values (0 to 1.0) in operand to nearest RGB" c, m, y, k = operand r = round((1-c)*(1-k)*255) b = round((1-y)*(1-k)*255) g = round((1-m)*(1-k)*255) return (r, g, b)
# -*- coding: utf-8 -*- """ Created on Mon Apr 29 16:05:03 2019 @author: Administrator """ import io from PyPDF2 import PdfFileReader, PdfFileWriter from wand.image import Image pdfile = PdfFileReader('E:\\YJZ\work\\化工安全\\M02模块送审\\修改\\904.pdf') pageobj = pdfile.getPage(0) dst_pdf = PdfFileWriter() dst_pdf.addPage(pageobj) pdf_bytes = io.BytesIO() dst_pdf.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=500) img.format = 'jpg' img.save(filename='2.jpg') img.destroy()
def send_mail(letter_bureau, client): if client.id_proof is None: print(FileNotFoundError('Cannot find id proof of client')) return url = "http://127.0.0.1:8000" + reverse("grid_url:media_url", args={client.id_proof}) print(url) page = urlopen(url) content_type = page.headers.get("content-type") extension = content_type.split("/")[-1] f = page.read() f = BytesIO(f) file_name = os.path.join( os.path.dirname(os.path.abspath(__file__)), "id_" + "".join( secrets.choice(string.ascii_uppercase + string.digits) for _ in range(15)), ) if content_type == "application/pdf": reader = PdfFileReader(f) writer = PdfFileWriter() for pageNum in range(reader.getNumPages()): currentPage = reader.getPage(pageNum) writer.addPage(currentPage) file_name += f".{extension}" outputStream = open(file_name, "wb") writer.write(outputStream) outputStream.close() else: image = Image.open(f) file_name += f".{extension}" try: image.save(file_name) except OSError: new_image = image.convert("RGB") new_image.save(file_name) try: html = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), "letter_email_template.html"), "r", encoding="utf-8", ) except: print(FileNotFoundError('Cannot find email template')) return bureau_email = Bureau.objects.filter(_id=ObjectId( get_id_from_url(letter_bureau.bureau_url))).values("email")[0]["email"] letter_path = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "media", str(letter_bureau.pdf_file), ) yag = yagmail.SMTP("*****@*****.**") yag.send( bureau_email, f"Request to solve dispute of {client.full_name}", html.read(), [ file_name, letter_path, ], ) if os.path.exists(file_name): os.remove(file_name)
# # def split_by_num(filename, nums, password=None): filename = r'F:\研一下\量化投资资料\量化教材\Hands-On_Machine_Learning_for_Algorithmic_Trading.pdf' pdf_reader = PdfFileReader(open(filename, mode='rb' )) pages = pdf_reader.getNumPages() outline = pdf_reader.getOutlines() outlinchapter = [] outlinepage = [i+18 for i in [8,33,65,88,119,147,175,224,260,284,312,351,389,418,441,458]] for o in outline: res = re.findall(r"'/Title': '(.*?)', '/Page': IndirectObject\((.*?), 0\)",str(o),re.S) if 'Chapter' in res[0][0]: outlinchapter.append(res[0][0]) #print(list(outlinedict[0].keys())[0],list(outlinedict[0].values())[0]) outlinedict =[{i[0]:i[1]} for i in zip(outlinchapter,outlinepage)] for i in range(len(outlinedict)+1): pdf_writer = PdfFileWriter() split_pdf_name = list(outlinedict[i].keys())[0].replace(':','') + '.pdf' start = list(outlinedict[i].values())[0] end = list(outlinedict[i+1].values())[0] print(split_pdf_name) for i in range(int(start), int(end)): pdf_writer.addPage(pdf_reader.getPage(i)) with open(split_pdf_name,'wb') as out: pdf_writer.write(out)
from PyPDF2 import PdfFileReader, PdfFileWriter pdf_document = "source/Computer-Vision-Resources.pdf" pdf = PdfFileReader(pdf_document) for page in range(pdf.getNumPages()): pdf_writer = PdfFileWriter() current_page = pdf.getPage(page) pdf_writer.addPage(current_page) outputFilename = "dist/Computer-Vision-Resources-page-{}.pdf".format(page + 1) with open(outputFilename, "wb") as out: pdf_writer.write(out) print("created", outputFilename)
def main(): """Where it all began.""" parser = argparse.ArgumentParser() parser.add_argument("video", type=argparse.FileType('rb'), help="video of the speaker") parser.add_argument("slides", type=argparse.FileType('rb'), help="slides in pdf format") parser.add_argument("output", type=argparse.FileType('wb'), help="superimposed video output file") parser.add_argument("-n", "--dry-run", action='store_true', help="don't run the final encoding pass") parser.add_argument("--height", type=int, help="height of output video (defaults to 1080)", default=1080) parser.add_argument("--crop", help="box to crop video from (w:h:x:y)") parser.add_argument("--fraction", type=float, help="size of speaker box relative to video", default=1 / 3.0) parser.add_argument( "-t", type=argparse.FileType('r', encoding='UTF-8'), help="path to the file that specifies the slide transitions", default='transitions.txt') parser.add_argument("--end", help="timestamp to end video at") parser.add_argument( 'remaining', nargs=argparse.REMAINDER, help="additional arguments to pass to ffmpeg as output options") args = parser.parse_args() args.video.close() args.output.close() slides = tempfile.TemporaryDirectory("slides") segments = tempfile.TemporaryDirectory("segments") segment_list = tempfile.NamedTemporaryFile('w') # get all the transitions transitions = [] with open('transitions.txt', 'r') as t: for line in t.readlines(): line = line.strip() if line.startswith('#'): continue fields = line.split() if len(fields) >= 2: transitions.append((fields[0], int(fields[1]))) # split the pdf inputpdf = PdfFileReader(args.slides) size = None for i in range(inputpdf.numPages): page = inputpdf.getPage(i) if size is None: size = page.mediaBox elif size != page.mediaBox: print("pdf page sizes differ.") sys.exit(1) return output = PdfFileWriter() output.addPage(page) with open("%s/%d.pdf" % (slides.name, i + 1), "wb") as outputStream: output.write(outputStream) if size is None: print("no slides?") sys.exit(1) return # get info about the video end = int( float( subprocess.run([ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", args.video.name ], capture_output=True).stdout.strip())) if args.end: parts = args.end.split(":") s = int(parts.pop()) if len(parts) != 0: s += int(parts.pop()) * 60 if len(parts) != 0: s += int(parts.pop()) * 60 * 60 end = s fps = subprocess.run([ "ffprobe", "-v", "error", "-select_streams", "V", "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", args.video.name ], capture_output=True, encoding="UTF-8").stdout.strip() transitions.append((end, transitions[-1][1])) # h * 192 / 72 = 1080 # h * r / 72 = target_h # r = 72 * target_h / h height = args.height # search for next pdf scale that produces divisible-by-two width and height while True: pdf_scale = int(72.0 * args.height / float(size.upperLeft[1] - size.lowerLeft[1])) width = float(size.upperRight[0] - size.upperLeft[0]) * pdf_scale / 72.0 # print(width, height, pdf_scale) if math.ceil(width) % 2 == 0: break height += 1 print("pdf page size is", size.lowerRight[0], "by", size.upperRight[1], "and will be scaled with DPI", pdf_scale) print("output will be %dx%d, and %s long (at %s fps)" % (width, height, pretty_time_delta(end), fps)) print("transitions:") print("\n".join([ " - slide % 3d @ %s" % (slide, time) for (time, slide) in transitions[:-1] ])) print("---") print("==> producing slide video segments") i = 0 since = 0 show = None l_trim = None for (time, slide) in transitions: if type(time) == type(end): s = time else: parts = time.split(":") s = int(parts.pop()) if len(parts) != 0: s += int(parts.pop()) * 60 if len(parts) != 0: s += int(parts.pop()) * 60 * 60 # non-zero start time (maybe) implies we should trim the beginning if l_trim is None: l_trim = s # note that _in theory_ we should adjust all the slide transition # timestamps back by this amount, but in practice this isn't # necessary since we only really care about the _differences_ # between them. the one exception to this is the very last # timestamp, since it is one we artificially set to the end time of # the video. if the video is trimmed at the beginning, the last # slide should also end correspondingly sooner transitions[-1] = (transitions[-1][0] - l_trim, transitions[-1][1]) if since >= s: print( "slide times do not monotonically increase (%s came after %s)" % (pretty_time_delta(s), pretty_time_delta(since))) sys.exit(1) return if s > end: if since >= end: # no point in encoding things after this break # run this slide until the end time s = end # create a png for the slide we're supposed to show subprocess.run([ "pdftoppm", "-singlefile", "-png", "-r", "%d" % pdf_scale, "%s/%d.pdf" % (slides.name, slide), "%s/%d" % (segments.name, i) ]) if show is not None: # loop that png frame for the currently shown frame until this time print(" -> loop slide %d for %s" % (i - 1, pretty_time_delta(s - since))) subprocess.run([ "ffmpeg", "-loglevel", "error", "-f", "image2", "-loop", "1", "-framerate", "1", "-pattern_type", "none", "-i", "%s/%d.png" % (segments.name, i - 1), "-r", "1", "-t", "%d" % (s - since), "-vcodec", "png", "-an", "%s/%d.mov" % (segments.name, i - 1) ]) segment_list.write("file '%s/%d.mov'\n" % (segments.name, i - 1)) since = s show = slide i += 1 filter_complex = [] filter_complex.append("[1] fps=%s [slides]" % fps) if args.crop is None: filter_complex.append("[0] scale=-1:%d [pip]" % (int(height * args.fraction))) else: filter_complex.append("[0] crop=%s,scale=-1:%d [pip]" % (args.crop, int(height * args.fraction))) filter_complex.append( "[slides][pip] overlay=main_w-overlay_w-10:main_h-overlay_h-10") segment_list.flush() print("==> producing slide video") subprocess.run([ "ffmpeg", "-y", "-loglevel", "error", "-f", "concat", "-safe", "0", "-i", segment_list.name, "-c", "copy", "%s/all.mov" % segments.name ]) if args.dry_run: copyfile("%s/all.mov" % segments.name, "all.mov") encoding_args = [ "ffmpeg", "-ss", "%d" % l_trim, "-to", "%d" % end, "-i", args.video.name, "-i", "all.mov" if args.dry_run else "%s/all.mov" % segments.name, "-filter_complex", "; ".join(filter_complex), "-pix_fmt", "yuv420p", "-r", "%s" % fps, *args.remaining, args.output.name ] print("==> superimposing video onto slides") if args.dry_run: print(" -> would run:") print(encoding_args) print(" -> but skipping since this is a dry run.") else: subprocess.run(encoding_args)
# doc = fitz.open(pdf_document) # print("Исходный документ: ", doc) # print("\nКоличество страниц: %i\n\n------------------\n\n" % doc.pageCount) # print(doc.metadata) # # for current_page in range(5,6): # page = doc.loadPage(current_page) # page_text = page.getText() # print("Стр. ", current_page+1, "\n\nСодержание;\n") # print(page_text) from PyPDF2 import PdfFileReader # # pdf_document = "Sobitiya.pdf" # with open(pdf_document, "rb") as f: # pdf = PdfFileReader(f) # info = pdf.getDocumentInfo() # pages = pdf.getNumPages() # print (info) # print ("number of pages: %i" % pages) # page1 = pdf.getPage(0) # print(page1) # print(page1.extractText()) pgs = open('muzika.pdf', 'rb') read_pdf = PdfFileReader(pgs) number = read_pdf.getNumPages() page = read_pdf.getPage(6) page_content = page.extractText() print(page_content.encode('utf-8'))
def draw_pdf(self, blank_filename, filename): tmp_pdf = io.BytesIO() can = canvas.Canvas(tmp_pdf) can.setFontSize(7) # Box first 4 winners xs = [154, 260, 452, 560] y = 490 h = 79 w = 10 for game, x in zip( self.bracket_heap[ROUND_SECTIONS[1][0]:ROUND_SECTIONS[1][1]], xs): if game.winner.team_id in TOP_ROW_TEAM_IDS: can.rect(x, y, h, w) else: can.rect(x, y - 14, 79, 10) bracket_pdf_points = { # left start point, right start point, dy val, switch value 1: ((0, 463), (763, 463), 25, 16), 2: ((127, 456), (607, 456), 29, 16), 3: ((187, 442), (555, 442), 58, 8), 4: ((237, 413), (507, 413), 116, 4), 5: ((286, 357), (460, 357), 234, 2), 6: ((290, 235), (442, 235), 0, 1) } # Put win prob on first round x, y = bracket_pdf_points[1][0] dy = 25 switch = bracket_pdf_points[1][3] for i in range(ROUND_SECTIONS[2][0], ROUND_SECTIONS[2][1], 1): if switch == 0: x, y = bracket_pdf_points[1][1] switch = bracket_pdf_points[1][3] dy = 25 can.drawString(x, y, f"({self.bracket_heap[i].win_prob:.2%})") can.drawString(x, y - dy / 2, f"({1 - self.bracket_heap[i].win_prob:.2%})") y -= dy if switch == bracket_pdf_points[1][3] / 2: y -= 10 dy = 33 switch -= 1 # Fill in rounds for r in range(2, 7): x, y = bracket_pdf_points[r][0] switch = bracket_pdf_points[r][3] for i in range(ROUND_SECTIONS[r][0], ROUND_SECTIONS[r][1]): if switch == 0: x, y = bracket_pdf_points[r][1] switch = bracket_pdf_points[r][3] win_prob = self.bracket_heap[(i - 1) // 2].win_prob if self.bracket_heap[ (i - 1) // 2].teams[1] == self.bracket_heap[i].winner: win_prob = 1 - win_prob can.drawString( x, y, str(self.bracket_heap[i].winner) + f" ({win_prob:.2%})") y -= bracket_pdf_points[r][2] switch -= 1 # Fill in winner can.drawString(363, 235, str(self.bracket_heap[0].winner)) can.save() watermark = PdfFileReader(tmp_pdf) out_pdf = PdfFileWriter() blank_pdf = PdfFileReader(open(blank_filename, 'rb')) blank_page = blank_pdf.getPage(0) blank_page.mergePage(watermark.getPage(0)) out_pdf.addPage(blank_page) with open(filename, 'wb') as out_stream: out_pdf.write(out_stream)
# how to merge pdf files so that each file begins on an odd page number? # # http://unix.stackexchange.com/a/66455 import sys from PyPDF2 import PdfFileWriter, PdfFileReader alignment = 2 # to align on even pages output = PdfFileWriter() output_page_number = 0 for filename in sys.argv[1:]: inpdf = PdfFileReader(open(filename, 'rb')) pages = [inpdf.getPage(i) for i in range(0, inpdf.getNumPages())] for p in pages: output.addPage(p) output_page_number += 1 # blank pages until next alignment boundary while output_page_number % alignment != 0: output.addBlankPage() output_page_number += 1 # speedbump: on python2, sys.stdout is opened as text, NOT binary # clean this up sometime... output.write(sys.stdout)
from PyPDF2 import PdfFileReader as PdfReader, PdfFileWriter as PdfWriter pdf_obj = open('Ch13/meetingminutes.pdf', 'rb') pdf_reader = PdfReader(pdf_obj) print(pdf_reader.numPages) # Output: 19 page_obj = pdf_reader.getPage(0) print(page_obj.extractText())
def generate(self, op): from reportlab.graphics.shapes import Drawing from reportlab.pdfgen import canvas from reportlab.lib import pagesizes, units from reportlab.graphics.barcode.qr import QrCodeWidget from reportlab.graphics import renderPDF from PyPDF2 import PdfFileWriter, PdfFileReader order = op.order pagesize = self.settings.get('pagesize', default='A4') if hasattr(pagesizes, pagesize): pagesize = getattr(pagesizes, pagesize) else: pagesize = pagesizes.A4 orientation = self.settings.get('orientation', default='portrait') if hasattr(pagesizes, orientation): pagesize = getattr(pagesizes, orientation)(pagesize) buffer = BytesIO() p = canvas.Canvas(buffer, pagesize=pagesize) event_s = self.settings.get('event_s', default=22, as_type=float) if event_s: p.setFont("Helvetica", event_s) event_x = self.settings.get('event_x', default=15, as_type=float) event_y = self.settings.get('event_y', default=235, as_type=float) p.drawString(event_x * units.mm, event_y * units.mm, str(self.event.name)) order_s = self.settings.get('order_s', default=17, as_type=float) if order_s: p.setFont("Helvetica", order_s) order_x = self.settings.get('order_x', default=15, as_type=float) order_y = self.settings.get('order_y', default=220, as_type=float) p.drawString(order_x * units.mm, order_y * units.mm, _('Order code: {code}').format(code=order.code)) name_s = self.settings.get('name_s', default=17, as_type=float) if name_s: p.setFont("Helvetica", name_s) name_x = self.settings.get('name_x', default=15, as_type=float) name_y = self.settings.get('name_y', default=210, as_type=float) item = str(op.item.name) if op.variation: item += " – " + str(op.variation) p.drawString(name_x * units.mm, name_y * units.mm, item) price_s = self.settings.get('price_s', default=17, as_type=float) if price_s: p.setFont("Helvetica", price_s) price_x = self.settings.get('price_x', default=15, as_type=float) price_y = self.settings.get('price_y', default=200, as_type=float) p.drawString(price_x * units.mm, price_y * units.mm, "%s %s" % (str(op.price), self.event.currency)) qr_s = self.settings.get('qr_s', default=80, as_type=float) if qr_s: reqs = qr_s * units.mm qrw = QrCodeWidget(op.secret, barLevel='H') b = qrw.getBounds() w = b[2] - b[0] h = b[3] - b[1] d = Drawing(reqs, reqs, transform=[reqs / w, 0, 0, reqs / h, 0, 0]) d.add(qrw) qr_x = self.settings.get('qr_x', default=10, as_type=float) qr_y = self.settings.get('qr_y', default=120, as_type=float) renderPDF.draw(d, p, qr_x * units.mm, qr_y * units.mm) code_s = self.settings.get('code_s', default=11, as_type=float) if code_s: p.setFont("Helvetica", code_s) code_x = self.settings.get('code_x', default=15, as_type=float) code_y = self.settings.get('code_y', default=120, as_type=float) p.drawString(code_x * units.mm, code_y * units.mm, op.secret) attendee_s = self.settings.get('attendee_s', default=0, as_type=float) if code_s and op.attendee_name: p.setFont("Helvetica", attendee_s) attendee_x = self.settings.get('attendee_x', default=15, as_type=float) attendee_y = self.settings.get('attendee_y', default=90, as_type=float) p.drawString(attendee_x * units.mm, attendee_y * units.mm, op.attendee_name) p.showPage() p.save() buffer.seek(0) new_pdf = PdfFileReader(buffer) output = PdfFileWriter() bg_file = self.settings.get('background', as_type=File) if isinstance(bg_file, File): bgf = default_storage.open(bg_file.name, "rb") else: bgf = open(finders.find('pretixpresale/pdf/ticket_default_a4.pdf'), "rb") bg_pdf = PdfFileReader(bgf) for page in new_pdf.pages: bg_page = copy.copy(bg_pdf.getPage(0)) bg_page.mergePage(page) output.addPage(bg_page) outbuffer = BytesIO() output.write(outbuffer) outbuffer.seek(0) return 'order%s%s.pdf' % ( self.event.slug, order.code), 'application/pdf', outbuffer.read()
def run_one(options, data, tokens, name, tree_key='binary_tree'): example_id = data['example_id'] parse = data[tree_key] style = data.get('style', None) with tempfile.NamedTemporaryFile(mode='w') as f: path_ps = f.name path_pdf = os.path.join(options.out_dir, '{}-{}.pdf'.format(name, example_id)) turtle.speed('fastest') fig = TreeFig(color=options.color, size=options.size) # Setup scale = 1 # x0 = -300 * scale y0 = 65 * scale yMax = 200 * scale widthWindow = WIDTH * scale heightWindow = 500 * scale x0 = -widthWindow / 2 + 10 fig.setup_turtle(widthWindow, heightWindow, scale, x0) # Init turtle. ts = turtle.getscreen() ts.tracer( 0, 0 ) # https://stackoverflow.com/questions/16119991/how-to-speed-up-pythons-turtle-function-and-stop-it-freezing-at-the-end # Draw settings. settings = {} settings['style'] = style # Draw. bounding_box = fig.draw_tree(parse, tokens, **settings) # Update Canvas. ts.update() ts.getcanvas().postscript(file=path_ps) print('writing to {}'.format(path_pdf)) os.system('ps2pdf -dEPSCrop {} {}'.format(path_ps, path_pdf)) # Crop the image. # print('bounding box = {}'.format(bounding_box)) output_filename = os.path.join( options.out_dir, '{}-{}-cropped.pdf'.format(name, example_id)) input1 = PdfFileReader(open(path_pdf, "rb")) output = PdfFileWriter() page = input1.getPage(0) # print('mediaBox', page.mediaBox) # print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y()) page.trimBox.lowerLeft = (bounding_box['x0'], bounding_box['y0']) page.trimBox.upperRight = (bounding_box['x1'], bounding_box['y1']) page.cropBox.lowerLeft = (bounding_box['x0'], bounding_box['y0']) page.cropBox.upperRight = (bounding_box['x1'], bounding_box['y1']) output.addPage(page) print('writing to {}'.format(output_filename)) outputStream = open(output_filename, "wb") output.write(outputStream) outputStream.close() return bounding_box
class Renderer: def __init__(self, event, layout, background_file): self.layout = layout self.background_file = background_file self.variables = get_variables(event) self.images = get_images(event) self.event = event if self.background_file: self.bg_bytes = self.background_file.read() self.bg_pdf = PdfFileReader(BytesIO(self.bg_bytes), strict=False) else: self.bg_bytes = None self.bg_pdf = None @classmethod def _register_fonts(cls): pdfmetrics.registerFont( TTFont('Open Sans', finders.find('fonts/OpenSans-Regular.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans I', finders.find('fonts/OpenSans-Italic.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans B', finders.find('fonts/OpenSans-Bold.ttf'))) pdfmetrics.registerFont( TTFont('Open Sans B I', finders.find('fonts/OpenSans-BoldItalic.ttf'))) for family, styles in get_fonts().items(): pdfmetrics.registerFont( TTFont(family, finders.find(styles['regular']['truetype']))) if 'italic' in styles: pdfmetrics.registerFont( TTFont(family + ' I', finders.find(styles['italic']['truetype']))) if 'bold' in styles: pdfmetrics.registerFont( TTFont(family + ' B', finders.find(styles['bold']['truetype']))) if 'bolditalic' in styles: pdfmetrics.registerFont( TTFont(family + ' B I', finders.find(styles['bolditalic']['truetype']))) def _draw_poweredby(self, canvas: Canvas, op: OrderPosition, o: dict): content = o.get('content', 'dark') if content not in ('dark', 'white'): content = 'dark' img = finders.find( 'pretixpresale/pdf/powered_by_pretix_{}.png'.format(content)) ir = ThumbnailingImageReader(img) try: width, height = ir.resize(None, float(o['size']) * mm, 300) except: logger.exception("Can not resize image") pass canvas.drawImage(ir, float(o['left']) * mm, float(o['bottom']) * mm, width=width, height=height, preserveAspectRatio=True, anchor='n', mask='auto') def _draw_barcodearea(self, canvas: Canvas, op: OrderPosition, o: dict): content = o.get('content', 'secret') if content == 'secret': content = op.secret elif content == 'pseudonymization_id': content = op.pseudonymization_id level = 'H' if len(content) > 32: level = 'M' if len(content) > 128: level = 'L' reqs = float(o['size']) * mm qrw = QrCodeWidget(content, barLevel=level, barHeight=reqs, barWidth=reqs) d = Drawing(reqs, reqs) d.add(qrw) qr_x = float(o['left']) * mm qr_y = float(o['bottom']) * mm renderPDF.draw(d, canvas, qr_x, qr_y) def _get_ev(self, op, order): return op.subevent or order.event def _get_text_content(self, op: OrderPosition, order: Order, o: dict, inner=False): if o.get('locale', None) and not inner: with language(o['locale'], self.event.settings.region): return self._get_text_content(op, order, o, True) ev = self._get_ev(op, order) if not o['content']: return '(error)' if o['content'] == 'other': return o['text'] elif o['content'].startswith('itemmeta:'): return op.item.meta_data.get(o['content'][9:]) or '' elif o['content'].startswith('meta:'): return ev.meta_data.get(o['content'][5:]) or '' elif o['content'] in self.variables: try: return self.variables[o['content']]['evaluate'](op, order, ev) except: logger.exception('Failed to process variable.') return '(error)' return '' def _draw_imagearea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): ev = self._get_ev(op, order) if not o['content'] or o['content'] not in self.images: image_file = None else: try: image_file = self.images[o['content']]['evaluate'](op, order, ev) except: logger.exception('Failed to process variable.') image_file = None if image_file: ir = ThumbnailingImageReader(image_file) try: ir.resize(float(o['width']) * mm, float(o['height']) * mm, 300) except: logger.exception("Can not resize image") pass canvas.drawImage( image=ir, x=float(o['left']) * mm, y=float(o['bottom']) * mm, width=float(o['width']) * mm, height=float(o['height']) * mm, preserveAspectRatio=True, anchor='c', # centered in frame mask='auto') else: canvas.saveState() canvas.setFillColorRGB(.8, .8, .8, alpha=1) canvas.rect( x=float(o['left']) * mm, y=float(o['bottom']) * mm, width=float(o['width']) * mm, height=float(o['height']) * mm, stroke=0, fill=1, ) canvas.restoreState() def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order, o: dict): font = o['fontfamily'] if o['bold']: font += ' B' if o['italic']: font += ' I' align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT} style = ParagraphStyle(name=uuid.uuid4().hex, fontName=font, fontSize=float(o['fontsize']), leading=float(o['fontsize']), autoLeading="max", textColor=Color(o['color'][0] / 255, o['color'][1] / 255, o['color'][2] / 255), alignment=align_map[o['align']]) text = conditional_escape( self._get_text_content(op, order, o) or "", ).replace("\n", "<br/>\n") # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper # to resolve all ligatures and python-bidi to switch RTL texts. configuration = { 'delete_harakat': True, 'support_ligatures': False, } reshaper = ArabicReshaper(configuration=configuration) try: text = "<br/>".join( get_display(reshaper.reshape(l)) for l in text.split("<br/>")) except: logger.exception('Reshaping/Bidi fixes failed on string {}'.format( repr(text))) p = Paragraph(text, style=style) w, h = p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm) # p_size = p.wrap(float(o['width']) * mm, 1000 * mm) ad = getAscentDescent(font, float(o['fontsize'])) canvas.saveState() # The ascent/descent offsets here are not really proven to be correct, they're just empirical values to get # reportlab render similarly to browser canvas. if o.get('downward', False): canvas.translate(float(o['left']) * mm, float(o['bottom']) * mm) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1] / 2) else: canvas.translate( float(o['left']) * mm, float(o['bottom']) * mm + h) canvas.rotate(o.get('rotation', 0) * -1) p.drawOn(canvas, 0, -h - ad[1]) canvas.restoreState() def draw_page(self, canvas: Canvas, order: Order, op: OrderPosition, show_page=True): for o in self.layout: if o['type'] == "barcodearea": self._draw_barcodearea(canvas, op, o) elif o['type'] == "imagearea": self._draw_imagearea(canvas, op, order, o) elif o['type'] == "textarea": self._draw_textarea(canvas, op, order, o) elif o['type'] == "poweredby": self._draw_poweredby(canvas, op, o) if self.bg_pdf: canvas.setPageSize((self.bg_pdf.getPage(0).mediaBox[2], self.bg_pdf.getPage(0).mediaBox[3])) if show_page: canvas.showPage() def render_background(self, buffer, title=_('Ticket')): if settings.PDFTK: buffer.seek(0) with tempfile.TemporaryDirectory() as d: with open(os.path.join(d, 'back.pdf'), 'wb') as f: f.write(self.bg_bytes) with open(os.path.join(d, 'front.pdf'), 'wb') as f: f.write(buffer.read()) subprocess.run([ settings.PDFTK, os.path.join(d, 'front.pdf'), 'background', os.path.join(d, 'back.pdf'), 'output', os.path.join(d, 'out.pdf'), 'compress' ], check=True) with open(os.path.join(d, 'out.pdf'), 'rb') as f: return BytesIO(f.read()) else: from PyPDF2 import PdfFileReader, PdfFileWriter buffer.seek(0) new_pdf = PdfFileReader(buffer) output = PdfFileWriter() for page in new_pdf.pages: bg_page = copy.copy(self.bg_pdf.getPage(0)) bg_page.mergePage(page) output.addPage(bg_page) output.addMetadata({ '/Title': str(title), '/Creator': 'pretix', }) outbuffer = BytesIO() output.write(outbuffer) outbuffer.seek(0) return outbuffer
parser.add_argument('-o', '--output', metavar='output', help='set the output file name', default='extracted') parser.add_argument('-c', '--copy', help='copy the text to the clipboard', action='store_true') args = parser.parse_args() pdf = PdfFileReader(args.file) out = open(args.output + '.txt', 'a') text = '' for i in range(pdf.getNumPages()): page = pdf.getPage(i) content = page.extractText() text += content + '\n' if args.copy: try: import pyperclip except ImportError: print('pyperclip module is required to use the copy argument') else: pyperclip.copy(text) out.write(text) out.close()
newpage.rotateClockwise(int(angle)) elif clock == "ac": newpage.rotateCounterClockwise(int(angle)) pdfWriter.addPage(newpage) resultPdfFile = open('rotatedPage.pdf', 'wb') pdfWriter.write(resultPdfFile) resultPdfFile.close() resultPdfFile1 = open('rotatedPage.pdf', 'rb') pdfReader1 = PyPDF2.PdfFileReader(resultPdfFile1) pageObj = pdfReader1.getPage(0) pdfWriter.addPage(pageObj) pdfOutputFile = open('combinedfile.pdf', 'wb') pdfWriter.write(pdfOutputFile) pdfOutputFile.close() pdffinal = PdfFileReader('combinedfile.pdf', 'rb') output = PdfFileWriter() for i in range(pdffinal.getNumPages() - 2): p = pdffinal.getPage(i) output.addPage(p) with open('final.pdf', 'wb') as f: output.write(f) print("CONFIGURING...") for i in tqdm(range(5)): time.sleep(3) print("DONE!") resultPdfFile1.close() pdfget.close() os.remove("rotatedPage.pdf") os.remove("combinedfile.pdf")
def _overlay_printable_areas_with_white(src_pdf): """ Overlays the printable areas onto the src PDF, this is so the code can check for a presence of non white in the areas outside the printable area. Our overlay function draws four areas in white. Logo, address, service address, and the body. Logo is the area above the address area. Service address runs from the top right, down the side of the letter to the right of the address area. This function subtracts/adds 1mm to make every boundary more generous. This is to solve pixel-hunting issues where letters fail validation because there's one pixel of the boundary, generally because of anti-aliasing some text. This doesn't affect the red overlays we draw when displaying to end users, so people should still layout their PDFs based on the published constraints. :param BytesIO src_pdf: A file-like :return BytesIO: New file like containing the overlaid pdf """ pdf = PdfFileReader(src_pdf) page = pdf.getPage(0) can = NotifyCanvas(white) # Overlay the blanks where the service can print as per the template # The first page is more varied because of address blocks etc subsequent pages are more simple # Body pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BODY_TOP_FROM_TOP_OF_PAGE - 1 pt2 = BORDER_RIGHT_FROM_LEFT_OF_PAGE + 1, BORDER_BOTTOM_FROM_TOP_OF_PAGE + 1 can.rect(pt1, pt2) # Service address block - the writeable area on the right hand side (up to the top right corner) pt1 = SERVICE_ADDRESS_LEFT_FROM_LEFT_OF_PAGE - 1, SERVICE_ADDRESS_TOP_FROM_TOP_OF_PAGE - 1 pt2 = SERVICE_ADDRESS_RIGHT_FROM_LEFT_OF_PAGE + 1, SERVICE_ADDRESS_BOTTOM_FROM_TOP_OF_PAGE + 1 can.rect(pt1, pt2) # Service Logo Block - the writeable area above the address (only as far across as the address extends) pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BORDER_TOP_FROM_TOP_OF_PAGE - 1 pt2 = LOGO_RIGHT_FROM_LEFT_OF_PAGE + 1, LOGO_BOTTOM_FROM_TOP_OF_PAGE + 1 can.rect(pt1, pt2) # Citizen Address Block - the address window pt1 = ADDRESS_LEFT_FROM_LEFT_OF_PAGE - 1, ADDRESS_TOP_FROM_TOP_OF_PAGE - 1 pt2 = ADDRESS_RIGHT_FROM_LEFT_OF_PAGE + 1, ADDRESS_BOTTOM_FROM_TOP_OF_PAGE + 1 can.rect(pt1, pt2) # move to the beginning of the StringIO buffer new_pdf = PdfFileReader(can.get_bytes()) page.mergePage(new_pdf.getPage(0)) # For each subsequent page its just the body of text for page_num in range(1, pdf.numPages): page = pdf.getPage(page_num) can = NotifyCanvas(white) # Each page of content pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BORDER_TOP_FROM_TOP_OF_PAGE - 1 pt2 = BORDER_RIGHT_FROM_LEFT_OF_PAGE + 1, BORDER_BOTTOM_FROM_TOP_OF_PAGE + 1 can.rect(pt1, pt2) # move to the beginning of the StringIO buffer new_pdf = PdfFileReader(can.get_bytes()) page.mergePage(new_pdf.getPage(0)) out = bytesio_from_pdf(pdf) # it's a good habit to put things back exactly the way we found them src_pdf.seek(0) return out