def pdf(self): """Fix other peoples missing docstrings.""" pdf = None try: pdf = PdfFileReader(StringIO(self.data)) except Exception: logger.warn('Error opening pdf file, trying to fix it...') fixed_data = self._fixPdf(self.data) # try to reopen the pdf file again try: pdf = PdfFileReader(StringIO(fixed_data)) except Exception: logger.warn('This pdf file cannot be fixed.') if pdf and pdf.isEncrypted: try: decrypt = pdf.decrypt('') if decrypt == 0: logger.warn('This pdf is password protected.') except Exception: logger.warn('Errors while decrypting the pdf file.') if pdf is None: remove_image_previews(self.context) return pdf
def generate_document(self, data): packet = StringIO() if self.template_file is not None: template = PdfFileReader(open(self.template_file, 'rb')) c = canvas.Canvas(packet, pagesize=(self.width, self.height)) i = 0 for field_cls in self.fields: # TODO: Catch exception if there is less columns than fields field = field_cls(self, c, data[i]) field.render() i += 1 # Save canvas c.save() packet.seek(0) text = PdfFileReader(packet) output = PdfFileWriter() if self.template_file is not None: # Merge text with base page = template.getPage(0) page.mergePage(text.getPage(0)) else: page = text.getPage(0) output.addPage(page) # Save file filename = "%s/%s.pdf" % (self.output_dir, self.generate_filename(data)) outputStream = open(filename, 'wb') output.write(outputStream) outputStream.close()
def page_extract(start, end, SUBSECTION): PDF_IN = PdfFileReader(open(PDF_DIR, 'rb')) # for i in xrange(PDF_IN.numPages): # for all pages for i in range(int(start) - 1, int(end)): output = PdfFileWriter() output.addPage(PDF_IN.getPage(i)) base, name_ext = os.path.split(PDF_DIR) name, ext = os.path.splitext(name_ext) PDF_OUT = '{}{}'.format(TMP_DIR, '{}-{}{}'.format(name, str(i).zfill(6), ext)) with open(PDF_OUT, 'wb') as outputStream: output.write(outputStream) gs_pdf_to_png(PDF_OUT) os.remove(PDF_OUT) png_list = group(os.listdir(TMP_DIR), 2) for tup in png_list: print tup card_front = os.path.join(TMP_DIR, tup[0]) card_back = os.path.join(TMP_DIR, tup[1]) make_cards(card_front, card_back, SUBSECTION)
def main(): args = do_cmd_args_line() for f in os.listdir(args.path): if f.endswith('.pdf'): fname = os.path.join(args.path, f) pdfile = PdfFileReader(file(fname, 'rb')) title = pdfile.getDocumentInfo().title subject = pdfile.getDocumentInfo().subject author = pdfile.getDocumentInfo().author if author == None or author == '': author = 'Unknown' if title == None or title == '': title = os.path.splitext(f)[0] tgtfname = '[{0}] {1}.pdf'.format(author, title) ftgtname = os.path.join(args.dest, tgtfname) print 'renaming {0} -> {1}'.format(fname, ftgtname) if not args.dryrun: try: os.rename(fname, ftgtname) except Exception as e: print e
def getPLBURL(journal,doi,count): cj = http.cookiejar.CookieJar() # initialize the cookie jar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) url = 'http://dx.doi.org/'+doi user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = [('User-Agent', user_agent)] opener.addheaders = headers #with opener.open(url) as response: response= opener.open(url) output = response.read() import re p = re.compile('pdfurl="(.*?)"') m = p.search(output.strip().decode('utf-8')) # need to convert from bytes to string m.group(1) response=opener.open(m.group(1)) out = response.read() type(out) f = io.BytesIO(out) if f: o = PdfFileReader(f) merged = PdfFileWriter() outName= "Single_"+str(count)+".pdf" merged.addPage(o.getPage(0)) with open(outName,'wb') as pdf: merged.write(pdf)
def tearpage(filename, startpage=1): """ Copy filename to a tempfile, write pages startpage..N to filename. :param filename: PDF filepath :param startpage: page number for the new first page """ # Copy the pdf to a tmp file tmp = tempfile.NamedTemporaryFile() shutil.copy(filename, tmp.name) # Read the copied pdf try: input_file = PdfFileReader(open(tmp.name, 'rb')) except PdfReadError: _fixPdf(filename, tmp.name) input_file = PdfFileReader(open(tmp.name, 'rb')) # Seek for the number of pages num_pages = input_file.getNumPages() # Write pages excepted the first one output_file = PdfFileWriter() for i in range(startpage, num_pages): output_file.addPage(input_file.getPage(i)) tmp.close() outputStream = open(filename, "wb") output_file.write(outputStream)
def pdf_meta(tmp_file_path, original_file_name, original_file_extension): if (use_pdf_meta): pdf = PdfFileReader(open(tmp_file_path, 'rb')) doc_info = pdf.getDocumentInfo() else: doc_info = None if (doc_info is not None): author = doc_info.author if doc_info.author is not None else "Unknown" title = doc_info.title if doc_info.title is not None else original_file_name subject = doc_info.subject else: author = "Unknown" title = original_file_name subject = "" return uploader.BookMeta( file_path = tmp_file_path, extension = original_file_extension, title = title, author = author, cover = pdf_preview(tmp_file_path, original_file_name), description = subject, tags = "", series = "", series_id="")
def get_images(pdf_file): with open(pdf_file, 'rb') as fp: reader = PdfFileReader(fp) page = reader.getPage(0) xObject = page['/Resources']['/XObject'].getObject() for obj in xObject: if xObject[obj]['/Subtype'] == '/Image': width, height = (xObject[obj]['/Width'], xObject[obj]['/Height']) # Ignore smaller images. if height < 100: continue size = width, height data = xObject[obj].getData() if xObject[obj]['/ColorSpace'] == '/DeviceRGB': mode = "RGB" else: mode = "P" encoding = xObject[obj]['/Filter'] if encoding == '/FlateDecode' or '/FlateDecode' in encoding: yield Image.frombytes(mode, size, data) else: raise Exception( 'Unexpected image encoding: {}'.format(encoding))
def buildPDF(self, data, document_root): data = json.loads(data)[0]['fields'] content = StringIO.StringIO() parser = canvas.Canvas(content, pagesize=letter) self.employee_name(parser, data['name']) self.social_security(parser, data['ssn']) self.title(parser, data['title']) self.base_salary(parser, data['base_salary']) self.period(parser, data['period']) self.period_year(parser, data['period_year']) self.effective_date(parser, data['effective_date']) self.multi_campus(parser, data['multi_campus']) self.sponsored_accounts(parser, data['sponsored_accounts']) self.cost_sharing(parser, data['cost_sharing']) self.university_funds(parser, data['university_funds']) self.payments_paid(parser, data['payments_paid']) self.comments(parser, data['comments']) parser.save() content.seek(0) text = PdfFileReader(content) form = PdfFileReader(document_root+'/a125.pdf').getPage(0) output = PdfFileWriter() form.mergePage(text.getPage(0)) output.addPage(form) outputStream = open(document_root+'/a125-gen.pdf', 'wb') output.write(outputStream) self.form = output
def pdf_to_csv_with_PyPDF(): """ Iterates throught all the pdf stored in ./data/pdf/ folder and export its content to the file data.csv. The format of the csv file should have two columns: id and text """ bar = progressbar.ProgressBar() csv_data_file = _DATA_PATH + "data.csv" with open(csv_data_file, "w", newline='') as csvfile: data_writer = csv.writer(csvfile) data_writer.writerow(["document_id","document_text"]) for fn in bar(os.listdir(_PDF_PATH)): file_path = os.path.join(_PDF_PATH, fn) if file_path.endswith(".pdf"): try: input_file = PdfFileReader(open(file_path, 'rb')) text = "" for p in range(input_file.getNumPages()): text += input_file.getPage(p).extractText() + " " except utils.PdfReadError as e: print("Error al leer el PDF: {0}".format(fn)) except Exception as e: print("Error desconocido en el PDF: {0}".format(fn)) print("Error: {0}".format(e)) else: #TODO: Check if text is not empty data_writer.writerow([fn,text])
def _merge_pdf(documents): '''Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf ''' writer = PdfFileWriter() streams = [] # We have to close the streams *after* PdfFilWriter's call to write() try: for document in documents: pdfreport = open(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport, overwriteWarnings=False) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp(suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) finally: for stream in streams: try: stream.close() except Exception: pass for stream in streams: stream.close() return merged_file_path
def handle(self, *args, **options): for cert_type, ss_class_children in settings.CERT_CHILDREN.iteritems(): self.stdout.write('Certificate Type: {}\n'.format(cert_type)) for ss_class, children in ss_class_children.iteritems(): self.stdout.write('SS Class: {}\n'.format(ss_class)) for child in children: self.stdout.write('Child: {}\n'.format(child)) paf_path = os.path.join(settings.CERT_TEMPLATE_PATH, settings.CERT_FILE[cert_type]) pdf = PdfFileReader(paf_path) page = pdf.getPage(0) s = StringIO.StringIO() c = canvas.Canvas(s, pagesize=letter) # Child font_name = settings.CERT_COORD[cert_type]['child']['font']['name'] font_size = settings.CERT_COORD[cert_type]['child']['font']['size'] x = settings.CERT_COORD[cert_type]['child']['x'] y = settings.CERT_COORD[cert_type]['child']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, child) # Event font_name = settings.CERT_COORD[cert_type]['event']['font']['name'] font_size = settings.CERT_COORD[cert_type]['event']['font']['size'] x = settings.CERT_COORD[cert_type]['event']['x'] y = settings.CERT_COORD[cert_type]['event']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'Sunday School Summer Festival {}'.format(datetime.now().strftime('%Y'))) # Date font_name = settings.CERT_COORD[cert_type]['date']['font']['name'] font_size = settings.CERT_COORD[cert_type]['date']['font']['size'] x = settings.CERT_COORD[cert_type]['date']['x'] y = settings.CERT_COORD[cert_type]['date']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, '{}'.format(datetime.now().strftime('%B %Y'))) # Church font_name = settings.CERT_COORD[cert_type]['church']['font']['name'] font_size = settings.CERT_COORD[cert_type]['church']['font']['size'] x = settings.CERT_COORD[cert_type]['church']['x'] y = settings.CERT_COORD[cert_type]['church']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'St. Mark Coptic Orthodox Church') c.save() pdf_with_custom_text = PdfFileReader(s) page.mergePage(pdf_with_custom_text.getPage(0)) writer = PdfFileWriter() writer.addPage(page) output_file = '{}_{}.pdf'.format(child, datetime.now().strftime('%Y')) output_dir = os.path.join(settings.CERT_PATH, ss_class) if not os.path.exists(output_dir): os.makedirs(output_dir) output_path = os.path.join(output_dir, output_file) with open(output_path, 'wb') as f: writer.write(f)
def createCoverPage(self,title,description): global output packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) existing_pdf=self.getPage(0) font=15 offset=0.25*font top_offset=700 can.setFillColorRGB(1,0,0,alpha=1) #canvas.setStrokeColor(red) can.setFont("Helvetica-Bold", font) can.drawString(50, top_offset, title) can.drawString(50,top_offset-font-offset, description) can.save() #move to the beginning of the StringIO buffer new_pdf = PdfFileReader(packet) # read your existing PDF #existing_pdf = PdfFileReader(file("docs/doc3.pdf", "rb")) #existing_pdf = PdfFileReader(file("output/out14.pdf", "rb")) existing_pdf.mergePage(new_pdf.getPage(0)) output.addPage(existing_pdf)
def extract_text(link): amazon_file_name = "pdfs/" + link[25:] if not default_storage.exists(amazon_file_name): try: add_file(link) except: return '' pdf = default_storage.open(amazon_file_name, 'rb') try: pdf_file = PdfFileReader(pdf) except: print "BAD FILE-- %s " %(link) pages = pdf_file.getNumPages() count = 0 text = '' while count < pages: pg = pdf_file.getPage(count) pgtxt = pg.extractText() count = count + 1 text = text + pgtxt return text
def test_cat(self): """Make sure files are properly concatenated.""" check_call([STAPLER, 'cat', ONEPAGE_PDF, FIVEPAGE_PDF, self.outputfile]) self.assert_(os.path.isfile(self.outputfile)) pdf = PdfFileReader(file(self.outputfile, 'rb')) self.assertEqual(pdf.getNumPages(), 6)
def _merge_pdf_images(self, docf, stream, outlines): pdfin = PdfFileReader(docf.name) pdfout = PdfFileWriter() pdfout._info.getObject().update(pdfin.getDocumentInfo()) # embed images into file for pageno, page in enumerate(pdfin.pages): for img in self._pdf_images: if img.page != (pageno + 1): continue # Load image imgin = PdfFileReader(img.fname) imgpage = imgin.getPage(0) scale = min(img.width / imgpage.mediaBox[2].as_numeric(), img.height / imgpage.mediaBox[3].as_numeric()) page.mergeScaledTranslatedPage(imgpage, scale, img.x, img.y) pdfout.addPage(page) # create outlines stack = [] for pageno, level, header in outlines: stack = stack[:level] parent = (stack[0] if stack else None) stack.append(pdfout.addBookmark(header.strip(), pageno - 1, parent)) pdfout.write(stream)
def preview_measurement(self): towncode = self.request.params.get("code", None) filename = self.request.params.get("filename", None) cur_record = DBSession.query(LuxMeasurementDirectory).\ filter(LuxMeasurementDirectory.town_code == int(towncode)).first() if cur_record is None: return HTTPBadRequest("Invalid Town name") measurement_filepath = "%s/%s" % (cur_record.path, filename) input1 = PdfFileReader(open(measurement_filepath, 'rb')) factor = 1.5 page0 = input1.getPage(0) width = int(int(page0.mediaBox[2]) / factor) height = int(int(page0.mediaBox[3]) / factor) (fd, tempfilename) = tempfile.mkstemp(".png") try: subprocess.call(["/usr/bin/convert", "-sample", str(width) + "x" + str(height), measurement_filepath, tempfilename]) tfile = open(tempfilename, "r") data = tfile.read() finally: os.close(fd) os.remove(tempfilename) headers = {"Content-Type": "image/png"} return Response(data, headers=headers)
def add_update_pdf_metadata(filename, update_dictionary): # This seems to be the only way to modify the existing PDF metadata. # # pylint: disable=protected-access, no-member def add_prefix(value): return '/' + value full_update_dictionary = {add_prefix(k): v for k, v in update_dictionary.items()} with open(filename, 'rb') as input_file: pdf_input = PdfFileReader(input_file) pdf_output = PdfFileWriter() for page in range(pdf_input.getNumPages()): pdf_output.addPage(pdf_input.getPage(page)) info_dict = pdf_output._info.getObject() info = pdf_input.documentInfo full_update_dictionary = dict(chain(info.items(), full_update_dictionary.items())) for key in full_update_dictionary: assert full_update_dictionary[key] is not None info_dict.update({NameObject(key): createStringObject(full_update_dictionary[key])}) _, temp_file_name = tempfile.mkstemp(prefix="email2pdf_add_update_pdf_metadata", suffix=".pdf") with open(temp_file_name, 'wb') as file_out: pdf_output.write(file_out) shutil.move(temp_file_name, filename)
def mergePDFList(self, pdf_data_list, start_on_recto=False): """Merge multiple PDFs in a new PDF. Both input and output are raw PDF data as string, so pdf_data_list must be a list of strings, and the output is the merged pdf as a string. If "start_on_recto" is set to true, some blank pages will be added in order to have each PDF as the recto page. This is useful if you have to print the merged pdf in recto/verso mode. """ from StringIO import StringIO from PyPDF2 import PdfFileWriter, PdfFileReader output = PdfFileWriter() for pdf_data in pdf_data_list: if pdf_data: pdf_reader = PdfFileReader(StringIO(pdf_data)) page_count = pdf_reader.getNumPages() for page in range(page_count): output.addPage(pdf_reader.getPage(page)) if start_on_recto and page_count % 2: output.addBlankPage() outputStream = StringIO() output.write(outputStream) return outputStream.getvalue()
def get_png_image_frompdf( input_pdf_file, newWidth = None, verify = True ): assert( os.path.basename( input_pdf_file ).endswith( '.pdf' ) ) assert( os.path.isfile( input_pdf_file ) ) ipdf = PdfFileReader( open( input_pdf_file, 'rb' ) ) assert( ipdf.getNumPages() == 1 ) mbox = ipdf.getPage( 0 ).mediaBox files = { 'file' : open( input_pdf_file, 'rb' ) } width = int( mbox.getWidth( ) ) height = int( mbox.getHeight( ) ) apiKey = get_cloudconvert_api_key( ) params = { 'apikey' : apiKey, 'input' : 'upload', 'inputformat' : 'pdf', 'outputformat' : 'png', } if newWidth is not None: assert( isinstance( newWidth, int ) ) assert( newWidth > 10 ) newHeight = int( height * 1.0 * newWidth / width ) params['converteroptions[resize]'] = '%dx%d' % ( newWidth, newHeight ) # ## response = requests.post( "https://api.cloudconvert.com/convert", params = params, files = files, verify = verify ) if response.status_code != 200: raise ValueError("Error, could not upload and convert PDF file %s." % input_pdf_file ) img = Image.open( StringIO( response.content ) ) return img
def toStringFormat(path): # tiempo inicial # se inicia la cadena que almacenará el contenido de cada página # del pdf contenido_pagina = "" # instanciando lista a ocupar lista = list() # abrir pdf en modo lectura pdf = PdfFileReader(codecs.open(path, "rb")) # imprime cuantas páginas tiene el pdf: numero_paginas = pdf.getNumPages() # print("Numero de paginas del PDF: ", numero_paginas) # uso de la librería PyPDF2 para obtener la cantidad de hojas del pdf for i in range(numero_paginas): # convierte página i de pdf en txt subprocess.call( "pdftotext -f " + str(i + 1) + " -l " + str(i + 1) + " " + path, shell=True) # reemplazo de .pdf a .txt en path txt = path.replace(".pdf", ".txt") # abrir fichero txt que trae el contenido de la página i del pdf + # limpieza del string contenido_pagina = codecs.open(txt, encoding='ISO-8859-1').read().lower() contenido_pagina = contenido_pagina.replace('á', 'a') contenido_pagina = contenido_pagina.replace('é', 'e') contenido_pagina = contenido_pagina.replace('í', 'i') contenido_pagina = contenido_pagina.replace('ó', 'o') contenido_pagina = contenido_pagina.replace('ú', 'u') contenido_pagina = contenido_pagina.replace('ñ', 'n') contenido_pagina = re.sub('[^a-z]', '', contenido_pagina) lista.append(contenido_pagina) subprocess.call("rm -R " + txt, shell=True) return lista
def pdf_copy(input: str, output: str, pages: [int], yes_to_all=False): """ Copy pages from the input file in a new output file. :param input: name of the input pdf file :param output: name of the output pdf file :param pages: list containing the page numbers to copy in the new file """ if not os.path.isfile(input): print("Error. The file '%s' does not exist." % input) return if os.path.isfile(output) and not yes_to_all and not overwrite_dlg(output): return with open(input, "rb") as inputfile: reader = PdfFileReader(inputfile) outputfile = open(output, "wb") writer = PdfFileWriter() if pages is None: pages = range(len(reader.pages)) else: pages = parse_rangearg(pages, len(reader.pages)) for pagenr in sorted(pages): page = reader.getPage(pagenr) writer.addPage(page) writer.write(outputfile) outputfile.close()
def __init__(self, file_abs_path): """ __init__(self, file_abs_path): Arguments: - file_abs_path: (string) Absolute file path. """ self.absolute_path = file_abs_path self.name = os.path.basename(self.absolute_path) application_messages.print_file_name(self.name) application_messages.print_document_info('Path', self.absolute_path) try: document = PdfFileReader(file(self.absolute_path, 'rb')) self.__get_encrypted_status(document) document_info = document.getDocumentInfo() if document_info: self.__parse_document_info(document_info) except Exception as ex: if 'encode' not in str(ex): raise Exception(ex)
def make_tile(page_number,n_tiles,row,column): path = "/pieces/diotima_quartet/arco_quartet.pdf" output_path = "/pieces/diotima_quartet/hoban_tiles/hoban%s_%s@%s.pdf" % (page_number,row,column) source = PdfFileReader(open(path, "rb")) page = source.getPage(page_number) width = float(page.mediaBox.getWidth()) height = float(page.mediaBox.getHeight()) tile_column = column tile_row = row tile_size = n_tiles tile_width = width/tile_size tile_height = height/tile_size column = tile_width*tile_column row = tile_height*tile_row page.cropBox.lowerLeft = (column,row) page.cropBox.upperRight = (column+tile_width,row+tile_height) page.trimBox.lowerLeft = (column,row) page.trimBox.upperRight = (column+tile_width,row+tile_height) page.mediaBox.lowerLeft = (column,row) page.mediaBox.upperRight = (column+tile_width,row+tile_height) # output output = PdfFileWriter() output.addPage(page) outputStream = file(output_path, "wb") output.write(outputStream) return None
def add_files(category, filenames_, input_abs_dir): """ Handle pdf files for *category* (str). Input pdf files are in *input_abs_dir* (str) *filenames* gives the list of filenames relative to *input_abs_dir*. """ global proceedings_pdf global cumulative_page_count global blank_page_pdf mprint('(For {})'.format(category)) for filename_ in filenames_: input_pdf_path = os.path.join(input_abs_dir, filename_) mprint('\t' + os.path.relpath(input_pdf_path, working_dir)) input_pdf = PdfFileReader(open(input_pdf_path, 'rb')) input_number_of_pages = input_pdf.getNumPages() proceedings_pdf.appendPagesFromReader(input_pdf) cumulative_page_count += input_number_of_pages # check if blank page insertion is needed if cumulative_page_count % 2: # if odd number cumulative_page_count += 1 proceedings_pdf.appendPagesFromReader(blank_page_pdf)
def split(paperpdf, splitpdf): output = PdfFileWriter() with open(paperpdf, "rb") as l: with open(paperpdf, "rb") as r: # I know... I know. # We have to do this because PyPDF2 kind of sucks. left = PdfFileReader(l) right = PdfFileReader(r) pagecount = left.getNumPages() print("%s has %s pages to split." % (paperpdf,pagecount)) for num in range(0, pagecount): left_page = left.getPage(num) right_page = right.getPage(num) midpoint = ( left_page.mediaBox.getUpperRight_x() / 2, left_page.mediaBox.getUpperRight_y() ) left_page.mediaBox.upperRight = midpoint output.addPage(left_page) right_page.mediaBox.upperLeft = midpoint output.addPage(right_page) print("Writing %s pages to %s" % (output.getNumPages(), splitpdf)) with open(splitpdf, "wb") as s: output.write(s)
def toStringFormatParalell(path, rank, size, comm): pdf = PdfFileReader(open(path, "rb")) numero_paginas = pdf.getNumPages() print("******************************************",numero_paginas) intervalo = int(numero_paginas/size) resto = numero_paginas%size fin, inicio = 0, 0 if(rank==0): for i in range(1, size): if(i == rank): fin += intervalo inicio = (fin - intervalo) + 1 fin += resto data = {'inicio':inicio, 'fin': fin, 'path': path} comm.send(data, dest=i, tag=1) else: fin += intervalo inicio = (fin - intervalo) + 1 data = {'inicio':inicio, 'fin': fin, 'path': path} comm.send(data, dest=i, tag=1) if(rank!=0): data = comm.recv(source=0, tag=1) contenido_pagina = "" lista = list() for i in range(data['inicio'], data['fin']): txt = data['path'].replace(".pdf", rank + ".txt") subprocess.call( "pdftotext -f " + str(i + 1) + " -l " + str(i + 1) + " " + data['path'], shell=True) contenido_pagina = open(txt).read().lower() contenido_pagina = contenido_pagina.replace('á', 'a') contenido_pagina = contenido_pagina.replace('é', 'e') contenido_pagina = contenido_pagina.replace('í', 'i') contenido_pagina = contenido_pagina.replace('ó', 'o') contenido_pagina = contenido_pagina.replace('ú', 'u') contenido_pagina = contenido_pagina.replace('ñ', 'n') contenido_pagina = re.sub('[^a-z]', '', contenido_pagina) lista.append(contenido_pagina) #subprocess.call("rm -R " + txt, shell=True) comm.send(lista, dest=0, tag=2) if(rank == 0): book = [] for i in range(1,size): book += comm.recv(source=i, tag=2) return book
def stampContent(self, page, index): #returns an iterable of stamped page objects to write to file. if self.kind.get() == self.TYPES[self.TEXT_INDEX]: packet = StringIO.StringIO() widthPoints=float(page.mediaBox[2]) heightPoints=float(page.mediaBox[3]) main_canvas = canvas.Canvas(packet, (widthPoints, heightPoints)) main_canvas.setFillColorRGB(1,0,0,alpha=0.5) main_canvas.setFont("Helvetica-Bold", self.font) main_canvas.drawString(50, 150*(index+1), self.get_content()) main_canvas.save() stamped_pdf = PdfFileReader(packet) stamped_page = stamped_pdf.getPage(0) rotation_angle = page.get('/Rotate', 0) #stamped_page.rotateClockwise(page.get('/Rotate')) page.mergeRotatedTranslatedPage( page2=stamped_page, rotation=rotation_angle, tx=stamped_page.mediaBox.getWidth() / 2, ty=stamped_page.mediaBox.getWidth() / 2 ) return page
def pdf_get_no_pages(self, input_file): """Return number of pages in a pdf using PyPDF2.""" try: pdf_input = PdfFileReader(file(input_file, "rb")) return pdf_input.getNumPages() except: return None
return field_list if __name__ == "__main__": try: input_path = Path(argv[1]) except IndexError: input_path = None try: output_path = Path(argv[2]) except IndexError: output_path = None output_path = check_io_path_pair(input_path, "Input file", ".pdf", output_path, "Output file", ".txt", "_field_values") reader = PdfFileReader(input_path.open(mode="rb")) field_list = get_pdf_field_list(reader) if field_list is None: print(str(input_path) + " does not contain fields.") exit() field_str = "\n".join(map(str, field_list)) header = "Fields in file " + str(input_path) + "\n\n" output_path.write_text(header + field_str)
txt = f""" Information about {pdf_path}: Author: {information.author} Creator: {information.creator} Producer: {information.producer} Subject: {information.subject} Title: {information.title} Number of pages: {number_of_pages} """ print(txt) return information, number_of_pages numero_pg = PdfFileReader(open('Desobediencia_civil.pdf', 'rb')).getNumPages() #numero_pg = 40 #print("Número de páginas del pdf: ") #print(numero_pg) # Nº folios: #print("Número de folios teórico: ") #print(numero_pg/4) ################################################################### # Redondeo forzado hacia arriba import math #math.ceil(1.1) # Número de folios reales: numero_folios_reales = math.ceil(numero_pg/4)
def pdftitle(fh): pdf_reader = PdfFileReader(fh) docinfo = pdf_reader.getDocumentInfo() return docinfo.title if (docinfo and docinfo.title) else ''
import os, sys from PyPDF2 import PdfFileReader, PdfFileWriter pdf = PdfFileReader(sys.argv[1]) for page in range(pdf.getNumPages()): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) output_filename = '{}{}.pdf'.format('split', page + 1) with open(output_filename, 'wb') as out: pdf_writer.write(out) print('Created: {}'.format(output_filename))
# Otherwise, the printing order is intended for automatic double-sided printing: # Outward outside # Outward inside # Middle outside # Middle inside import sys from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger inputOneFileName = sys.argv[1] outputOneFileName = sys.argv[2] print "input 1: " + inputOneFileName print "output 1: " + outputOneFileName inputReaderOne = PdfFileReader(open(inputOneFileName, "rb")) outputWriterOne = PdfFileWriter() pageIndex = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] pageIndex[0] = 13 pageIndex[1] = 0 if sys.argv[3] == "manual": pageIndex[2] = 11 pageIndex[3] = 2 pageIndex[4] = 9 pageIndex[5] = 4 pageIndex[6] = 1 pageIndex[7] = 12 pageIndex[8] = 3
input_path = sys.argv[1] starting_page_number = int(sys.argv[2]) starting_page_number = starting_page_number - 1 if starting_page_number < 0: starting_page_number = 0 ending_page_number = int(sys.argv[3]) output_base_path = os.path.dirname(input_path) output_base_name = os.path.basename(input_path) output_base_name = os.path.splitext(output_base_name)[0] output_path = os.path.join( output_base_path, output_base_name + "--" + str(starting_page_number) + "-" + str(ending_page_number) + ".pdf") print(output_path) # 1.) Read In PDF to PyPDF2 Object input_pdf = PdfFileReader(open(input_path, "rb")) output_pdf = PdfFileWriter() print("Inut PDF's Total Pages === " + str(input_pdf.numPages)) # 2.) Build Map of "ID's" ? of "Objects" ? in PDF to Actual Page Numbers # https://github.com/giffen/pdf_bookmarks_to_html/blob/master/pdf_bookmarks_to_html.py#L29 def build_id_to_page_map(input_pdf, pages=None, _result=None, _num_pages=None): if _result is None: _result = {} if pages is None: _num_pages = [] pages = input_pdf.trailer["/Root"].getObject()["/Pages"].getObject() t = pages["/Type"] if t == "/Pages":
def getArchiveFilenameList(self): out = [] pdf = PdfFileReader(open(self.path, 'rb')) for page in range(1, pdf.getNumPages() + 1): out.append("/%04d.jpg" % (page)) return out
def __init__(self, pdf_path): # Open the PDF file with a new PdfFileReader instance self.pdf_reader = PdfFileReader(pdf_path) # Initialize the .writer1 and .writer2 attributes to None self.writer1 = None self.writer2 = None
class Command(BaseCommand): help = 'Imports Transcriptions E.g. add_transcription "MSS 9" file.pdf' # Set this to ONLY import the text without processing/uploading files textonly = False temp_dir = "/tmp" file_path = None reference = None input_file = None catalogue_entry = None def add_arguments(self, parser): parser.add_argument('reference', type=str) parser.add_argument('file_path', type=str) def handle(self, *args, **options): self.reference = options['reference'] self.file_path = options['file_path'] self.input_file = PdfFileReader(open(self.file_path, "rb")) self.catalogue_entry = CatalogueEntry.objects.get(title=self.reference) transcription_page = RichTextPage.objects.get( slug='transcriptions') trans_page = TranscriptionPage() trans_page.title = self.reference trans_page.page = self.catalogue_entry with open(self.file_path, 'rb') as f: document = Document() document.title = 'Transcript: {}'.format(self.reference) document.file.save('Transcript {}.pdf'.format( self.reference), File(f), save=True) document.save() trans_page.transcription_pdf = document transcription_page.add_child(instance=trans_page) trans_page.save() num_of_pages = self.input_file.getNumPages() res_page_no = None res_outfile = None res_text = None for p in range(num_of_pages): page = self.input_file.getPage(p) page_text = page.extractText() m = re.findall(r"\[f\.\s?([0-9]+)([r,v]+)\]", page_text) if m: if res_page_no is not None: self._process(res_page_no, res_outfile, res_text) res_page_no = "{}{}".format(m[0][0].zfill(3), m[0][1]) res_outfile = PdfFileWriter() res_outfile.addPage(page) res_text = page_text else: # Add Page res_outfile.addPage(page) res_text = '{} {}'.format(res_text, page_text) if p == num_of_pages and res_page_no is not None: self._process(res_page_no, res_outfile, res_text) def _process(self, page_no, outfile, text): print('Processing: {} {}'.format(self.reference, page_no)) image = Image.objects.descendant_of( self.catalogue_entry).get( reference__endswith=page_no) if not self.textonly: temp_filename = self._temp_filename() with open(temp_filename, 'wb') as out: outfile.write(out) # Create a Document with open(temp_filename, 'rb') as f: document = Document() document.title = 'Transcript: {}'.format(image.reference) document.file.save('Transcript {}.pdf'.format( image.reference), File(f), save=True) document.save() image.transcription = text image.transcription_pdf = document image.save() # Delete the temporary file! os.remove(temp_filename) else: image.transcription = text image.save() def _temp_filename(self): return os.path.join(self.temp_dir,''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(20))) # noqa
def gen_pdf(self): print("Combining Images into PDF.....") path1 = image_dir + "week_heatmap.png" path2 = image_dir + "memory.png" path3 = image_dir + "word_cloud.png" path4 = image_dir + "bar.png" path5 = image_dir + "score.png" path6 = image_dir + "red.png" pdf = PdfFileWriter() # Using ReportLab Canvas to insert image into PDF img_temp = BytesIO() img_doc = canvas.Canvas(img_temp, pagesize=(2000, 2300)) # heat map x, y - start position img_doc.drawImage(path1, -150, 1400, width=2600, height=650) # memory img_doc.drawImage(path2, 1070, 681, width=697, height=667) # word_cloud img_doc.drawImage(path3, -28, 585, width=1100, height=778) # score img_doc.drawImage(path5, 1128, -59, width=894, height=672) # bar img_doc.drawImage(path4, 0, -11, width=1286, height=620) # logo img_doc.drawImage(logo, 99, 2068, width=105, height=80) # red square img_doc.drawImage(path6, inch * 24.3, inch * 16.25, width=91, height=45) img_doc.drawImage(path6, inch * 24.3, inch * 14.69, width=91, height=45) img_doc.drawImage(path6, inch * 24.3, inch * 13.14, width=91, height=45) img_doc.drawImage(path6, inch * 24.3, inch * 11.60, width=91, height=45) # draw three lines, x,y,width,height img_doc.rect(0.83 * inch, 28.5 * inch, 26.0 * inch, 0.04 * inch, fill=1) img_doc.rect(0.83 * inch, 18.9 * inch, 26.0 * inch, 0.04 * inch, fill=1) img_doc.rect(0.83 * inch, 8.5 * inch, 26.0 * inch, 0.04 * inch, fill=1) # title img_doc.setFont("Helvetica-Bold", 82) img_doc.drawString( 212, 2078, "Personal YouTube Usage Report", ) # first watch body_style = ParagraphStyle("Body", fontSize=31) items1 = [] link1 = "<link href=" + urls[-1] + ">PLAY</link>" items1.append(Paragraph(link1, body_style)) f1 = Frame(inch * 24.3, inch * 14.88, inch * 12, inch * 2) f1.addFromList(items1, img_doc) # most watch items2 = [] link2 = "<link href=" + max(set(urls), key=urls.count) + ">PLAY</link>" items2.append(Paragraph(link2, body_style)) f2 = Frame(inch * 24.3, inch * 13.34, inch * 12, inch * 2) f2.addFromList(items2, img_doc) # first like items3 = [] link3 = "<link href=" + like + ">PLAY</link>" items3.append(Paragraph(link3, body_style)) f3 = Frame(inch * 24.3, inch * 11.79, inch * 12, inch * 2) f3.addFromList(items3, img_doc) # first comment items4 = [] link4 = "<link href=" + link + ">PLAY</link>" items4.append(Paragraph(link4, body_style)) f4 = Frame(inch * 24.3, inch * 10.25, inch * 12, inch * 2) f4.addFromList(items4, img_doc) # first search items5 = [] link5 = ( "<link href=" ">" + str(re.sub("[^\w\s]", "", str(search_raw[-1]))) + "</link>" ) items5.append(Paragraph(link5, body_style)) f5 = Frame(inch * 23.7, inch * 8.73, inch * 12, inch * 2) f5.addFromList(items5, img_doc) img_doc.save() pdf.addPage(PdfFileReader(BytesIO(img_temp.getvalue())).getPage(0)) pdf.write(open("YouTube_Report.pdf", "wb")) print( "Congratulations! You have successfully created your personal YouTube report!" ) if sys.platform == "win32": os.startfile("YouTube_Report.pdf") else: opener = "open" if sys.platform == "darwin" else "xdg-open" subprocess.call([opener, "YouTube_Report.pdf"])
text2=L[1]+' '+L[2]#nom prenom text3=L[3]#fonction text4=L[4]#birth text5=L[5]#expire image=L[6]#link to image packet = io.BytesIO()# create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont('Avenir',8) can.setFillColorRGB(0,0,0) can.drawString(154, 75, text) can.drawString(157,55,text4) can.drawString(157,35,text5) can.setFillColorRGB(255,255,255) can.drawString(16,50,text2) can.setFont('Avenir',6) can.drawString(16,40,text3) can.drawImage(image,16,61,width=0.87*inch,height=0.87*inch) barcode = code128.Code128(text) barcode.drawOn(can, 0, 10) can.save()#move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet)# read your existing PDF existing_pdf = PdfFileReader(open(r"C:\Users\pc\Desktop\programmes pyth\A8.pdf", "rb")) output = PdfFileWriter()# add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page)# finally, write "output" to a real file outputStream = open(r"C:\Users\pc\Desktop\programmes pyth\Output_files\card{0}.pdf".format(i+1), "wb") output.write(outputStream) outputStream.close()
def PDF_page_counter(): pdf = PdfFileReader(open(root.fileName, 'rb')) root.pages = pdf.getNumPages()
folder=os.path.splitext(filename)[0] # joins srcfile = os.path.join(original_folder, filename) # Try file existe? try: filepath = os.stat(srcfile) except: upload=False errormsg= "file does not exist" error_log(filename,upload,errormsg) sys.exit() with open(srcfile, "rb") as f: pdf = PdfFileReader(f) #Try bookmarks without child try: bookmarks = pdf.getOutlines() except: upload=False errormsg= "this file contains bookmarks with child" error_log(filename,upload,errormsg) sys.exit() #Read Bookmarks if bookmarks: for b in bookmarks: invID = b['/Title'] if len(invID) < 22 and re.match('\w',invID): i = pdf.getDestinationPageNumber(b) #Search InvID in database
plt.tight_layout() #plt.show() cnt += 1 plt.rcParams.update({'font.size': 8}) pdf.savefig(fig) plt.subplot(cnt) fig = plt.figure(figsize=(8, 8)) plt.barh(x_pos, performance, align='center', alpha=0.5) plt.yticks(x_pos, objects) plt.xlabel('Number Of users') plt.title('Users Having Bad passwords In Diffrent Ways') cnt += 1 plt.rcParams.update({'font.size': 8}) pdf.savefig(fig) pdf.close() #plt.show() pdf_writer = PdfFileWriter() paths = glob.glob('User*.pdf') paths.sort() output_path = r'User_List_grapic.pdf' for path in paths: pdf_reader = PdfFileReader(path) for page in range(pdf_reader.getNumPages()): pdf_writer.addPage(pdf_reader.getPage(page)) with open(output_path, 'wb') as fh: pdf_writer.write(fh)
def add_payer_signature(self, file_path, industries_type, tx): if tx is not None: PAGE_WIDTH, _ = GOV_LEGAL tx_signer_title = 'By:' tx_block_title = 'Block Number:' tx_time_title = 'Block Time:' tx_hash_title = 'TxHash:' tx_email_title = 'Name:' if tx.user.name is not None and len( tx.user.name) > 0 else 'Email:' to_tx_time = datetime.fromtimestamp(int(tx.block_time_stamp), tz=pytz.utc) to_tx_signer = tx.user.wallet.address to_tx_block = tx.block_number to_tx_time = to_tx_time.strftime('%a %b %d %Y %H:%M:%S GMT%z (%Z)') to_tx_hash = tx.hash to_tx_email = tx.user.name if tx.user.name is not None and len( tx.user.name) > 0 else tx.user.email base = path.basename(file_path) filename, file_extension = path.splitext(base) pdf_path = path.abspath(path.join(__file__, '../..')) + '/files/pdf/' millis = int(round(time.time())) water_pdf_name = 'watermark' + str(millis) + '.pdf' output_pdf_name = 'Email:' + filename + str(millis) + '_signed.pdf' icolor = self._icolor(industries_type) # Create the watermark from an image c = canvas.Canvas(pdf_path + '/' + water_pdf_name) # move the origin up and to the left c.translate(inch, inch) # change color if icolor == 3: c.setFillColorRGB(0, 0, 0) else: c.setFillColorRGB(255, 255, 255) # define a large font c.setFont("Helvetica", 14) # draw tx c.drawString(-10, 1.7 * inch, tx_signer_title) c.drawString(-10, 1.37 * inch, tx_block_title) c.drawString(-10, 1.04 * inch, tx_hash_title) c.drawString(-10, 0.38 * inch, tx_time_title) c.drawString(-10, 0.05 * inch, tx_email_title) c.drawString(1.4 * inch, 1.7 * inch, to_tx_signer) c.drawString(1.4 * inch, 1.37 * inch, to_tx_block) c.drawString(1.4 * inch, 1.04 * inch, to_tx_hash[:32]) c.drawString(1.4 * inch, 0.71 * inch, to_tx_hash[32:]) c.drawString(1.4 * inch, 0.38 * inch, to_tx_time) c.drawString(1.4 * inch, 0.05 * inch, to_tx_email) c.showPage() c.save() # Get the watermark file you just created watermark = PdfFileReader( open(pdf_path + "/" + water_pdf_name, "rb")) # Get our files ready output_file = PdfFileWriter() input_file = PdfFileReader(open(file_path, "rb")) # Number of pages in input document page_count = input_file.getNumPages() # Go through all the input file pages to add a watermark to them for page_number in range(page_count): input_page = input_file.getPage(page_number) # print input_page if page_number == page_count - 1: # merge the watermark with the page input_page.mergePage(watermark.getPage(0)) # add page from input file to output document output_file.addPage(input_page) # finally, write "output" to document-output.pdf with open(pdf_path + output_pdf_name, "wb") as outputStream: output_file.write(outputStream) outputStream.close() return pdf_path + output_pdf_name return ''
# ADDING WATERMARK to a PDF from PyPDF2 import PdfFileReader, PdfFileWriter pdf = PdfFileReader("/home/hidayat7z/PDFS/pdfone.pdf") watermark = PdfFileReader("/home/hidayat7z/PDFS/watermark.pdf") #we need to add watermark to each page page_w = watermark.getPage(0) #watermark is stored in page_ object new_pdf = PdfFileWriter() #new pdf file object pages = pdf.getNumPages() for i in range(pages): page = pdf.getPage(i) #each page of the pdf gets stored in 'page' object page.mergePage(page_w) #merge this page with page_w object new_pdf.addPage( page) #new page thus obtained is added to the new pdf file object pdf_file = open("/home/hidayat7z/PDFS/RESULTANT.pdf", 'wb') new_pdf.write(pdf_file) pdf_file.close()
def add_payee_signature(self, file_path, industries_type, tx): if tx is not None: base = path.basename(file_path) filename, file_extension = path.splitext(base) pdf_path = path.abspath(path.join(__file__, '../..')) + '/files/pdf/' millis = int(round(time.time())) water_pdf_name = 'watermark' + str(millis) + '.pdf' output_pdf_name = 'Email:' + filename + str(millis) + '_signed.pdf' tx_signer_title = 'By:' tx_block_title = 'Block Number:' tx_time_title = 'Block Time:' tx_hash_title = 'TxHash:' tx_email_title = 'Name:' if tx.user.name is not None and len( tx.user.name) > 0 else 'Email:' from_tx_time = datetime.fromtimestamp(int(tx.block_time_stamp), tz=pytz.utc) from_tx_signer = tx.user.wallet.address from_tx_block = tx.block_number from_tx_time = from_tx_time.strftime( '%a %b %d %Y %H:%M:%S GMT%z (%Z)') from_tx_hash = tx.hash from_tx_email = tx.user.name if tx.user.name is not None and len( tx.user.name) > 0 else tx.user.email # Create the watermark from an image c = canvas.Canvas(pdf_path + '/' + water_pdf_name, pagesize=GOV_LEGAL) c_width, c_height = GOV_LEGAL # move the origin up and to the left c.translate(inch, inch) icolor = self._icolor(industries_type) # change color if icolor == 3: c.setFillColorRGB(0, 0, 0) else: c.setFillColorRGB(255, 255, 255) # add line c.setFont("Courier-Bold", 20) c.drawString(-10, 4.5 * inch, '____') # draw tx c.setFont("Helvetica", 14) c.drawString(-10, 4.14 * inch, tx_signer_title) c.drawString(-10, 3.81 * inch, tx_block_title) c.drawString(-10, 3.48 * inch, tx_hash_title) c.drawString(-10, 2.82 * inch, tx_time_title) c.drawString(-10, 2.49 * inch, tx_email_title) c.drawString(1.4 * inch, 4.14 * inch, from_tx_signer) c.drawString(1.4 * inch, 3.81 * inch, from_tx_block) c.drawString(1.4 * inch, 3.48 * inch, from_tx_hash[:32]) c.drawString(1.4 * inch, 3.15 * inch, from_tx_hash[32:]) c.drawString(1.4 * inch, 2.82 * inch, from_tx_time) c.drawString(1.4 * inch, 2.49 * inch, from_tx_email) c.showPage() c.save() # create canvas for final stamp stamp_pdf_name = 'stamp' + str(millis) + '.pdf' c_stamp = canvas.Canvas(pdf_path + '/' + stamp_pdf_name) c_stamp.drawImage(self.app.config['FILE_FOLDER'] + '/final_stamp.png', c_width - 155, 3.0 * inch, width=120, height=120, mask='auto', anchor='nw') c_stamp.showPage() c_stamp.save() # Get the watermark file you just created watermark = PdfFileReader( open(pdf_path + "/" + water_pdf_name, "rb")) stamp = PdfFileReader(open(pdf_path + "/" + stamp_pdf_name, "rb")) # Get our files ready output_file = PdfFileWriter() input_file = PdfFileReader(open(file_path, "rb")) page_count = input_file.getNumPages() # Go through all the input file pages to add a watermark to them for page_number in range(page_count): input_page = input_file.getPage(page_number) # print input_page if page_number == page_count - 1: # merge the watermark with the page input_page.mergePage(watermark.getPage(0)) input_page.mergePage(stamp.getPage(0)) # add page from input file to output document output_file.addPage(input_page) # finally, write "output" to document-output.pdf with open(pdf_path + output_pdf_name, "wb") as outputStream: output_file.write(outputStream) outputStream.close() return pdf_path + output_pdf_name return ''
def add_description(self, description, industries_type, file_path): base = path.basename(file_path) filename, file_extension = path.splitext(base) pdf_path = path.abspath(path.join(__file__, '../..')) + '/files/pdf/' millis = int(round(time.time())) water_pdf_name = 'watermark' + str(millis) + '.pdf' signature_pdf_name = 'signature' + str(millis) + '.pdf' output_pdf_name = 'Email:' + filename + str(millis) + '_signed.pdf' # create canvas for content c = canvas.Canvas(pdf_path + '/' + water_pdf_name, pagesize=GOV_LEGAL, bottomup=0) c_width, c_height = GOV_LEGAL # add background icolor = self._icolor(industries_type) c.drawImage(self.app.config['FILE_FOLDER'] + "/bg_{}.png".format(icolor), 0, 0, width=c_width, height=c_height, mask='auto', anchor='nw') # move the origin up and to the left c.translate(inch, inch) # change color if icolor == 3: c.setFillColorRGB(0, 0, 0) else: c.setFillColorRGB(255, 255, 255) # add handshake title c.setFont("Helvetica", 23) c.drawString(-10, 0.5 * inch, 'The Handshake') # define a large font stylesheet = getSampleStyleSheet() style = stylesheet['Normal'] style.fontName = 'Courier-Bold' style.fontSize = 34 style.leading = style.fontSize * 1.2 if icolor == 3: style.textColor = colors.black else: style.textColor = colors.white p = Paragraph(description, style) f = KeepInFrame(c_width + 20 - 2 * inch, c_height, [p], vAlign='TOP') width, height = f.wrapOn(c, c_width + 40 - 2 * inch, c_height) f.drawOn(c, -10, -height - 20 + (2.8 * inch)) c.showPage() c.save() # Get the watermark file you just created watermark = PdfFileReader(open(pdf_path + "/" + water_pdf_name, "rb")) signature = None # create signature page if any if len(description) > 160: c_signature = canvas.Canvas(pdf_path + '/' + signature_pdf_name, pagesize=GOV_LEGAL, bottomup=0) # add background c_signature.drawImage(self.app.config['FILE_FOLDER'] + "/bg_{}.png".format(icolor), 0, 0, width=c_width, height=c_height, mask='auto', anchor='nw') c_signature.showPage() c_signature.save() signature = PdfFileReader( open(pdf_path + "/" + signature_pdf_name, "rb")) # Get our files ready output_file = PdfFileWriter() watermark_page = watermark.getPage(0) output_file.addPage(watermark_page) if signature is not None: output_file.addPage(signature.getPage(0)) # finally, write "output" to document-output.pdf with open(pdf_path + output_pdf_name, "wb") as outputStream: output_file.write(outputStream) outputStream.close() return pdf_path + output_pdf_name
from PyPDF2 import PdfFileWriter, PdfFileReader inputpdf = PdfFileReader( open( "/home/ila/Documents/personal/certificates/247_ai_certificates/paperwork/10_12.pdf", "rb")) for i in range(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) with open("document-page%s.pdf" % i, "wb") as outputStream: output.write(outputStream)
Created on Sat Apr 09 14:53:17 2016 @author: Kalyani """ mypath = "D:\\NLP_SP\\myPythonCode\\books\\class11" import os filelist = [] txtfile = [] from PyPDF2 import PdfFileWriter, PdfFileReader for file in os.listdir(mypath): if file.endswith(".pdf"): filelist.append(file) writefile = file.replace(".pdf", ".txt") txtfile.append(writefile) for i in xrange(0, len(txtfile)): pdfFileObj = PdfFileReader(open(filelist[i], "rb")) f = open(txtfile[i], 'w') try: j = 0 while j < pdfFileObj.numPages: pageObj = pdfFileObj.getPage(j) f.write((pageObj.extractText()).encode('utf-8') + '\n') print pageObj.extractText() j += 1 except: print "" f.close()
pgread.mediaBox.lowerLeft = (win_r2, win_t2) # if page is any other size than the three above else: pass def dataFrame(): tabula.io.convert_into("target_file.pdf", "output_file.csv", output_format="csv", lattice=True, pages="all") # pdf file input pdf_input = PdfFileReader(str(pdf_path)) # get number of pages in pdf file pgnum = pdf_input.getNumPages() #prints the total number of pages in file print('there are', pgnum, 'pages in this file') pgread = pdf_input.getPage(1) croppedWindow(winr, wint) pdf_writer = PdfFileWriter() pdf_writer.addPage(pgread) with Path("target_croppedfile.pdf").open(mode="wb") as output_file: pdf_writer.write(output_file) # while loop to loop through all pages of the file num = 1 while num != pgnum: # read certain page of pdf file pgread = pdf_input.getPage(num)
def pdfPrint2(): with conn: c.execute("""SELECT * FROM data WHERE cid LIKE '%s' """ %VarshowID2.get()) search = c.fetchall() #print(search) with conn: c.execute("""SELECT * FROM data WHERE idTest LIKE %s """ %Varshowtest2.get()) search2 = c.fetchall() #print(search2) if search != () : with conn: c.execute(""" SELECT room FROM dataRoom""") searchRoom = c.fetchall() #search = c.fetchone() #search = c.fetchmany() #print(searchRoom) search_Room.clear() for g in searchRoom: cutRoom = str(g).split("'") search_Room.append(cutRoom[1]) search_Room.sort() if (search != () and search2 != ()) : packet = io.BytesIO() #font pdfmetrics.registerFont(TTFont('THSarabun', 'THSarabun.ttf')) # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont('THSarabun', 16) can.drawString(100, 645, SHschool2.get()) can.drawString(80, 665, VarshowNameTH2.get()) can.drawString(170, 626, Varshowtest2.get()) a= search_Room.index(str(SHroomtest2.get())) a=int(a)+1 can.drawString(100, 608, str(a)) can.drawString(155, 608, Varshowroomtest2.get()) can.drawString(230, 608, OHroomtest2.get()) can.drawString(390, 645, SHschool2.get()) can.drawString(370, 665, VarshowNameTH2.get()) can.drawString(460, 626, Varshowtest2.get()) can.drawString(385, 608, str(a)) can.drawString(443, 608, Varshowroomtest2.get()) can.drawString(523, 608, OHroomtest2.get()) thai_year = datetime.now().year + 543 dt = datetime.now().strftime('%d / %m /') can.setFont('THSarabun', 14) can.drawString(488, 432, dt) can.drawString(528, 432, str(thai_year)) can.drawString(193, 432, dt) can.drawString(235, 432, str(thai_year)) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(open("mypdf.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file try : outputStream = open("destination.pdf", "wb") except: messagebox.showinfo('แจ้งเตือน','กรุณาปิด PDF',parent=tebmain) else : output.write(outputStream) outputStream.close() os.startfile("destination.pdf") messagebox.showinfo('แจ้งเตือน','พิมพ์ไฟล์แล้ว',parent=tebmain) else : messagebox.showwarning('แจ้งเตือน','กรุณาบันทึกข้อมูลลงระบบก่อนสั่งพิมพ์',parent=tebmain)
from PyPDF2 import PdfFileWriter, PdfFileReader inputpdf = PdfFileReader(open("files/Sander_Test.pdf", "rb")) for i in range(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) with open("output_files/document-page%s.pdf" % i, "wb") as outputStream: output.write(outputStream)
def get_filepath(): import tkinter from tkinter import filedialog root = tkinter.Tk() root.withdraw() return (filedialog.askopenfilename()) original_file = get_filepath() # pass a list of page numbers you want to eliminate (page numbering starts from 0) pages_to_exclude = [14, 15, 16] # Shows the number of pages in the file with open(original_file, 'rb') as f: pdf = PdfFileReader(f) number_of_pages = pdf.getNumPages() pages_to_keep = [ i for i in list(range(number_of_pages)) if i not in pages_to_exclude ] infile = PdfFileReader(original_file, 'rb') output = PdfFileWriter() for i in pages_to_keep: p = infile.getPage(i) output.addPage(p) output_file = get_filepath()
# 指定填充颜色 c.setFillColorRGB(0.6, 0, 0) # 设置透明度,1为不透明 c.setFillAlpha(0.1) c.drawString(15, 15, student_info) if picture_path: c.translate(5 * cm, 2.5 * cm) c.rotate(45) c.drawImage(picture_path, 15, 15, 600, 120) # 生成临时的 pdf c.save() # 读取临时生成的 pdf watermark = PdfFileReader(open(pdf_name, "rb")) # 读取原始文件夹中的所有pdf for file in os.listdir(folder_path): if file.endswith(".pdf"): # 要输出的文件路径及名称 output_file_name = file.split( '.pdf')[0] + '_' + student_info + '_watermarked' + '.pdf' print('正在生成:' + output_file_name) print('请等待……') output_file = PdfFileWriter() input_file = PdfFileReader(open(folder_path + '/' + file, "rb")) page_count = input_file.getNumPages() for page_number in range(page_count): input_page = input_file.getPage(page_number)
def iter_pdf_page(self, f): reader = PdfFileReader(f) for pgnum in range(reader.getNumPages()): pg = reader.getPage(pgnum) yield pg
def providencia(documento): mes_letras = { 1: 'Enero', 2: 'Febrero', 3: 'Marzo', 4: 'Abril', 5: 'Mayo', 6: 'Junio', 7: 'Julio', 8: 'Agosto', 9: 'Septiembre', 10: 'Octubre', 11: 'Noviembre', 12: 'Diciembre', } def text_to_bold(text): return u'''<b><font size=12>{}</font></b> <br/>'''.format(text) def print_text_bold(text, x, y, pdf): p = ParagraphStyle('test') p.textColor = 'black' p.alignment = TA_LEFT p.fontSize = 8 p.leading = 9 para = Paragraph(text_to_bold(unicode(text)), p) para.wrapOn(pdf, 300, 50) para.drawOn(pdf, x, y) def get_fecha(): from datetime import date d = date.today() fecha = "Caracas, {dia_letra} ({dia}) de {mes} de {anyo}".format( dia_letra=NumToWord.get_month_words(d.day), dia=str(d.day), mes=mes_letras[d.month], anyo=str(d.year)) return fecha domicilio, gerente, supervisor, funcionarios, apoyo = documento_info( documento) texto = text_providencia(supervisor, funcionarios, apoyo) p = ParagraphStyle('test') p.textColor = 'black' p.alignment = TA_JUSTIFY p.fontSize = 10 p.leading = 12 para = Paragraph(text_to_bold(unicode(domicilio)), p) para_texto = Paragraph(unicode(texto), p) output = PdfFileWriter() input = PdfFileReader( file( os.path.join(settings.PDF_ROOT, 'verificacion', 'PROVIDENCIA.pdf'), 'rb')) # create response object response = HttpResponse(content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename=Verificacion_Providencia.pdf' fecha = get_fecha() # get number of pages num_pages = input.getNumPages() for page in xrange(num_pages - 1): new_page = False buffer = StringIO() # create string buffer for PDF pdf = canvas.Canvas(buffer, pagesize=letter) print_text_bold(unicode(documento.pst.razon_social), 220, 768 + 2, pdf) print_text_bold(unicode(documento.pst.rif), 220, 750 + 2, pdf) # pdf.drawString(220, 768, unicode(documento.pst.razon_social)) # pdf.drawString(220, 750, unicode(documento.pst.rif)) if documento.pst.rtn != None: rtn = unicode(documento.pst.rtn) # pdf.drawString(220, 735, unicode(documento.pst.rtn)) else: rtn = u'S/RTN' # pdf.drawString(220, 735, u'S/RTN') print_text_bold(rtn, 220, 735 + 2, pdf) print_text_bold(unicode(documento.codigo), 80, 835, pdf) print_text_bold(fecha, 295, 835, pdf) # pdf.drawString(80, 835, unicode(documento.codigo)) para.wrapOn(pdf, 300, 50) para.drawOn(pdf, 220, 695) para_texto.wrapOn(pdf, 450, 300) para_texto.drawOn(pdf, 80, 675 - para_texto.height) if 675 - para_texto.height > 230: gaceta_end = gaceta(pdf, 675 - para_texto.height, gerente) notificacion(pdf, gaceta_end) label(pdf, gaceta_end, page) else: new_page = True pdf.save() # put on watermark from buffer watermark = PdfFileReader(buffer) tmp = input.getPage(page) tmp.mergePage(watermark.getPage(0)) buffer.seek(0) # add processed pdf page output.addPage(tmp) if new_page: buffer = StringIO() # create string buffer for PDF pdf = canvas.Canvas(buffer, pagesize=letter) gaceta_end = gaceta(pdf, 800, gerente) notificacion(pdf, gaceta_end) label(pdf, gaceta_end, page) pdf.save() # put on watermark from buffer watermark = PdfFileReader(buffer) input = PdfFileReader( file( os.path.join(settings.PDF_ROOT, 'verificacion', 'PROVIDENCIA.pdf'), 'rb')) tmp = input.getPage(3) tmp.mergePage(watermark.getPage(0)) buffer.seek(0) # add processed pdf page output.addPage(tmp) output.write(response) return response
def constancia(documento): new_page = False domicilio, gerente, supervisor, funcionarios, apoyo = documento_info( documento) texto = text_constancia(documento, supervisor, funcionarios) p = ParagraphStyle('test') p.textColor = 'black' p.alignment = TA_JUSTIFY p.fontSize = 10 p.leading = 12 if domicilio: para = Paragraph(unicode(domicilio), p) else: para = Paragraph(unicode("No tiene registro de domicilio"), p) para_texto = Paragraph(unicode(texto), p) output = PdfFileWriter() input = PdfFileReader( file(os.path.join(settings.PDF_ROOT, 'verificacion', 'CONSTANCIA.pdf'), 'rb')) # create response object response = HttpResponse(content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename=Verificacion_Constancia.pdf' # get number of pages num_pages = input.getNumPages() buffer = StringIO() # create string buffer for PDF pdf = canvas.Canvas(buffer, pagesize=letter) pdf.drawString(220, 793, unicode(documento.pst.razon_social)) pdf.drawString(220, 779, unicode(documento.pst.rif)) if documento.pst.rtn != None: pdf.drawString(220, 766, unicode(documento.pst.rtn)) else: pdf.drawString(220, 766, u'S/RTN') pdf.drawString(80, 850, unicode(documento.codigo)) para.wrapOn(pdf, 300, 50) para.drawOn(pdf, 220, 762 - para.height) para_texto.wrapOn(pdf, 450, 300) para_texto.drawOn(pdf, 80, 730 - para_texto.height) pasivo(pdf, 730 - para_texto.height) supervisor_end = supervisor_firma(pdf, 730 - para_texto.height, supervisor) for funcionario in xrange(len(funcionarios)): supervisor_end = funcionario_firma(pdf, supervisor_end, funcionarios[funcionario]) if supervisor_end <= 114 and funcionario != len(funcionarios) - 1: new_page = True start_funcionario = funcionario + 1 break pdf.save() # put on watermark from buffer watermark = PdfFileReader(buffer) tmp = input.getPage(0) tmp.mergePage(watermark.getPage(0)) buffer.seek(0) # add processed pdf page output.addPage(tmp) if new_page: buffer = StringIO() # create string buffer for PDF pdf = canvas.Canvas(buffer, pagesize=letter) supervisor_end = 850 for funcionario in xrange(start_funcionario, len(funcionarios)): supervisor_end = funcionario_firma(pdf, supervisor_end, funcionarios[funcionario]) pdf.save() # put on watermark from buffer watermark = PdfFileReader(buffer) input = PdfFileReader( file( os.path.join(settings.PDF_ROOT, 'verificacion', 'PROVIDENCIA.pdf'), 'rb')) tmp = input.getPage(3) tmp.mergePage(watermark.getPage(0)) buffer.seek(0) # add processed pdf page output.addPage(tmp) output.write(response) return response
i = 0 # always starting on first page, index for number of notes octave = [4, 5] # User setting for Set1 or Set2 set = 2 # change to Set1 or Set2 <----------------------------------------------- User can change that if set == 1: f_name = 'Set1.pdf' t_name = 'numpresses_1.txt' o_name = 'set1_more_info.csv' elif set == 2: f_name = 'Set3.pdf' t_name = 'numpresses_2.txt' o_name = 'set2_more_info.csv' # Open music sheet in PDF Reader pdf = PdfFileReader(open(f_name, 'rb')) # read pdf webbrowser.open_new(f_name) # open pdf on browser n = pdf.getNumPages() # display total number of pages print('Number of Pages:\t' + str(n)) keyboard = Controller() time.sleep(2) # Open txt / csv file for page turning information page_note = [] with open(t_name) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: page_note.append(row) ######## Example ######## # [numnote_turn, numnote_tot , forward/backward, numpage_turn # # ['54', '66', 'f', '1'] #
def cedula_hallazgo(documento): domicilio, gerente, supervisor, funcionarios, apoyo = documento_info( documento) output = PdfFileWriter() # create response object response = HttpResponse(content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename=Cedula_de_Hallazgo.pdf' buffer = StringIO() doc = SimpleDocTemplate(buffer, pagesize=letter, rightMargin=72, leftMargin=72, topMargin=50, bottomMargin=100) styles = getSampleStyleSheet() styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER, fontSize=8)) styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY, fontSize=8)) Story = [] I = Image(os.path.join(settings.BASE_DIR, 'static', 'img', 'logo.png')) I.drawHeight = 1.25 * inch * I.drawHeight / I.drawWidth I.drawWidth = 1.25 * inch data = [[I, '', '', '', '', ''], ['SUJETO PASIVO:', '', '', '', '', ''], ['MATERIA:', '', '', '', '', '']] data[0][2] = Paragraph( u'''<b>CEDULA DE HALLAZGOS<br/> Contribución Especial del 1% por la Presentación de<br/> Servicios Turísticos</b>''', styles["Center"]) data[0][4] = documento.codigo data[1][1] = documento.pst.nombre_o_razon() data[1][3] = 'RIF: ' + documento.pst.rif data[2][1] = documento.hallazgos_materia data[2][3] = 'PERIODO: ' + documento.fecha_notificacion.strftime( "%d/%m/%Y") w = [80, 30, 90, 90, 80, 80] Story.append( Table(data, colWidths=w, style=[('GRID', (0, 0), (-1, -1), 0.25, colors.black), ('ALIGN', (0, 0), (-1, 0), 'CENTER'), ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), ('FONTSIZE', (0, 0), (-1, -1), 8), ('SPAN', (0, 0), (1, 0)), ('SPAN', (2, 0), (3, 0)), ('SPAN', (4, 0), (5, 0)), ('SPAN', (1, 1), (2, 1)), ('SPAN', (1, 2), (2, 2)), ('SPAN', (3, 1), (5, 1)), ('SPAN', (3, 2), (5, 2))])) Story.append(Spacer(1, 12)) data = [['CONDICIÓN', 'CRITERIO', 'EFECTO', 'EVIDENCIA'], ['', '', '', ''], ['', '', '', '']] try: data[2][0] = Paragraph(documento.hallazgos_condicion, styles["Justify"]) data[2][1] = Paragraph(documento.hallazgos_criterio, styles["Justify"]) data[2][2] = Paragraph(documento.hallazgos_efecto, styles["Justify"]) data[2][3] = Paragraph(documento.hallazgos_evidencia, styles["Justify"]) except: pass Story.append( Table(data, colWidths=[95, 170, 81, 105], style=[ ('GRID', (0, 0), (-1, 0), 0.25, colors.black), ('GRID', (0, 2), (-1, 2), 0.25, colors.black), ('FONTSIZE', (0, 0), (-1, -1), 8), ('ALIGN', (0, 0), (-1, 0), 'CENTER'), ('BACKGROUND', (0, 0), (-1, 0), colors.grey), ('VALIGN', (0, 2), (-1, 2), 'TOP'), ])) Story.append(Spacer(1, 12)) ptext = 'Observaciones: <u>%s</u>' % documento.observaciones Story.append(Paragraph(ptext, styles['Normal'])) Story.append(Spacer(1, 12)) Story.append( Paragraph('Fiscal Actuante: %s' % gerente.get_full_name(), styles['Normal'])) Story.append( Paragraph('Supervisor: %s' % supervisor.get_full_name(), styles['Normal'])) doc.build(Story) watermark = PdfFileReader(buffer) output.addPage(watermark.getPage(0)) output.write(response) return response
def fill(self, fname, pagesize, events, topspace, bottomspace, margins): tf = tempfile.NamedTemporaryFile(delete=False) pagesize = (pagesize[0] / 2 - 6, pagesize[1]) doc = BaseDocTemplate(tf.name, pagesize=pagesize, leftMargin=margins, bottomMargin=bottomspace, rightMargin=margins, topMargin=topspace) column = Frame(doc.leftMargin+6, doc.bottomMargin+0.5*inch, doc.width-6, 3.3*inch) rsvp = Frame(doc.leftMargin+6, doc.bottomMargin, doc.width-6, 0.5*inch) doc.addPageTemplates(PageTemplate(frames=[rsvp, column])) # render one side story = [] story.append(Paragraph("Please RSVP at map.berniesanders.com", styles["default"])) story.append(FrameBreak()) for e in events: story.append(Event(e).render()) doc.build(story) # now duplicate for 2-up src = PdfFileReader(open(tf.name, "rb")) out = PdfFileWriter() lhs = src.getPage(0) lhs.mergeTranslatedPage(lhs, lhs.mediaBox.getUpperRight_x(), 0, True) out.addPage(lhs) with open(fname.name, "wb") as outfile: out.write(outfile) os.remove(tf.name)