def merge(self, pdf_one, pdf_two, filename='my.pdf', output_dir='D:/pdf/'): ''' function:#pdfone为扫描的正面;#pdftwo为扫描的背面;#本函数实现将两个扫描文件按原有的顺序合并起来 :param pdf_one: :param pdf_two: :param filename: :param output_dir: :return: ''' input_one = open(pdf_one, 'rb') input_two = open(pdf_two, 'rb') pdf_input_one = PdfFileReader(input_one) pdf_input_two = PdfFileReader(input_two) numOne = pdf_input_one.getNumPages() numTwo = pdf_input_two.getNumPages() print(numOne, numTwo) pdf_output = PdfFileWriter() index_one = 0 index_two = numTwo - 1 while True: if index_one == numOne: break print(index_one, index_two) page1 = pdf_input_one.getPage(index_one) pdf_output.addPage(page1) page2 = pdf_input_two.getPage(index_two) pdf_output.addPage(page2) index_one += 1 index_two -= 1 pdf_name = output_dir + filename output_stream = open(pdf_name, 'wb') pdf_output.write(output_stream) output_stream.close() input_one.close() input_two.close() print('Done!')
def generate_images(path, save_dir_name, is_train): if not os.path.exists('png_files/'): os.mkdir('png_files/') train_images = 'train_images/' test_images = 'test_images/' if is_train: save_directory_path = 'png_files/'+ train_images + save_dir_name + '_annotated_images' else: save_directory_path = 'png_files/' + test_images + save_dir_name + '_annotated_images' if not os.path.exists(save_directory_path): os.makedirs(save_directory_path) filename = path print("Converting " + filename + " from pdf to PNG...") reader = PdfFileReader(open(filename, mode="rb")) try: page_number = reader.getNumPages() except: page_number = reader.getNumPages() #PyPDF2 bug with tempfile.TemporaryDirectory() as path: images_from_path = convert_from_path(filename, dpi=72, output_folder=path, last_page=page_number, first_page=0) i = 0 for page in images_from_path: base_filename = os.path.splitext(os.path.basename(filename))[0] + '_' + str(i + 1) + '.png' page.save(os.path.join(save_directory_path, base_filename), 'PNG') i += 1 print('PDF file successfully converted.')
def test_cat(self): """Make sure files are properly concatenated.""" run_stapler(['cat', ONEPAGE_PDF, FIVEPAGE_PDF, self.outputfile]) self.assertTrue(os.path.isfile(self.outputfile)) with open(self.outputfile, 'rb') as outputfile: pdf = PdfFileReader(outputfile) self.assertEqual(pdf.getNumPages(), 6)
def PdfPrettyPrint(inputname, outputname): inputfile = open(inputname, 'rb') wrt = PdfFileWriter() ipt = PdfFileReader(inputfile) #print ipt.getDocumentInfo() pdfnums = ipt.getNumPages() #print pdfnums i = 0 while i < pdfnums: page = ipt.getPage(i) wrt.addPage(page) if i + 2 < pdfnums: page = ipt.getPage(i + 2) wrt.addPage(page) else: wrt.addBlankPage() if i + 1 < pdfnums: page = ipt.getPage(i + 1) page.rotateClockwise(180) wrt.addPage(page) else: wrt.addBlankPage() if i + 3 < pdfnums: page = ipt.getPage(i + 3) page.rotateClockwise(180) wrt.addPage(page) else: wrt.addBlankPage() i = i + 4 fl = open(outputname, "wb") wrt.write(fl) inputfile.close() fl.close() return True
def readPDFfile(infile): pdf = PdfFileReader(infile, "rb")) content = "" num = pdf.getNumPages() for i in range(0, num): extractedText = pdf.getPage(i).extractText() content += extractedText + "\n" return content
def getDataUsingPyPdf2(filename): pdf = PdfFileReader(open(filename, "rb")) content = "" num = pdf.getNumPages() for i in range(0, num): extractedText = pdf.getPage(i).extractText() content += extractedText + "\n" return content
def searchPDF(filename,search_term): search_term = search_term.lower() pages = [] pdf = PdfFileReader(open(filename, "rb")) for i in range(0, pdf.getNumPages()): content = pdf.getPage(i).extractText().lower() if(search_term in content): pages.append(i + 1) return pages;
def __init__(self, path): self.path = path self.pages = None self.meta_data = None try: with open(self.path,'rb') as fp: pdf = PdfFileReader(fp) self.meta_data = pdf.getDocumentInfo() self.pages = pdf.getNumPages() except (IOError,TypeError) as e: print(e) except: print("Unexpected error:", sys.exc_info()[0])
def clickOK(): File = PdfFileReader(open(selectPDF.get() + '.pdf', 'rb')) page_cound = File.getNumPages() pprint.pprint(page_cound) ageList = [] for i in range(0, page_cound): try: if i == entry1.get(): ageList.append(File.getPage(i).extractText()) pprint.pprint(ageList[int(0)]) except: print("except")
def generate_images_for_lecture(self, lecture_instance, file_pdf): pdf_im = PdfFileReader(file_pdf) for page_num in range(pdf_im.getNumPages()): page_file_name = file_pdf.path+'['+str(page_num)+']' print(page_file_name) im = PythonMagick.Image(page_file_name) image_lecture_page = Image() image_lecture_page.lecture = lecture_instance image_file_name = basename(file_pdf.name)+"-page-"+str(page_num)+".jpeg" image_lecture_page.image.save(image_file_name, File(im)) print("here") print(image_lecture_page)
def crop(pdf_in, pdf_out): """ Параметры pdf_in - абсолютный путь к пдф pdf_out - абсолютный путь для исходящего пдф :return: status """ """ Временно к функции добавлен второй параметр - pdf_out. В продакшн она должна сохранять результат кропа в тот же файл """ status = True # Словарь с размерами бумаги для каждой страницы papers = analyze_papersize(pdf_in) # like {1: ('Speedmaster', 900, 640), 2: ('Dominant', 640, 450)} # TODO Доработать временное решение кропа в отсутствии инфы о размере бумаги. if papers == {}: perl_crop = "perl pdfcrop.pl {} {}".format(pdf_in, pdf_out) os.system(perl_crop) return status input = PdfFileReader(file(pdf_in, "rb")) output = PdfFileWriter() # Количество страниц pages_qty = input.getNumPages() for index in range(pages_qty): paper_machine = papers[index+1][0] paper_w = papers[index+1][1] paper_h = papers[index+1][2] for m in PrintingPress._registry: if paper_machine == m.name: machine = m plate_w = machine.plate_w plate_h = machine.plate_h page = input.getPage(index) """ EXAMLE # The resulting document has a trim box that is 200x200 points # and starts at 25,25 points inside the media box. # The crop box is 25 points inside the trim box. print mm(page.mediaBox.getUpperRight_x()), mm(page.mediaBox.getUpperRight_y()) page.trimBox.lowerLeft = (25, 25) page.trimBox.upperRight = (225, 225) page.cropBox.lowerLeft = (50, 50) page.cropBox.upperRight = (200, 200) """ print 'Crop page {} to paper {}x{}'.format(index+1, paper_w, paper_h) page.mediaBox.lowerLeft = ((pt(plate_w - paper_w)/2), pt(machine.klapan)) # отступ слева, отступ снизу page.mediaBox.upperRight = (pt(paper_w + (plate_w - paper_w)/2), pt(paper_h + machine.klapan)) # ширина+отступ, высота+отступ output.addPage(page) outputstream = file(pdf_out, "wb") output.write(outputstream) outputstream.close() return status