def process(self, pdf_filename, pdf_resolution, imageformat, do_orientation): final_text = "" image_pdf = Image(filename=pdf_filename, resolution=pdf_resolution) image_page = image_pdf.convert(imageformat) page = 1 process_start = time.time() for img in image_page.sequence: img_per_page = Image(image=img) img_per_page.type = 'grayscale' img_per_page.depth = 8 img_per_page.density = pdf_resolution try: img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None) except AttributeError as e: print("Update Wand library: %s" % e) img_per_page.save(filename="buffer.png") page_start = time.time() txt = self.image2txt_pyocr(img_per_page.make_blob(imageformat), do_orientation) page_elaboration = time.time() - page_start print("page %s - size %s - process %2d sec. - text %s" % (page, img_per_page.size, page_elaboration, len(txt))) final_text += "%s\n" % txt page += 1 img.destroy() process_end = time.time() - process_start print("Total elaboration time: %s" % process_end) return final_text
def pdf_run(self, image_file_name, filename, path): image_pdf = Image(filename=image_file_name, resolution=300) #take filename image_page = image_pdf.convert("png") #png conversion page = 1 #init page process_start = time.time() for img in image_page.sequence: # Every single image in image_page for grayscale conversion in 300 resolution img_per_page = Image(image=img) img_per_page.type = 'grayscale' img_per_page.depth = 8 img_per_page.density = 300 try: img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None) except AttributeError as e: print("Update Wand library: %s" % e) img_buf = path + '/' + "saram_" + filename + str(page) + ".png" os.chmod(path, 0o777) img_per_page.save(filename=img_buf) page_start = time.time() page_elaboration = time.time() - page_start print("page %s - size %s - process %2d sec." % (page, img_per_page.size, page_elaboration)) page += 1 img.destroy() process_end = time.time() - process_start print("Total elaboration time: %s" % process_end)
def process(self, pdf_filename, pdf_resolution, imageformat, do_orientation, png_filename): #Attribute png File name is added by me final_text = "" image_pdf = Image(filename=pdf_filename, resolution=pdf_resolution) image_page = image_pdf.convert(imageformat) page = 1 process_start = time.time() for img in image_page.sequence: img_per_page = Image(image=img) img_per_page.type = 'grayscale' img_per_page.depth = 8 img_per_page.density = pdf_resolution #try: img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None) #except AttributeError as e: #print("Update Wand library: {}".format(e)) img_per_page.save( filename=png_filename ) # i have changed this to png_filename from 'buffer.png' page_start = time.time() self.image2txt_pyocr(img_per_page.make_blob(imageformat), do_orientation) ## txt = self.image2txt_pyocr(img_per_page.make_blob(imageformat), do_orientation) ## page_elaboration = time.time() - page_start ## print("page %s - size %s - process %2d sec. - text %s" % ## (page, img_per_page.size, page_elaboration, len(txt))) ## final_text += "%s\n" % txt ## page += 1 ## img.destroy() process_end = time.time() - process_start print("Total elaboration time: %s" % process_end) return final_text