def image_file_to_string(filename, lang=_language, psm=_pagesegmode, cleanup=_cleanup_scratch_flag, graceful_errors=True): """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True, converts to compatible format and then applies tesseract. Fetches resulting text. If cleanup=True, delete scratch files after operation. Parameter lang specifies used language. If lang is empty, English is used. Page segmentation mode parameter psm is available in Tesseract 3.01. psm values are: 0 = Orientation and script detection (OSD) only. 1 = Automatic page segmentation with OSD. 2 = Automatic page segmentation, but no OSD, or OCR 3 = Fully automatic page segmentation, but no OSD. (Default) 4 = Assume a single column of text of variable sizes. 5 = Assume a single uniform block of vertically aligned text. 6 = Assume a single uniform block of text. 7 = Treat the image as a single text line. 8 = Treat the image as a single word. 9 = Treat the image as a single word in a circle. 10 = Treat the image as a single character.""" try: try: call_tesseract(filename, scratch_text_name_root, lang, psm) result = util.retrieve_result(scratch_text_name_root) except errors.Tesser_General_Exception: if graceful_errors: im = Image.open(filename) result = image_to_string(im, cleanup) else: raise finally: if cleanup: util.perform_cleanup(scratch_image_name, scratch_text_name_root) return result
def image_file_to_string(filename, lang = _language, psm = _pagesegmode, cleanup = _cleanup_scratch_flag, graceful_errors=True): """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True, converts to compatible format and then applies tesseract. Fetches resulting text. If cleanup=True, delete scratch files after operation. Parameter lang specifies used language. If lang is empty, English is used. Page segmentation mode parameter psm is available in Tesseract 3.01. psm values are: 0 = Orientation and script detection (OSD) only. 1 = Automatic page segmentation with OSD. 2 = Automatic page segmentation, but no OSD, or OCR 3 = Fully automatic page segmentation, but no OSD. (Default) 4 = Assume a single column of text of variable sizes. 5 = Assume a single uniform block of vertically aligned text. 6 = Assume a single uniform block of text. 7 = Treat the image as a single text line. 8 = Treat the image as a single word. 9 = Treat the image as a single word in a circle. 10 = Treat the image as a single character.""" try: try: call_tesseract(filename, scratch_text_name_root, lang, psm) result = util.retrieve_result(scratch_text_name_root) except errors.Tesser_General_Exception: if graceful_errors: im = Image.open(filename) result = image_to_string(im, cleanup) else: raise finally: if cleanup: util.perform_cleanup(scratch_image_name, scratch_text_name_root) return result
def image_to_string(im, cleanup=_cleanup_scratch_flag): """Converts im to file, applies tesseract, and fetches resulting text. If cleanup=True, delete scratch files after operation.""" try: util.image_to_scratch(im, scratch_image_name) call_tesseract(scratch_image_name, scratch_text_name_root) result = util.retrieve_result(scratch_text_name_root) finally: if cleanup: util.perform_cleanup(scratch_image_name, scratch_text_name_root) return result
def image_file_to_string(filename, cleanup=_cleanup_scratch_flag, graceful_errors=True): """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True, converts to compatible format and then applies tesseract. Fetches resulting text. If cleanup=True, delete scratch files after operation.""" try: try: call_tesseract(filename, scratch_text_name_root) result = util.retrieve_result(scratch_text_name_root) except errors.Tesser_General_Exception: if graceful_errors: im = Image.open(filename) result = image_to_string(im, cleanup) else: raise finally: if cleanup: util.perform_cleanup(scratch_image_name, scratch_text_name_root) return result
def image_file_to_string(filename, cleanup=_cleanup_scratch_flag, graceful_errors=True): """Applies tesseract to filename; or, if image is incompatible and graceful_errors=True, converts to compatible format and then applies tesseract. Fetches resulting text. If cleanup=True, delete scratch files after operation.""" try: try: call_tesseract(filename, scratch_text_name_root) result = util.retrieve_result(scratch_text_name_root) except (errors.Tesser_General_Exception): if graceful_errors: im = Image.open(filename) result = image_to_string(im, cleanup) else: raise finally: if cleanup: util.perform_cleanup(scratch_image_name, scratch_text_name_root) return result