def extract_and_ocr(filename, region): application_id = os.environ['ABBYY_APPLICATION_ID'] password = os.environ['ABBYY_PASSWORD'] ocr_engine = CloudOCR(application_id=application_id, password=password) image = Image.open(filename) region_data = image.crop(region) stream = StringIO() region_data.save(stream, 'JPEG') stream.seek(0) post_file = {'temp.jpg': stream} result = ocr_engine.process_and_download(post_file,exportFormat='txt') return result['txt'].read()
def extract_and_ocr(filename, region): application_id = os.environ['ABBYY_APPLICATION_ID'] password = os.environ['ABBYY_PASSWORD'] ocr_engine = CloudOCR(application_id=application_id, password=password) image = Image.open(filename) region_data = image.crop(region) stream = StringIO() region_data.save(stream, 'JPEG') stream.seek(0) post_file = {'temp.jpg': stream} result = ocr_engine.process_and_download(post_file, exportFormat='txt') return result['txt'].read()
def mrz_scan(source_file, APPID, PWD): if (os.path.isfile(source_file) == False): raise Exception("file error.") ocr_engine = CloudOCR(APPID, PWD) input_file = open(source_file, 'rb') post_file = {input_file.name: input_file} # print("Waiting...") time.sleep(0.0001) result = ocr_engine.process_and_download(post_file, exportFormat='txt', language='English') mrz_code = "" f = result['txt'].read().decode("utf-8") for d in f.splitlines()[-3:]: if '<' in d: # remove space for error scan mrz_code += re.sub(' ', '', d)[:44] mrz_code += '\n' print("ourcode :", mrz_code) return mrz_code
parser.add_argument('--inputFilename', help='', required=True) args = parser.parse_args() if 'ABBYY_APPLICATION_ID' in os.environ.keys(): application_id = os.environ['ABBYY_APPLICATION_ID'] else: application_id = args.application_id if 'ABBYY_PASSWORD' in os.environ.keys(): password = os.environ['ABBYY_PASSWORD'] else: password = args.password ocr_engine = CloudOCR(application_id, password) api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword'] parameters = dict( filter(lambda x: x[0] in api_parameters and x[1] is not None, args._get_kwargs())) input_file = open(args.inputFilename, 'rb') post_file = {input_file.name: input_file} result = ocr_engine.process_and_download(post_file, **parameters) for format, content in result.iteritems(): output_filename = '{name}.{extension}'.format(name='.'.join( input_file.name.split('.')[:-1]), extension=format) with open(output_filename, 'wb') as output_file: output_file.write(content.read()) output_file.close()
parser.add_argument('--language', help='Specifies recognition language of the document.') parser.add_argument('--textType', help='Specifies the type of the text on a page.') parser.add_argument('--exportFormat', help='Specifies the export format.') parser.add_argument('--pdfPassword', help='Contains a password for accessing password-protected images in PDF format.') parser.add_argument('--inputFilename', help='', required=True) args = parser.parse_args() if 'ABBYY_APPLICATION_ID' in list(os.environ.keys()): application_id = os.environ['ABBYY_APPLICATION_ID'] else: application_id = args.application_id if 'ABBYY_PASSWORD' in list(os.environ.keys()): password = os.environ['ABBYY_PASSWORD'] else: password = args.password ocr_engine = CloudOCR(application_id, password) api_parameters = ['language', 'textType', 'exportFormat', 'pdfPassword'] parameters = dict([x for x in args._get_kwargs() if x[0] in api_parameters and x[1] is not None]) input_file = open(args.inputFilename, 'rb') post_file = {input_file.name: input_file} result = ocr_engine.process_and_download(post_file, **parameters) for format, content in result.items(): output_filename = '{name}.{extension}'.format(name='.'.join(input_file.name.split('.')[:-1]), extension=format) with open(output_filename, 'wb') as output_file: output_file.write(content.read()) output_file.close()