# import the necessary packages from pyimagesearch.alignment import align_images from collections import namedtuple import pytesseract import argparse import imutils import cv2 import numpy as np image = 'Documents/ocr-document/scans/File_000.jpg' template = 'Documents/ocr-document/form_w4.png' image = cv2.imread(image) template = cv2.imread(template) aligned = align_images(image, template, debug=True)
["employee", "signature", "form", "valid", "unless", "you", "sign"]), OCRLocation("step5_date", (1804, 2516, 504, 156), ["date"]), OCRLocation("employee_name_address", (265, 2706, 1224, 180), ["employer", "name", "address"]), OCRLocation("employee_ein", (1831, 2706, 448, 180), ["employer", "identification", "number", "ein"]), ] # load the input image and template from disk print("[INFO] loading images...") image = cv2.imread(args["image"]) template = cv2.imread(args["template"]) # align the images, on the template!! print("[INFO] aligning images...") aligned = align_images(image, template) if args["verbose"] == 'true': cv2.imshow("Aligned", imutils.resize(aligned, width=700)) # initialize a results list to store the document OCR parsing results print("[INFO] OCR'ing document...") parsingResults = [] winNum = 1 # loop over the locations of the document we are going to OCR for loc in OCR_LOCATIONS: # extract the OCR ROI from the aligned image (x, y, w, h) = loc.bbox roi = aligned[y:y + h, x:x + w] if args["verbose"] == 'true': cv2.imshow("ROI#{}".format(winNum), roi)
print('PDF Detected, Converting to .jpg...') args['image'] = convert_pdf(args['image']) image = cv2.imread(str(args["image"])) print(type(image)) template = cv2.imread(args["template"]) # align the images print("[INFO] aligning images...") scale = 2 if args['threshold'] == None: y, x, _ = image.shape aligned = cv2.resize(image, (scale * 2 * x, scale * 2 * y)) else: threshold = float(args['threshold']) aligned = align_images(cv2.threshold(image, 130, 255, cv2.THRESH_BINARY)[1], template, debug=False) # aligned = align_images(image, template, debug=False) y, x, _ = aligned.shape print(f"orignal.shape: {aligned.shape}") aligned = cv2.resize(aligned, (scale * 2 * x, scale * 2 * y)) # initialize a results list to store the document OCR parsing results print("[INFO] OCR'ing document...") parsingResults = [] # loop over the locations of the document we are going to OCR for loc in OCR_LOCATIONS: # extract the OCR ROI from the aligned image (x, y, w, h) = tuple([scale * t for t in loc.bbox]) roi = aligned[y:y + h, x:x + w]