Example #1
0
# import the necessary packages
from pyimagesearch.alignment import align_images
from collections import namedtuple
import pytesseract
import argparse
import imutils
import cv2
import numpy as np

image = 'Documents/ocr-document/scans/File_000.jpg'
template = 'Documents/ocr-document/form_w4.png'

image = cv2.imread(image)
template = cv2.imread(template)

aligned = align_images(image, template, debug=True)

Example #2
0
        ["employee", "signature", "form", "valid", "unless", "you", "sign"]),
    OCRLocation("step5_date", (1804, 2516, 504, 156), ["date"]),
    OCRLocation("employee_name_address", (265, 2706, 1224, 180),
                ["employer", "name", "address"]),
    OCRLocation("employee_ein", (1831, 2706, 448, 180),
                ["employer", "identification", "number", "ein"]),
]

# load the input image and template from disk
print("[INFO] loading images...")
image = cv2.imread(args["image"])
template = cv2.imread(args["template"])

# align the images, on the template!!
print("[INFO] aligning images...")
aligned = align_images(image, template)
if args["verbose"] == 'true':
    cv2.imshow("Aligned", imutils.resize(aligned, width=700))

# initialize a results list to store the document OCR parsing results
print("[INFO] OCR'ing document...")
parsingResults = []

winNum = 1
# loop over the locations of the document we are going to OCR
for loc in OCR_LOCATIONS:
    # extract the OCR ROI from the aligned image
    (x, y, w, h) = loc.bbox
    roi = aligned[y:y + h, x:x + w]
    if args["verbose"] == 'true':
        cv2.imshow("ROI#{}".format(winNum), roi)
Example #3
0
    print('PDF Detected, Converting to .jpg...')
    args['image'] = convert_pdf(args['image'])
image = cv2.imread(str(args["image"]))
print(type(image))
template = cv2.imread(args["template"])

# align the images
print("[INFO] aligning images...")
scale = 2
if args['threshold'] == None:
    y, x, _ = image.shape
    aligned = cv2.resize(image, (scale * 2 * x, scale * 2 * y))
else:
    threshold = float(args['threshold'])
    aligned = align_images(cv2.threshold(image, 130, 255,
                                         cv2.THRESH_BINARY)[1],
                           template,
                           debug=False)
    #	aligned = align_images(image, template, debug=False)
    y, x, _ = aligned.shape
    print(f"orignal.shape: {aligned.shape}")
    aligned = cv2.resize(aligned, (scale * 2 * x, scale * 2 * y))

# initialize a results list to store the document OCR parsing results
print("[INFO] OCR'ing document...")
parsingResults = []

# loop over the locations of the document we are going to OCR
for loc in OCR_LOCATIONS:
    # extract the OCR ROI from the aligned image
    (x, y, w, h) = tuple([scale * t for t in loc.bbox])
    roi = aligned[y:y + h, x:x + w]