def get_data_from_image(self, image): try: pytesseract.tesseract_cmd = self.sys_cfg['tesseract_directory'] self.bounding_boxes = pytesseract.image_to_boxes(image) self.redraw(False) return pytesseract.image_to_string(image) except pytesseract.TesseractNotFoundError: self.set_status('Did not find tesseract.exe at specified directory', 'red')
def processImage(filename): img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2GRAY) # alpha = 2.5 # beta = 70 # for y in range(img.shape[0]): # for x in range(img.shape[1]): # for c in range(img.shape[2]): # img[y,x,c] = np.clip(alpha*img[y,x,c] + beta, 0, 255) d = pt.image_to_boxes(img, output_type=Output.DICT) return d
ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="path to input image to be OCR'd") ap.add_argument("-t", "--training-dir", required=False, default=TRAINING_BOXES_DIR, help="path to input image to be OCR'd") ap.add_argument("-p", "--process", required=False, default=0, help="path to input image to be OCR'd") args = vars(ap.parse_args()) image_path = args['image'] filename = image_path.split("/")[-1] workdir = image_path.split("/")[:-1] workdir = "/".join(workdir) + "/" if args['process']: image = binarize_image(workdir, filename) else: image = Image.open(image_path) boxes = pytesseract.image_to_boxes(image, config=CONFIG_TESSERACT, lang=LANG) with open(TRAINING_BOXES_DIR + filename.split(".")[0] + ".boxes", "w+") as boxesfile: boxesfile.write(boxes) boxesfile.close()
hocr = open("output.hocr", "r", encoding="utf-8").read() #extract coordinate information from hocr soup = BeautifulSoup(hocr, "html.parser") words = soup.find_all('span',class_='ocrx_word') word_coordinates = get_coordinates(words) lines = soup.find_all('span',class_='ocr_line') line_coordinates = get_coordinates(lines) paragraphs = soup.find_all('p',class_='ocr_par') paragraph_coordinates = get_coordinates(paragraphs) # Draw the bounding box img = cv2.imread(filename) boxes = pt.image_to_boxes(img) h, w, _ = img.shape for c in word_coordinates: img = cv2.rectangle(img,(int(c[0]),int(c[1])),(int(c[2]),int(c[3])),(255,0,0),2) #blue for c in line_coordinates: img = cv2.rectangle(img,(int(c[0]),int(c[1])),(int(c[2]),int(c[3])),(0,0,255),2) #red for c in paragraph_coordinates: img = cv2.rectangle(img,(int(c[0]),int(c[1])),(int(c[2]),int(c[3])),(255,255,0),2) #cyan for b in boxes.splitlines(): b = b.split(' ') img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2) #green
import cv2 from pytesseract import pytesseract pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe" img = cv2.imread("ur_mom.png") height, width, c = img.shape letter_boxes = pytesseract.image_to_boxes(img) print(letter_boxes) for box in letter_boxes.splitlines(): box = box.split() print(box) x,y,w,h = int(box[1]),int(box[2]),int(box[3]),int(box[4]) cv2.rectangle(img,(x,height-y),(w,height-h), (0,0,255),3) cv2.putText(img,box[0],(x,height-h+32),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2) cv2.imwrite('eeeeee.png', img) cv2.imshow("window", img) cv2.waitKey(0)
def waterFill(image, charMap, color=False, blur=False): h, w = image.shape if blur: image = cv2.GaussianBlur(image, ksize=(3, 3), sigmaX=2) boxes = tesseract.image_to_boxes(image, lang="eng", config=r'--oem 3 --psm 3') up_thres = 0.032 down_thres = 0.095 left_thres = 0 right_thres = 0.012 boxed_image = image.copy() if color: boxed_image = cv2.cvtColor(boxed_image, cv2.COLOR_GRAY2BGR) # plt.imshow(boxed_image) # plt.waitforbuttonpress(0) # plt.close() upright = inverted = 0 for b in boxes.splitlines(): # Letter, coordinates of bottom left corner of bounding box, # coordinates of top right of bounding box bx = b.split(" ") #If tesseract identifies it as symmetric letter, continue if bx[0] in charMap.loc[:, "CharName"].values: rw = charMap[charMap["CharName"] == bx[0]].first_valid_index() if charMap.loc[rw, "Symmetric"] == True: #Symmetric column continue bx[1:] = map(int, bx[1:]) #convert coordinates to integers #get the character found by tesseract cropped_image = image[h - bx[4]:h - bx[2] + 1, bx[1]:bx[3] + 1] #convert any blur to either black or white pixel values t = 143 cropped_image_b = binarize_image(cropped_image.copy(), threshold=t) cropped_image_b = np.abs(cropped_image_b - 255) #invert the image #Calculate capacities for character cib_h, cib_w = cropped_image_b.shape cib_area = cib_h * cib_w #Total capacity, not capacity/pixel U, D, L, R, _, _ = water_fill6(cropped_image_b) #Capacity per pixel try: U, D, L, R = U / cib_area, D / cib_area, L / cib_area, R / cib_area except ZeroDivisionError: U, D, L, R = 0, 0, 0, 0 if U < up_thres: U = 0 if D < down_thres: D = 0 if L < left_thres: L = 0 if R < right_thres: R = 0 upright += D + R inverted += U + L #output image with boxes around tesseract detected characters boxed_image = cv2.rectangle(boxed_image, (bx[1], h - bx[2]), (bx[3], h - bx[4]), (80, 176, 0), 0) if upright >= 2.5 * inverted: return "upright", boxed_image, upright, inverted else: return "inverted", boxed_image, upright, inverted