def get_check_patch(im, box, text_len=6, flag=False): if not flag: origin = box[1].astype(np.int32) # width = 65 bias = 10 origin[0] += bias ### attention height = np.round((box[2, 1] - box[1, 1])).astype(np.int32) width = (height*1.5).astype(np.int32) check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width] point = (origin[0], origin[1]-(height*0.1).astype(np.int32)) else: try: patch = get_patchs(im, box[np.newaxis, ...], with_morph=True)[0] check_boxes = pytesseract.image_to_boxes(patch, config=config[1]) if '~' in check_boxes: patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0] check_boxes = pytesseract.image_to_boxes(patch, config=config[1]) if Debug_mode: print('LCH: the ~ detected') if '~' in check_boxes: check_boxes = check_boxes.replace('~', '0') check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32) except: patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0] check_boxes = pytesseract.image_to_boxes(patch, config=config[1]) check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32) origin = (check_array[-1, 3] + box[0][0], check_array[-1, 2] + box[0][1]) # origin = (check_array[5, 3] + box[0][0], check_array[5, 2] + box[0][1]) height = (box[2, 1] - origin[1]).astype(np.int32) width = (box[2, 0] - origin[0] + 10).astype(np.int32) check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width] point = (origin[0], origin[1]-(height*0.1).astype(np.int32)) return check_patch, point
def translit(image, box_sim_thresh=0.65): """Returns the string representation of the transliteration found in IMAGE. :param image: an image of a transliterated Semitic language :param box_sim_thresh: the minimum Jaccard similarity of the bounding boxes of two characters for them to be recognized as describing the same character """ # Variable declarations follow. # The following is a parameter of a rather queer sort flagship_language = 'ces' other_languages = [k for k in charsets.keys() if k != flagship_language] data = pytesseract.image_to_data(image, lang=flagship_language, output_type=Output.DICT) flagship_boxes = pytesseract.image_to_boxes(image, lang=flagship_language, output_type=Output.DICT) other_boxes = { language: pytesseract.image_to_boxes(image, lang=language, output_type=Output.DICT) for language in other_languages } # Discovery of the correct characters follows. chars = list() for char0, box0 in zip( flagship_boxes['char'], zip(flagship_boxes['left'], flagship_boxes['bottom'], flagship_boxes['right'], flagship_boxes['top'])): matches = [(flagship_language, char0)] for other in other_languages: for char1, box1 in zip( other_boxes[other]['char'], zip(other_boxes[other]['left'], other_boxes[other]['bottom'], other_boxes[other]['right'], other_boxes[other]['top'])): sim = box_sim(*box0, *box1) if char1 in charsets[other] and (sim >= box_sim_thresh or (sim > 0 and char_kin(char0, char1))): matches.append((other, char1)) chars.append((char0, box0, choose_char(matches))) # Matching of characters to words follows. for i, (left, top, width, height, conf, word0) in enumerate( zip(data['left'], data['top'], data['width'], data['height'], data['conf'], data['text'])): if int(conf) >= 0: right, bottom = left + width, top + height top, bottom = image.height - top, image.height - bottom word = '' for char0, box, char in chars: if (char0 in word0) and box_sim(*box, left, bottom, right, top) > 0: word += char data['text'][i] = word return data_to_string(data['text'])
def locateNameBox(): image = Image.open('answerSheet_with_name.png') file = open("image_to_string.txt", "w") box = image_to_boxes(image).split('\n') for i in range(len(box)): if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm' and box[i + 3][0] == 'e'): print("true") for j in range(0, 4): print(box[i + j]) file.write(image_to_boxes(image)) file.close()
def findcoordinateOfName(path): image = cv2.imread(path) height, width = image.shape[:2] crop_img = image[0:int(height / 3), 0:width] cv2.imwrite("./temp/temp.png", crop_img) image = Image.open("./temp/temp.png") # image.show() # file = open("image_to_string.txt", "w") box = image_to_boxes(image).split('\n') # print(image.size) width, height = image.size coordinate = [] for i in range(len(box)): flag = False if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm' and box[i + 3][0] == 'e'): # print("true") for j in range(0, 4): flag = True coordinate.append(box[i + j]) # print(box[i + j]) if (flag): break coorE = coordinate[3].split(" ") # print() return ((int(coorE[1]), height - int(coorE[4])), (int(coorE[3]), height - int(coorE[2])))
def submit_fun(self): if self.path != '': img = cv2.imread(self.path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) text_got = '' hImg, wImg, _ = img.shape boxes = pytesseract.image_to_boxes(img) for b in boxes.splitlines(): # print(b) b = b.split(' ') # print(b[0], end=" ") text_got = text_got + str(b[0]) + ' ' x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4]) cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255), 2) cv2.putText(img, b[0], (x + 10, hImg - y + 55), cv2.FONT_HERSHEY_SIMPLEX, 2, (50, 50, 255), 2) # img = cv2.resize(img, (int(img.shape[1])*2, int(img.shape[0])*2)) # make image bigger self.text.delete(1.0, END) ps = PSS() self.text.insert(INSERT, text_got) cv2.imshow('img', img) cv2.waitKey(0) cv2.destroyAllWindows() else: messagebox.showerror("Error", "You haven't select any image file yet.")
def tesseract_box(image, color): boxes = pytesseract.image_to_boxes(image) for b in boxes.splitlines(): b = b.split(' ') image = cv2.rectangle(image, (int(b[1]), 128 - int(b[2])), (int(b[3]), 128 - int(b[4])), color, 2) return image
def getTextCoordinate(): global textCoordinate, filename, ratio_Y_Axis_Value, ratio_X_Axis_Value, numberOfCharactor, numberOfDigitValue ratio_X_Axis_Value_Array = ratio_Y_Axis_Value_Array = np.arange(5) # ratio_Y_Axis_Value_Array = np.arange(5) total_ratio_X_Axis_Value = total_ratio_Y_Axis_Value = ratio_Y_Axis_Index = ratio_X_Axis_Index = 0 # run tesseract, returning the bounding boxes boxes = pytesseract.image_to_boxes( Image.open(filename)) # also include any config options you use numberOfCharactor = int(len(boxes.splitlines())) textCoordinate = [[0] * 4 for i in range(numberOfCharactor)] # store values into textCoordinate array i = 0 ratio_X_Axis_Value = ratio_Y_Axis_Value = 0 numberOf_Tic_Mark_X_Axis = 0 for b in boxes.splitlines(): b = b.split(' ') # print("Charactor of " + b[0] + " = "+b[1]+ ","+b[2]+ ","+b[3]+ ","+b[4]) # (x1, y1, X2, y2) if (b[0].isdigit() and (int(b[0]) != 0)): textCoordinate[i][0] = int(b[0]) # store Charactor textCoordinate[i][1] = round( (int(b[1]) + int(b[3])) / 2) # store X Coordinate textCoordinate[i][2] = round( ((height - int(b[2])) + (height - int(b[4]))) / 2) # store Y Coordinate i = i + 1 numberOfDigitValue = i
def explain_tesseract(img, title, options_str): """ explains the tesseract way of analysing this image by having boxes drawn around the characters """ r,c = img.shape nb_lines = 40 additional_lines = np.full((nb_lines,c),255, dtype=np.uint8) # adding a blank rectangle above the image img2 = np.append(img,additional_lines,axis= 0) img2 = np.append(additional_lines,img2,axis= 0) # if interactive: cv2.imshow("img extended", img2) hImg,wImg = img.shape # print("pytesseract options", options_str) myres = pytesseract.image_to_string(img,config=options_str).strip() candidate_results.append([f'{title} (orig size)',myres]) #if interactive: print("pytesseract (orig): ", myres) myres2 = pytesseract.image_to_string(img2,config=options_str).strip() candidate_results.append([f'{title} (extended)',myres2]) #if interactive: print("pytesseract (extended): ", myres2) boxes = pytesseract.image_to_boxes(img,config=options_str) for b in boxes.splitlines(): #print(b) b = b.split(' ') # print(b) x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4]) cv2.rectangle(img2,(x,hImg-y+nb_lines),(w,hImg-h+nb_lines),(0,255,0),2) cv2.putText(img2,b[0],(x,hImg-y+25+nb_lines),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2)
def _extract_text(self, frame, bbox, sharpen=False): # type: (Int, List[Int], Bool) -> String] """ Returns the string that has been detected within the bounding box of the given frame. Pytesseract is searching for a string within the given bounding box of the given frame. Sometimes better results can be obtained by preprocessing the frame (in our case by adding sharpness). Returns: String representing the text, that has been detected in the frames bounding box. Returning empty string if nothing has been detected. """ cropped_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]] img = cv2.bitwise_not(cropped_frame) # invert image for better results. if sharpen: kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) img = cv2.filter2D(img, -1, kernel) letters = pytesseract.image_to_boxes(img) letters = letters.split('\n') letters = [letter.split() for letter in letters] text = [] for letter in letters: if letter: text.append(letter[0]) # h, w = img.shape # cv2.rectangle(img, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1) # cv2.imshow("Image", img) # cv2.waitKey(0) text = ''.join(text) if text: logging.info('Detected VFX Shotcode: %s', text) return text
def find_letters(thresh_low, type): nonlocal thresholded nonlocal letters gray = cv2.cvtColor(np.array(letter_wheel), cv2.COLOR_RGB2GRAY) ret, thresholded = cv2.threshold(gray, thresh_low, 255, type) thresholded = cv2.resize(thresholded, None, fx=self.resize_scale, fy=self.resize_scale) thresholded = cv2.filter2D(thresholded, -1, self.smoothing_kernel) cv2.imwrite("thresholded.png", thresholded) letters_raw = pytesseract.image_to_string(thresholded, config=self.config) letters = [] for letter in letters_raw: if letter in allowable_chars(): letters.append(letter) if len(letters) < 3: raise IndexError() self.boxes = [ box.split() for box in pytesseract.image_to_boxes( thresholded, config=self.config).split("\n") ] for box in self.boxes: box[1] = int(int(box[1]) / self.resize_scale) box[2] = screen.height * self.crop_bottom_proportion - int( int(box[2]) / self.resize_scale) box[3] = int(int(box[3]) / self.resize_scale) box[4] = screen.height * self.crop_bottom_proportion - int( int(box[4]) / self.resize_scale)
def tesseract(): # If you don't have tesseract executable in your PATH, include the following: #pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_exec$ # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract' # Simple image to string #.encode('utf8') #add to print statement to make encoding work print(pytesseract.image_to_string(Image.open(imgFile)).encode('utf8')) # Get bounding box estimates print(pytesseract.image_to_boxes(Image.open(imgFile))) # Get verbose data including boxes, confidences, line and page numbers print(pytesseract.image_to_data(Image.open(imgFile))) # Get information about orientation and script detection print(pytesseract.image_to_osd(Image.open(imgFile))) # In order to bypass the internal image conversions, just use relative or ab$ # NOTE: If you don't use supported images, tesseract will return error print(pytesseract.image_to_string(imgFile)) # get a searchable PDF pdf = pytesseract.image_to_pdf_or_hocr(imgFile, extension='pdf') # get HOCR output hocr = pytesseract.image_to_pdf_or_hocr(imgFile, extension='hocr')
def processus(img_adress=None, img_file=None): img = img_file if img_file is not None else cv2.imread(img_adress) # print(img) #pytesseract only accept rgb, so we convert bgr to rgb img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ##Detecting characters and their position #print(pytesseract.image_to_string(img)) ##character xpoint ypoint width heigth #print(pytesseract.image_to_boxes(img)) #taille hImg, wImg,_ = img.shape ##character xpoint ypoint width heigth boxes = pytesseract.image_to_boxes(img) boxes_splitted = boxes.splitlines() boxes_stringed = pytesseract.image_to_string(img).splitlines() for b in boxes_splitted: #print(b) b = b.split(' ') x,y,w,h = int( b[1]), int(b[2]), int(b[3]),int(b[4]) # cv2.rectangle(img, (x,y), (x+w, y+h), (0,0,255), 1) cv2.rectangle(img, (x,hImg - y), (w, hImg - h), (50, 50, 255), 2) #ecrire le caracteres dessus cv2.putText(img, b[0], (x, hImg - y + 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2) cv2.imshow('result', img) cv2.waitKey(0) return (img, boxes_splitted, boxes_stringed)
def get_img_ocr(imgfile, config_number=3, ratio=2): #img = Image.open(imgfile) #custom_config = r'--oem 3 --psm 6' #####img = cv2.imread(imgfile) # default is 3: fully automatic # support 1-12 # may use 11: Sparse text. Find as much text as possible in no particular order. # may use 12: Sparse text with OSD. # may use 6: Assume a single uniform block of text custom_config = r'--oem 3 --psm ' + str(config_number) img = postprocess(imgfile) height, width = img.shape[:2] img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)), interpolation=cv2.INTER_CUBIC) img_array = np.array(img_1) H, W, _ = img_array.shape text = pytesseract.image_to_string(img_1, config=custom_config) bbox = pytesseract.image_to_boxes(img_1, config=custom_config) # (x1, y1, x2, y2) #img.close() ######imgname = path + '/scanned.png' ######pix.writePNG(imgname) return bbox, text, H, W
def get_img_ocr(image, box, config_number=3, ratio=2): #img = Image.open(imgfile) #custom_config = r'--oem 3 --psm 6' #####img = cv2.imread(imgfile) # default is 3: fully automatic # support 1-12 # may use 1, 6, 11, 12 custom_config = r'--oem 3 --psm ' + str(config_number) img = image.copy() x1, y1, x2, y2 = box img = img[y1 - 10:y2 + 10, x1 - 10:x2 + 10, :] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] height, width = img.shape[:2] img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)), interpolation=cv2.INTER_CUBIC) img_array = np.array(img_1) img_array = img_array[:, :, np.newaxis] H, W, _ = img_array.shape text = pytesseract.image_to_string(img_1, config=custom_config) bbox = pytesseract.image_to_boxes(img_1, config=custom_config) # (x1, y1, x2, y2) #img.close() ######imgname = path + '/scanned.png' ######pix.writePNG(imgname) return bbox, text, H, W
def tess_func(gray): """ Function to utilise tess text reading for each frame. """ h = gray.shape[0] print(h) # Find the image height boxes = tess.image_to_boxes(gray) # Create a original copy of the frame. img = gray.copy() # Draw the bounding boxes on the image. for line in boxes.splitlines(): data = line.split(' ') gray = cv2.rectangle(gray, (int(data[1]), h - int(data[2])), (int(data[3]), h - int(data[4])), (0, 0, 255), 1) # Find the text in the image if applicable. text = tess.image_to_string(img) print("Tess text: " + text) return gray
def handleTitleScreen(self): canPlayNow = False gray = None W = 0 H = 0 x = 0 y = 0 while not canPlayNow: img = self.getWindowShot() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) endX = int(gray.shape[1] * (67 / 100)) startX = int(gray.shape[1] * (1 / 4)) startY = int(gray.shape[0] * (3 / 4)) endY = int(gray.shape[0] * (7 / 8)) gray = gray[startY:endY, startX:endX] text = pytesseract.image_to_string(gray) for line in text.split("\n"): if line == "CLICK HERE TO PLAY!": canPlayNow = True x = startX y = startY W = gray.shape[1] H = gray.shape[0] boxes = pytesseract.image_to_boxes(gray) break region = self.getWindowDimensions() x += region[0] y += region[1] pyautogui.moveTo(x + 108, y + 31) pyautogui.click()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--filename", required=True) args = parser.parse_args() filename = args.filename img = cv2.imread(filename) height, width, _ = img.shape if height >= 1200: resize_width = (width * 1200) // height height, width = 1200, resize_width img = cv2.resize(img, (width, height)) boxes = pytesseract.image_to_boxes(img) for i in boxes.splitlines(): box = i.split(' ') h_d, w_d = abs(int(box[2]) - int(box[4])), abs(int(box[1]) - int(box[3])) #밑은 박스의 픽셀 크기가 400이 넘으면 박스를 그리지 않음# #if h_d != 0 and w_d != 0 and h_d * w_d <= 400: # if h_d / w_d <= 6 and w_d / h_d <= 5: img = cv2.rectangle(img, (int(box[1]), height - int(box[2])), (int(box[3]), height - int(box[4])), (0, 255, 0), 1) cv2.imwrite('res-' + filename, img) #cv2.imshow(filename, img) cv2.waitKey(0)
def boxes(): # https: // stackoverflow.com / questions / 20831612 / getting - the - bounding - box - of - the - recognized - words - using - python - tesseract img = cv2.imread('images/1.png') height = img.shape[0] width = img.shape[1] h, w = img.shape[:2] cv2.namedWindow('jpg', cv2.WINDOW_NORMAL) cv2.resizeWindow('jpg', w, h) # cv2.imshow('jpg', c) # d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT) d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT) print(d.keys()) n_boxes = len(d['char']) for i in range(n_boxes): (text, x1, y2, x2, y1) = (d['char'][i], d['left'][i], d['top'][i], d['right'][i], d['bottom'][i]) print( text, ' - x1:{}, y2:{}, x2:{}, y1:{} - page:{}'.format( x1, y2, x2, y1, d['page'][i])) cv2.rectangle(img, (x1, height - y1), (x2, height - y2), (0, 255, 0), 2) cv2.imshow('jpg', img) cv2.waitKey(0)
def charsearch(what=49, graphic_enable=graphic_on): global ch, cw, cy, cx, b, img, count, barray #text = pytesseract.image_to_string(gray,config='--psm 13') cx = 0 cy = 0 ch = 0 cw = 0 barray = np.array([]) count = 0 boxes = pytesseract.image_to_boxes(gray, config='--psm 13') h, w, c = img.shape barray = boxes for b in boxes.splitlines(): b = b.split(' ') if (b[0] == chr(what)): img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (255, 0, 0), 2) cv2.putText(img, str(b[0]), (int(b[1]), h - int(b[2]) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1) cx = int(b[1]) cy = h - int(b[2]) ch = int(b[4]) - int(b[2]) cw = int(b[3]) - int(b[1]) count = count + 1 print("cx,cy,ch,cw:", cx, cy, ch, cw) showInMovedWindow('Char', img, 710, 50, graphic_enable) print(count)
def process(im): config = ('-l kor --psm 6 --oem 1 ') h, w, _ = im.shape boxes = pytesseract.image_to_boxes(im, config=config) csvformat = [] img = im for b in boxes.splitlines(): b = b.split(' ') seq = [ str(b[0]), str(int(b[1]) / w), str(1 - int(b[4]) / h), str(int(b[3]) / w), str(1 - int(b[2]) / h) ] csvformat.append(" ".join(seq) + "\n") #conversion hidden_size = 512 encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) loadModel(encoder1, attn_decoder1, checkpoint_PATH + "75000DeepPosV2.checkpoint") output_words, attentions, output_poses = evaluate(encoder1, attn_decoder1, "".join(csvformat)) return (output_words, output_poses)
def find_characters(image_to_process, should_debug=False): unicode_wall_of_character_locations = pytesseract.image_to_boxes( image_to_process) # Note: Takes Image file, not CV2 image. if should_debug: print("All the characters and their locations in Unicode are: ") print(unicode_wall_of_character_locations) string_wall_of_character_locations = repr( unicode_wall_of_character_locations ) # Converts the unicode file to a string if should_debug: print("\nThe characters in string format: ") print(string_wall_of_character_locations) string_wall_of_character_locations = string_wall_of_character_locations.replace( "\\n", " ") # Replaces all newlines with a space character. if should_debug: print("\nAll newlines should now be spaces: ") print(string_wall_of_character_locations) string_tuple_of_character_locations = tuple( i for i in string_wall_of_character_locations.split() ) # Parses the data into a tuple (a static array). if should_debug: print("\nUnsplit tuple with size: " + str(len(string_tuple_of_character_locations))) print(str(string_tuple_of_character_locations) + "\n") split_tuple_of_character_locations = tuple( string_tuple_of_character_locations[a:(a + 6)] for a in range(0, len(string_tuple_of_character_locations), 6)) # Splits the tuple every 6 positions. if should_debug: print("Split tuple with size: " + str(len(string_tuple_of_character_locations))) print(split_tuple_of_character_locations) return split_tuple_of_character_locations
def webcam(): pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe' cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) cap.set(3, 640) cap.set(4, 480) def captureScreen(bbox=(300, 300, 1500, 1000)): capScr = np.array(ImageGrab.grab(bbox)) capScr = cv2.cvtColor(capScr, cv2.COLOR_RGB2BGR) return capScr while True: timer = cv2.getTickCount() _, img = cap.read() # img = captureScreen() # DETECTING CHARACTERES hImg, wImg, _ = img.shape boxes = pytesseract.image_to_boxes(img) for b in boxes.splitlines(): # print(b) b = b.split(' ') # print(b) x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4]) cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255), 2) cv2.putText(img, b[0], (x, hImg - y + 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 50, 255), 2) fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer) # cv2.putText(img, str(int(fps)), (75, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (20,230,20), 2); cv2.imshow("Result", img) cv2.waitKey(1)
def printboxes(imageName, mode="words"): img = cv2.imread('images/' + imageName) # detect words if mode == "words": d = pytesseract.image_to_data(img, output_type=Output.DICT) print(d.keys()) # generate boxes around words n_boxes = len(d['text']) for i in range(n_boxes): if int(d['conf'][i]) > 60: (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) #detect characters else: h, w, c = img.shape boxes = pytesseract.image_to_boxes(img) for b in boxes.splitlines(): b = b.split(' ') img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2) # #show boxes cv2.imshow('img', img) cv2.waitKey(0)
def parse_images(images): """ Takes in a list of Python Image objects and returns all bank data Input Type: list of Python Image objects Output Type: string that is csv compliant """ contents = [] count = 1 width, height = images[0].size for i in images: if DEBUG: print('Running OCR on image {0}'.format(count)) content = pytesseract.image_to_boxes(i) content = content.split('\n') structured = [] # Pytesseract output is a big string so we have to break and parse out for i in content: data = i.split(' ') # More parsing out structured += [{ 'contents': data[0], 'x': int(data[1]), 'y': int(data[2]) }] contents += [structured] count += 1 parsed = parser.parse_tesseract(contents, width) return parsed
def read_image(thresholdedimg, unit): '''Read text from an image using pytesseract. Note: pytesseract reads PIL format images, so we convert our cv2 (numpy array) for this step. :param numpy.ndarray thresholdedimg: input image to be read. :param string unit: "nm" or "um" depending on what we're looking for. :return string greekengtextfromimage: text that was read. ''' # convert input to appropriate format. imagetoread = Image.fromarray(thresholdedimg) # Only look for these characters. if unit == "um": cfg = 'pytess_config_um.txt' else: cfg = "pytess_config_nm.txt" # Read image using English alphabet. greekengtextfromimage = image_to_string( imagetoread, lang="eng", config=os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg)) text_boxes = image_to_boxes(imagetoread, lang="eng", config=os.path.join( os.path.dirname(os.path.abspath(__file__)), cfg)) return greekengtextfromimage, text_boxes
def char_grid(input_img_path, output_dir): img = cv2.imread(input_img_path) height = img.shape[0] width = img.shape[1] blank_image = np.zeros((height, width, 3), np.uint8) tess_img = pytesseract.image_to_boxes(img, output_type=Output.DICT) n_boxes = len(tess_img['char']) for i in range(n_boxes): try: annot_a = True (text, x1, y2, x2, y1) = (tess_img['char'][i], tess_img['left'][i], tess_img['top'][i], tess_img['right'][i], tess_img['bottom'][i]) except: annot_a = False if annot_a: char_color = CHAR_MAP.get(text.strip().lower(), (255, 255, 255)) x, y, w, h = x1, y1,\ (x2 - x1), (y2 - y1) cv2.rectangle(blank_image, (x1, height - y1), (x2, height - y2), char_color, cv2.FILLED) filename = input_img_path.split('/')[-1] cv2.imwrite(os.path.join(output_dir, filename), blank_image)
def __extract_boxes(self): ''' Extracts boxes data from image. ''' self.__boxes = pytesseract.image_to_boxes(self.image, lang=self.language, output_type="dict")
def _image_to_data(self): """ Utilizes pytesseract OCR to generate bounding box data for the image. Returns the bounding box data. """ found_image = False while not found_image: try: print('Please wait.....') input_image = Image.open(self.input_image_path) found_image = True except FileNotFoundError: print(f'\nThe file [{self.input_image_path}] was not found.') self.input_image_path = input('Please enter the path for the image you\'d like to search: ') image_string = pytesseract.image_to_string(input_image) word_box_data = pytesseract.image_to_data(input_image) char_box_data = pytesseract.image_to_boxes(input_image) self.width, self.height = input_image.size print(f'Width: {self.width}, Height: {self.height}') # TODO: Raise an error if no text is recognized print(f'\nRecognized Text:\n {image_string}') self._normalize_word_boxes(word_box_data) self._normalize_char_boxes(char_box_data)
def search_word(img): global word _, imgH = img.size letter_count = 0 error_count = 0 wordCoords = [] success = False for b in pytesseract.image_to_boxes(img).splitlines(): b = b.split(' ') letter = b[0] if letter == word[letter_count]: # ou si error ajoutée dernièrement, regarder pour letter_count-1 letter_count += 1 left, bottom, right, top = int(b[1]), imgH - int(b[2]), int(b[3]), imgH - int(b[4]) if letter_count == 1: wordCoords = [left, top] elif letter_count == len(word): wordCoords.extend([right, bottom]) success = True else: error_count += 1 if error_count > len(word)/3: wordCoords = [] letter_count = 0 error_count = 0 if success: break return success, wordCoords
def tables_from_contours(img,cnt): """ From the contours generated from preprocessing Identify the tables and stations regions in the image =============================================== Inputs: img (numpy array): image cnt (list): detected contours in image =============================================== Outputs: tables (numpy array): Table arrays stations_regions (numpy array): array of station region """ tables = [] stations_regions = [] for i in range(len(cnt)): area = cv2.contourArea(cnt[i]) # Extract the regions containing the images if area > 100000: mask = np.zeros_like(img) cv2.drawContours(mask, cnt, i, 255, -1) # get rectangle coordinates from the contours x, y, w, h = cv2.boundingRect(cnt[i]) # crop out the table region from the image crop = img[y:h + y, x:w + x] masked_img = cv2.bitwise_and(img, img, mask=mask) masked_img[mask == 0] = 255 # cut of the upper part of the image containing stations crop_left = img[y - 200:y, x:w + x] boxes = pytesseract.image_to_boxes(crop) # also include any config options you use tables.append(crop) stations_regions.append(crop_left) tables.reverse() stations_regions.reverse() return tables, stations_regions
from wand.image import Image as wand_Image from PIL import Image as PIL_Image from wand.color import Color import os import pytesseract def build_images(force=False): if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]): all_pages = wand_Image(filename='./example.pdf', resolution=300) for idx, page in enumerate(all_pages.sequence): with Image(page) as i: i.format = 'png' i.background_color = Color('white') i.alpha_channel = 'remove' i.save(filename='foo-%s.png' % idx) build_images() boxes = pytesseract.image_to_boxes(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) data = pytesseract.image_to_data(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT) osd = pytesseract.image_to_osd(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
import sys import cv2 import pytesseract from tesseract import image_to_string filename = sys.argv[1] # read the image and get the dimensions img = cv2.imread(filename) h, w, _ = img.shape # assumes color image # run tesseract, returning the bounding boxes boxes = pytesseract.image_to_boxes(img) # also include any config options you use # draw the bounding boxes on the image for b in boxes.splitlines(): b = b.split(' ') img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2) # show annotated image and wait for keypress cv2.imshow(filename, img) cv2.waitKey(0)