def get_check_patch(im, box, text_len=6, flag=False):
    if not flag:
        origin = box[1].astype(np.int32)
        # width = 65
        bias = 10
        origin[0] += bias  ### attention
        height = np.round((box[2, 1] - box[1, 1])).astype(np.int32)
        width = (height*1.5).astype(np.int32)
        check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width]
        point = (origin[0], origin[1]-(height*0.1).astype(np.int32))
            patch = get_patchs(im, box[np.newaxis, ...], with_morph=True)[0]
            check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
            if '~' in check_boxes:
                patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0]
                check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
                if Debug_mode:
                    print('LCH: the ~ detected')
                if '~' in check_boxes:
                    check_boxes = check_boxes.replace('~', '0')
            check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32)
            patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0]
            check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
            check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32)

        origin = (check_array[-1, 3] + box[0][0], check_array[-1, 2] + box[0][1])
        # origin = (check_array[5, 3] + box[0][0], check_array[5, 2] + box[0][1])
        height = (box[2, 1] - origin[1]).astype(np.int32)
        width = (box[2, 0] - origin[0] + 10).astype(np.int32)
        check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width]
        point = (origin[0], origin[1]-(height*0.1).astype(np.int32))
    return check_patch, point
Example #2
def translit(image, box_sim_thresh=0.65):
    """Returns the string representation of the transliteration found in IMAGE.
  :param image: an image of a transliterated Semitic language
  :param box_sim_thresh: the minimum Jaccard similarity of the bounding boxes of
      two characters for them to be recognized as describing the same character
    # Variable declarations follow.
    # The following is a parameter of a rather queer sort
    flagship_language = 'ces'
    other_languages = [k for k in charsets.keys() if k != flagship_language]
    data = pytesseract.image_to_data(image,
    flagship_boxes = pytesseract.image_to_boxes(image,
    other_boxes = {
        language: pytesseract.image_to_boxes(image,
        for language in other_languages
    # Discovery of the correct characters follows.
    chars = list()
    for char0, box0 in zip(
            zip(flagship_boxes['left'], flagship_boxes['bottom'],
                flagship_boxes['right'], flagship_boxes['top'])):
        matches = [(flagship_language, char0)]
        for other in other_languages:
            for char1, box1 in zip(
                sim = box_sim(*box0, *box1)
                if char1 in charsets[other] and (sim >= box_sim_thresh or
                                                 (sim > 0
                                                  and char_kin(char0, char1))):
                    matches.append((other, char1))
        chars.append((char0, box0, choose_char(matches)))
    # Matching of characters to words follows.
    for i, (left, top, width, height, conf, word0) in enumerate(
            zip(data['left'], data['top'], data['width'], data['height'],
                data['conf'], data['text'])):
        if int(conf) >= 0:
            right, bottom = left + width, top + height
            top, bottom = image.height - top, image.height - bottom
            word = ''
            for char0, box, char in chars:
                if (char0 in word0) and box_sim(*box, left, bottom, right,
                                                top) > 0:
                    word += char
            data['text'][i] = word
    return data_to_string(data['text'])
Example #3
def locateNameBox():
    image ='answerSheet_with_name.png')
    file = open("image_to_string.txt", "w")
    box = image_to_boxes(image).split('\n')

    for i in range(len(box)):
        if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm'
                and box[i + 3][0] == 'e'):
            for j in range(0, 4):
                print(box[i + j])
Example #4
def findcoordinateOfName(path):
    image = cv2.imread(path)
    height, width = image.shape[:2]
    crop_img = image[0:int(height / 3), 0:width]
    cv2.imwrite("./temp/temp.png", crop_img)
    image ="./temp/temp.png")
    # file = open("image_to_string.txt", "w")
    box = image_to_boxes(image).split('\n')

    # print(image.size)
    width, height = image.size

    coordinate = []
    for i in range(len(box)):
        flag = False
        if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm'
                and box[i + 3][0] == 'e'):
            # print("true")
            for j in range(0, 4):
                flag = True
                coordinate.append(box[i + j])
                # print(box[i + j])

            if (flag):
    coorE = coordinate[3].split(" ")
    # print()
    return ((int(coorE[1]), height - int(coorE[4])), (int(coorE[3]),
                                                      height - int(coorE[2])))
    def submit_fun(self):
        if self.path != '':
            img = cv2.imread(self.path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            text_got = ''
            hImg, wImg, _ = img.shape
            boxes = pytesseract.image_to_boxes(img)
            for b in boxes.splitlines():
                # print(b)
                b = b.split(' ')
                # print(b[0], end=" ")
                text_got = text_got + str(b[0]) + ' '
                x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])

                cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255),
                cv2.putText(img, b[0], (x + 10, hImg - y + 55),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (50, 50, 255), 2)
            # img = cv2.resize(img, (int(img.shape[1])*2, int(img.shape[0])*2)) # make image bigger
            self.text.delete(1.0, END)
            ps = PSS()
            self.text.insert(INSERT, text_got)
            cv2.imshow('img', img)
                                 "You haven't select any image file yet.")
Example #6
def tesseract_box(image, color):
    boxes = pytesseract.image_to_boxes(image)
    for b in boxes.splitlines():
        b = b.split(' ')
        image = cv2.rectangle(image, (int(b[1]), 128 - int(b[2])),
                              (int(b[3]), 128 - int(b[4])), color, 2)
    return image
def getTextCoordinate():
    global textCoordinate, filename, ratio_Y_Axis_Value, ratio_X_Axis_Value, numberOfCharactor, numberOfDigitValue

    ratio_X_Axis_Value_Array = ratio_Y_Axis_Value_Array = np.arange(5)
    # ratio_Y_Axis_Value_Array =  np.arange(5)
    total_ratio_X_Axis_Value = total_ratio_Y_Axis_Value = ratio_Y_Axis_Index = ratio_X_Axis_Index = 0

    # run tesseract, returning the bounding boxes
    boxes = pytesseract.image_to_boxes(  # also include any config options you use
    numberOfCharactor = int(len(boxes.splitlines()))

    textCoordinate = [[0] * 4 for i in range(numberOfCharactor)]

    # store values into textCoordinate array
    i = 0
    ratio_X_Axis_Value = ratio_Y_Axis_Value = 0
    numberOf_Tic_Mark_X_Axis = 0

    for b in boxes.splitlines():
        b = b.split(' ')
        # print("Charactor of  " + b[0] + "  =  "+b[1]+ ","+b[2]+ ","+b[3]+ ","+b[4])  # (x1, y1, X2, y2)
        if (b[0].isdigit() and (int(b[0]) != 0)):
            textCoordinate[i][0] = int(b[0])  # store Charactor
            textCoordinate[i][1] = round(
                (int(b[1]) + int(b[3])) / 2)  # store X Coordinate
            textCoordinate[i][2] = round(
                ((height - int(b[2])) +
                 (height - int(b[4]))) / 2)  # store Y Coordinate
            i = i + 1
            numberOfDigitValue = i
Example #8
def explain_tesseract(img, title, options_str):
    explains the tesseract way of analysing this image by having boxes drawn around the characters
    r,c = img.shape
    nb_lines = 40
    additional_lines = np.full((nb_lines,c),255, dtype=np.uint8)
    # adding a blank rectangle above the image
    img2 = np.append(img,additional_lines,axis= 0)
    img2 = np.append(additional_lines,img2,axis= 0)

    # if interactive: cv2.imshow("img extended", img2)

    hImg,wImg = img.shape
    # print("pytesseract options", options_str)
    myres = pytesseract.image_to_string(img,config=options_str).strip()
    candidate_results.append([f'{title} (orig size)',myres])
    #if interactive: print("pytesseract (orig): ", myres)
    myres2 = pytesseract.image_to_string(img2,config=options_str).strip()
    candidate_results.append([f'{title} (extended)',myres2])
    #if interactive: print("pytesseract (extended): ", myres2)
    boxes = pytesseract.image_to_boxes(img,config=options_str)

    for b in boxes.splitlines():
        b = b.split(' ')
        # print(b)
        x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4])
    def _extract_text(self, frame, bbox, sharpen=False):
        # type: (Int, List[Int], Bool) -> String]
        """ Returns the string that has been detected within the bounding box of the given frame.

        Pytesseract is searching for a string within the given bounding box of
        the given frame. Sometimes better results can be obtained by preprocessing
        the frame (in our case by adding sharpness).

            String representing the text, that has been detected in the frames bounding box.
            Returning empty string if nothing has been detected.
        cropped_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        img = cv2.bitwise_not(cropped_frame) # invert image for better results.
        if sharpen:
            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
            img = cv2.filter2D(img, -1, kernel)
        letters = pytesseract.image_to_boxes(img)
        letters = letters.split('\n')
        letters = [letter.split() for letter in letters]
        text = []
        for letter in letters:
            if letter:
        #         h, w = img.shape
        #         cv2.rectangle(img, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)
        # cv2.imshow("Image", img)
        # cv2.waitKey(0)
        text = ''.join(text)
        if text:
  'Detected VFX Shotcode: %s', text)
        return text
Example #10
        def find_letters(thresh_low, type):
            nonlocal thresholded
            nonlocal letters
            gray = cv2.cvtColor(np.array(letter_wheel), cv2.COLOR_RGB2GRAY)
            ret, thresholded = cv2.threshold(gray, thresh_low, 255, type)
            thresholded = cv2.resize(thresholded,
            thresholded = cv2.filter2D(thresholded, -1, self.smoothing_kernel)
            cv2.imwrite("thresholded.png", thresholded)

            letters_raw = pytesseract.image_to_string(thresholded,
            letters = []
            for letter in letters_raw:
                if letter in allowable_chars():
            if len(letters) < 3:
                raise IndexError()
            self.boxes = [
                box.split() for box in pytesseract.image_to_boxes(
                    thresholded, config=self.config).split("\n")
            for box in self.boxes:
                box[1] = int(int(box[1]) / self.resize_scale)
                box[2] = screen.height * self.crop_bottom_proportion - int(
                    int(box[2]) / self.resize_scale)
                box[3] = int(int(box[3]) / self.resize_scale)
                box[4] = screen.height * self.crop_bottom_proportion - int(
                    int(box[4]) / self.resize_scale)
def tesseract():
    # If you don't have tesseract executable in your PATH, include the following:
    #pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_exec$
    # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
    # Simple image to string
    #.encode('utf8') #add to print statement to make encoding work
    # Get bounding box estimates
    # Get verbose data including boxes, confidences, line and page numbers
    # Get information about orientation and script detection
    # In order to bypass the internal image conversions, just use relative or ab$
    # NOTE: If you don't use supported images, tesseract will return error
    # get a searchable PDF
    pdf = pytesseract.image_to_pdf_or_hocr(imgFile, extension='pdf')
    # get HOCR output
    hocr = pytesseract.image_to_pdf_or_hocr(imgFile, extension='hocr')
Example #12
def processus(img_adress=None, img_file=None):

    img = img_file if img_file is not None else cv2.imread(img_adress)
    # print(img)
    #pytesseract only accept rgb, so we convert bgr to rgb
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    ##Detecting characters and their position
    ##character xpoint ypoint width heigth

    hImg, wImg,_ = img.shape
    ##character xpoint ypoint width heigth
    boxes = pytesseract.image_to_boxes(img)
    boxes_splitted = boxes.splitlines()
    boxes_stringed = pytesseract.image_to_string(img).splitlines()
    for b in boxes_splitted:
        b = b.split(' ')
        x,y,w,h = int( b[1]), int(b[2]), int(b[3]),int(b[4])

        # cv2.rectangle(img, (x,y), (x+w, y+h), (0,0,255), 1)
        cv2.rectangle(img, (x,hImg - y), (w, hImg - h), (50, 50, 255), 2)

        #ecrire le caracteres dessus
        cv2.putText(img, b[0], (x, hImg - y + 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2)

    cv2.imshow('result', img)

    return (img, boxes_splitted, boxes_stringed)
Example #13
def get_img_ocr(imgfile, config_number=3, ratio=2):
    #img =
    #custom_config = r'--oem 3 --psm 6'
    #####img = cv2.imread(imgfile)

    # default is 3: fully automatic
    # support 1-12
    # may use 11: Sparse text. Find as much text as possible in no particular order.
    # may use 12: Sparse text with OSD.
    # may use 6: Assume a single uniform block of text
    custom_config = r'--oem 3 --psm ' + str(config_number)

    img = postprocess(imgfile)

    height, width = img.shape[:2]

    img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)),
    img_array = np.array(img_1)
    H, W, _ = img_array.shape
    text = pytesseract.image_to_string(img_1, config=custom_config)
    bbox = pytesseract.image_to_boxes(img_1,
                                      config=custom_config)  # (x1, y1, x2, y2)
    ######imgname = path + '/scanned.png'
    return bbox, text, H, W
def get_img_ocr(image, box, config_number=3, ratio=2):
    #img =
    #custom_config = r'--oem 3 --psm 6'
    #####img = cv2.imread(imgfile)

    # default is 3: fully automatic
    # support 1-12
    # may use 1, 6, 11, 12
    custom_config = r'--oem 3 --psm ' + str(config_number)

    img = image.copy()
    x1, y1, x2, y2 = box
    img = img[y1 - 10:y2 + 10, x1 - 10:x2 + 10, :]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    height, width = img.shape[:2]

    img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)),
    img_array = np.array(img_1)
    img_array = img_array[:, :, np.newaxis]
    H, W, _ = img_array.shape
    text = pytesseract.image_to_string(img_1, config=custom_config)
    bbox = pytesseract.image_to_boxes(img_1,
                                      config=custom_config)  # (x1, y1, x2, y2)
    ######imgname = path + '/scanned.png'
    return bbox, text, H, W
Example #15
def tess_func(gray):
    """ Function to utilise tess text reading for each frame. """
    h = gray.shape[0]

    # Find the image height  
    boxes = tess.image_to_boxes(gray)

    # Create a original copy of the frame.
    img = gray.copy()

    # Draw the bounding boxes on the image.
    for line in boxes.splitlines():
        data = line.split(' ')
        gray = cv2.rectangle(gray,
                        (int(data[1]), h - int(data[2])), (int(data[3]), h - int(data[4])),
                        (0, 0, 255), 1)

    # Find the text in the image if applicable.
    text = tess.image_to_string(img)
    print("Tess text: " + text)

    return gray                               
Example #16
 def handleTitleScreen(self):
     canPlayNow = False
     gray = None
     W = 0
     H = 0
     x = 0
     y = 0
     while not canPlayNow:
         img = self.getWindowShot()
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         endX = int(gray.shape[1] * (67 / 100))
         startX = int(gray.shape[1] * (1 / 4))
         startY = int(gray.shape[0] * (3 / 4))
         endY = int(gray.shape[0] * (7 / 8))
         gray = gray[startY:endY, startX:endX]
         text = pytesseract.image_to_string(gray)
         for line in text.split("\n"):
             if line == "CLICK HERE TO PLAY!":
                 canPlayNow = True
                 x = startX
                 y = startY
                 W = gray.shape[1]
                 H = gray.shape[0]
                 boxes = pytesseract.image_to_boxes(gray)
     region = self.getWindowDimensions()
     x += region[0]
     y += region[1]
     pyautogui.moveTo(x + 108, y + 31)
Example #17
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--filename", required=True)
    args = parser.parse_args()
    filename = args.filename

    img = cv2.imread(filename)
    height, width, _ = img.shape

    if height >= 1200:
        resize_width = (width * 1200) // height
        height, width = 1200, resize_width

        img = cv2.resize(img, (width, height))

    boxes = pytesseract.image_to_boxes(img)

    for i in boxes.splitlines():
        box = i.split(' ')
        h_d, w_d = abs(int(box[2]) -
                       int(box[4])), abs(int(box[1]) - int(box[3]))
        #밑은 박스의 픽셀 크기가 400이 넘으면 박스를 그리지 않음#
        #if h_d != 0 and w_d != 0 and h_d * w_d <= 400:
        #    if h_d / w_d <= 6 and w_d / h_d <= 5:
        img = cv2.rectangle(img, (int(box[1]), height - int(box[2])),
                            (int(box[3]), height - int(box[4])), (0, 255, 0),

    cv2.imwrite('res-' + filename, img)
    #cv2.imshow(filename, img)
Example #18
def boxes():
    # https: // / questions / 20831612 / getting - the - bounding - box - of - the - recognized - words - using - python - tesseract
    img = cv2.imread('images/1.png')
    height = img.shape[0]
    width = img.shape[1]

    h, w = img.shape[:2]
    cv2.namedWindow('jpg', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('jpg', w, h)
    # cv2.imshow('jpg', c)

    # d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT)
    d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT)
    n_boxes = len(d['char'])
    for i in range(n_boxes):
        (text, x1, y2, x2, y1) = (d['char'][i], d['left'][i], d['top'][i],
                                  d['right'][i], d['bottom'][i])
            text, ' - x1:{}, y2:{}, x2:{}, y1:{} - page:{}'.format(
                x1, y2, x2, y1, d['page'][i]))
        cv2.rectangle(img, (x1, height - y1), (x2, height - y2), (0, 255, 0),
    cv2.imshow('jpg', img)
def charsearch(what=49, graphic_enable=graphic_on):
    global ch, cw, cy, cx, b, img, count, barray
    #text = pytesseract.image_to_string(gray,config='--psm 13')
    cx = 0
    cy = 0
    ch = 0
    cw = 0
    barray = np.array([])
    count = 0
    boxes = pytesseract.image_to_boxes(gray, config='--psm 13')
    h, w, c = img.shape
    barray = boxes
    for b in boxes.splitlines():
        b = b.split(' ')
        if (b[0] == chr(what)):
            img = cv2.rectangle(img, (int(b[1]), h - int(b[2])),
                                (int(b[3]), h - int(b[4])), (255, 0, 0), 2)
            cv2.putText(img, str(b[0]), (int(b[1]), h - int(b[2]) + 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1)
            cx = int(b[1])
            cy = h - int(b[2])
            ch = int(b[4]) - int(b[2])
            cw = int(b[3]) - int(b[1])
            count = count + 1
            print("cx,cy,ch,cw:", cx, cy, ch, cw)

    showInMovedWindow('Char', img, 710, 50, graphic_enable)
def process(im):
    config = ('-l kor --psm 6 --oem 1 ')

    h, w, _ = im.shape
    boxes = pytesseract.image_to_boxes(im, config=config)
    csvformat = []
    img = im
    for b in boxes.splitlines():
        b = b.split(' ')
        seq = [
            str(int(b[1]) / w),
            str(1 - int(b[4]) / h),
            str(int(b[3]) / w),
            str(1 - int(b[2]) / h)
        csvformat.append(" ".join(seq) + "\n")  #conversion

    hidden_size = 512
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size,

    loadModel(encoder1, attn_decoder1,
              checkpoint_PATH + "75000DeepPosV2.checkpoint")

    output_words, attentions, output_poses = evaluate(encoder1, attn_decoder1,
    return (output_words, output_poses)
def find_characters(image_to_process, should_debug=False):
    unicode_wall_of_character_locations = pytesseract.image_to_boxes(
        image_to_process)  # Note: Takes Image file, not CV2 image.
    if should_debug:
        print("All the characters and their locations in Unicode are: ")
    string_wall_of_character_locations = repr(
    )  # Converts the unicode file to a string
    if should_debug:
        print("\nThe characters in string format: ")
    string_wall_of_character_locations = string_wall_of_character_locations.replace(
        "\\n", " ")  # Replaces all newlines with a space character.
    if should_debug:
        print("\nAll newlines should now be spaces: ")
    string_tuple_of_character_locations = tuple(
        i for i in string_wall_of_character_locations.split()
    )  # Parses the data into a tuple (a static array).
    if should_debug:
        print("\nUnsplit tuple with size: " +
        print(str(string_tuple_of_character_locations) + "\n")
    split_tuple_of_character_locations = tuple(
        string_tuple_of_character_locations[a:(a + 6)]
        for a in range(0, len(string_tuple_of_character_locations),
                       6))  # Splits the tuple every 6 positions.
    if should_debug:
        print("Split tuple with size: " +
    return split_tuple_of_character_locations
Example #22
def webcam():
    pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    cap.set(3, 640)
    cap.set(4, 480)

    def captureScreen(bbox=(300, 300, 1500, 1000)):
        capScr = np.array(ImageGrab.grab(bbox))
        capScr = cv2.cvtColor(capScr, cv2.COLOR_RGB2BGR)
        return capScr

    while True:
        timer = cv2.getTickCount()
        _, img =
        # img = captureScreen()
        hImg, wImg, _ = img.shape
        boxes = pytesseract.image_to_boxes(img)
        for b in boxes.splitlines():
            # print(b)
            b = b.split(' ')
            # print(b)
            x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])
            cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255), 2)
            cv2.putText(img, b[0], (x, hImg - y + 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 50, 255), 2)
        fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
        # cv2.putText(img, str(int(fps)), (75, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (20,230,20), 2);
        cv2.imshow("Result", img)
Example #23
def printboxes(imageName, mode="words"):

    img = cv2.imread('images/' + imageName)

    # detect words
    if mode == "words":
        d = pytesseract.image_to_data(img, output_type=Output.DICT)

        # generate boxes around words
        n_boxes = len(d['text'])
        for i in range(n_boxes):
            if int(d['conf'][i]) > 60:
                (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0),

    #detect characters
        h, w, c = img.shape
        boxes = pytesseract.image_to_boxes(img)
        for b in boxes.splitlines():
            b = b.split(' ')
            img = cv2.rectangle(img, (int(b[1]), h - int(b[2])),
                                (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

    # #show boxes
    cv2.imshow('img', img)
def parse_images(images):
    Takes in a list of Python Image objects and returns all bank data
    Input Type: list of Python Image objects
    Output Type: string that is csv compliant
    contents = []
    count = 1
    width, height = images[0].size
    for i in images:
        if DEBUG:
            print('Running OCR on image {0}'.format(count))
        content = pytesseract.image_to_boxes(i)
        content = content.split('\n')
        structured = []
        # Pytesseract output is a big string so we have to break and parse out
        for i in content:
            data = i.split(' ')  # More parsing out
            structured += [{
                'contents': data[0],
                'x': int(data[1]),
                'y': int(data[2])
        contents += [structured]
        count += 1
    parsed = parser.parse_tesseract(contents, width)
    return parsed
Example #25
def read_image(thresholdedimg, unit):
    '''Read text from an image using pytesseract. Note: pytesseract reads PIL format images, 
    so we convert our cv2 (numpy array) for this step.

    :param numpy.ndarray thresholdedimg: input image to be read.
    :param string unit: "nm" or "um" depending on what we're looking for.
    :return string greekengtextfromimage: text that was read.

    # convert input to appropriate format.
    imagetoread = Image.fromarray(thresholdedimg)

    # Only look for these characters.
    if unit == "um":
        cfg = 'pytess_config_um.txt'
        cfg = "pytess_config_nm.txt"

    # Read image using English alphabet.
    greekengtextfromimage = image_to_string(
        config=os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg))

    text_boxes = image_to_boxes(imagetoread,

    return greekengtextfromimage, text_boxes
Example #26
def char_grid(input_img_path, output_dir):
    img = cv2.imread(input_img_path)
    height = img.shape[0]
    width = img.shape[1]
    blank_image = np.zeros((height, width, 3), np.uint8)
    tess_img = pytesseract.image_to_boxes(img, output_type=Output.DICT)
    n_boxes = len(tess_img['char'])
    for i in range(n_boxes):
            annot_a = True
            (text, x1, y2, x2, y1) = (tess_img['char'][i], tess_img['left'][i],
                                      tess_img['top'][i], tess_img['right'][i],
            annot_a = False

        if annot_a:
            char_color = CHAR_MAP.get(text.strip().lower(), (255, 255, 255))
            x, y, w, h = x1, y1,\
                         (x2 - x1), (y2 - y1)

            cv2.rectangle(blank_image, (x1, height - y1), (x2, height - y2),
                          char_color, cv2.FILLED)

            filename = input_img_path.split('/')[-1]
            cv2.imwrite(os.path.join(output_dir, filename), blank_image)
Example #27
 def __extract_boxes(self):
     Extracts boxes data from image.
     self.__boxes = pytesseract.image_to_boxes(self.image,
    def _image_to_data(self):
        Utilizes pytesseract OCR to generate bounding box data for the image.
        Returns the bounding box data.
        found_image = False
        while not found_image:
                print('Please wait.....')
                input_image =
                found_image = True
            except FileNotFoundError:
                print(f'\nThe file [{self.input_image_path}] was not found.')
                self.input_image_path = input('Please enter the path for the image you\'d like to search: ')
        image_string = pytesseract.image_to_string(input_image)
        word_box_data = pytesseract.image_to_data(input_image)
        char_box_data = pytesseract.image_to_boxes(input_image)
        self.width, self.height = input_image.size
        print(f'Width: {self.width}, Height: {self.height}')
        # TODO: Raise an error if no text is recognized
        print(f'\nRecognized Text:\n {image_string}')

Example #29
def search_word(img):
    global word
    _, imgH = img.size
    letter_count = 0
    error_count = 0
    wordCoords = []
    success = False
    for b in pytesseract.image_to_boxes(img).splitlines():
        b = b.split(' ')
        letter = b[0]
        if letter == word[letter_count]: # ou si error ajoutée dernièrement, regarder pour letter_count-1
            letter_count += 1
            left, bottom, right, top = int(b[1]), imgH - int(b[2]), int(b[3]), imgH - int(b[4])
            if letter_count == 1:
                wordCoords = [left, top]
            elif letter_count == len(word):
                wordCoords.extend([right, bottom])
                success = True
            error_count += 1
            if error_count > len(word)/3:
                wordCoords = []
                letter_count = 0
                error_count = 0
        if success:
    return success, wordCoords
Example #30
def tables_from_contours(img,cnt):
    From the contours generated from preprocessing
    Identify the tables and stations regions in the image
    img (numpy array): image
    cnt (list): detected contours in image
    tables (numpy array): Table arrays
    stations_regions (numpy array): array of station region
    tables = []
    stations_regions = []
    for i in range(len(cnt)):
        area = cv2.contourArea(cnt[i])
        # Extract the regions containing the images
        if area > 100000:
            mask = np.zeros_like(img)
            cv2.drawContours(mask, cnt, i, 255, -1)
            # get rectangle coordinates from the contours
            x, y, w, h = cv2.boundingRect(cnt[i])
            # crop out the table region from the image
            crop = img[y:h + y, x:w + x]
            masked_img = cv2.bitwise_and(img, img, mask=mask)
            masked_img[mask == 0] = 255
            # cut of the upper part of the image containing stations
            crop_left = img[y - 200:y, x:w + x]
            boxes = pytesseract.image_to_boxes(crop)  # also include any config options you use
    return tables, stations_regions
Example #31
from wand.image import Image as wand_Image
from PIL import Image as PIL_Image
from wand.color import Color
import os
import pytesseract

def build_images(force=False):
    if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]):
        all_pages = wand_Image(filename='./example.pdf', resolution=300)
        for idx, page in enumerate(all_pages.sequence):
            with Image(page) as i:
                i.format = 'png'
                i.background_color = Color('white')
                i.alpha_channel = 'remove'
      'foo-%s.png' % idx)


boxes = pytesseract.image_to_boxes('foo-0.png'), output_type=pytesseract.Output.DICT)
data = pytesseract.image_to_data('foo-0.png'), output_type=pytesseract.Output.DICT)
osd = pytesseract.image_to_osd('foo-0.png'), output_type=pytesseract.Output.DICT)
import sys
import cv2
import pytesseract
from tesseract import image_to_string

filename = sys.argv[1]

# read the image and get the dimensions
img = cv2.imread(filename)
h, w, _ = img.shape # assumes color image

# run tesseract, returning the bounding boxes
boxes = pytesseract.image_to_boxes(img) # also include any config options you use

# draw the bounding boxes on the image
for b in boxes.splitlines():
    b = b.split(' ')
    img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

# show annotated image and wait for keypress
cv2.imshow(filename, img)