def get_check_patch(im, box, text_len=6, flag=False):
    if not flag:
        origin = box[1].astype(np.int32)
        # width = 65
        bias = 10
        origin[0] += bias  ### attention
        height = np.round((box[2, 1] - box[1, 1])).astype(np.int32)
        width = (height*1.5).astype(np.int32)
        check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width]
        point = (origin[0], origin[1]-(height*0.1).astype(np.int32))
    else:
        try:
            patch = get_patchs(im, box[np.newaxis, ...], with_morph=True)[0]
            check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
            if '~' in check_boxes:
                patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0]
                check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
                if Debug_mode:
                    print('LCH: the ~ detected')
                if '~' in check_boxes:
                    check_boxes = check_boxes.replace('~', '0')
            check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32)
        except:
            patch = get_patchs(im, box[np.newaxis, ...], with_morph=False)[0]
            check_boxes = pytesseract.image_to_boxes(patch, config=config[1])
            check_array = boxes_to_array(check_boxes)[:text_len, :].astype(np.int32)

        origin = (check_array[-1, 3] + box[0][0], check_array[-1, 2] + box[0][1])
        # origin = (check_array[5, 3] + box[0][0], check_array[5, 2] + box[0][1])
        height = (box[2, 1] - origin[1]).astype(np.int32)
        width = (box[2, 0] - origin[0] + 10).astype(np.int32)
        check_patch = im[origin[1]-(height*0.1).astype(np.int32):origin[1]+(height*1.25).astype(np.int32), origin[0]:origin[0]+width]
        point = (origin[0], origin[1]-(height*0.1).astype(np.int32))
    return check_patch, point
Ejemplo n.º 2
0
def translit(image, box_sim_thresh=0.65):
    """Returns the string representation of the transliteration found in IMAGE.
  :param image: an image of a transliterated Semitic language
  :param box_sim_thresh: the minimum Jaccard similarity of the bounding boxes of
      two characters for them to be recognized as describing the same character
  """
    # Variable declarations follow.
    # The following is a parameter of a rather queer sort
    flagship_language = 'ces'
    other_languages = [k for k in charsets.keys() if k != flagship_language]
    data = pytesseract.image_to_data(image,
                                     lang=flagship_language,
                                     output_type=Output.DICT)
    flagship_boxes = pytesseract.image_to_boxes(image,
                                                lang=flagship_language,
                                                output_type=Output.DICT)
    other_boxes = {
        language: pytesseract.image_to_boxes(image,
                                             lang=language,
                                             output_type=Output.DICT)
        for language in other_languages
    }
    # Discovery of the correct characters follows.
    chars = list()
    for char0, box0 in zip(
            flagship_boxes['char'],
            zip(flagship_boxes['left'], flagship_boxes['bottom'],
                flagship_boxes['right'], flagship_boxes['top'])):
        matches = [(flagship_language, char0)]
        for other in other_languages:
            for char1, box1 in zip(
                    other_boxes[other]['char'],
                    zip(other_boxes[other]['left'],
                        other_boxes[other]['bottom'],
                        other_boxes[other]['right'],
                        other_boxes[other]['top'])):
                sim = box_sim(*box0, *box1)
                if char1 in charsets[other] and (sim >= box_sim_thresh or
                                                 (sim > 0
                                                  and char_kin(char0, char1))):
                    matches.append((other, char1))
        chars.append((char0, box0, choose_char(matches)))
    # Matching of characters to words follows.
    for i, (left, top, width, height, conf, word0) in enumerate(
            zip(data['left'], data['top'], data['width'], data['height'],
                data['conf'], data['text'])):
        if int(conf) >= 0:
            right, bottom = left + width, top + height
            top, bottom = image.height - top, image.height - bottom
            word = ''
            for char0, box, char in chars:
                if (char0 in word0) and box_sim(*box, left, bottom, right,
                                                top) > 0:
                    word += char
            data['text'][i] = word
    return data_to_string(data['text'])
Ejemplo n.º 3
0
def locateNameBox():
    image = Image.open('answerSheet_with_name.png')
    file = open("image_to_string.txt", "w")
    box = image_to_boxes(image).split('\n')

    for i in range(len(box)):
        if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm'
                and box[i + 3][0] == 'e'):
            print("true")
            for j in range(0, 4):
                print(box[i + j])
    file.write(image_to_boxes(image))
    file.close()
Ejemplo n.º 4
0
def findcoordinateOfName(path):
    image = cv2.imread(path)
    height, width = image.shape[:2]
    crop_img = image[0:int(height / 3), 0:width]
    cv2.imwrite("./temp/temp.png", crop_img)
    image = Image.open("./temp/temp.png")
    # image.show()
    # file = open("image_to_string.txt", "w")
    box = image_to_boxes(image).split('\n')

    # print(image.size)
    width, height = image.size

    coordinate = []
    for i in range(len(box)):
        flag = False
        if (box[i][0] == 'n' and box[i + 1][0] == 'a' and box[i + 2][0] == 'm'
                and box[i + 3][0] == 'e'):
            # print("true")
            for j in range(0, 4):
                flag = True
                coordinate.append(box[i + j])
                # print(box[i + j])

            if (flag):
                break
    coorE = coordinate[3].split(" ")
    # print()
    return ((int(coorE[1]), height - int(coorE[4])), (int(coorE[3]),
                                                      height - int(coorE[2])))
    def submit_fun(self):
        if self.path != '':
            img = cv2.imread(self.path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            text_got = ''
            hImg, wImg, _ = img.shape
            boxes = pytesseract.image_to_boxes(img)
            for b in boxes.splitlines():
                # print(b)
                b = b.split(' ')
                # print(b[0], end=" ")
                text_got = text_got + str(b[0]) + ' '
                x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])

                cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255),
                              2)
                cv2.putText(img, b[0], (x + 10, hImg - y + 55),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (50, 50, 255), 2)
            # img = cv2.resize(img, (int(img.shape[1])*2, int(img.shape[0])*2)) # make image bigger
            self.text.delete(1.0, END)
            ps = PSS()
            self.text.insert(INSERT, text_got)
            cv2.imshow('img', img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()
        else:
            messagebox.showerror("Error",
                                 "You haven't select any image file yet.")
Ejemplo n.º 6
0
def tesseract_box(image, color):
    boxes = pytesseract.image_to_boxes(image)
    for b in boxes.splitlines():
        b = b.split(' ')
        image = cv2.rectangle(image, (int(b[1]), 128 - int(b[2])),
                              (int(b[3]), 128 - int(b[4])), color, 2)
    return image
def getTextCoordinate():
    global textCoordinate, filename, ratio_Y_Axis_Value, ratio_X_Axis_Value, numberOfCharactor, numberOfDigitValue

    ratio_X_Axis_Value_Array = ratio_Y_Axis_Value_Array = np.arange(5)
    # ratio_Y_Axis_Value_Array =  np.arange(5)
    total_ratio_X_Axis_Value = total_ratio_Y_Axis_Value = ratio_Y_Axis_Index = ratio_X_Axis_Index = 0

    # run tesseract, returning the bounding boxes
    boxes = pytesseract.image_to_boxes(
        Image.open(filename))  # also include any config options you use
    numberOfCharactor = int(len(boxes.splitlines()))

    textCoordinate = [[0] * 4 for i in range(numberOfCharactor)]

    # store values into textCoordinate array
    i = 0
    ratio_X_Axis_Value = ratio_Y_Axis_Value = 0
    numberOf_Tic_Mark_X_Axis = 0

    for b in boxes.splitlines():
        b = b.split(' ')
        # print("Charactor of  " + b[0] + "  =  "+b[1]+ ","+b[2]+ ","+b[3]+ ","+b[4])  # (x1, y1, X2, y2)
        if (b[0].isdigit() and (int(b[0]) != 0)):
            textCoordinate[i][0] = int(b[0])  # store Charactor
            textCoordinate[i][1] = round(
                (int(b[1]) + int(b[3])) / 2)  # store X Coordinate
            textCoordinate[i][2] = round(
                ((height - int(b[2])) +
                 (height - int(b[4]))) / 2)  # store Y Coordinate
            i = i + 1
            numberOfDigitValue = i
Ejemplo n.º 8
0
def explain_tesseract(img, title, options_str):
    """
    explains the tesseract way of analysing this image by having boxes drawn around the characters
    """
       
    r,c = img.shape
    nb_lines = 40
    additional_lines = np.full((nb_lines,c),255, dtype=np.uint8)
    
    # adding a blank rectangle above the image
    img2 = np.append(img,additional_lines,axis= 0)
    img2 = np.append(additional_lines,img2,axis= 0)

    # if interactive: cv2.imshow("img extended", img2)

    hImg,wImg = img.shape
    # print("pytesseract options", options_str)
    myres = pytesseract.image_to_string(img,config=options_str).strip()
    candidate_results.append([f'{title} (orig size)',myres])
    #if interactive: print("pytesseract (orig): ", myres)
    myres2 = pytesseract.image_to_string(img2,config=options_str).strip()
    candidate_results.append([f'{title} (extended)',myres2])
    #if interactive: print("pytesseract (extended): ", myres2)
    
    boxes = pytesseract.image_to_boxes(img,config=options_str)

    for b in boxes.splitlines():
        #print(b)
        b = b.split(' ')
        # print(b)
        x,y,w,h = int(b[1]),int(b[2]),int(b[3]),int(b[4])
        cv2.rectangle(img2,(x,hImg-y+nb_lines),(w,hImg-h+nb_lines),(0,255,0),2)
        cv2.putText(img2,b[0],(x,hImg-y+25+nb_lines),cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2)
    def _extract_text(self, frame, bbox, sharpen=False):
        # type: (Int, List[Int], Bool) -> String]
        """ Returns the string that has been detected within the bounding box of the given frame.

        Pytesseract is searching for a string within the given bounding box of
        the given frame. Sometimes better results can be obtained by preprocessing
        the frame (in our case by adding sharpness).

        Returns:
            String representing the text, that has been detected in the frames bounding box.
            Returning empty string if nothing has been detected.
        """
        cropped_frame = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        img = cv2.bitwise_not(cropped_frame) # invert image for better results.
        if sharpen:
            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
            img = cv2.filter2D(img, -1, kernel)
        letters = pytesseract.image_to_boxes(img)
        letters = letters.split('\n')
        letters = [letter.split() for letter in letters]
        text = []
        for letter in letters:
            if letter:
                text.append(letter[0])
        #         h, w = img.shape
        #         cv2.rectangle(img, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)
        # cv2.imshow("Image", img)
        # cv2.waitKey(0)
        text = ''.join(text)
        if text:
            logging.info('Detected VFX Shotcode: %s', text)
        return text
Ejemplo n.º 10
0
        def find_letters(thresh_low, type):
            nonlocal thresholded
            nonlocal letters
            gray = cv2.cvtColor(np.array(letter_wheel), cv2.COLOR_RGB2GRAY)
            ret, thresholded = cv2.threshold(gray, thresh_low, 255, type)
            thresholded = cv2.resize(thresholded,
                                     None,
                                     fx=self.resize_scale,
                                     fy=self.resize_scale)
            thresholded = cv2.filter2D(thresholded, -1, self.smoothing_kernel)
            cv2.imwrite("thresholded.png", thresholded)

            letters_raw = pytesseract.image_to_string(thresholded,
                                                      config=self.config)
            letters = []
            for letter in letters_raw:
                if letter in allowable_chars():
                    letters.append(letter)
            if len(letters) < 3:
                raise IndexError()
            self.boxes = [
                box.split() for box in pytesseract.image_to_boxes(
                    thresholded, config=self.config).split("\n")
            ]
            for box in self.boxes:
                box[1] = int(int(box[1]) / self.resize_scale)
                box[2] = screen.height * self.crop_bottom_proportion - int(
                    int(box[2]) / self.resize_scale)
                box[3] = int(int(box[3]) / self.resize_scale)
                box[4] = screen.height * self.crop_bottom_proportion - int(
                    int(box[4]) / self.resize_scale)
Ejemplo n.º 11
0
def tesseract():
    # If you don't have tesseract executable in your PATH, include the following:
    #pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_exec$
    # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
    
    # Simple image to string
    
    #.encode('utf8') #add to print statement to make encoding work
    
    print(pytesseract.image_to_string(Image.open(imgFile)).encode('utf8'))
    
    # Get bounding box estimates
    print(pytesseract.image_to_boxes(Image.open(imgFile)))
    
    # Get verbose data including boxes, confidences, line and page numbers
    print(pytesseract.image_to_data(Image.open(imgFile)))
    
    # Get information about orientation and script detection
    print(pytesseract.image_to_osd(Image.open(imgFile)))
    
    # In order to bypass the internal image conversions, just use relative or ab$
    # NOTE: If you don't use supported images, tesseract will return error
    print(pytesseract.image_to_string(imgFile))
    
    # get a searchable PDF
    pdf = pytesseract.image_to_pdf_or_hocr(imgFile, extension='pdf')
    
    # get HOCR output
    hocr = pytesseract.image_to_pdf_or_hocr(imgFile, extension='hocr')
Ejemplo n.º 12
0
def processus(img_adress=None, img_file=None):

    img = img_file if img_file is not None else cv2.imread(img_adress)
    # print(img)
    #pytesseract only accept rgb, so we convert bgr to rgb
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    ##Detecting characters and their position
    #print(pytesseract.image_to_string(img))
    ##character xpoint ypoint width heigth
    #print(pytesseract.image_to_boxes(img))

    #taille
    hImg, wImg,_ = img.shape
    ##character xpoint ypoint width heigth
    boxes = pytesseract.image_to_boxes(img)
    boxes_splitted = boxes.splitlines()
    boxes_stringed = pytesseract.image_to_string(img).splitlines()
    for b in boxes_splitted:
        #print(b)
        b = b.split(' ')
        x,y,w,h = int( b[1]), int(b[2]), int(b[3]),int(b[4])

        # cv2.rectangle(img, (x,y), (x+w, y+h), (0,0,255), 1)
        cv2.rectangle(img, (x,hImg - y), (w, hImg - h), (50, 50, 255), 2)

        #ecrire le caracteres dessus
        cv2.putText(img, b[0], (x, hImg - y + 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 255, 50), 2)

    cv2.imshow('result', img)
    cv2.waitKey(0)

    return (img, boxes_splitted, boxes_stringed)
Ejemplo n.º 13
0
def get_img_ocr(imgfile, config_number=3, ratio=2):
    #img = Image.open(imgfile)
    #custom_config = r'--oem 3 --psm 6'
    #####img = cv2.imread(imgfile)

    # default is 3: fully automatic
    # support 1-12
    # may use 11: Sparse text. Find as much text as possible in no particular order.
    # may use 12: Sparse text with OSD.
    # may use 6: Assume a single uniform block of text
    custom_config = r'--oem 3 --psm ' + str(config_number)

    img = postprocess(imgfile)

    height, width = img.shape[:2]

    img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)),
                       interpolation=cv2.INTER_CUBIC)
    img_array = np.array(img_1)
    H, W, _ = img_array.shape
    text = pytesseract.image_to_string(img_1, config=custom_config)
    bbox = pytesseract.image_to_boxes(img_1,
                                      config=custom_config)  # (x1, y1, x2, y2)
    #img.close()
    ######imgname = path + '/scanned.png'
    ######pix.writePNG(imgname)
    return bbox, text, H, W
Ejemplo n.º 14
0
def get_img_ocr(image, box, config_number=3, ratio=2):
    #img = Image.open(imgfile)
    #custom_config = r'--oem 3 --psm 6'
    #####img = cv2.imread(imgfile)

    # default is 3: fully automatic
    # support 1-12
    # may use 1, 6, 11, 12
    custom_config = r'--oem 3 --psm ' + str(config_number)

    img = image.copy()
    x1, y1, x2, y2 = box
    img = img[y1 - 10:y2 + 10, x1 - 10:x2 + 10, :]
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    height, width = img.shape[:2]

    img_1 = cv2.resize(img, (int(ratio * width), int(ratio * height)),
                       interpolation=cv2.INTER_CUBIC)
    img_array = np.array(img_1)
    img_array = img_array[:, :, np.newaxis]
    H, W, _ = img_array.shape
    text = pytesseract.image_to_string(img_1, config=custom_config)
    bbox = pytesseract.image_to_boxes(img_1,
                                      config=custom_config)  # (x1, y1, x2, y2)
    #img.close()
    ######imgname = path + '/scanned.png'
    ######pix.writePNG(imgname)
    return bbox, text, H, W
Ejemplo n.º 15
0
def tess_func(gray):
    """ Function to utilise tess text reading for each frame. """
    h = gray.shape[0]
    print(h)

    # Find the image height  
    boxes = tess.image_to_boxes(gray)

    # Create a original copy of the frame.
    img = gray.copy()

    # Draw the bounding boxes on the image.
    for line in boxes.splitlines():
        data = line.split(' ')
        gray = cv2.rectangle(gray,
                        (int(data[1]), h - int(data[2])), (int(data[3]), h - int(data[4])),
                        (0, 0, 255), 1)


    # Find the text in the image if applicable.
    text = tess.image_to_string(img)
    print("Tess text: " + text)

    return gray                               
    
Ejemplo n.º 16
0
 def handleTitleScreen(self):
     canPlayNow = False
     gray = None
     W = 0
     H = 0
     x = 0
     y = 0
     while not canPlayNow:
         img = self.getWindowShot()
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         endX = int(gray.shape[1] * (67 / 100))
         startX = int(gray.shape[1] * (1 / 4))
         startY = int(gray.shape[0] * (3 / 4))
         endY = int(gray.shape[0] * (7 / 8))
         gray = gray[startY:endY, startX:endX]
         text = pytesseract.image_to_string(gray)
         for line in text.split("\n"):
             if line == "CLICK HERE TO PLAY!":
                 canPlayNow = True
                 x = startX
                 y = startY
                 W = gray.shape[1]
                 H = gray.shape[0]
                 boxes = pytesseract.image_to_boxes(gray)
                 break
     region = self.getWindowDimensions()
     x += region[0]
     y += region[1]
     pyautogui.moveTo(x + 108, y + 31)
     pyautogui.click()
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--filename", required=True)
    args = parser.parse_args()
    filename = args.filename

    img = cv2.imread(filename)
    height, width, _ = img.shape

    if height >= 1200:
        resize_width = (width * 1200) // height
        height, width = 1200, resize_width

        img = cv2.resize(img, (width, height))

    boxes = pytesseract.image_to_boxes(img)

    for i in boxes.splitlines():
        box = i.split(' ')
        h_d, w_d = abs(int(box[2]) -
                       int(box[4])), abs(int(box[1]) - int(box[3]))
        #밑은 박스의 픽셀 크기가 400이 넘으면 박스를 그리지 않음#
        #if h_d != 0 and w_d != 0 and h_d * w_d <= 400:
        #    if h_d / w_d <= 6 and w_d / h_d <= 5:
        img = cv2.rectangle(img, (int(box[1]), height - int(box[2])),
                            (int(box[3]), height - int(box[4])), (0, 255, 0),
                            1)

    cv2.imwrite('res-' + filename, img)
    #cv2.imshow(filename, img)
    cv2.waitKey(0)
Ejemplo n.º 18
0
def boxes():
    # https: // stackoverflow.com / questions / 20831612 / getting - the - bounding - box - of - the - recognized - words - using - python - tesseract
    img = cv2.imread('images/1.png')
    height = img.shape[0]
    width = img.shape[1]

    h, w = img.shape[:2]
    cv2.namedWindow('jpg', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('jpg', w, h)
    # cv2.imshow('jpg', c)

    # d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT)
    d = pytesseract.image_to_boxes(img, lang='heb', output_type=Output.DICT)
    print(d.keys())
    n_boxes = len(d['char'])
    for i in range(n_boxes):
        (text, x1, y2, x2, y1) = (d['char'][i], d['left'][i], d['top'][i],
                                  d['right'][i], d['bottom'][i])
        print(
            text, ' - x1:{}, y2:{}, x2:{}, y1:{} - page:{}'.format(
                x1, y2, x2, y1, d['page'][i]))
        cv2.rectangle(img, (x1, height - y1), (x2, height - y2), (0, 255, 0),
                      2)
    cv2.imshow('jpg', img)
    cv2.waitKey(0)
def charsearch(what=49, graphic_enable=graphic_on):
    global ch, cw, cy, cx, b, img, count, barray
    #text = pytesseract.image_to_string(gray,config='--psm 13')
    cx = 0
    cy = 0
    ch = 0
    cw = 0
    barray = np.array([])
    count = 0
    boxes = pytesseract.image_to_boxes(gray, config='--psm 13')
    h, w, c = img.shape
    barray = boxes
    for b in boxes.splitlines():
        b = b.split(' ')
        if (b[0] == chr(what)):
            img = cv2.rectangle(img, (int(b[1]), h - int(b[2])),
                                (int(b[3]), h - int(b[4])), (255, 0, 0), 2)
            cv2.putText(img, str(b[0]), (int(b[1]), h - int(b[2]) + 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1)
            cx = int(b[1])
            cy = h - int(b[2])
            ch = int(b[4]) - int(b[2])
            cw = int(b[3]) - int(b[1])
            count = count + 1
            print("cx,cy,ch,cw:", cx, cy, ch, cw)

    showInMovedWindow('Char', img, 710, 50, graphic_enable)
    print(count)
def process(im):
    config = ('-l kor --psm 6 --oem 1 ')

    h, w, _ = im.shape
    boxes = pytesseract.image_to_boxes(im, config=config)
    csvformat = []
    img = im
    for b in boxes.splitlines():
        b = b.split(' ')
        seq = [
            str(b[0]),
            str(int(b[1]) / w),
            str(1 - int(b[4]) / h),
            str(int(b[3]) / w),
            str(1 - int(b[2]) / h)
        ]
        csvformat.append(" ".join(seq) + "\n")  #conversion

    hidden_size = 512
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size,
                                   output_lang.n_words,
                                   dropout_p=0.1).to(device)

    loadModel(encoder1, attn_decoder1,
              checkpoint_PATH + "75000DeepPosV2.checkpoint")

    output_words, attentions, output_poses = evaluate(encoder1, attn_decoder1,
                                                      "".join(csvformat))
    return (output_words, output_poses)
def find_characters(image_to_process, should_debug=False):
    unicode_wall_of_character_locations = pytesseract.image_to_boxes(
        image_to_process)  # Note: Takes Image file, not CV2 image.
    if should_debug:
        print("All the characters and their locations in Unicode are: ")
        print(unicode_wall_of_character_locations)
    string_wall_of_character_locations = repr(
        unicode_wall_of_character_locations
    )  # Converts the unicode file to a string
    if should_debug:
        print("\nThe characters in string format: ")
        print(string_wall_of_character_locations)
    string_wall_of_character_locations = string_wall_of_character_locations.replace(
        "\\n", " ")  # Replaces all newlines with a space character.
    if should_debug:
        print("\nAll newlines should now be spaces: ")
        print(string_wall_of_character_locations)
    string_tuple_of_character_locations = tuple(
        i for i in string_wall_of_character_locations.split()
    )  # Parses the data into a tuple (a static array).
    if should_debug:
        print("\nUnsplit tuple with size: " +
              str(len(string_tuple_of_character_locations)))
        print(str(string_tuple_of_character_locations) + "\n")
    split_tuple_of_character_locations = tuple(
        string_tuple_of_character_locations[a:(a + 6)]
        for a in range(0, len(string_tuple_of_character_locations),
                       6))  # Splits the tuple every 6 positions.
    if should_debug:
        print("Split tuple with size: " +
              str(len(string_tuple_of_character_locations)))
        print(split_tuple_of_character_locations)
    return split_tuple_of_character_locations
Ejemplo n.º 22
0
def webcam():
    pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    cap.set(3, 640)
    cap.set(4, 480)

    def captureScreen(bbox=(300, 300, 1500, 1000)):
        capScr = np.array(ImageGrab.grab(bbox))
        capScr = cv2.cvtColor(capScr, cv2.COLOR_RGB2BGR)
        return capScr

    while True:
        timer = cv2.getTickCount()
        _, img = cap.read()
        # img = captureScreen()
        # DETECTING CHARACTERES
        hImg, wImg, _ = img.shape
        boxes = pytesseract.image_to_boxes(img)
        for b in boxes.splitlines():
            # print(b)
            b = b.split(' ')
            # print(b)
            x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])
            cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (50, 50, 255), 2)
            cv2.putText(img, b[0], (x, hImg - y + 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 50, 255), 2)
        fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
        # cv2.putText(img, str(int(fps)), (75, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (20,230,20), 2);
        cv2.imshow("Result", img)
        cv2.waitKey(1)
Ejemplo n.º 23
0
def printboxes(imageName, mode="words"):

    img = cv2.imread('images/' + imageName)

    # detect words
    if mode == "words":
        d = pytesseract.image_to_data(img, output_type=Output.DICT)
        print(d.keys())

        # generate boxes around words
        n_boxes = len(d['text'])
        for i in range(n_boxes):
            if int(d['conf'][i]) > 60:
                (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i],
                                d['height'][i])
                img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0),
                                    2)

    #detect characters
    else:
        h, w, c = img.shape
        boxes = pytesseract.image_to_boxes(img)
        for b in boxes.splitlines():
            b = b.split(' ')
            img = cv2.rectangle(img, (int(b[1]), h - int(b[2])),
                                (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

    # #show boxes
    cv2.imshow('img', img)
    cv2.waitKey(0)
Ejemplo n.º 24
0
def parse_images(images):
    """
    Takes in a list of Python Image objects and returns all bank data
    Input Type: list of Python Image objects
    Output Type: string that is csv compliant
    """
    contents = []
    count = 1
    width, height = images[0].size
    for i in images:
        if DEBUG:
            print('Running OCR on image {0}'.format(count))
        content = pytesseract.image_to_boxes(i)
        content = content.split('\n')
        structured = []
        # Pytesseract output is a big string so we have to break and parse out
        for i in content:
            data = i.split(' ')  # More parsing out
            structured += [{
                'contents': data[0],
                'x': int(data[1]),
                'y': int(data[2])
            }]
        contents += [structured]
        count += 1
    parsed = parser.parse_tesseract(contents, width)
    return parsed
Ejemplo n.º 25
0
def read_image(thresholdedimg, unit):
    '''Read text from an image using pytesseract. Note: pytesseract reads PIL format images, 
    so we convert our cv2 (numpy array) for this step.

    :param numpy.ndarray thresholdedimg: input image to be read.
    :param string unit: "nm" or "um" depending on what we're looking for.
    
    :return string greekengtextfromimage: text that was read.
     '''

    # convert input to appropriate format.
    imagetoread = Image.fromarray(thresholdedimg)

    # Only look for these characters.
    if unit == "um":
        cfg = 'pytess_config_um.txt'
    else:
        cfg = "pytess_config_nm.txt"

    # Read image using English alphabet.
    greekengtextfromimage = image_to_string(
        imagetoread,
        lang="eng",
        config=os.path.join(os.path.dirname(os.path.abspath(__file__)), cfg))

    text_boxes = image_to_boxes(imagetoread,
                                lang="eng",
                                config=os.path.join(
                                    os.path.dirname(os.path.abspath(__file__)),
                                    cfg))

    return greekengtextfromimage, text_boxes
Ejemplo n.º 26
0
def char_grid(input_img_path, output_dir):
    img = cv2.imread(input_img_path)
    height = img.shape[0]
    width = img.shape[1]
    blank_image = np.zeros((height, width, 3), np.uint8)
    tess_img = pytesseract.image_to_boxes(img, output_type=Output.DICT)
    n_boxes = len(tess_img['char'])
    for i in range(n_boxes):
        try:
            annot_a = True
            (text, x1, y2, x2, y1) = (tess_img['char'][i], tess_img['left'][i],
                                      tess_img['top'][i], tess_img['right'][i],
                                      tess_img['bottom'][i])
        except:
            annot_a = False

        if annot_a:
            char_color = CHAR_MAP.get(text.strip().lower(), (255, 255, 255))
            x, y, w, h = x1, y1,\
                         (x2 - x1), (y2 - y1)

            cv2.rectangle(blank_image, (x1, height - y1), (x2, height - y2),
                          char_color, cv2.FILLED)

            filename = input_img_path.split('/')[-1]
            cv2.imwrite(os.path.join(output_dir, filename), blank_image)
Ejemplo n.º 27
0
 def __extract_boxes(self):
     '''
     Extracts boxes data from image.
     '''
     self.__boxes = pytesseract.image_to_boxes(self.image,
                                               lang=self.language,
                                               output_type="dict")
Ejemplo n.º 28
0
    def _image_to_data(self):
        """
        Utilizes pytesseract OCR to generate bounding box data for the image.
        Returns the bounding box data.
        """
        found_image = False
        while not found_image:
            try:
                print('Please wait.....')
                input_image = Image.open(self.input_image_path)
                found_image = True
            except FileNotFoundError:
                print(f'\nThe file [{self.input_image_path}] was not found.')
                self.input_image_path = input('Please enter the path for the image you\'d like to search: ')
        
        image_string = pytesseract.image_to_string(input_image)
        word_box_data = pytesseract.image_to_data(input_image)
        char_box_data = pytesseract.image_to_boxes(input_image)
        self.width, self.height = input_image.size
        print(f'Width: {self.width}, Height: {self.height}')
        # TODO: Raise an error if no text is recognized
        print(f'\nRecognized Text:\n {image_string}')

        self._normalize_word_boxes(word_box_data)
        self._normalize_char_boxes(char_box_data)
Ejemplo n.º 29
0
def search_word(img):
    global word
    _, imgH = img.size
    letter_count = 0
    error_count = 0
    wordCoords = []
    success = False
    for b in pytesseract.image_to_boxes(img).splitlines():
        b = b.split(' ')
        letter = b[0]
        if letter == word[letter_count]: # ou si error ajoutée dernièrement, regarder pour letter_count-1
            letter_count += 1
            left, bottom, right, top = int(b[1]), imgH - int(b[2]), int(b[3]), imgH - int(b[4])
            if letter_count == 1:
                wordCoords = [left, top]
            elif letter_count == len(word):
                wordCoords.extend([right, bottom])
                success = True
        else:
            error_count += 1
            if error_count > len(word)/3:
                wordCoords = []
                letter_count = 0
                error_count = 0
        if success:
            break
    return success, wordCoords
Ejemplo n.º 30
0
def tables_from_contours(img,cnt):
    """
    From the contours generated from preprocessing
    Identify the tables and stations regions in the image
    ===============================================
    Inputs:
    img (numpy array): image
    cnt (list): detected contours in image
    ===============================================
    Outputs:
    tables (numpy array): Table arrays
    stations_regions (numpy array): array of station region
    """
    tables = []
    stations_regions = []
    for i in range(len(cnt)):
        area = cv2.contourArea(cnt[i])
        # Extract the regions containing the images
        if area > 100000:
            mask = np.zeros_like(img)
            cv2.drawContours(mask, cnt, i, 255, -1)
            # get rectangle coordinates from the contours
            x, y, w, h = cv2.boundingRect(cnt[i])
            # crop out the table region from the image
            crop = img[y:h + y, x:w + x]
            masked_img = cv2.bitwise_and(img, img, mask=mask)
            masked_img[mask == 0] = 255
            # cut of the upper part of the image containing stations
            crop_left = img[y - 200:y, x:w + x]
            boxes = pytesseract.image_to_boxes(crop)  # also include any config options you use
            tables.append(crop)
            stations_regions.append(crop_left)
    tables.reverse()
    stations_regions.reverse()
    return tables, stations_regions
Ejemplo n.º 31
0
from wand.image import Image as wand_Image
from PIL import Image as PIL_Image
from wand.color import Color
import os
import pytesseract

def build_images(force=False):
    if not all([not force, os.path.isfile('./foo-0.png'), os.path.isfile('./foo-0.png')]):
        all_pages = wand_Image(filename='./example.pdf', resolution=300)
        for idx, page in enumerate(all_pages.sequence):
            with Image(page) as i:
                i.format = 'png'
                i.background_color = Color('white')
                i.alpha_channel = 'remove'
                i.save(filename='foo-%s.png' % idx)


build_images()

boxes = pytesseract.image_to_boxes(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
data = pytesseract.image_to_data(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
osd = pytesseract.image_to_osd(PIL_Image.open('foo-0.png'), output_type=pytesseract.Output.DICT)
Ejemplo n.º 32
0
import sys
import cv2
import pytesseract
from tesseract import image_to_string

filename = sys.argv[1]

# read the image and get the dimensions
img = cv2.imread(filename)
h, w, _ = img.shape # assumes color image

# run tesseract, returning the bounding boxes
boxes = pytesseract.image_to_boxes(img) # also include any config options you use

# draw the bounding boxes on the image
for b in boxes.splitlines():
    b = b.split(' ')
    img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

# show annotated image and wait for keypress
cv2.imshow(filename, img)
cv2.waitKey(0)