Beispiel #1
0
def main():
	parser = argparse.ArgumentParser(usage='%(prog)s [options] imageurl', description='Cappy, the little captcha solver.')

	parser.version = "0.1"
	parser.add_argument("imageurl", metavar="imageurl", type=str, help="the url to POST the captcha to")
	parser.add_argument("-o", "--output", type=str, default="/tmp/captcha.png", help="where to save the temporary captcha file (default: /tmp/captcha.png)")
	parser.add_argument("-oe", "--outputedited", type=str, default="/tmp/captcha-edited.png", help="where to save the temporary captcha file (default: /tmp/captcha-edited.png)")
	parser.add_argument("-c", "--count", type=int, default=200, help="total number of captchas to try (default: 200)")
	parser.add_argument("-v", action="version")

	args = parser.parse_args()
	imageurl = args.imageurl
	output = args.output
	outputedited = args.outputedited
	totalcount = args.count

	with PyTessBaseAPI(psm=PSM.SINGLE_WORD, oem=OEM.TESSERACT_ONLY) as api:
		api.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz")
		
		res = requests.get(imageurl, stream=True).raw
		with open(output, "wb") as out_file:
			shutil.copyfileobj(res, out_file)
		del res

		image = None
		count = 0
		starttime = time.time()

		while count < totalcount:
			image = cv2.imread(output, cv2.IMREAD_GRAYSCALE)
			image = cv2.resize(image, None, fx=10, fy=10, interpolation=cv2.INTER_LINEAR)
			image = cv2.GaussianBlur(image, (5,5), 0)
	#		image = cv2.bilateralFilter(image, 9, 75, 75)
	#		image = cv2.blur(image, (5,5))
	#		image = cv2.medianBlur(image, 9)
			ret, image = cv2.threshold(image, 185, 255, cv2.THRESH_BINARY)
			cv2.imwrite(outputedited, image)

			api.SetImageFile(outputedited)
			captcha = api.GetUTF8Text().replace(" ", "").rstrip().lower()

			count += 1

		elapsedtime = time.time() - starttime

	print("Finished")
	print("Time: " + str(round(elapsedtime)) + " seconds")
	print("Total solves per 30 seconds: " + str(round((count/elapsedtime) * 30)))
Beispiel #2
0
def add_ocrinfo(tree, imgfile):
    imgpil = Image.open(imgfile)
    (orig_width, orig_height) = (imgpil.width, imgpil.height)

    #root_width = tree[min(tree)]['width']
    ratio = 1.0 * orig_width / config.width
    #imgpil = imgpil.convert("RGB").resize(
    #    (orig_width * OCR_RATIO, orig_height * OCR_RATIO))

    tesapi = PyTessBaseAPI(lang='eng')
    tesapi.SetImage(imgpil)
    tesapi.SetSourceResolution(config.ocr_resolution)

    for nodeid in tree:
        node = tree[nodeid]

        if node['children'] and node['text'] == '':
            node['ocr'] = ''
            continue

        x = max(node['x'] * ratio - 1, 0)
        y = max(node['y'] * ratio - 1, 0)
        x2 = min((node['x'] + node['width']) * ratio + 1, orig_width)
        y2 = min((node['y'] + node['height']) * ratio + 1, orig_height)
        width = int(x2 - x)
        height = int(y2 - y)

        if width > 3 and height > 3:
            #tesapi.SetRectangle(int(x * OCR_RATIO), int(y * OCR_RATIO),
            #                    int(width * OCR_RATIO), int(height * OCR_RATIO))
            #print(int(x), int(y), int(width), int(height), orig_width, orig_height)
            tesapi.SetRectangle(int(x), int(y), int(width), int(height))
            ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ')
            if ocr_text.strip() == '':
                x = min(x + width * 0.05, orig_width)
                y = min(y + height * 0.05, orig_height)
                width *= 0.9
                height *= 0.9
                tesapi.SetRectangle(int(x), int(y), int(width), int(height))
                ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ')

        else:
            ocr_text = ''

        node['ocr'] = ocr_text
Beispiel #3
0
def get_blob_ocr_result(image: np.ndarray, rect: Rect, ppi: int = 0):
    text = ''
    confidence = 0.0
    with PyTessBaseAPI(psm=PSM.SINGLE_LINE) as api:
        # only read numbers (doesn't seem to work, known issue in v4.0)
        #api.SetVariable('tessedit_char_whitelist', '0123456789')
        api.SetImageBytes(*image)
        api.SetRectangle(*astuple(rect))
        if ppi != 0:
            api.SetSourceResolution(ppi)
        #api.Recognize()
        text = api.GetUTF8Text()
        confidence = api.AllWordConfidences()
        if not len(confidence):
            confidence = (0, )
    return [text, confidence]
Beispiel #4
0
def tess(filename):
    # frk = german fraktur
    # psm=7 treats image as line of text, psm=10 for single char
    with PyTessBaseAPI(lang='frk', psm=10) as api:
        api.SetImageFile(filename)
        actualLabel = filename[filename.rfind('_') + 1:-4]
        predictLabel = api.GetUTF8Text().rstrip()
        print(filename)
        print('Actual:', actualLabel, 'Prediction:', predictLabel, \
               'Confidence:', str(api.AllWordConfidences())) # 0 worst, 100 best
        if actualLabel == predictLabel:
            print('CORRECT ~~~\n')
            return 1
        else:
            print('INCORRECT >:(\n')
            return 0
Beispiel #5
0
def get_comp(input_name):
    image = Image.open(input_name)
    with PyTessBaseAPI(path=tesseract_path, lang='heb') as api:
        image = preprocess_image(image)
        api.SetImage(image)
        boxes = api.GetComponentImages(RIL.TEXTLINE, True)
        debug_log('Found {} textline image components.'.format(len(boxes)))
        for i, (im, box, _, _) in enumerate(boxes):
            # im is a PIL image object
            # box is a dict with x, y, w and h keys
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = api.GetUTF8Text()
            conf = api.MeanTextConf()
            debug_log(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
                      "confidence: {1}, text: {2}".format(
                          i, conf, ocrResult, **box))
Beispiel #6
0
def get_away_team_name(away_image):
    away = away_image.crop(
        (210 * RESIZE_FACTOR, 0, 270 * RESIZE_FACTOR, 25 * RESIZE_FACTOR))
    away = pre_process.invert_if_neccessary(away)
    # away.show()

    with PyTessBaseAPI() as api:
        api.SetImage(away)
        text = api.GetUTF8Text().replace('\n', '').strip(' ')
        # confidence = api.AllWordConfidences()

        # Good only if the name is exactly 3 uppercase characters
        if re.match("^[A-Za-z1-9]{3}$", text) is None:
            return None  # team name wasn't good

        return text
Beispiel #7
0
 def watch(self):
     if not area_data_path.exists():
         raise NotImplementedError()
     with PyTessBaseAPI(lang="jpn") as api:
         for i in range(10000):
             time.sleep(1)
             igrab = RectImageGrab()
             image = igrab.grab()
             api.SetImage(image)
             txt = api.GetUTF8Text()
             txt = "".join(txt.split())
             self.ocr_result = txt
             self._logger.info(f"OCR:{txt}")
             if self.is_stopping is True:
                 break
     return 1
Beispiel #8
0
def ocr_roi(rois):
    """
    Function to do the OCR
    Inputs: rois: A list of Images
    Returns: A List of strings. One for each incoming Image
    """
    # Initializing PyTessBaseApi again and again adds an unnecessary overhead.
    # So initializing it just once
    # PSM.SINGLE_LINE degrades performace
    extracted_text = []
    with PyTessBaseAPI(path=os.path.join(os.getcwd(), 'tessdata')) as api:
        for roi in rois:
            # Converting the OpenCV image to PIL image for tesseract
            api.SetImage(Image.fromarray(roi))
            extracted_text.append(api.GetUTF8Text())
    return extracted_text
Beispiel #9
0
def rotate_to_upright(image):
    with PyTessBaseAPI(psm=PSM.OSD_ONLY) as api:
        api.SetImage(image)

        os = api.DetectOS()
        if os:
            if os['orientation'] == Orientation.PAGE_RIGHT:
                image = image.rotate(90, expand=True)

            if os['orientation'] == Orientation.PAGE_LEFT:
                image = image.rotate(270, expand=True)

            if os['orientation'] == Orientation.PAGE_DOWN:
                image = image.rotate(180, expand=True)

    return image
Beispiel #10
0
    def check_ocr(self):
        image = self._open_image()
        image_width = self.image_width
        image_height = self.image_height
        texts_id = self.TEXTSID
        with PyTessBaseAPI() as api:
            api.SetImage(image)
            boxes = api.GetComponentImages(RIL.TEXTLINE, True)

            boxes_out = np.empty([0, 4])
            class_out = []
            confi_out = []
            for i, (im, box, _, _) in enumerate(boxes):
                # im is a PIL image object
                # box is a dict with x, y, w and h keys
                api.SetRectangle(box['x'], box['y'], box['w'], box['h'])

                # 这个是输出具体文字的方法,需要时去掉注释
                ocrResult = api.GetUTF8Text()
                conf = api.MeanTextConf()

                ymin = float(box['y'] / image_height)
                xmin = float(box['x'] / image_width)
                ymax = float((box['y'] + box['h']) / image_height)
                xmax = float((box['x'] + box['w']) / image_width)
                bb = np.array([ymin, xmin, ymax, xmax])
                bb = bb.reshape((1, 4))

                # print("x:%s y:%s w:%s h:%s" % (box['x'], box['y'], box['w'], box['h']))
                # print("w:%s h:%s" % (image_width, image_height))
                # print("ymin:%s xmin:%s ymax:%s xmax:%s" % (ymin, xmin, ymax, xmax))
                boxes_out = np.append(boxes_out, bb, axis=0)
                class_out.append(texts_id)
                if conf == 0:
                    conf = 99
                conf = int(conf) / 100
                confi_out.append(conf)
                # print(ocrResult)
                # print(conf)
                # print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
                #       "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box))
            out = []
            out.append(boxes_out)
            out.append(class_out)
            out.append(confi_out)

            return out
Beispiel #11
0
def execute2():
    driver.save_screenshot('driver.png')
    image_file = Image.open("driver.png") # open colour image
    image_file= image_file.convert('L') # convert image to monochrome - this works
    #image_file= image_file.convert('1') # convert image to black and white
    image_file.save('result.png')
    
    images = ['result.png' ]
    with PyTessBaseAPI() as api:
        for img in images:
            api.SetImageFile(img)
            pignore = (api.GetUTF8Text())
            ignore=  (api.AllWordConfidences())
            pignore = str(pignore)
    matchObj1 = re.search( r'Please Enter. (.*)', pignore, re.M|re.I)
    if matchObj1:
        game = matchObj1.group(1)
        driver.find_element_by_id("adcopy_response").send_keys(game)
        frame1 = driver.find_element_by_id("free_play_form_button")
        driver.execute_script("$(arguments[0]).click();", frame1)
        print ("Waiting 60 minutes for next claim")
        time.sleep(3600)
        brutal()
    else:
        matchObj2 = re.search( r'Please Enter, (.*)', pignore, re.M|re.I)
        if matchObj2:
            game2 = matchObj2.group(1)
            driver.find_element_by_id("adcopy_response").send_keys(game2)
            frame2 = driver.find_element_by_id("free_play_form_button")
            driver.execute_script("$(arguments[0]).click();", frame2)
            ("Waiting 60 minutes for next claim")
            time.sleep(3600)
            brutal()
        else:
            matchObj3 = re.search( r'Ple ase Enter, (.*)', pignore, re.M|re.I)
            if matchObj3:
                game3 = matchObj3.group(1)
                driver.find_element_by_id("adcopy_response").send_keys(game3)
                frame3 = driver.find_element_by_id("free_play_form_button")
                driver.execute_script("$(arguments[0]).click();", frame3)
                ("Waiting 60 minutes for next claim")
                time.sleep(3600)
                brutal()
            else:
                driver.execute_script("javascript:ACPuzzle.reload('')")
                time.sleep(10)
                execute2()
def resolve(path):
    captcha_image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    captcha_image = cv2.resize(captcha_image,
                               None,
                               fx=6,
                               fy=6,
                               interpolation=cv2.INTER_LINEAR)
    captcha_image = cv2.medianBlur(captcha_image, 9)
    th, captcha_image = cv2.threshold(captcha_image, 65, 250,
                                      cv2.THRESH_BINARY)
    cv2.imwrite('captcha.png', captcha_image)

    with PyTessBaseAPI() as api:
        api.SetVariable('tessedit_char_whitelist',
                        'abcdefghijklmnopqrstuvwxyz1234567890')
        api.SetImageFile('captcha.png')
        return api.GetUTF8Text().replace(' ', '').rstrip().lower()
Beispiel #13
0
def readText(filename):
    res = {}
    image = Image.open(filename)
    with PyTessBaseAPI() as api:
        api.SetImage(image)
        boxes = api.GetComponentImages(RIL.TEXTLINE, True)
        print(('Found {} textline image components.').format(len(boxes)))
        for i, (im, box, _, _) in enumerate(boxes):
            # im is a PIL image object
            # box is a dict with x, y, w and h keys
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = api.GetUTF8Text()
            conf = api.MeanTextConf()
            if ocrResult != '':
                ##                res[ocrResult] = (box['x'] + box['w']/2, box['y'] + box['h']/2)
                res[ocrResult] = (box['x'], box['y'])
    return res
Beispiel #14
0
    def cyclethrough(self, index, filename, directory):

        directory = askdirectory()  # prompt user to select a folder

        for filename in os.listdir(directory):
            self.Stopper()
            if filename.endswith(".JPG"):

                img_read = cv2.imread(os.path.join(directory,filename))
                area_lower_bound = 200  # originally 300
                print filename  # Doesn't know the full path
                grayscale = cv2.cvtColor(img_read,
                                         cv2.COLOR_BGR2GRAY)  # potential improvement: using multiple color channels and combining results

                block_size = 201
                offset = 24
                binar_adaptive = threshold_adaptive(grayscale, block_size=block_size, offset=offset)

                # next, do noise removal
                noisy = binar_adaptive.astype('uint8') * 255

                im2, contours, hierarchy = cv2.findContours(noisy, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

                large_contours = []

                for cnt in contours:
                    if cv2.contourArea(cnt) > area_lower_bound:
                        large_contours.append(cnt)

                black_bg = np.zeros((img_read.shape[0], img_read.shape[1]), dtype='uint8')
                cv2.drawContours(black_bg, large_contours, -1, color=255, thickness=-1)
                # Image.fromarray(black_bg).show()  # black text on white background
                combined = np.logical_and(255 - black_bg, 255 - noisy)  # why are some tiny pixels left here?

                img_for_tess = Image.fromarray(combined.astype('uint8') * 255)

                with PyTessBaseAPI(psm=1) as api:
                    api.SetVariable("tessedit_char_whitelist",
                                    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0987654321-.:/()")
                    api.SetImage(img_for_tess)
                    self.DisplayImages(api.GetThresholdedImage(), os.path.join(directory, filename))
                    self.DisplayOCRText(api.GetUTF8Text())
                continue

            else:
                continue
Beispiel #15
0
def findBoundingBoxesWord(fname):
    """ Use OCR to find the bounding boxes of each word in a document"""
    # This opens the converted pdf as an image file
    image = Image.open(fname)
    #This converts the original image to RGBA to allow for alpha channel
    #composits (This allows for transparency in PIL)
    img = image.convert("RGBA")
    #This creates a new transparent image to composite with teh original
    tmp = Image.new('RGBA', img.size, (0, 0, 0, 0))
    #This creates the drawing object for the overlay
    draw = ImageDraw.Draw(tmp)

    with PyTessBaseAPI() as api:
        api.SetImage(image)

        # Interate over lines using OCR
        #boxes = api.GetComponentImages(RIL.TEXTLINE, True)

        # Iterate over words using OCR
        boxes = api.GetComponentImages(RIL.WORD, True)
        #print 'Found {} textline image components.'.format(len(boxes))
        for i, (im, box, _, _) in enumerate(boxes):
            # im is a PIL image object
            # box is a dict with x, y, w and h keys
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = api.GetUTF8Text()

            #This calls the google translate function to translate a line of text for inclusion
            #transText = g_translate(ocrResult)
            #print transText

            #This gets the OCR confidence
            conf = api.MeanTextConf()

            #scale the confidence to the opacity
            opacity = opacityConversion(conf)

            #draw = ImageDraw.Draw(image)
            draw.rectangle(((box['x'], box['y']), ((box['x'] + box['w']),
                                                   (box['y'] + box['h']))),
                           fill=(244, 167, 66, opacity))

    # This creates a composit image with the original image and the transparent overlay
    img = Image.alpha_composite(img, tmp)
    # This saves the new image
    img.save(fname)
Beispiel #16
0
def textConfidence(fname):
    with PyTessBaseAPI() as api:
        #for image in images:
        api.SetImageFile(fname)
        text = api.GetUTF8Text()
        #print api.AllWordConfidences()
        print textstat.flesch_kincaid_grade(text)

        print textstat.flesch_reading_ease(text)

        print("90-100 : Very Easy")
        print("80-89 : Easy")
        print("70-79 : Fairly Easy")
        print("60-69 : Standard")
        print("50-59 : Fairly Difficult")
        print("30-49 : Difficult")
        print("0-29 : Very Confusing")
Beispiel #17
0
def process_image(img, page, refine_boxes):
    try:
        page.progress = ('Analysing layout', 0.0)
        with PyTessBaseAPI(lang='ge', psm=3) as api:
            api.SetVariable("hocr_char_boxes", "true")
            api.SetImage(img)
            api.Recognize()
            hocr = api.GetHOCRText(0)
        page.progress = ('Analysing layout', 1.0)
        page_json = process_hocr(hocr, img, page)
        if refine_boxes:
            page_json = refine(img, page_json, page)
        page_json_to_text(page_json, page)
        return page_json
    except:
        page.progress = (f"Error processing page: {e}", -1)
        return {}
Beispiel #18
0
    def perform_ocr(img):

        # First, preprocessor the image:
        img = Image_Utils.preprocess_image(img)

        # Next, convert image to python PIL (because pytesseract using PIL):
        img_pil = Image.fromarray(img)

        if not Ocr_Utils.OCR_API_HANDLE:
            Ocr_Utils.OCR_API_HANDLE = PyTessBaseAPI(psm=PSM.SINGLE_COLUMN)
            #Ocr_Utils.OCR_API_HANDLE = PyTessBaseAPI(psm=PSM.SINGLE_BLOCK)

        Ocr_Utils.OCR_API_HANDLE.SetImage(img_pil)
        text = Ocr_Utils.OCR_API_HANDLE.GetUTF8Text()

        # Return extracted text:
        return text
Beispiel #19
0
def read_word(image, whitelist=None, chars=None, spaces=False):
    """ OCR a single word from an image. Useful for captchas.
        Image should be pre-processed to remove noise etc. """
    api = PyTessBaseAPI()
    api.SetPageSegMode(8)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    guess = api.GetUTF8Text()

    if not spaces:
        guess = ''.join([c for c in guess if c != " "])
        guess = guess.strip()

    if chars is not None and len(guess) != chars:
        return guess, None

    return guess, api.MeanTextConf()
Beispiel #20
0
def run(is_invert_image=False, image_path=None, image_dir=None):
    with PyTessBaseAPI(path=TESSDATA_PATH, lang='chi_sim') as api:
        if image_dir != None:
            for home, _, files in os.walk(image_dir):
                for f in files:
                    if f == '.DS_Store':
                        continue
                    img = os.path.join(home, f)
                    print(img)
                    pretreatment_imgs = pretreatment(is_invert_image, img)
                    ocr(api, img)
                    for pretreatment_img in pretreatment_imgs:
                        ocr(api, pretreatment_img)
        if image_path != None:
            pretreatment_imgs = pretreatment(is_invert_image, image_path)
            ocr(api, image_path)
            for pretreatment_img in pretreatment_imgs:
                ocr(api, pretreatment_img)
Beispiel #21
0
def get_word_data(img):
    image = Image.open(img, mode='r')
    pdf = pdfpage.PDFPage('folder location', 1)
    with PyTessBaseAPI() as api:
        api.SetImage(image)
        boxes = api.GetComponentImages(
            RIL.WORD,
            True)  # option for TEXTLINE or SYMBOL (character) as well
        for i, (im, box, _, _) in enumerate(boxes):
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = api.GetUTF8Text()
            conf = api.MeanTextConf()
            doc_word = word.Word(i, box['x'], box['y'], box['w'], box['h'],
                                 conf, ocrResult)
            # print ((u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, ""confidence: {1}, text: {2}").format(i, conf, ocrResult, **box))
            pdf.add_word(doc_word)
    pdf.sort_dictionaries()
    return pdf
Beispiel #22
0
    def _get_word_block_list_from_image(
            self,
            image: Image,
            find_type: int,
            spec_box: dict = None) -> typing.List[WordBlock]:
        word_list = list()
        with PyTessBaseAPI(lang=self.lang) as api:
            api.SetImage(image)
            if spec_box:
                ocr_result = api.GetUTF8Text()
                return [WordBlock(box=spec_box, content=ocr_result)]

            boxes = api.GetComponentImages(find_type, True)
            for _, box, *_ in boxes:
                api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                ocr_result = api.GetUTF8Text()
                word_list.append(WordBlock(box=box, content=ocr_result))
        return word_list
def tesseract(path, filename, conf_dir, text_dir):
    # OCR - use the Tesseract API through Cython and PyTesseract
    with PyTessBaseAPI() as api:
        pathFilename = path + "/" + filename

        label_text = ""
        ri = None
        try:
            # Set the image
            api.SetImageFile(pathFilename)
            # Run and verify the recognition process
            label_text = api.GetUTF8Text()
            label_text = label_text[:-1]
            api.SetVariable("save_blob_choices", "T")
            api.Recognize()
            ri = api.GetIterator()
        except:
            return

        conf_text = ""
        # Iterate over each of the symbols of the file
        level = RIL.SYMBOL
        for r in iterate_level(ri, level):
            try:
                symbol = r.GetUTF8Text(level)
                conf = 0.01 * r.Confidence(level)

                # We only save non-break symbols
                if (symbol not in ['\n', '\r', '\t', '\f']):
                    conf_text += symbol + "\t" + str(conf) + "\n"
            except:
                continue

        if len(conf_text) > 0:
            basename = filename[:-4]
            # Write all the characters and their Confidence in the probabilities file
            conf_pathFilename = conf_dir + "/" + basename + ".prob"
            with open(conf_pathFilename, "w") as f:
                f.write(conf_text.encode('utf-8'))

            # Write the recognized text line in the text file
            text_pathFilename = text_dir + "/" + basename + ".txt"
            with open(text_pathFilename, "w") as f:
                f.write(label_text.encode('utf-8'))
Beispiel #24
0
def ORIG_ocr_content_confidences_images():

    fpath='/home/ub2/ARCHIVE/DOC_IMAGES/038c69d2-0e3e-43e4-8ea9-274b93d2144b.jpg'
    fpath_out='/home/ub2/ARCHIVE/DOC_IMAGES/038c69d2-0e3e-43e4-8ea9-274b93d2144b_opencv.jpg'
    fpath='/home/ub2/ARCHIVE/MDSCAN/completed/__ppm__-31.ppm'
    fout='/home/ub2/ARCHIVE/MDSCAN/completed/__ppm__-31_opencv.pdf'

    fpath='/home/ub2/ARCHIVE/DOC_IMAGES/1482885785_8855e56.jpg'

    images = [fpath]
    r={}
    with PyTessBaseAPI() as api:
        for img in images:
            api.SetImageFile(img)
            r['lines']=api.GetTextlines()
            #r['images']=api.GetComponentImages()
            #r['thresh']=api.GetThresholdedImage()
            r['_words']=api.GetWords()
            r['strips']=api.GetStrips()
            r['text']=api.GetUTF8Text()
            r['words']=api.AllWords()
            r['word_conf']=api.AllWordConfidences()
            word_conf_map=api.MapWordConfidences()
            #r['word_conf_map']=api.MapWordConfidences()
            #print api.GetUTF8Text()
            #print api.AllWordConfidences()
    # api is automatically finalized when used in a with-statement (context manager).
    # otherwise api.End() should be explicitly called when it's no longer needed.

    def img_display(pil_image):
        b = BytesIO()
        pil_image.save(b, format='png')
        data = b.getvalue()
        r = '<img src="data:image/png;base64,' + base64.encodestring(data) + '"/>'
        return BS(r).renderContents().replace('&lt;','<').replace('&gt;','>').replace('\\n','\n')
        #return r
        #ip_img = display.Image(data=data, format='png', embed=True)
        #return ip_img._repr_png_()

    df=pd.DataFrame(map(lambda s: dict(zip(['word','conf'],s)),word_conf_map))
    df['images']=map(lambda i: r['_words'][i][0],range(len(r['_words'])))
    df['image2']=df.images.map(lambda img: img_display(img))
    h = df.sort_values(['conf','word'],ascending=[True,True]).head().ix[:,['conf','word','image2']].to_html(escape=True)
    HTML(BS(h).renderContents().replace('&lt;','<').replace('&gt;','>').replace('\\n','\n'))
Beispiel #25
0
def detector(image):
    # load the example image and convert it to grayscale
    inputImage = cv2.imread(image)
    gray = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
    # Threshold the image
    gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    # Write the grayscale to temp file
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)

    with PyTessBaseAPI() as api:
        # Send the new grayscale image into Tesseract
        api.SetImageFile(filename)

        # We will segment the grayscale by words
        boxes = api.GetComponentImages(RIL.WORD, True)

        # With each bouding box for the words
        for i, (im, box, _, _) in enumerate(boxes):
            # Grab the coordinates of the bounding box
            api.SetRectangle(box['x'], box['y'], box['w'], box['h'])

            # Turn the returned bounding box coordinates into an array of coordinates
            coord = list(box.values())

            # Load grayscale for cropping
            cropper = Image.open(filename)

            # Cropped image is saved into new variable
            crop_image = cropper.crop(
                (coord[0], coord[1], coord[0] + coord[2], coord[1] + coord[3]))

            # Convert the new image into a numpy array
            cropped = numpy.array(crop_image)

            # Create the new file name for the word
            word_file = "word_" + str(i) + ".png"

            # Have OpenCV save the cropped image into the new file
            cv2.imwrite(word_file, cropped)

    # Remove the grayscale image
    os.remove(filename)
def orcTitle(path):
     # make first page into jpeg
    page = convert_from_path(path, first_page=0, last_page = 1)[0]
    page.save(temp_file, 'JPEG') 

    # use ocr to extract title
    # image = Image.open(temp_file)
    with PyTessBaseAPI() as api:
        api.SetImageFile(temp_file)
        api.Recognize()  # required to get result from the next line
        ri = api.GetIterator()

        # loop through and find largest text size
        level = RIL.TEXTLINE
        maxSize = 0
        for r in iterate_level(ri, level):
            # extract line of text
            text = r.GetUTF8Text(level)

            # get line's font size
            fontSize = r.WordFontAttributes()['pointsize']

            # check to see if current max
            # remove extra spaces/newlines/tabs (etc.) when testing min length req
            if len(''.join(text.split())) > 1 and fontSize > maxSize:
                maxSize = fontSize


        # loop through again and concatenate largest words
        ri = api.GetIterator()
        level = RIL.TEXTLINE
        title_list = []
        for r in iterate_level(ri, level):
            text = r.GetUTF8Text(level)
            fontSize = ri.WordFontAttributes()['pointsize']
            if len(''.join(text.split())) > 1 and fontSize > maxSize - 15:
                # add title words to list
                title_list.extend(r.GetUTF8Text(level).split())

        # concatenate them back together
        title = ' '.join(title_list)

        os.remove(temp_file)
        return title
def find_word_attribute(image, tessdata_3_path):

    #Reading image
    raw_img = Image.open(image)

    #Scaling image
    img = scale_image(raw_img)

    #Initializing parameters
    word_arr = []
    bold_arr = []

    #Using TessBaseAPI to read the fond attribute
    with PyTessBaseAPI(path=tessdata_3_path) as api:
        api.SetImage(img)
        api.Recognize(0)

        #print(api.GetUTF8Text())
        ri = api.GetIterator()
        level = RIL.WORD
        for r in iterate_level(ri, level):
            bb = r.BoundingBox(level)
            if bb != None:
                word = r.GetUTF8Text(level)
                #word_arr.append(word)

                font_name = r.WordFontAttributes()
                #attr.append(font_name)

                if word != None and font_name != None:
                    word_arr.append(word)
                    bold_arr.append(font_name)

                Lang_name = r.WordRecognitionLanguage()
                bool_value = r.WordIsFromDictionary()
                conf = r.Confidence(level)

        df1 = pd.DataFrame(word_arr)
        df2 = pd.DataFrame(bold_arr)
        df = pd.concat([df1, df2], axis=1)
        df.rename(columns={df.columns[0]: "Word"}, inplace=True)

    return (df)
Beispiel #28
0
def show_frame(cap,imgLabel,imgText):
    ret, frame = cap.read()
    img = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
    im = Image.fromarray(img)
    with PyTessBaseAPI() as api:
        api.SetImage(im)
        boxes = api.GetComponentImages(RIL.TEXTLINE, True)
        ocrResult = api.GetUTF8Text()
        #print('Found {} textline image components.'.format(len(boxes)))
        for i, (im, box, _, _) in enumerate(boxes):
        # im is a PIL image object # box is a dict with x, y, w and h keys
        #api.SetRectangle(box['x'], box['y'], box['w'], box['h']) #conf = api.MeanTextConf()
            cv.rectangle(img,(box['x'],box['y']),(box['x']+box['w'],box['y']+box['h']),(255,0,0),1)
    im = Image.fromarray(img)
    imgtk = ImageTk.PhotoImage(image=im)
    imgLabel.imgtk = imgtk
    imgLabel.configure(image=imgtk)
    imgText.configure(text=ocrResult)
    imgLabel.after(10, lambda: show_frame(cap,imgLabel,imgText))
Beispiel #29
0
def Spaces(image=None):
    PrintGood(
        'This returns the number of spaces in a specific image or images')
    if isinstance(image, list) == False:
        image = PromptList('Which image/images to Scan: ', image)
    for image in image:
        image = Image.open(image)
        with PyTessBaseAPI() as api:
            api.SetImage(image)
            boxes = api.GetComponentImages(RIL.TEXTLINE, True)
            Spaces = 0
            for i, (im, box, _, _) in enumerate(boxes):
                im.save('saving{}.jpg'.format(i))
                api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
                ocrResult = api.GetUTF8Text()
                conf = api.MeanTextConf()
                text = str(ocrResult).replace('\n', '').split(' ')
                Spaces = len(text) + Spaces
    return int(Spaces)
Beispiel #30
0
def symbolConfidenc(img):
    word = ''
    count = 0
    insertSpace = 'false'
    with PyTessBaseAPI() as api:
        api.SetImageFile(img)
        api.Recognize()

        ri = api.GetIterator()
        #levelTwo = RIL.TEXTLINE
        level = RIL.WORD
        for r in iterate_level(ri, level):
            #space = r.GetUTF8Text(levelTwo)#gets whole line includes everything unlike RIL.SYMBOL
            symbol = r.GetUTF8Text(level)  # r == ri
            conf = r.Confidence(level)

            if conf > 50:
                word = word + ' ' + symbol
    return word