def main(): parser = argparse.ArgumentParser(usage='%(prog)s [options] imageurl', description='Cappy, the little captcha solver.') parser.version = "0.1" parser.add_argument("imageurl", metavar="imageurl", type=str, help="the url to POST the captcha to") parser.add_argument("-o", "--output", type=str, default="/tmp/captcha.png", help="where to save the temporary captcha file (default: /tmp/captcha.png)") parser.add_argument("-oe", "--outputedited", type=str, default="/tmp/captcha-edited.png", help="where to save the temporary captcha file (default: /tmp/captcha-edited.png)") parser.add_argument("-c", "--count", type=int, default=200, help="total number of captchas to try (default: 200)") parser.add_argument("-v", action="version") args = parser.parse_args() imageurl = args.imageurl output = args.output outputedited = args.outputedited totalcount = args.count with PyTessBaseAPI(psm=PSM.SINGLE_WORD, oem=OEM.TESSERACT_ONLY) as api: api.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyz") res = requests.get(imageurl, stream=True).raw with open(output, "wb") as out_file: shutil.copyfileobj(res, out_file) del res image = None count = 0 starttime = time.time() while count < totalcount: image = cv2.imread(output, cv2.IMREAD_GRAYSCALE) image = cv2.resize(image, None, fx=10, fy=10, interpolation=cv2.INTER_LINEAR) image = cv2.GaussianBlur(image, (5,5), 0) # image = cv2.bilateralFilter(image, 9, 75, 75) # image = cv2.blur(image, (5,5)) # image = cv2.medianBlur(image, 9) ret, image = cv2.threshold(image, 185, 255, cv2.THRESH_BINARY) cv2.imwrite(outputedited, image) api.SetImageFile(outputedited) captcha = api.GetUTF8Text().replace(" ", "").rstrip().lower() count += 1 elapsedtime = time.time() - starttime print("Finished") print("Time: " + str(round(elapsedtime)) + " seconds") print("Total solves per 30 seconds: " + str(round((count/elapsedtime) * 30)))
def add_ocrinfo(tree, imgfile): imgpil = Image.open(imgfile) (orig_width, orig_height) = (imgpil.width, imgpil.height) #root_width = tree[min(tree)]['width'] ratio = 1.0 * orig_width / config.width #imgpil = imgpil.convert("RGB").resize( # (orig_width * OCR_RATIO, orig_height * OCR_RATIO)) tesapi = PyTessBaseAPI(lang='eng') tesapi.SetImage(imgpil) tesapi.SetSourceResolution(config.ocr_resolution) for nodeid in tree: node = tree[nodeid] if node['children'] and node['text'] == '': node['ocr'] = '' continue x = max(node['x'] * ratio - 1, 0) y = max(node['y'] * ratio - 1, 0) x2 = min((node['x'] + node['width']) * ratio + 1, orig_width) y2 = min((node['y'] + node['height']) * ratio + 1, orig_height) width = int(x2 - x) height = int(y2 - y) if width > 3 and height > 3: #tesapi.SetRectangle(int(x * OCR_RATIO), int(y * OCR_RATIO), # int(width * OCR_RATIO), int(height * OCR_RATIO)) #print(int(x), int(y), int(width), int(height), orig_width, orig_height) tesapi.SetRectangle(int(x), int(y), int(width), int(height)) ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ') if ocr_text.strip() == '': x = min(x + width * 0.05, orig_width) y = min(y + height * 0.05, orig_height) width *= 0.9 height *= 0.9 tesapi.SetRectangle(int(x), int(y), int(width), int(height)) ocr_text = tesapi.GetUTF8Text().strip().replace('\n', ' ') else: ocr_text = '' node['ocr'] = ocr_text
def get_blob_ocr_result(image: np.ndarray, rect: Rect, ppi: int = 0): text = '' confidence = 0.0 with PyTessBaseAPI(psm=PSM.SINGLE_LINE) as api: # only read numbers (doesn't seem to work, known issue in v4.0) #api.SetVariable('tessedit_char_whitelist', '0123456789') api.SetImageBytes(*image) api.SetRectangle(*astuple(rect)) if ppi != 0: api.SetSourceResolution(ppi) #api.Recognize() text = api.GetUTF8Text() confidence = api.AllWordConfidences() if not len(confidence): confidence = (0, ) return [text, confidence]
def tess(filename): # frk = german fraktur # psm=7 treats image as line of text, psm=10 for single char with PyTessBaseAPI(lang='frk', psm=10) as api: api.SetImageFile(filename) actualLabel = filename[filename.rfind('_') + 1:-4] predictLabel = api.GetUTF8Text().rstrip() print(filename) print('Actual:', actualLabel, 'Prediction:', predictLabel, \ 'Confidence:', str(api.AllWordConfidences())) # 0 worst, 100 best if actualLabel == predictLabel: print('CORRECT ~~~\n') return 1 else: print('INCORRECT >:(\n') return 0
def get_comp(input_name): image = Image.open(input_name) with PyTessBaseAPI(path=tesseract_path, lang='heb') as api: image = preprocess_image(image) api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) debug_log('Found {} textline image components.'.format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() debug_log(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " "confidence: {1}, text: {2}".format( i, conf, ocrResult, **box))
def get_away_team_name(away_image): away = away_image.crop( (210 * RESIZE_FACTOR, 0, 270 * RESIZE_FACTOR, 25 * RESIZE_FACTOR)) away = pre_process.invert_if_neccessary(away) # away.show() with PyTessBaseAPI() as api: api.SetImage(away) text = api.GetUTF8Text().replace('\n', '').strip(' ') # confidence = api.AllWordConfidences() # Good only if the name is exactly 3 uppercase characters if re.match("^[A-Za-z1-9]{3}$", text) is None: return None # team name wasn't good return text
def watch(self): if not area_data_path.exists(): raise NotImplementedError() with PyTessBaseAPI(lang="jpn") as api: for i in range(10000): time.sleep(1) igrab = RectImageGrab() image = igrab.grab() api.SetImage(image) txt = api.GetUTF8Text() txt = "".join(txt.split()) self.ocr_result = txt self._logger.info(f"OCR:{txt}") if self.is_stopping is True: break return 1
def ocr_roi(rois): """ Function to do the OCR Inputs: rois: A list of Images Returns: A List of strings. One for each incoming Image """ # Initializing PyTessBaseApi again and again adds an unnecessary overhead. # So initializing it just once # PSM.SINGLE_LINE degrades performace extracted_text = [] with PyTessBaseAPI(path=os.path.join(os.getcwd(), 'tessdata')) as api: for roi in rois: # Converting the OpenCV image to PIL image for tesseract api.SetImage(Image.fromarray(roi)) extracted_text.append(api.GetUTF8Text()) return extracted_text
def rotate_to_upright(image): with PyTessBaseAPI(psm=PSM.OSD_ONLY) as api: api.SetImage(image) os = api.DetectOS() if os: if os['orientation'] == Orientation.PAGE_RIGHT: image = image.rotate(90, expand=True) if os['orientation'] == Orientation.PAGE_LEFT: image = image.rotate(270, expand=True) if os['orientation'] == Orientation.PAGE_DOWN: image = image.rotate(180, expand=True) return image
def check_ocr(self): image = self._open_image() image_width = self.image_width image_height = self.image_height texts_id = self.TEXTSID with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) boxes_out = np.empty([0, 4]) class_out = [] confi_out = [] for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) # 这个是输出具体文字的方法,需要时去掉注释 ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() ymin = float(box['y'] / image_height) xmin = float(box['x'] / image_width) ymax = float((box['y'] + box['h']) / image_height) xmax = float((box['x'] + box['w']) / image_width) bb = np.array([ymin, xmin, ymax, xmax]) bb = bb.reshape((1, 4)) # print("x:%s y:%s w:%s h:%s" % (box['x'], box['y'], box['w'], box['h'])) # print("w:%s h:%s" % (image_width, image_height)) # print("ymin:%s xmin:%s ymax:%s xmax:%s" % (ymin, xmin, ymax, xmax)) boxes_out = np.append(boxes_out, bb, axis=0) class_out.append(texts_id) if conf == 0: conf = 99 conf = int(conf) / 100 confi_out.append(conf) # print(ocrResult) # print(conf) # print(u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, " # "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box)) out = [] out.append(boxes_out) out.append(class_out) out.append(confi_out) return out
def execute2(): driver.save_screenshot('driver.png') image_file = Image.open("driver.png") # open colour image image_file= image_file.convert('L') # convert image to monochrome - this works #image_file= image_file.convert('1') # convert image to black and white image_file.save('result.png') images = ['result.png' ] with PyTessBaseAPI() as api: for img in images: api.SetImageFile(img) pignore = (api.GetUTF8Text()) ignore= (api.AllWordConfidences()) pignore = str(pignore) matchObj1 = re.search( r'Please Enter. (.*)', pignore, re.M|re.I) if matchObj1: game = matchObj1.group(1) driver.find_element_by_id("adcopy_response").send_keys(game) frame1 = driver.find_element_by_id("free_play_form_button") driver.execute_script("$(arguments[0]).click();", frame1) print ("Waiting 60 minutes for next claim") time.sleep(3600) brutal() else: matchObj2 = re.search( r'Please Enter, (.*)', pignore, re.M|re.I) if matchObj2: game2 = matchObj2.group(1) driver.find_element_by_id("adcopy_response").send_keys(game2) frame2 = driver.find_element_by_id("free_play_form_button") driver.execute_script("$(arguments[0]).click();", frame2) ("Waiting 60 minutes for next claim") time.sleep(3600) brutal() else: matchObj3 = re.search( r'Ple ase Enter, (.*)', pignore, re.M|re.I) if matchObj3: game3 = matchObj3.group(1) driver.find_element_by_id("adcopy_response").send_keys(game3) frame3 = driver.find_element_by_id("free_play_form_button") driver.execute_script("$(arguments[0]).click();", frame3) ("Waiting 60 minutes for next claim") time.sleep(3600) brutal() else: driver.execute_script("javascript:ACPuzzle.reload('')") time.sleep(10) execute2()
def resolve(path): captcha_image = cv2.imread(path, cv2.IMREAD_GRAYSCALE) captcha_image = cv2.resize(captcha_image, None, fx=6, fy=6, interpolation=cv2.INTER_LINEAR) captcha_image = cv2.medianBlur(captcha_image, 9) th, captcha_image = cv2.threshold(captcha_image, 65, 250, cv2.THRESH_BINARY) cv2.imwrite('captcha.png', captcha_image) with PyTessBaseAPI() as api: api.SetVariable('tessedit_char_whitelist', 'abcdefghijklmnopqrstuvwxyz1234567890') api.SetImageFile('captcha.png') return api.GetUTF8Text().replace(' ', '').rstrip().lower()
def readText(filename): res = {} image = Image.open(filename) with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) print(('Found {} textline image components.').format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() if ocrResult != '': ## res[ocrResult] = (box['x'] + box['w']/2, box['y'] + box['h']/2) res[ocrResult] = (box['x'], box['y']) return res
def cyclethrough(self, index, filename, directory): directory = askdirectory() # prompt user to select a folder for filename in os.listdir(directory): self.Stopper() if filename.endswith(".JPG"): img_read = cv2.imread(os.path.join(directory,filename)) area_lower_bound = 200 # originally 300 print filename # Doesn't know the full path grayscale = cv2.cvtColor(img_read, cv2.COLOR_BGR2GRAY) # potential improvement: using multiple color channels and combining results block_size = 201 offset = 24 binar_adaptive = threshold_adaptive(grayscale, block_size=block_size, offset=offset) # next, do noise removal noisy = binar_adaptive.astype('uint8') * 255 im2, contours, hierarchy = cv2.findContours(noisy, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) large_contours = [] for cnt in contours: if cv2.contourArea(cnt) > area_lower_bound: large_contours.append(cnt) black_bg = np.zeros((img_read.shape[0], img_read.shape[1]), dtype='uint8') cv2.drawContours(black_bg, large_contours, -1, color=255, thickness=-1) # Image.fromarray(black_bg).show() # black text on white background combined = np.logical_and(255 - black_bg, 255 - noisy) # why are some tiny pixels left here? img_for_tess = Image.fromarray(combined.astype('uint8') * 255) with PyTessBaseAPI(psm=1) as api: api.SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0987654321-.:/()") api.SetImage(img_for_tess) self.DisplayImages(api.GetThresholdedImage(), os.path.join(directory, filename)) self.DisplayOCRText(api.GetUTF8Text()) continue else: continue
def findBoundingBoxesWord(fname): """ Use OCR to find the bounding boxes of each word in a document""" # This opens the converted pdf as an image file image = Image.open(fname) #This converts the original image to RGBA to allow for alpha channel #composits (This allows for transparency in PIL) img = image.convert("RGBA") #This creates a new transparent image to composite with teh original tmp = Image.new('RGBA', img.size, (0, 0, 0, 0)) #This creates the drawing object for the overlay draw = ImageDraw.Draw(tmp) with PyTessBaseAPI() as api: api.SetImage(image) # Interate over lines using OCR #boxes = api.GetComponentImages(RIL.TEXTLINE, True) # Iterate over words using OCR boxes = api.GetComponentImages(RIL.WORD, True) #print 'Found {} textline image components.'.format(len(boxes)) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() #This calls the google translate function to translate a line of text for inclusion #transText = g_translate(ocrResult) #print transText #This gets the OCR confidence conf = api.MeanTextConf() #scale the confidence to the opacity opacity = opacityConversion(conf) #draw = ImageDraw.Draw(image) draw.rectangle(((box['x'], box['y']), ((box['x'] + box['w']), (box['y'] + box['h']))), fill=(244, 167, 66, opacity)) # This creates a composit image with the original image and the transparent overlay img = Image.alpha_composite(img, tmp) # This saves the new image img.save(fname)
def textConfidence(fname): with PyTessBaseAPI() as api: #for image in images: api.SetImageFile(fname) text = api.GetUTF8Text() #print api.AllWordConfidences() print textstat.flesch_kincaid_grade(text) print textstat.flesch_reading_ease(text) print("90-100 : Very Easy") print("80-89 : Easy") print("70-79 : Fairly Easy") print("60-69 : Standard") print("50-59 : Fairly Difficult") print("30-49 : Difficult") print("0-29 : Very Confusing")
def process_image(img, page, refine_boxes): try: page.progress = ('Analysing layout', 0.0) with PyTessBaseAPI(lang='ge', psm=3) as api: api.SetVariable("hocr_char_boxes", "true") api.SetImage(img) api.Recognize() hocr = api.GetHOCRText(0) page.progress = ('Analysing layout', 1.0) page_json = process_hocr(hocr, img, page) if refine_boxes: page_json = refine(img, page_json, page) page_json_to_text(page_json, page) return page_json except: page.progress = (f"Error processing page: {e}", -1) return {}
def perform_ocr(img): # First, preprocessor the image: img = Image_Utils.preprocess_image(img) # Next, convert image to python PIL (because pytesseract using PIL): img_pil = Image.fromarray(img) if not Ocr_Utils.OCR_API_HANDLE: Ocr_Utils.OCR_API_HANDLE = PyTessBaseAPI(psm=PSM.SINGLE_COLUMN) #Ocr_Utils.OCR_API_HANDLE = PyTessBaseAPI(psm=PSM.SINGLE_BLOCK) Ocr_Utils.OCR_API_HANDLE.SetImage(img_pil) text = Ocr_Utils.OCR_API_HANDLE.GetUTF8Text() # Return extracted text: return text
def read_word(image, whitelist=None, chars=None, spaces=False): """ OCR a single word from an image. Useful for captchas. Image should be pre-processed to remove noise etc. """ api = PyTessBaseAPI() api.SetPageSegMode(8) if whitelist is not None: api.SetVariable("tessedit_char_whitelist", whitelist) api.SetImage(image) api.Recognize() guess = api.GetUTF8Text() if not spaces: guess = ''.join([c for c in guess if c != " "]) guess = guess.strip() if chars is not None and len(guess) != chars: return guess, None return guess, api.MeanTextConf()
def run(is_invert_image=False, image_path=None, image_dir=None): with PyTessBaseAPI(path=TESSDATA_PATH, lang='chi_sim') as api: if image_dir != None: for home, _, files in os.walk(image_dir): for f in files: if f == '.DS_Store': continue img = os.path.join(home, f) print(img) pretreatment_imgs = pretreatment(is_invert_image, img) ocr(api, img) for pretreatment_img in pretreatment_imgs: ocr(api, pretreatment_img) if image_path != None: pretreatment_imgs = pretreatment(is_invert_image, image_path) ocr(api, image_path) for pretreatment_img in pretreatment_imgs: ocr(api, pretreatment_img)
def get_word_data(img): image = Image.open(img, mode='r') pdf = pdfpage.PDFPage('folder location', 1) with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages( RIL.WORD, True) # option for TEXTLINE or SYMBOL (character) as well for i, (im, box, _, _) in enumerate(boxes): api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() doc_word = word.Word(i, box['x'], box['y'], box['w'], box['h'], conf, ocrResult) # print ((u"Box[{0}]: x={x}, y={y}, w={w}, h={h}, ""confidence: {1}, text: {2}").format(i, conf, ocrResult, **box)) pdf.add_word(doc_word) pdf.sort_dictionaries() return pdf
def _get_word_block_list_from_image( self, image: Image, find_type: int, spec_box: dict = None) -> typing.List[WordBlock]: word_list = list() with PyTessBaseAPI(lang=self.lang) as api: api.SetImage(image) if spec_box: ocr_result = api.GetUTF8Text() return [WordBlock(box=spec_box, content=ocr_result)] boxes = api.GetComponentImages(find_type, True) for _, box, *_ in boxes: api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocr_result = api.GetUTF8Text() word_list.append(WordBlock(box=box, content=ocr_result)) return word_list
def tesseract(path, filename, conf_dir, text_dir): # OCR - use the Tesseract API through Cython and PyTesseract with PyTessBaseAPI() as api: pathFilename = path + "/" + filename label_text = "" ri = None try: # Set the image api.SetImageFile(pathFilename) # Run and verify the recognition process label_text = api.GetUTF8Text() label_text = label_text[:-1] api.SetVariable("save_blob_choices", "T") api.Recognize() ri = api.GetIterator() except: return conf_text = "" # Iterate over each of the symbols of the file level = RIL.SYMBOL for r in iterate_level(ri, level): try: symbol = r.GetUTF8Text(level) conf = 0.01 * r.Confidence(level) # We only save non-break symbols if (symbol not in ['\n', '\r', '\t', '\f']): conf_text += symbol + "\t" + str(conf) + "\n" except: continue if len(conf_text) > 0: basename = filename[:-4] # Write all the characters and their Confidence in the probabilities file conf_pathFilename = conf_dir + "/" + basename + ".prob" with open(conf_pathFilename, "w") as f: f.write(conf_text.encode('utf-8')) # Write the recognized text line in the text file text_pathFilename = text_dir + "/" + basename + ".txt" with open(text_pathFilename, "w") as f: f.write(label_text.encode('utf-8'))
def ORIG_ocr_content_confidences_images(): fpath='/home/ub2/ARCHIVE/DOC_IMAGES/038c69d2-0e3e-43e4-8ea9-274b93d2144b.jpg' fpath_out='/home/ub2/ARCHIVE/DOC_IMAGES/038c69d2-0e3e-43e4-8ea9-274b93d2144b_opencv.jpg' fpath='/home/ub2/ARCHIVE/MDSCAN/completed/__ppm__-31.ppm' fout='/home/ub2/ARCHIVE/MDSCAN/completed/__ppm__-31_opencv.pdf' fpath='/home/ub2/ARCHIVE/DOC_IMAGES/1482885785_8855e56.jpg' images = [fpath] r={} with PyTessBaseAPI() as api: for img in images: api.SetImageFile(img) r['lines']=api.GetTextlines() #r['images']=api.GetComponentImages() #r['thresh']=api.GetThresholdedImage() r['_words']=api.GetWords() r['strips']=api.GetStrips() r['text']=api.GetUTF8Text() r['words']=api.AllWords() r['word_conf']=api.AllWordConfidences() word_conf_map=api.MapWordConfidences() #r['word_conf_map']=api.MapWordConfidences() #print api.GetUTF8Text() #print api.AllWordConfidences() # api is automatically finalized when used in a with-statement (context manager). # otherwise api.End() should be explicitly called when it's no longer needed. def img_display(pil_image): b = BytesIO() pil_image.save(b, format='png') data = b.getvalue() r = '<img src="data:image/png;base64,' + base64.encodestring(data) + '"/>' return BS(r).renderContents().replace('<','<').replace('>','>').replace('\\n','\n') #return r #ip_img = display.Image(data=data, format='png', embed=True) #return ip_img._repr_png_() df=pd.DataFrame(map(lambda s: dict(zip(['word','conf'],s)),word_conf_map)) df['images']=map(lambda i: r['_words'][i][0],range(len(r['_words']))) df['image2']=df.images.map(lambda img: img_display(img)) h = df.sort_values(['conf','word'],ascending=[True,True]).head().ix[:,['conf','word','image2']].to_html(escape=True) HTML(BS(h).renderContents().replace('<','<').replace('>','>').replace('\\n','\n'))
def detector(image): # load the example image and convert it to grayscale inputImage = cv2.imread(image) gray = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY) # Threshold the image gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Write the grayscale to temp file filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, gray) with PyTessBaseAPI() as api: # Send the new grayscale image into Tesseract api.SetImageFile(filename) # We will segment the grayscale by words boxes = api.GetComponentImages(RIL.WORD, True) # With each bouding box for the words for i, (im, box, _, _) in enumerate(boxes): # Grab the coordinates of the bounding box api.SetRectangle(box['x'], box['y'], box['w'], box['h']) # Turn the returned bounding box coordinates into an array of coordinates coord = list(box.values()) # Load grayscale for cropping cropper = Image.open(filename) # Cropped image is saved into new variable crop_image = cropper.crop( (coord[0], coord[1], coord[0] + coord[2], coord[1] + coord[3])) # Convert the new image into a numpy array cropped = numpy.array(crop_image) # Create the new file name for the word word_file = "word_" + str(i) + ".png" # Have OpenCV save the cropped image into the new file cv2.imwrite(word_file, cropped) # Remove the grayscale image os.remove(filename)
def orcTitle(path): # make first page into jpeg page = convert_from_path(path, first_page=0, last_page = 1)[0] page.save(temp_file, 'JPEG') # use ocr to extract title # image = Image.open(temp_file) with PyTessBaseAPI() as api: api.SetImageFile(temp_file) api.Recognize() # required to get result from the next line ri = api.GetIterator() # loop through and find largest text size level = RIL.TEXTLINE maxSize = 0 for r in iterate_level(ri, level): # extract line of text text = r.GetUTF8Text(level) # get line's font size fontSize = r.WordFontAttributes()['pointsize'] # check to see if current max # remove extra spaces/newlines/tabs (etc.) when testing min length req if len(''.join(text.split())) > 1 and fontSize > maxSize: maxSize = fontSize # loop through again and concatenate largest words ri = api.GetIterator() level = RIL.TEXTLINE title_list = [] for r in iterate_level(ri, level): text = r.GetUTF8Text(level) fontSize = ri.WordFontAttributes()['pointsize'] if len(''.join(text.split())) > 1 and fontSize > maxSize - 15: # add title words to list title_list.extend(r.GetUTF8Text(level).split()) # concatenate them back together title = ' '.join(title_list) os.remove(temp_file) return title
def find_word_attribute(image, tessdata_3_path): #Reading image raw_img = Image.open(image) #Scaling image img = scale_image(raw_img) #Initializing parameters word_arr = [] bold_arr = [] #Using TessBaseAPI to read the fond attribute with PyTessBaseAPI(path=tessdata_3_path) as api: api.SetImage(img) api.Recognize(0) #print(api.GetUTF8Text()) ri = api.GetIterator() level = RIL.WORD for r in iterate_level(ri, level): bb = r.BoundingBox(level) if bb != None: word = r.GetUTF8Text(level) #word_arr.append(word) font_name = r.WordFontAttributes() #attr.append(font_name) if word != None and font_name != None: word_arr.append(word) bold_arr.append(font_name) Lang_name = r.WordRecognitionLanguage() bool_value = r.WordIsFromDictionary() conf = r.Confidence(level) df1 = pd.DataFrame(word_arr) df2 = pd.DataFrame(bold_arr) df = pd.concat([df1, df2], axis=1) df.rename(columns={df.columns[0]: "Word"}, inplace=True) return (df)
def show_frame(cap,imgLabel,imgText): ret, frame = cap.read() img = cv.cvtColor(frame, cv.COLOR_RGB2BGR) im = Image.fromarray(img) with PyTessBaseAPI() as api: api.SetImage(im) boxes = api.GetComponentImages(RIL.TEXTLINE, True) ocrResult = api.GetUTF8Text() #print('Found {} textline image components.'.format(len(boxes))) for i, (im, box, _, _) in enumerate(boxes): # im is a PIL image object # box is a dict with x, y, w and h keys #api.SetRectangle(box['x'], box['y'], box['w'], box['h']) #conf = api.MeanTextConf() cv.rectangle(img,(box['x'],box['y']),(box['x']+box['w'],box['y']+box['h']),(255,0,0),1) im = Image.fromarray(img) imgtk = ImageTk.PhotoImage(image=im) imgLabel.imgtk = imgtk imgLabel.configure(image=imgtk) imgText.configure(text=ocrResult) imgLabel.after(10, lambda: show_frame(cap,imgLabel,imgText))
def Spaces(image=None): PrintGood( 'This returns the number of spaces in a specific image or images') if isinstance(image, list) == False: image = PromptList('Which image/images to Scan: ', image) for image in image: image = Image.open(image) with PyTessBaseAPI() as api: api.SetImage(image) boxes = api.GetComponentImages(RIL.TEXTLINE, True) Spaces = 0 for i, (im, box, _, _) in enumerate(boxes): im.save('saving{}.jpg'.format(i)) api.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = api.GetUTF8Text() conf = api.MeanTextConf() text = str(ocrResult).replace('\n', '').split(' ') Spaces = len(text) + Spaces return int(Spaces)
def symbolConfidenc(img): word = '' count = 0 insertSpace = 'false' with PyTessBaseAPI() as api: api.SetImageFile(img) api.Recognize() ri = api.GetIterator() #levelTwo = RIL.TEXTLINE level = RIL.WORD for r in iterate_level(ri, level): #space = r.GetUTF8Text(levelTwo)#gets whole line includes everything unlike RIL.SYMBOL symbol = r.GetUTF8Text(level) # r == ri conf = r.Confidence(level) if conf > 50: word = word + ' ' + symbol return word