def __init__(self, image_file, tessdata): api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD) api.SetImageFile(image_file) api.SetVariable("textord_tablefind_recognize_tables", "T") api.SetVariable("textord_tabfind_find_tables", "T") api.Recognize() self.api = api
def init_api(**kwargs): api = PyTessBaseAPI(**kwargs) # outputbase digits api.SetVariable("tessedit_char_whitelist", "0123456789") api.SetVariable("classify_bln_numeric_mode", "1") api.SetVariable("tessedit_do_invert", "0") # api.SetVariable("dpi", "300") return api
def read_text_with_confidence(image, lang='fast_ind', path='/usr/share/tesseract-ocr/5/tessdata', psm=4, whitelist=''): height, width = image.shape[:2] if height <= 0 or width <= 0: return '', 0 image_pil = Image.fromarray(image) api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY) try: api.SetImage(image_pil) if whitelist != '': api.SetVariable('tessedit_char_whitelist', whitelist) api.Recognize() text = api.GetUTF8Text() confidence = api.MeanTextConf() except Exception: print("[ERROR] Tesseract exception") finally: api.End() return text, confidence
class Ocr: def __init__(self): self.api = None def __enter__(self): self.api = PyTessBaseAPI().__enter__() self.api.SetVariable('tessedit_char_whitelist', OCR_CHARACTER_WHITELIST) return self def __exit__(self, exc_type, exc_val, exc_tb): self.api.__exit__(exc_type, exc_val, exc_tb) def get_characters(self, image): h, w = image.shape[:2] if h < 1 or w < 1: raise NoImageError() img_pil = Image.fromarray(image) self.api.SetImage(img_pil) cell_text = self.api.GetUTF8Text().strip() confidence = self.api.MeanTextConf() return cell_text, confidence
def getWords(pages, letters_cache): standard_words, split_words, letters = [], [], {'bid': [], 'letters': []} prev_word = None letter_detect = PyTessBaseAPI(psm=8, lang='eng') letter_detect.SetVariable('tessedit_char_whitelist', ascii_uppercase) bid = 0 for pg_num in pages: page = doc[pg_num] # get initial block bounding boxes blocks = [] for block in page.getText("blocks"): bbox = block[:4] text = block[4].strip() if len(text) != 1: # not a single letter blocks.append({ 'bid': bid, 'bbox': bbox, 'pg': page.number, 'text': text }) bid += 1 elif not letters_cache: # maps each bid to a corresponding dictionary letter # this provides a heuristic for our search sf, eps = 25 / 6, 1 pix = page.getPixmap(matrix=fitz.Matrix(sf, sf)) img = Image.open(io.BytesIO(pix.getPNGData())) bbox = resize(bbox, sf, eps) block_img = img.crop(bbox) letter_detect.SetImage(block_img) letter_detect.Recognize() letter = letter_detect.AllWords()[0] assert (len(letter) == 1) letters['bid'].append(bid) letters['letters'].append(letter.lower()) standard, split, prev_word, insert_word = groupBlocks( blocks, prev_word, pg_num) # last block from previous page (no spillover) if insert_word: add_word(standard, insert_word) # clean up standard_words.extend(standard) split_words.extend(split) # add the last word if prev_word: add_word(standard, prev_word) # make sure all the blocks are properly formatted for word in chain(standard_words, split_words): test_word_format(word) return standard_words, split_words, letters
def read_char(image, whitelist=None): """ OCR a single character from an image. Useful for captchas.""" api = PyTessBaseAPI() api.SetPageSegMode(10) if whitelist is not None: api.SetVariable("tessedit_char_whitelist", whitelist) api.SetImage(image) api.Recognize() return api.GetUTF8Text().strip()
def read_word(image, whitelist=None, chars=None, spaces=False): """ OCR a single word from an image. Useful for captchas. Image should be pre-processed to remove noise etc. """ api = PyTessBaseAPI() api.SetPageSegMode(8) if whitelist is not None: api.SetVariable("tessedit_char_whitelist", whitelist) api.SetImage(image) api.Recognize() guess = api.GetUTF8Text() if not spaces: guess = ''.join([c for c in guess if c != " "]) guess = guess.strip() if chars is not None and len(guess) != chars: return guess, None return guess, api.MeanTextConf()
class OcrWrapper(BaseImageToString): _OPTIONS = ('tessedit_char_whitelist', '0123456789ABCDEF.-') def __init__(self): if sys.platform == 'win32': self._ocr = PyTessBaseAPI( path="C:\\Program Files\\Tesseract-OCR\\tessdata") else: self._ocr = PyTessBaseAPI() self._ocr.SetVariable(self._OPTIONS[0], self._OPTIONS[1]) pass def image_to_string(self, image: Image) -> str: image.format = 'PNG' self._ocr.SetImage(image) raw_data = self._ocr.GetUTF8Text() return raw_data def end(self): self._ocr.End()
class OCR(): ocr_api = None def __init__(self): self.reset() def __del__(self): del self.ocr_api def reset(self): self.ocr_api = PyTessBaseAPI(oem=OEM.TESSERACT_ONLY) def ocr_filter_img(self, im): if im == None: return im dat = im.getdata() f = [] for d in dat: if d[0] >= 254 and d[1] >= 254 and d[2] >= 254: #chp catk f.append((0, 0, 0)) elif d[0] <= 28 and d[1] == 255 and d[2] <= 80: #chp catk boost f.append((0, 0, 0)) elif d[0] == 255 and d[1] <= 2 and d[2] <= 2: #chp catk malus f.append((0, 0, 0)) elif d[0] <= 179 and d[0] >= 164 and d[1] <= 230 and d[ 1] >= 211 and d[2] >= 233: #smana f.append((0, 0, 0)) elif d[0] <= 205 and d[0] >= 175 and d[1] <= 220 and d[ 1] >= 190 and d[2] <= 235 and d[2] >= 215: #mana f.append((0, 0, 0)) elif d[0] == 245 and d[1] == 245 and d[2] == 250: #hp f.append((0, 0, 0)) elif d[0] == 246 and d[1] == 227 and d[2] == 227: #card cost f.append((0, 0, 0)) else: f.append((255, 255, 255)) im.putdata(f) im = ImageOps.grayscale(im) # im = im.filter(ImageFilter.GaussianBlur(4)) # im = ImageOps.invert(im) return im def filter_img(self, im): if im == None: return im # im = ImageOps.grayscale(im) # im = im.filter(ImageFilter.GaussianBlur(4)) # im = ImageOps.invert(im) return im def ocr_txt(self, img): im = self.ocr_filter_img(img) self.ocr_api.SetPageSegMode(PSM.SINGLE_BLOCK) self.ocr_api.SetVariable('tessedit_char_whitelist', ascii_letters) self.ocr_api.SetVariable('tessedit_char_blacklist', digits) self.ocr_api.SetImage(im) text = self.ocr_api.GetUTF8Text().strip('\n') # logging.info("Btn text detected as %s", text) return text.lower() def ocr_number(self, img): im = self.ocr_filter_img(img) self.ocr_api.SetVariable('tessedit_char_whitelist', digits) self.ocr_api.SetVariable('tessedit_char_blacklist', ascii_letters) self.ocr_api.SetPageSegMode(PSM.SINGLE_WORD) self.ocr_api.SetImage(im) number = self.ocr_api.GetUTF8Text().strip('\n') try: number = int(number) except: number = -1 logging.info("OCR number %i >", number) return int(number)
class TT2Predictor: """holds the several trainer predictor instances and common operations """ def __init__(self, **kwargs): self.trainers_predictors_list = [] self.text_predictors_list = [ ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"), ("main_level", (1203, 323, 1223, 399), "0123456789", "8"), ("next_level", (1212, 445, 1230, 493), "0123456789", "8"), ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"), ("gold", (1091, 283, 1126, 471), "0123456789.abcdefghijklmnopqrstuvwxyz", "7"), ("current_dps_down_no_tab", (389, 562, 423, 709), "0123456789.abcdefghijklmnopqrstuvwxyz", "8"), ("last_hero", (124, 109, 148, 430), "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "7") ] self.api = PyTessBaseAPI() self.api.Init() print(tesserocr.tesseract_version()) print(tesserocr.get_languages()) self.global_image = None self.status = CurrentStatus() boss_trainer = TrainerPredictor( "boss_active_predictor", ["boss_active", "boss_inactive", "no_boss"], (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30]) egg_trainer = TrainerPredictor("egg_active_predictor", ["egg_active", "egg_inactive"], (741, 31, 761, 64), 10, 16, 255.0, [200, 30]) gold_pet_trainer = TrainerPredictor( "gold_pet_predictor", ["goldpet", "nopet", "normalpet", "partial pet"], (624, 364, 734, 474), 40, 40, 255.0, [200, 30]) tab_predictor = TrainerPredictor("tab_predictor", [ "skills_tab", "heroes_tab", "equipment_tab", "pet_tab", "relic_tab", "shop_tab", "no_tab" ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30]) self.trainers_predictors_list.append(boss_trainer) self.trainers_predictors_list.append(egg_trainer) self.trainers_predictors_list.append(gold_pet_trainer) self.trainers_predictors_list.append(tab_predictor) for trainer in self.trainers_predictors_list: pass #trainer.crop_images() #trainer.process_images() #trainer.read_and_pickle() #trainer.train_graph() saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle" save_pickle(saved_classes_file, self.trainers_predictors_list) def parse_raw_image(self): with open(glo.RAW_FULL_FILE, 'rb') as f: image = Image.frombytes('RGBA', (1280, 720), f.read()) for class_predictor in self.trainers_predictors_list: class_predictor.predict_crop(image) self.global_image = image image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" + time.strftime("%Y%m%d-%H%M%S-%f") + ".png") # save original capture copy def parse_image_text(self, predict_map): return_dict = {} for text_predictor in self.text_predictors_list: if text_predictor[0] in predict_map: img = self.global_image.crop(text_predictor[1]) img = img.convert('L') img = img.rotate(90, expand=True) self.api.SetImage(img) self.api.SetVariable("tessedit_char_whitelist", text_predictor[2]) self.api.SetVariable("tessedit_pageseg_mode", text_predictor[3]) self.api.SetVariable("language_model_penalty_non_dict_word", "0") self.api.SetVariable("doc_dict_enable", "0") text_capture = self.api.GetUTF8Text().encode('utf-8').strip() return_dict[text_predictor[0]] = text_capture print("raw text capture ", text_predictor[0], ":", text_capture) self.api.Clear() return return_dict def predict_parsed_all(self): pred_dict = {} for class_predictor in self.trainers_predictors_list: pred_dict[class_predictor.name] = class_predictor.predict_parsed() return pred_dict def predict_parsed(self, predict_map, predict_map_text, **kwargs): pred_dict = {"transition_level": False} # check if image is level trasitioning. trivial prediction. if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False: pass else: img = self.global_image.crop((0, 0, 100, 100)) # black corner extrema = img.convert("L").getextrema() if extrema[0] == extrema[1]: # only one color print("warning level transitioning") pred_dict["transition_level"] = True else: pass for class_predictor in self.trainers_predictors_list: if class_predictor.name in predict_map: pred_dict[ class_predictor.name] = class_predictor.predict_parsed() pred_dict_text = self.parse_image_text(predict_map_text) pred_dict.update(pred_dict_text) self.status.update_status(pred_dict, self.trainers_predictors_list) return pred_dict def predict(self): self.parse_raw_image() return self.predict_parsed_all() def check_predict(self, pred_dict, predictor, classification): for class_predictor in self.trainers_predictors_list: if class_predictor.name == predictor: return int( pred_dict[predictor] ) == class_predictor.pred_classes.index(classification)
def tesser_ocr( image: np.ndarray, expected_type: Optional[Callable[[str], T]] = None, whitelist: Optional[str] = None, invert: bool = False, scale: float = 1, blur: Optional[float] = None, engine: tesserocr.PyTessBaseAPI = tesseract_only, warn_on_fail: bool = False, ) -> Optional[T]: with lock: if image.shape[0] <= 1 or image.shape[1] <= 1: if not expected_type or expected_type is str: return "" else: return None if whitelist is None: if expected_type is int: whitelist = string.digits elif expected_type is float: whitelist = string.digits + "." else: whitelist = string.digits + string.ascii_letters + string.punctuation + " " # print('>', whitelist) engine.SetVariable("tessedit_char_whitelist", whitelist) if invert: image = 255 - image if scale != 1: image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) if blur: image = cv2.GaussianBlur(image, (0, 0), blur) # if debug: # cv2.imshow('tesser_ocr', image) # cv2.waitKey(0) if len(image.shape) == 2: height, width = image.shape channels = 1 else: height, width, channels = image.shape engine.SetImageBytes(image.tobytes(), width, height, channels, width * channels) text: str = engine.GetUTF8Text() if " " not in whitelist: text = text.replace(" ", "") if "\n" not in whitelist: text = text.replace("\n", "") if not any(c in whitelist for c in string.ascii_lowercase): text = text.upper() if expected_type: try: return expected_type(text) except Exception as e: try: caller = inspect.stack()[1] logger.log( logging.WARNING if warn_on_fail else logging.DEBUG, f"{os.path.basename(caller.filename)}:{caller.lineno} {caller.function} | " f"Got exception interpreting {text!r} as {expected_type.__name__}", ) except: logger.log( logging.WARNING if warn_on_fail else logging.DEBUG, f"Got exception interpreting {text!r} as {expected_type.__name__}", ) return None else: return text
def _get_tesseract(): tesseract = PyTessBaseAPI() tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ' ") tesseract.SetPageSegMode(PSM.SINGLE_LINE) return tesseract
class ViewerWindow(Gtk.Window): def __init__(self, filenames, kind, show, ml): Gtk.Window.__init__(self) self.ptx = 0 self.pty = 0 self.focus_id = -1 self.file_idx = 0 self.kind = kind self.show_hidden = show self.ml = ml self.screen_hint = '' self.in_hint_screen = False self.colors = {} self.memory = {} self.elem_models = {} self.filenames = filenames self.tesapi = PyTessBaseAPI(lang='eng') self.tesapi.SetVariable("tessedit_char_whitelist", WHITELIST) self.init_ui() self.load() def init_ui(self): self.connect("delete-event", Gtk.main_quit) darea = Gtk.DrawingArea() darea.connect("draw", self.on_draw) darea.connect("motion-notify-event", self.move_over) darea.connect("button-release-event", self.click_evt) darea.connect("scroll-event", self.scroll_evt) darea.connect("key-release-event", self.key_evt) darea.set_events(Gdk.EventMask.POINTER_MOTION_MASK | Gdk.EventMask.BUTTON_RELEASE_MASK | Gdk.EventMask.BUTTON_PRESS_MASK | Gdk.EventMask.SCROLL_MASK | Gdk.EventMask.KEY_PRESS_MASK | Gdk.EventMask.KEY_RELEASE_MASK) darea.set_can_focus(True) self.add(darea) self.show_all() def load(self, prev=False): if self.file_idx == len(self.filenames): Gtk.main_quit() return if prev: self.file_idx -= 2 filename = self.filenames[self.file_idx] (self.app, self.scr) = util.get_aux_info(filename) if self.app not in self.memory: self.memory[self.app] = {} self.set_title(filename) self.file_idx += 1 print("Loading %s" % filename) self.pngfile = os.path.splitext(filename)[0] + '.png' self.descname = os.path.splitext(filename)[0] + '.%s.txt' % self.kind starttime = time.time() self.tree = analyze.load_tree(filename) hidden.find_hidden_ocr(self.tree) hidden.mark_children_hidden_ocr(self.tree) util.print_tree(self.tree, show_hidden=self.show_hidden) if self.ml: self.get_ml_rets() else: self.load_desc() endtime = time.time() print("Load time: %.3fs" % (endtime - starttime)) self.focus_id = -1 self.colors = {} self.ptx = self.pty = 0 self.img = cairo.ImageSurface.create_from_png(self.pngfile) print('Image:', self.img.get_width(), self.img.get_height()) root_item_id = min(self.tree) root_node = self.tree[root_item_id] print('Root node:', root_node['width'], root_node['height']) self.scale = 1.0 * self.img.get_width() / config.width #self.scale = analyze.find_closest(self.scale, analyze.SCALE_RATIOS) print('Scale:', '%.3f' % self.scale, '->', '%.3f' % self.scale) self.resize(self.img.get_width(), self.img.get_height()) self.mark_depth(self.tree) for item_id in self.tree: color_r = random.random() / 2 color_g = random.random() / 2 color_b = random.random() / 2 self.colors[item_id] = (color_r, color_g, color_b) imgocr = Image.open(self.pngfile) self.imgwidth = imgocr.width self.imgheight = imgocr.height #imgocr2 = imgocr.convert("RGB").resize( # (imgocr.width * OCR_RATIO, imgocr.height * OCR_RATIO)) self.tesapi.SetImage(imgocr) self.tesapi.SetSourceResolution(config.ocr_resolution) self.dump_memory() def remember(self, node, desc): nodeid = node['id'] if not node['id']: return if node['id'] in self.memory[self.app]: if desc != self.memory[self.app][nodeid]: # multiple! self.memory[self.app][nodeid] = 'MUL' else: self.memory[self.app][node['id']] = desc def forget(self, node): if node['id'] in self.memory[self.app]: del self.memory[self.app][node['id']] def get_elem_model(self, app): elem_clas = elements.getmodel("../model/", "../guis/", app, "../guis-extra/", config.extra_element_scrs) self.elem_models[app] = elem_clas def get_ml_rets(self): if self.app not in self.elem_models: self.get_elem_model(self.app) guess_descs = {} guess_items = {} # type: Dict[str, List[int]] guess_score = {} elem_clas = self.elem_models[self.app] elem_clas.set_imgfile(self.pngfile) treeinfo = analyze.collect_treeinfo(self.tree) for itemid in self.tree: (guess_element, score) = elem_clas.classify(self.scr, self.tree, itemid, None, treeinfo) if guess_element != 'NONE': if tags.single(guess_element, self.scr) and guess_element in guess_items: old_item = guess_items[guess_element][0] if guess_score[old_item] < score: guess_items[guess_element] = [itemid] guess_score[itemid] = score del guess_descs[old_item] guess_descs[itemid] = guess_element else: guess_descs[itemid] = guess_element guess_score[itemid] = score guess_items[guess_element] = ( guess_items.get(guess_element, []) + [itemid]) for nodeid in guess_descs: self.tree[nodeid]['label'] = guess_descs[nodeid] def load_desc(self): if os.path.exists(self.descname): with open(self.descname) as inf: for line in inf.read().split('\n'): if not line: continue (item_id, desc) = line.split(' ', 1) item_id = int(item_id) found = False for nodeid in self.tree: node = self.tree[nodeid] if item_id in node['raw']: if 'label' in node: node['label'] += ' ' + desc else: node['label'] = desc print(nodeid, '(', item_id, ')', '->', desc) self.remember(node, desc) found = True break if not found: print("WARNING: %s (%s) is missing!" % (item_id, desc)) def mark_depth(self, tree): for item_id in tree: node = tree[item_id] if 'depth' in node: continue self.mark_depth_node(tree, item_id, 0) def mark_depth_node(self, tree, node_id, depth): node = tree[node_id] node['depth'] = depth node['descs'] = [] for child in node['children']: descs = self.mark_depth_node(tree, child, depth + 1) node['descs'] += descs return node['descs'] + [node_id] def get_node_info(self, node): (x, y, width, height, depth) = (node['x'], node['y'], node['width'], node['height'], node['depth']) x *= self.scale y *= self.scale width *= self.scale height *= self.scale width = min(width, self.imgwidth) height = min(height, self.imgheight) if x < 0: width += x x = 0 if y < 0: height += y y = 0 return (x, y, width, height, depth) def find_containing_widget(self, px, py): max_depth = 0 max_id = -1 for item_id in self.tree: node = self.tree[item_id] if self.ignore_node(node): continue if self.inside(node, px, py): if node['depth'] > max_depth: max_depth = node['depth'] max_id = item_id return max_id def inside(self, node, px, py): (x, y, width, height, depth) = self.get_node_info(node) return x <= px and x + width >= px and y <= py and y + height >= py def ignore_node(self, node): if node['class'].upper() == 'OPTION': return True if node.get('visible', '') == 'hidden': return True return False def on_draw(self, wid, ctx): ctx.select_font_face("Arial", cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_BOLD) ctx.set_source_surface(self.img, 0, 0) ctx.paint() ctx.set_font_size(20) ctx.set_line_width(5) ctx.set_source_rgb(1.0, 0.0, 0.0) max_click_id = -1 max_click_depth = 0 max_id = self.find_containing_widget(self.ptx, self.pty) for item_id in self.tree: node = self.tree[item_id] depth = node['depth'] if max_id in node['descs'] and node['click']: if depth > max_click_depth: max_click_depth = depth max_click_id = item_id for item_id in self.tree: node = self.tree[item_id] if self.ignore_node(node): continue if item_id == max_id: region_mode = False else: region_mode = True (x, y, width, height, depth) = self.get_node_info(node) if not self.inside(node, self.ptx, self.pty): continue self.show_widget(ctx, item_id, not region_mode, not region_mode) if max_click_id != -1 and max_click_id != max_id: self.show_widget(ctx, max_click_id, False, True) if self.focus_id >= 0: self.show_widget(ctx, self.focus_id, True, True, (1, 0, 0)) for itemid in self.tree: node = self.tree[itemid] if 'label' in node: if itemid == self.focus_id: color = (0, 1, 0) else: color = (0, 0, 1) self.show_widget(ctx, itemid, True, False, (0, 0, 1)) self.show_desc(ctx, node, color) #s.write_to_png('test.png') #os.system("%s %s" % (config.picviewer_path, 'test.png')) #report_time(start_time, "displayed") def move_sibling(self, to_next): leaf_list = [] any_list = [] for itemid in self.tree: node = self.tree[itemid] if not self.inside(node, self.clickx, self.clicky): continue if len(node['children']) == 0: leaf_list.append(itemid) any_list.append(itemid) for i in range(len(leaf_list)): if leaf_list[i] == self.focus_id: if to_next: idx = (i + 1) % len(leaf_list) else: idx = (i - 1) % len(leaf_list) self.focus_id = leaf_list[idx] return if len(leaf_list) == 0: for i in range(len(any_list)): if any_list[i] == self.focus_id: if to_next: idx = (i + 1) % len(any_list) else: idx = (i - 1) % len(any_list) self.focus_id = any_list[idx] return self.focus_id = any_list[0] else: self.focus_id = leaf_list[0] def show_widget(self, ctx, item_id, fill, show_text, colors=None): node = self.tree[item_id] (x, y, width, height, depth) = self.get_node_info(node) if colors is None: color_r = self.colors[item_id][0] color_g = self.colors[item_id][1] color_b = self.colors[item_id][2] else: (color_r, color_g, color_b) = colors ctx.rectangle(x, y, width, height) if fill: ctx.set_source_rgba(color_r, color_g, color_b, 0.3) ctx.fill() else: ctx.set_source_rgba(color_r, color_g, color_b, 1) ctx.set_line_width(5) ctx.stroke() if show_text: max_char = int(width / ctx.text_extents("a")[2]) text = str(item_id) if node['click']: text = 'C' + text if node['text']: text = text + ':' + node['text'][:(max_char - 5)] elif node['id']: text += '#' + node['id'][:(max_char - 5)] self.show_text(ctx, x + width / 2, y + height / 2, text, color_r, color_g, color_b) def show_desc(self, ctx, node, color=(0, 0, 1)): desc = node['label'] (x, y, width, height, depth) = self.get_node_info(node) self.show_text(ctx, x + width / 2, y + height / 2, desc, color[0], color[1], color[2]) def show_text(self, ctx, x, y, text, color_r, color_g, color_b): x_bearing, y_bearing, text_width, text_height = ctx.text_extents( text)[:4] ctx.move_to(x - text_width / 2, y + text_height / 2) ctx.set_source_rgba(1, 1, 1, 1) ctx.set_line_width(5) ctx.text_path(text) ctx.stroke() ctx.move_to(x - text_width / 2, y + text_height / 2) ctx.set_source_rgba(color_r, color_g, color_b, 1) ctx.text_path(text) ctx.fill() def move_over(self, widget, evt): self.ptx = evt.x self.pty = evt.y self.queue_draw() def click_evt(self, widget, evt): if self.in_hint_screen: self.process_screen_hint_click(evt) return if evt.button == 3: self.focus_id = -1 else: self.clickx = evt.x self.clicky = evt.y self.focus_id = self.find_containing_widget(evt.x, evt.y) self.queue_draw() def scroll_evt(self, widget, evt): if self.focus_id == -1: return scroll_up = evt.direction == Gdk.ScrollDirection.UP if scroll_up: self.focus_id = self.find_parent_widget(self.focus_id) else: self.focus_id = self.find_child_widget(self.focus_id) self.queue_draw() def find_parent_widget(self, wid): for itemid in self.tree: node = self.tree[itemid] if self.ignore_node(node): continue if wid in node['children']: return itemid return wid def find_child_widget(self, wid): for itemid in self.tree[wid]['children']: node = self.tree[itemid] if self.ignore_node(node): continue if self.inside(node, self.clickx, self.clicky): return itemid return wid def mark_direct(self): enter = self.get_text('Please enter id_label', 'format: <id> <label>') if enter is None: return if ' ' in enter: nodeid, label = enter.split(' ') else: nodeid = enter label = '' nodeid = int(nodeid) if nodeid not in self.tree: print('missing node', nodeid) return node = self.tree[nodeid] self.mark_node(node, label) def mark_focused(self): if self.focus_id < 0: return node = self.tree[self.focus_id] label = self.get_text( 'Please enter label', 'label for %s: %s (%s) #%s' % (self.focus_id, node['text'], node['desc'], node['id'])) if label is None: return if self.ml: if label == '': if 'label' not in self.tree[self.focus_id]: return self.generate_negative_hint(self.tree[self.focus_id]['label']) del self.tree[self.focus_id]['label'] else: self.generate_hint_for_widget(self.focus_id, label) self.add_label(node, label) else: self.mark_node(node, label) def generate_hint_for_widget(self, nodeid, label): return self.generate_hint(label, locator.get_locator(self.tree, nodeid)) def generate_negative_hint(self, label): return self.generate_hint(label, 'notexist') def generate_hint(self, label, hint): print("@%s.%s %s" % (self.scr, label, hint)) def mark_node(self, node, label): if label == '': if 'label' in node: del node['label'] self.forget(node) else: self.add_label(node, label) self.remember(node, label) self.save_labels() def ocr_text(self): node = self.tree[self.focus_id] (x, y, width, height, _) = self.get_node_info(node) print(x, y, width, height) x = max(x - 1, 0) y = max(y - 1, 0) width = min(width + 2, self.imgwidth) height = min(height + 2, self.imgheight) #self.tesapi.SetRectangle(x * OCR_RATIO, y * OCR_RATIO, # width * OCR_RATIO, height * OCR_RATIO) self.tesapi.SetRectangle(x, y, width, height) print("OCR ret:", self.tesapi.GetUTF8Text()) x = min(x + width * 0.05, self.imgwidth) y = min(y + height * 0.05, self.imgheight) width *= 0.9 height *= 0.9 self.tesapi.SetRectangle(x, y, width, height) print("OCR ret:", self.tesapi.GetUTF8Text()) def save_region(self): if self.focus_id == -1: return node = self.tree[self.focus_id] (x, y, width, height, _) = self.get_node_info(node) x = max(x - 1, 0) y = max(y - 1, 0) width = min(width + 2, self.imgwidth) height = min(height + 2, self.imgheight) regimg = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width), int(height)) ctx = cairo.Context(regimg) ctx.set_source_surface(self.img, -x, -y) ctx.paint() regimg.write_to_png("/tmp/region.png") def dump_memory(self): for _id in self.memory[self.app]: print('MEM %s -> %s' % (_id, self.memory[self.app][_id])) def add_label(self, node, desc): print('%s -> %s' % (util.describe_node(node, short=True), desc)) node['label'] = desc def auto_label(self): for nodeid in self.tree: node = self.tree[nodeid] if 'label' not in node and node['id'] in self.memory[self.app]: if self.memory[self.app][node['id']] != 'MUL': self.add_label(node, self.memory[self.app][node['id']]) else: print('skip MUL id: %s' % node['id']) self.save_labels() def remove_all(self): for nodeid in self.tree: node = self.tree[nodeid] if 'label' in node: del node['label'] def process_screen_hint_click(self, evt): click_id = self.find_containing_widget(evt.x, evt.y) if click_id == -1: print('Invalid widget selected') return hint = locator.get_locator(self.tree, click_id) if hint is None: print('Cannot generate hint for this widget') return hint = str(hint) if evt.button == 3: # negate hint = 'not ' + hint print('Widget hint: "%s"' % hint) self.add_screen_hint(hint) def add_screen_hint(self, hint): if self.screen_hint == '': self.screen_hint = hint else: self.screen_hint += ' && ' + hint def hint_screen(self): if not self.in_hint_screen: label = self.get_text('Please enter screen name', 'screen name like "signin"') if label is None: return self.screen_hint_label = label self.in_hint_screen = True self.screen_hint = '' else: self.in_hint_screen = False print("%%%s %s" % (self.screen_hint_label, self.screen_hint)) def key_evt(self, widget, evt): if evt.keyval == Gdk.KEY_space: self.mark_focused() elif evt.keyval == Gdk.KEY_Tab: self.load() elif evt.keyval == Gdk.KEY_Left: self.move_sibling(to_next=True) elif evt.keyval == Gdk.KEY_Right: self.move_sibling(to_next=False) elif evt.keyval == Gdk.KEY_v: self.ocr_text() elif evt.keyval == Gdk.KEY_a: self.auto_label() elif evt.keyval == Gdk.KEY_p: self.load(prev=True) elif evt.keyval == Gdk.KEY_l: self.mark_direct() elif evt.keyval == Gdk.KEY_r: self.remove_all() elif evt.keyval == Gdk.KEY_s: self.save_region() elif evt.keyval == Gdk.KEY_x: self.hint_screen() self.queue_draw() def save_labels(self): with open(self.descname, 'w') as outf: for itemid in sorted(self.tree): node = self.tree[itemid] if 'label' in node: outf.write("%s %s\n" % (itemid, node['label'])) def get_text(self, title, prompt): #base this on a message dialog dialog = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION, Gtk.ButtonsType.OK_CANCEL, title) dialog.format_secondary_text(prompt) #create the text input field entry = Gtk.Entry() #allow the user to press enter to do ok entry.connect("activate", lambda entry: dialog.response(Gtk.ResponseType.OK)) #create a horizontal box to pack the entry and a label hbox = Gtk.HBox() hbox.pack_start(Gtk.Label("Label:"), False, 5, 5) hbox.pack_end(entry, True, 0, 0) #add it and show it dialog.vbox.pack_end(hbox, True, True, 0) dialog.show_all() #go go go response = dialog.run() if response == Gtk.ResponseType.OK: text = entry.get_text() else: text = None dialog.destroy() return text
def ocr_on_bounding_boxes(img, components): blurbs = [] for component in components: (aspect, vertical, horizontal) = segment_into_lines(img, component) #if len(vertical)<2 and len(horizontal)<2:continue #attempt to separately process furigana #(furigana, non_furigana) = estimate_furigana(vertical) ''' from http://code.google.com/p/tesseract-ocr/wiki/ControlParams Useful parameters for Japanese and Chinese Some Japanese tesseract user found these parameters helpful for increasing tesseract-ocr (3.02) accuracy for Japanese : Name Suggested value Description chop_enable T Chop enable. use_new_state_cost F Use new state cost heuristics for segmentation state evaluation segment_segcost_rating F Incorporate segmentation cost in word rating? enable_new_segsearch 0 Enable new segmentation search path. language_model_ngram_on 0 Turn on/off the use of character ngram model. textord_force_make_prop_words F Force proportional word segmentation on all rows. ''' #now run OCR on this bounding box api = PyTessBaseAPI(path='C:/Program Files/Tesseract-OCR/tessdata') api.Init(".", "jpn", tesseract.OEM_DEFAULT) #handle single column lines as "vertical align" and Auto segmentation otherwise if len(vertical) < 2: api.SetPageSegMode( 5) #tesseract.PSM_VERTICAL_ALIGN)#PSM_AUTO)#PSM_SINGLECHAR)# else: api.SetPageSegMode(tesseract.PSM_AUTO) #PSM_SINGLECHAR)# api.SetVariable('chop_enable', 'T') api.SetVariable('use_new_state_cost', 'F') api.SetVariable('segment_segcost_rating', 'F') api.SetVariable('enable_new_segsearch', '0') api.SetVariable('language_model_ngram_on', '0') api.SetVariable('textord_force_make_prop_words', 'F') api.SetVariable('tessedit_char_blacklist', '}><L') api.SetVariable('textord_debug_tabfind', '0') x = component[1].start y = component[0].start w = component[1].stop - x h = component[0].stop - y roi = cv2.cv.CreateImage((w, h), 8, 1) sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h)) cv2.cv.Copy(sub, roi) tesseract.SetCvImage(roi, api) txt = api.GetUTF8Text() conf = api.MeanTextConf() if conf > 0 and len(txt) > 0: blurb = Blurb(x, y, w, h, txt, confidence=conf) blurbs.append(blurb) ''' for line in non_furigana: x=line[1].start y=line[0].start w=line[1].stop-x h=line[0].stop-y roi = cv2.cv.CreateImage((w,h), 8, 1) sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h)) cv2.cv.Copy(sub, roi) tesseract.SetCvImage(roi, api) txt=api.GetUTF8Text() conf=api.MeanTextConf() if conf>0: blurb = Blurb(x, y, w, h, txt, confidence=conf) blurbs.append(blurb) ''' return blurbs
from tesserocr import PyTessBaseAPI, RIL, iterate_level from PIL import Image from utils import * import sys ABCs = 'ABCDEFGHIJKLMNPQRSTUVWXYZ' WHITELIST = "1234567890X()/\\" + ABCs OCR_API = PyTessBaseAPI() #OCR_API = PyTessBaseAPI(path='C:/Users/conor/Downloads/tessdata-master') OCR_API.SetVariable('tessedit_pageseg_mode', "7") OCR_API.SetVariable('tessedit_char_whitelist', WHITELIST) if __name__ == '__main__': if len(sys.argv) < 2: print('usage: %s <input.png [...]>' % sys.argv[0]) sys.exit(1) def num_from_name(x): nums = '1234567890' n = '' for y in x: if y in nums: n += y return int(n) with PyTessBaseAPI() as api: api.SetVariable('tessedit_pageseg_mode', "7") api.SetVariable('tessedit_char_whitelist', WHITELIST) for x in sorted(sys.argv[1:], key=lambda x: num_from_name(x)): im = load_image(x)
def run_tesseract(image_file): if tessdata: api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD) else: api = PyTessBaseAPI(psm=PSM.AUTO_OSD) api.SetImageFile(image_file) api.SetVariable("textord_tablefind_recognize_tables", "T") api.SetVariable("textord_tabfind_find_tables", "T") api.Recognize() document = {} it = api.AnalyseLayout() if it is not None: orientation, direction, order, deskew_angle = it.Orientation() api.Recognize() ri = api.GetIterator() if ri is not None: document = { "orientation": orientation, "writing_direction": direction, "text_direction": order, "deskew_angle": deskew_angle, "blocks": [] } while ri.IsAtBeginningOf(RIL.BLOCK): block = { "block_type": ri.BlockType(), "block_type_str": BlockType[ri.BlockType()], "box": ri.BoundingBox(RIL.BLOCK), "ocr_text": ri.GetUTF8Text(RIL.BLOCK), "confidence": ri.Confidence(RIL.BLOCK), "paragraphs": [] } break_para = False while True: if ri.IsAtFinalElement(RIL.BLOCK, RIL.PARA): break_para = True break_line = False paragraph = { "box": ri.BoundingBox(RIL.PARA), "ocr_text": ri.GetUTF8Text(RIL.PARA), "paragraph_info": list(ri.ParagraphInfo()), "confidence": ri.Confidence(RIL.PARA), "lines": [] } while True: if ri.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE): break_line = True break_word = False line = { "box": ri.BoundingBox(RIL.TEXTLINE), "ocr_text": ri.GetUTF8Text(RIL.TEXTLINE), "confidence": ri.Confidence(RIL.TEXTLINE), "words": [] } while True: word = { "box": ri.BoundingBox(RIL.WORD), "ocr_text": ri.GetUTF8Text(RIL.WORD), "confidence": ri.Confidence(RIL.WORD), "attributes": ri.WordFontAttributes() } if ri.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD): break_word = True line["words"].append(word) if break_word: break ri.Next(RIL.WORD) paragraph["lines"].append(line) if break_line: break ri.Next(RIL.TEXTLINE) block["paragraphs"].append(paragraph) if break_para: break ri.Next(RIL.PARA) document["blocks"].append(block) ri.Next(RIL.BLOCK) return document
class OCREngine(): def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'): """ Args: extra_whitelist: string of extra chars for Tesseract to consider only takes effect when all_unicode is False all_unicode: if True, Tess will consider all possible unicode characters lang: OCR language """ self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang) self.is_closed = False if all_unicode: self.whitelist_chars = None else: self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "1234567890" r"~!@#$%^&*()_+-={}|[]\:;'<>?,./" '"' "©") + extra_whitelist self.tess.SetVariable('tessedit_char_whitelist', self.whitelist_chars) def check_engine(self): if self.is_closed: raise RuntimeError('OCREngine has been closed.') def recognize(self, image, min_text_size=MIN_TEXT_SIZE, max_text_size=MAX_TEXT_SIZE, uniformity_thresh=UNIFORMITY_THRESH, thin_line_thresh=THIN_LINE_THRESH, conf_thresh=CONF_THRESH, box_expand_factor=BOX_EXPAND_FACTOR, horizontal_pooling=HORIZONTAL_POOLING): """ Generator: Blob http://stackoverflow.com/questions/23506105/extracting-text-opencv Args: input_image: can be one of the following types: - string: image file path - ndarray: numpy image - PIL.Image.Image: PIL image min_text_size: min text height/width in pixels, below which will be ignored max_text_size: max text height/width in pixels, above which will be ignored uniformity_thresh (0.0 < _ < 1.0): remove all black or all white regions ignore a region if the number of pixels neither black nor white < [thresh] thin_line_thresh (must be odd int): remove all lines thinner than [thresh] pixels. can be used to remove the thin borders of web page textboxes. conf_thresh (0 < _ < 100): ignore regions with OCR confidence < thresh. box_expand_factor (0.0 < _ < 1.0): expand the bounding box outwards in case certain chars are cutoff. horizontal_pooling: result bounding boxes will be more connected with more pooling, but large pooling might lower accuracy. """ self.check_engine() # param sanity check assert max_text_size > min_text_size > 0 assert 0.0 <= uniformity_thresh < 1.0 assert thin_line_thresh % 2 == 1 assert 0 <= conf_thresh < 100 assert 0.0 <= box_expand_factor < 1.0 assert horizontal_pooling > 0 image = get_np_img(image) img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) img_bw = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 5) img = img_gray # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel) # cut off all gray pixels < 30. # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY) # connect horizontally oriented regions kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_pooling, 1)) img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) # remove all thin textbox borders (e.g. web page textbox) if thin_line_thresh > 0: kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh)) img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: x, y, w, h = box = Box(*cv2.boundingRect(contour)) # remove regions that are beyond size limits if (w < min_text_size or h < min_text_size or h > max_text_size): continue # remove regions that are almost uniformly white or black binary_region = crop(img_bw, box) uniformity = np.count_nonzero(binary_region) / float(w * h) if (uniformity > 1 - uniformity_thresh or uniformity < uniformity_thresh): continue # expand the borders a little bit to include cutoff chars expansion = int(min(h, w) * box_expand_factor) x = max(0, x - expansion) y = max(0, y - expansion) h, w = h + 2 * expansion, w + 2 * expansion if h > w: # further extend the long axis h += 2 * expansion elif w > h: w += 2 * expansion # image passed to Tess should be grayscale. # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11 box = Box(x, y, w, h) img_crop = crop(img_gray, box) # make sure that crops passed in tesseract have minimum x-height # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text img_crop = cv2.resize(img_crop, (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT / img_crop.shape[0]), CROP_RESIZE_HEIGHT)) ocr_text, conf = self.run_tess(img_crop) if conf > conf_thresh: yield Blob(ocr_text, box, conf) def _experiment_segment(self, img, min_text_size=MIN_TEXT_SIZE, max_text_size=MAX_TEXT_SIZE, uniformity_thresh=UNIFORMITY_THRESH, horizontal_pooling=HORIZONTAL_POOLING): """ PRIVATE: experiment only """ img_init = img # preserve initial image img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_bw = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 5) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel) disp(img) # morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # img = cv2.dilate(img, morph_kernel) # OTSU thresholding # _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY) # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2) disp(img) # connect horizontally oriented regions morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_pooling, 1)) img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel) disp(img) if 0: morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (horizontal_pooling, 3)) img = cv2.erode(img, morph_kernel, iterations=1) disp(img) morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6)) img = cv2.dilate(img, morph_kernel, iterations=1) elif 1: morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel) disp(img) # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) img_copy = np.copy(img_init) for contour in contours: x, y, w, h = cv2.boundingRect(contour) draw_rect(img_copy, x, y, w, h) if (w < min_text_size or h < min_text_size or h > max_text_size): continue binary_region = img_bw[y:y + h, x:x + w] uniformity = np.count_nonzero(binary_region) / float(w * h) if (uniformity > 1 - uniformity_thresh or uniformity < uniformity_thresh): # ignore mostly white or black regions # print(w, h) # disp(binary_region) continue # the image must be grayscale, otherwise Tesseract will SegFault # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11 draw_rect(img_init, x, y, w, h) disp(img_copy) disp(img_init, 0) def run_tess(self, img): """ Tesseract python API source code: https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx Returns: (ocr_text, confidence) """ if isinstance(img, np.ndarray): img = np2PIL(img) self.tess.SetImage(img) ocr_text = self.tess.GetUTF8Text().strip() conf = self.tess.MeanTextConf() return ocr_text, conf def _deprec_run_tess(self, img): "GetComponentImages throws SegFault randomly. No way to fix. :(" if isinstance(img, np.ndarray): img = np2PIL(img) components = self.tess.GetComponentImages(RIL.TEXTLINE, True) for _, inner_box, block_id, paragraph_id in components: # box is a dict with x, y, w and h keys inner_box = Box(**inner_box) if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE: continue self.tess.SetRectangle(*inner_box) ocr_text = self.tess.GetUTF8Text().strip() conf = self.tess.MeanTextConf() yield ocr_text, inner_box, conf def close(self): self.tess.End() self.is_closed = True def __enter__(self): return self def __exit__(self, type, value, traceback): self.close()
def _create_tesseract(): tesseract = PyTessBaseAPI() tesseract.SetVariable("load_system_dawg", "F") tesseract.SetVariable("load_freq_dawg", "F") tesseract.SetVariable("load_punc_dawg", "F") tesseract.SetVariable("load_number_dawg", "F") tesseract.SetVariable("load_unambig_dawg", "F") tesseract.SetVariable("load_bigram_dawg", "F") tesseract.SetVariable("load_fixed_length_dawgs", "F") tesseract.SetVariable("classify_enable_learning", "F") tesseract.SetVariable("classify_enable_adaptive_matcher", "F") tesseract.SetVariable("segment_penalty_garbage", "F") tesseract.SetVariable("segment_penalty_dict_nonword", "F") tesseract.SetVariable("segment_penalty_dict_frequent_word", "F") tesseract.SetVariable("segment_penalty_dict_case_ok", "F") tesseract.SetVariable("segment_penalty_dict_case_bad", "F") tesseract.SetVariable("edges_use_new_outline_complexity", "T") tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ") tesseract.SetPageSegMode(PSM.SINGLE_LINE) return tesseract
import tesserocr import numpy as np import cv2 user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36' headers = {'User-Agent': user_agent} url = 'http://www.bjsuperpass.com/captcha.svl?d=1503144107405' rs = requests.get(url, headers=headers, timeout=10) print('获取公交一卡通网站的验证码', rs.status_code) #TODO 获取cookies print('用BytesIO导入到Image,Numpy,Opencv') s1 = BytesIO(rs.content) # img = Image.open(BytesIO(resp.read())) # img = Image.open(s1) img = img.convert("RGB") im = np.array(img) cv2.imshow('src', im) cv2.waitKey(0) cv2.imwrite('captcha.jpg', im) ocr = PyTessBaseAPI() # ocr.Init(".", "eng", tesseract.OEM_DEFAULT) ocr.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") # ocr.SetPageSegMode(tesseract.PSM_AUTO) # ocr.SetImage(img) print('验证码是', tesserocr.image_to_text(img)) #TODO 发送cookies
# api.SetImageFile(img) # print( api.GetUTF8Text()) # print( api.AllWordConfidences()) img = Image.open( glo.DATA_FOLDER + '/number_range_predictorcropped3.png' ) #glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + '/fullcapture961 .png') #img = img.convert('L') from tesserocr import PyTessBaseAPI, RIL, iterate_level, PSM #print(help(tesserocr)) api = PyTessBaseAPI() api.Init() api.SetImageFile(glo.DATA_FOLDER + '/number_range_predictorcropped3.png') api.SetVariable("tessedit_pageseg_mode", "7") api.SetVariable("language_model_penalty_non_dict_word", "0") api.SetVariable("doc_dict_enable", "0") print("recognized txt:", api.GetUTF8Text().encode('utf-8').strip()) #api.Recognize() """ ri = api.GetIterator() level = RIL.SYMBOL for r in iterate_level(ri, level): symbol = r.GetUTF8Text(level) # r == ri conf = r.Confidence(level) print(u'symbol {}, conf: {}'.format(symbol, conf).encode('utf-8').strip()) indent = False ci = r.GetChoiceIterator() for c in ci: if indent:
class OCREngine: def __init__(self, psm: int = 3, config: dict = {}): logging.info('Initializing OCR engine with PSM=%d and configs=%s' % (psm, config)) self.api = PyTessBaseAPI(psm=psm) for key in config.keys(): self.api.SetVariable(key, config[key]) logging.debug('OCR engine initialized') def build_graph(self, image_path: str, scheme: str = None) -> DocumentGraph: hocr = self._get_hocr(image_path) words = self._get_words(hocr, scheme) dg = DocumentGraph(words) return dg def _get_hocr(self, image_path: str) -> str: logging.info('Reading to hOCR from image: %s' % image_path) self.api.SetImageFile(image_path) hocr_text = self.api.GetHOCRText(0) logging.debug('Image read') return hocr_text def _get_words(self, hocr: str, scheme: str = None): logging.info('Extracting words from hOCR.') if scheme is None: logging.warning('No scheme specified. Assuming xyxy') scheme = 'xyxy' soup = BeautifulSoup(hocr, 'html.parser') word_tags = soup.select('.ocrx_word') word_nodes = [self._make_node(tag, scheme=scheme) for tag in word_tags] word_nodes = list(filter(lambda node: node is not None, word_nodes)) return word_nodes def _make_node(self, tag: dict, scheme: str) -> WordNode: fields = tag['title'].split(';') if not len(fields) == 2: logging.warn('Malformed tag: %s. Skipping.' % tag) return None word = tag.text coordinates = tuple(map(int, fields[0].split()[1:])) conf = int(fields[1].split()[1]) wn = WordNode(word, WordNode.convert_coords(coordinates, scheme), conf) logging.debug('Made word: %s' % wn.__repr__()) return wn def close(self): self.api.End() logging.debug('OCR engine closed') def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if exc_type: print("type: %s\nvalue: %s\ntrace: %s" % (exc_type, exc_value, traceback)) self.close()