def __init__(self, image_file, tessdata):
     api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD)
     api.SetImageFile(image_file)
     api.SetVariable("textord_tablefind_recognize_tables", "T")
     api.SetVariable("textord_tabfind_find_tables", "T")
     api.Recognize()
     self.api = api
Exemple #2
0
def init_api(**kwargs):
    api = PyTessBaseAPI(**kwargs)
    # outputbase digits
    api.SetVariable("tessedit_char_whitelist", "0123456789")
    api.SetVariable("classify_bln_numeric_mode", "1")
    api.SetVariable("tessedit_do_invert", "0")
    # api.SetVariable("dpi", "300")
    return api
def read_text_with_confidence(image,
                              lang='fast_ind',
                              path='/usr/share/tesseract-ocr/5/tessdata',
                              psm=4,
                              whitelist=''):
    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return '', 0

    image_pil = Image.fromarray(image)

    api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY)

    try:
        api.SetImage(image_pil)

        if whitelist != '':
            api.SetVariable('tessedit_char_whitelist', whitelist)

        api.Recognize()

        text = api.GetUTF8Text()
        confidence = api.MeanTextConf()
    except Exception:
        print("[ERROR] Tesseract exception")
    finally:
        api.End()

    return text, confidence
Exemple #4
0
class Ocr:
    def __init__(self):
        self.api = None

    def __enter__(self):
        self.api = PyTessBaseAPI().__enter__()
        self.api.SetVariable('tessedit_char_whitelist',
                             OCR_CHARACTER_WHITELIST)
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.api.__exit__(exc_type, exc_val, exc_tb)

    def get_characters(self, image):
        h, w = image.shape[:2]

        if h < 1 or w < 1:
            raise NoImageError()

        img_pil = Image.fromarray(image)
        self.api.SetImage(img_pil)
        cell_text = self.api.GetUTF8Text().strip()
        confidence = self.api.MeanTextConf()

        return cell_text, confidence
Exemple #5
0
def getWords(pages, letters_cache):

    standard_words, split_words, letters = [], [], {'bid': [], 'letters': []}
    prev_word = None
    letter_detect = PyTessBaseAPI(psm=8, lang='eng')
    letter_detect.SetVariable('tessedit_char_whitelist', ascii_uppercase)
    bid = 0
    for pg_num in pages:
        page = doc[pg_num]

        # get initial block bounding boxes
        blocks = []
        for block in page.getText("blocks"):
            bbox = block[:4]
            text = block[4].strip()
            if len(text) != 1:  # not a single letter
                blocks.append({
                    'bid': bid,
                    'bbox': bbox,
                    'pg': page.number,
                    'text': text
                })
                bid += 1
            elif not letters_cache:
                # maps each bid to a corresponding dictionary letter
                # this provides a heuristic for our search
                sf, eps = 25 / 6, 1
                pix = page.getPixmap(matrix=fitz.Matrix(sf, sf))
                img = Image.open(io.BytesIO(pix.getPNGData()))
                bbox = resize(bbox, sf, eps)
                block_img = img.crop(bbox)
                letter_detect.SetImage(block_img)
                letter_detect.Recognize()
                letter = letter_detect.AllWords()[0]
                assert (len(letter) == 1)
                letters['bid'].append(bid)
                letters['letters'].append(letter.lower())

        standard, split, prev_word, insert_word = groupBlocks(
            blocks, prev_word, pg_num)

        # last block from previous page (no spillover)
        if insert_word:
            add_word(standard, insert_word)

        # clean up
        standard_words.extend(standard)
        split_words.extend(split)

    # add the last word
    if prev_word:
        add_word(standard, prev_word)

    # make sure all the blocks are properly formatted
    for word in chain(standard_words, split_words):
        test_word_format(word)

    return standard_words, split_words, letters
Exemple #6
0
def read_char(image, whitelist=None):
    """ OCR a single character from an image. Useful for captchas."""
    api = PyTessBaseAPI()
    api.SetPageSegMode(10)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    return api.GetUTF8Text().strip()
Exemple #7
0
def read_word(image, whitelist=None, chars=None, spaces=False):
    """ OCR a single word from an image. Useful for captchas.
        Image should be pre-processed to remove noise etc. """
    api = PyTessBaseAPI()
    api.SetPageSegMode(8)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    guess = api.GetUTF8Text()

    if not spaces:
        guess = ''.join([c for c in guess if c != " "])
        guess = guess.strip()

    if chars is not None and len(guess) != chars:
        return guess, None

    return guess, api.MeanTextConf()
Exemple #8
0
class OcrWrapper(BaseImageToString):

    _OPTIONS = ('tessedit_char_whitelist', '0123456789ABCDEF.-')

    def __init__(self):
        if sys.platform == 'win32':
            self._ocr = PyTessBaseAPI(
                path="C:\\Program Files\\Tesseract-OCR\\tessdata")
        else:
            self._ocr = PyTessBaseAPI()

        self._ocr.SetVariable(self._OPTIONS[0], self._OPTIONS[1])
        pass

    def image_to_string(self, image: Image) -> str:
        image.format = 'PNG'
        self._ocr.SetImage(image)
        raw_data = self._ocr.GetUTF8Text()
        return raw_data

    def end(self):
        self._ocr.End()
Exemple #9
0
class OCR():
    ocr_api = None

    def __init__(self):
        self.reset()

    def __del__(self):
        del self.ocr_api

    def reset(self):
        self.ocr_api = PyTessBaseAPI(oem=OEM.TESSERACT_ONLY)

    def ocr_filter_img(self, im):
        if im == None:
            return im
        dat = im.getdata()
        f = []
        for d in dat:
            if d[0] >= 254 and d[1] >= 254 and d[2] >= 254:  #chp catk
                f.append((0, 0, 0))
            elif d[0] <= 28 and d[1] == 255 and d[2] <= 80:  #chp catk boost
                f.append((0, 0, 0))
            elif d[0] == 255 and d[1] <= 2 and d[2] <= 2:  #chp catk malus
                f.append((0, 0, 0))
            elif d[0] <= 179 and d[0] >= 164 and d[1] <= 230 and d[
                    1] >= 211 and d[2] >= 233:  #smana
                f.append((0, 0, 0))
            elif d[0] <= 205 and d[0] >= 175 and d[1] <= 220 and d[
                    1] >= 190 and d[2] <= 235 and d[2] >= 215:  #mana
                f.append((0, 0, 0))
            elif d[0] == 245 and d[1] == 245 and d[2] == 250:  #hp
                f.append((0, 0, 0))
            elif d[0] == 246 and d[1] == 227 and d[2] == 227:  #card cost
                f.append((0, 0, 0))
            else:
                f.append((255, 255, 255))
        im.putdata(f)
        im = ImageOps.grayscale(im)
        # im = im.filter(ImageFilter.GaussianBlur(4))
        # im = ImageOps.invert(im)
        return im

    def filter_img(self, im):
        if im == None:
            return im
        # im = ImageOps.grayscale(im)
        # im = im.filter(ImageFilter.GaussianBlur(4))
        # im = ImageOps.invert(im)
        return im

    def ocr_txt(self, img):
        im = self.ocr_filter_img(img)
        self.ocr_api.SetPageSegMode(PSM.SINGLE_BLOCK)
        self.ocr_api.SetVariable('tessedit_char_whitelist', ascii_letters)
        self.ocr_api.SetVariable('tessedit_char_blacklist', digits)
        self.ocr_api.SetImage(im)
        text = self.ocr_api.GetUTF8Text().strip('\n')
        # logging.info("Btn text detected as %s", text)
        return text.lower()

    def ocr_number(self, img):
        im = self.ocr_filter_img(img)
        self.ocr_api.SetVariable('tessedit_char_whitelist', digits)
        self.ocr_api.SetVariable('tessedit_char_blacklist', ascii_letters)
        self.ocr_api.SetPageSegMode(PSM.SINGLE_WORD)
        self.ocr_api.SetImage(im)
        number = self.ocr_api.GetUTF8Text().strip('\n')
        try:
            number = int(number)
        except:
            number = -1

        logging.info("OCR number %i >", number)
        return int(number)
Exemple #10
0
class TT2Predictor:
    """holds the several trainer predictor instances and common operations """
    def __init__(self, **kwargs):
        self.trainers_predictors_list = []
        self.text_predictors_list = [
            ("previous_level", (1212, 231, 1230, 280), "0123456789", "8"),
            ("main_level", (1203, 323, 1223, 399), "0123456789", "8"),
            ("next_level", (1212, 445, 1230, 493), "0123456789", "8"),
            ("sub_level", (1177, 625, 1203, 692), "0123456789/", "8"),
            ("gold", (1091, 283, 1126, 471),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "7"),
            ("current_dps_down_no_tab", (389, 562, 423, 709),
             "0123456789.abcdefghijklmnopqrstuvwxyz", "8"),
            ("last_hero", (124, 109, 148, 430),
             "0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
             "7")
        ]
        self.api = PyTessBaseAPI()
        self.api.Init()
        print(tesserocr.tesseract_version())
        print(tesserocr.get_languages())
        self.global_image = None
        self.status = CurrentStatus()

        boss_trainer = TrainerPredictor(
            "boss_active_predictor",
            ["boss_active", "boss_inactive", "no_boss"],
            (1224, 555, 1248, 648), 12, 46, 255.0, [200, 30])
        egg_trainer = TrainerPredictor("egg_active_predictor",
                                       ["egg_active", "egg_inactive"],
                                       (741, 31, 761, 64), 10, 16, 255.0,
                                       [200, 30])
        gold_pet_trainer = TrainerPredictor(
            "gold_pet_predictor",
            ["goldpet", "nopet", "normalpet", "partial pet"],
            (624, 364, 734, 474), 40, 40, 255.0, [200, 30])
        tab_predictor = TrainerPredictor("tab_predictor", [
            "skills_tab", "heroes_tab", "equipment_tab", "pet_tab",
            "relic_tab", "shop_tab", "no_tab"
        ], (51, 1, 59, 717), 2, 179, 255.0, [200, 30])
        self.trainers_predictors_list.append(boss_trainer)
        self.trainers_predictors_list.append(egg_trainer)
        self.trainers_predictors_list.append(gold_pet_trainer)
        self.trainers_predictors_list.append(tab_predictor)
        for trainer in self.trainers_predictors_list:
            pass
            #trainer.crop_images()
            #trainer.process_images()
            #trainer.read_and_pickle()
            #trainer.train_graph()
        saved_classes_file = glo.DATA_FOLDER + "/dataforclassifier/TrainerPredictor_list.pickle"
        save_pickle(saved_classes_file, self.trainers_predictors_list)

    def parse_raw_image(self):
        with open(glo.RAW_FULL_FILE, 'rb') as f:
            image = Image.frombytes('RGBA', (1280, 720), f.read())
        for class_predictor in self.trainers_predictors_list:
            class_predictor.predict_crop(image)
        self.global_image = image
        image.save(glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + "/fullcapture" +
                   time.strftime("%Y%m%d-%H%M%S-%f") +
                   ".png")  # save original capture copy

    def parse_image_text(self, predict_map):
        return_dict = {}
        for text_predictor in self.text_predictors_list:
            if text_predictor[0] in predict_map:
                img = self.global_image.crop(text_predictor[1])

                img = img.convert('L')
                img = img.rotate(90, expand=True)
                self.api.SetImage(img)
                self.api.SetVariable("tessedit_char_whitelist",
                                     text_predictor[2])
                self.api.SetVariable("tessedit_pageseg_mode",
                                     text_predictor[3])
                self.api.SetVariable("language_model_penalty_non_dict_word",
                                     "0")
                self.api.SetVariable("doc_dict_enable", "0")
                text_capture = self.api.GetUTF8Text().encode('utf-8').strip()
                return_dict[text_predictor[0]] = text_capture
                print("raw text capture ", text_predictor[0], ":",
                      text_capture)
                self.api.Clear()
        return return_dict

    def predict_parsed_all(self):
        pred_dict = {}
        for class_predictor in self.trainers_predictors_list:
            pred_dict[class_predictor.name] = class_predictor.predict_parsed()
        return pred_dict

    def predict_parsed(self, predict_map, predict_map_text, **kwargs):
        pred_dict = {"transition_level": False}

        # check if image is level trasitioning. trivial prediction.
        if hasattr(kwargs, "empty_image") and kwargs["empty_image"] is False:
            pass
        else:
            img = self.global_image.crop((0, 0, 100, 100))  # black corner
            extrema = img.convert("L").getextrema()
            if extrema[0] == extrema[1]:  # only one color
                print("warning level transitioning")
                pred_dict["transition_level"] = True
            else:
                pass

        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name in predict_map:
                pred_dict[
                    class_predictor.name] = class_predictor.predict_parsed()
        pred_dict_text = self.parse_image_text(predict_map_text)
        pred_dict.update(pred_dict_text)
        self.status.update_status(pred_dict, self.trainers_predictors_list)
        return pred_dict

    def predict(self):
        self.parse_raw_image()
        return self.predict_parsed_all()

    def check_predict(self, pred_dict, predictor, classification):
        for class_predictor in self.trainers_predictors_list:
            if class_predictor.name == predictor:
                return int(
                    pred_dict[predictor]
                ) == class_predictor.pred_classes.index(classification)
Exemple #11
0
def tesser_ocr(
    image: np.ndarray,
    expected_type: Optional[Callable[[str], T]] = None,
    whitelist: Optional[str] = None,
    invert: bool = False,
    scale: float = 1,
    blur: Optional[float] = None,
    engine: tesserocr.PyTessBaseAPI = tesseract_only,
    warn_on_fail: bool = False,
) -> Optional[T]:

    with lock:

        if image.shape[0] <= 1 or image.shape[1] <= 1:
            if not expected_type or expected_type is str:
                return ""
            else:
                return None

        if whitelist is None:
            if expected_type is int:
                whitelist = string.digits
            elif expected_type is float:
                whitelist = string.digits + "."
            else:
                whitelist = string.digits + string.ascii_letters + string.punctuation + " "

        # print('>', whitelist)

        engine.SetVariable("tessedit_char_whitelist", whitelist)
        if invert:
            image = 255 - image
        if scale != 1:
            image = cv2.resize(image, (0, 0),
                               fx=scale,
                               fy=scale,
                               interpolation=cv2.INTER_LINEAR)

        if blur:
            image = cv2.GaussianBlur(image, (0, 0), blur)

        # if debug:
        #     cv2.imshow('tesser_ocr', image)
        #     cv2.waitKey(0)

        if len(image.shape) == 2:
            height, width = image.shape
            channels = 1
        else:
            height, width, channels = image.shape
        engine.SetImageBytes(image.tobytes(), width, height, channels,
                             width * channels)
        text: str = engine.GetUTF8Text()
        if " " not in whitelist:
            text = text.replace(" ", "")
        if "\n" not in whitelist:
            text = text.replace("\n", "")

        if not any(c in whitelist for c in string.ascii_lowercase):
            text = text.upper()

        if expected_type:
            try:
                return expected_type(text)
            except Exception as e:
                try:
                    caller = inspect.stack()[1]
                    logger.log(
                        logging.WARNING if warn_on_fail else logging.DEBUG,
                        f"{os.path.basename(caller.filename)}:{caller.lineno} {caller.function} | "
                        f"Got exception interpreting {text!r} as {expected_type.__name__}",
                    )
                except:
                    logger.log(
                        logging.WARNING if warn_on_fail else logging.DEBUG,
                        f"Got exception interpreting {text!r} as {expected_type.__name__}",
                    )
                return None
        else:
            return text
Exemple #12
0
def _get_tesseract():
    tesseract = PyTessBaseAPI()
    tesseract.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ' ")
    tesseract.SetPageSegMode(PSM.SINGLE_LINE)
    return tesseract
Exemple #13
0
class ViewerWindow(Gtk.Window):
    def __init__(self, filenames, kind, show, ml):
        Gtk.Window.__init__(self)
        self.ptx = 0
        self.pty = 0
        self.focus_id = -1
        self.file_idx = 0
        self.kind = kind
        self.show_hidden = show
        self.ml = ml
        self.screen_hint = ''
        self.in_hint_screen = False
        self.colors = {}
        self.memory = {}
        self.elem_models = {}
        self.filenames = filenames
        self.tesapi = PyTessBaseAPI(lang='eng')
        self.tesapi.SetVariable("tessedit_char_whitelist", WHITELIST)
        self.init_ui()
        self.load()

    def init_ui(self):
        self.connect("delete-event", Gtk.main_quit)

        darea = Gtk.DrawingArea()
        darea.connect("draw", self.on_draw)
        darea.connect("motion-notify-event", self.move_over)
        darea.connect("button-release-event", self.click_evt)
        darea.connect("scroll-event", self.scroll_evt)
        darea.connect("key-release-event", self.key_evt)
        darea.set_events(Gdk.EventMask.POINTER_MOTION_MASK
                         | Gdk.EventMask.BUTTON_RELEASE_MASK
                         | Gdk.EventMask.BUTTON_PRESS_MASK
                         | Gdk.EventMask.SCROLL_MASK
                         | Gdk.EventMask.KEY_PRESS_MASK
                         | Gdk.EventMask.KEY_RELEASE_MASK)
        darea.set_can_focus(True)
        self.add(darea)

        self.show_all()

    def load(self, prev=False):
        if self.file_idx == len(self.filenames):
            Gtk.main_quit()
            return
        if prev:
            self.file_idx -= 2
        filename = self.filenames[self.file_idx]
        (self.app, self.scr) = util.get_aux_info(filename)
        if self.app not in self.memory:
            self.memory[self.app] = {}
        self.set_title(filename)
        self.file_idx += 1
        print("Loading %s" % filename)
        self.pngfile = os.path.splitext(filename)[0] + '.png'
        self.descname = os.path.splitext(filename)[0] + '.%s.txt' % self.kind

        starttime = time.time()
        self.tree = analyze.load_tree(filename)
        hidden.find_hidden_ocr(self.tree)
        hidden.mark_children_hidden_ocr(self.tree)
        util.print_tree(self.tree, show_hidden=self.show_hidden)

        if self.ml:
            self.get_ml_rets()
        else:
            self.load_desc()

        endtime = time.time()
        print("Load time: %.3fs" % (endtime - starttime))

        self.focus_id = -1
        self.colors = {}
        self.ptx = self.pty = 0

        self.img = cairo.ImageSurface.create_from_png(self.pngfile)
        print('Image:', self.img.get_width(), self.img.get_height())

        root_item_id = min(self.tree)
        root_node = self.tree[root_item_id]
        print('Root node:', root_node['width'], root_node['height'])
        self.scale = 1.0 * self.img.get_width() / config.width
        #self.scale = analyze.find_closest(self.scale, analyze.SCALE_RATIOS)
        print('Scale:', '%.3f' % self.scale, '->', '%.3f' % self.scale)

        self.resize(self.img.get_width(), self.img.get_height())

        self.mark_depth(self.tree)

        for item_id in self.tree:
            color_r = random.random() / 2
            color_g = random.random() / 2
            color_b = random.random() / 2

            self.colors[item_id] = (color_r, color_g, color_b)

        imgocr = Image.open(self.pngfile)
        self.imgwidth = imgocr.width
        self.imgheight = imgocr.height
        #imgocr2 = imgocr.convert("RGB").resize(
        #    (imgocr.width * OCR_RATIO, imgocr.height * OCR_RATIO))
        self.tesapi.SetImage(imgocr)
        self.tesapi.SetSourceResolution(config.ocr_resolution)

        self.dump_memory()

    def remember(self, node, desc):
        nodeid = node['id']
        if not node['id']:
            return

        if node['id'] in self.memory[self.app]:
            if desc != self.memory[self.app][nodeid]:
                # multiple!
                self.memory[self.app][nodeid] = 'MUL'
        else:
            self.memory[self.app][node['id']] = desc

    def forget(self, node):
        if node['id'] in self.memory[self.app]:
            del self.memory[self.app][node['id']]

    def get_elem_model(self, app):
        elem_clas = elements.getmodel("../model/", "../guis/", app,
                                      "../guis-extra/",
                                      config.extra_element_scrs)
        self.elem_models[app] = elem_clas

    def get_ml_rets(self):
        if self.app not in self.elem_models:
            self.get_elem_model(self.app)

        guess_descs = {}
        guess_items = {}  # type: Dict[str, List[int]]
        guess_score = {}
        elem_clas = self.elem_models[self.app]
        elem_clas.set_imgfile(self.pngfile)
        treeinfo = analyze.collect_treeinfo(self.tree)
        for itemid in self.tree:
            (guess_element,
             score) = elem_clas.classify(self.scr, self.tree, itemid, None,
                                         treeinfo)
            if guess_element != 'NONE':
                if tags.single(guess_element,
                               self.scr) and guess_element in guess_items:
                    old_item = guess_items[guess_element][0]
                    if guess_score[old_item] < score:
                        guess_items[guess_element] = [itemid]
                        guess_score[itemid] = score
                        del guess_descs[old_item]
                        guess_descs[itemid] = guess_element
                else:
                    guess_descs[itemid] = guess_element
                    guess_score[itemid] = score
                    guess_items[guess_element] = (
                        guess_items.get(guess_element, []) + [itemid])
        for nodeid in guess_descs:
            self.tree[nodeid]['label'] = guess_descs[nodeid]

    def load_desc(self):
        if os.path.exists(self.descname):
            with open(self.descname) as inf:
                for line in inf.read().split('\n'):
                    if not line:
                        continue
                    (item_id, desc) = line.split(' ', 1)
                    item_id = int(item_id)
                    found = False
                    for nodeid in self.tree:
                        node = self.tree[nodeid]
                        if item_id in node['raw']:
                            if 'label' in node:
                                node['label'] += ' ' + desc
                            else:
                                node['label'] = desc
                            print(nodeid, '(', item_id, ')', '->', desc)

                            self.remember(node, desc)

                            found = True
                            break
                    if not found:
                        print("WARNING: %s (%s) is missing!" % (item_id, desc))

    def mark_depth(self, tree):
        for item_id in tree:
            node = tree[item_id]
            if 'depth' in node:
                continue
            self.mark_depth_node(tree, item_id, 0)

    def mark_depth_node(self, tree, node_id, depth):
        node = tree[node_id]
        node['depth'] = depth
        node['descs'] = []
        for child in node['children']:
            descs = self.mark_depth_node(tree, child, depth + 1)
            node['descs'] += descs

        return node['descs'] + [node_id]

    def get_node_info(self, node):
        (x, y, width, height, depth) = (node['x'], node['y'], node['width'],
                                        node['height'], node['depth'])
        x *= self.scale
        y *= self.scale
        width *= self.scale
        height *= self.scale

        width = min(width, self.imgwidth)
        height = min(height, self.imgheight)

        if x < 0:
            width += x
            x = 0

        if y < 0:
            height += y
            y = 0

        return (x, y, width, height, depth)

    def find_containing_widget(self, px, py):
        max_depth = 0
        max_id = -1

        for item_id in self.tree:
            node = self.tree[item_id]
            if self.ignore_node(node):
                continue
            if self.inside(node, px, py):
                if node['depth'] > max_depth:
                    max_depth = node['depth']
                    max_id = item_id

        return max_id

    def inside(self, node, px, py):
        (x, y, width, height, depth) = self.get_node_info(node)
        return x <= px and x + width >= px and y <= py and y + height >= py

    def ignore_node(self, node):
        if node['class'].upper() == 'OPTION':
            return True
        if node.get('visible', '') == 'hidden':
            return True
        return False

    def on_draw(self, wid, ctx):
        ctx.select_font_face("Arial", cairo.FONT_SLANT_NORMAL,
                             cairo.FONT_WEIGHT_BOLD)

        ctx.set_source_surface(self.img, 0, 0)
        ctx.paint()

        ctx.set_font_size(20)
        ctx.set_line_width(5)
        ctx.set_source_rgb(1.0, 0.0, 0.0)

        max_click_id = -1
        max_click_depth = 0

        max_id = self.find_containing_widget(self.ptx, self.pty)

        for item_id in self.tree:
            node = self.tree[item_id]
            depth = node['depth']
            if max_id in node['descs'] and node['click']:
                if depth > max_click_depth:
                    max_click_depth = depth
                    max_click_id = item_id

        for item_id in self.tree:
            node = self.tree[item_id]
            if self.ignore_node(node):
                continue

            if item_id == max_id:
                region_mode = False
            else:
                region_mode = True

            (x, y, width, height, depth) = self.get_node_info(node)

            if not self.inside(node, self.ptx, self.pty):
                continue

            self.show_widget(ctx, item_id, not region_mode, not region_mode)

        if max_click_id != -1 and max_click_id != max_id:
            self.show_widget(ctx, max_click_id, False, True)

        if self.focus_id >= 0:
            self.show_widget(ctx, self.focus_id, True, True, (1, 0, 0))

        for itemid in self.tree:
            node = self.tree[itemid]
            if 'label' in node:
                if itemid == self.focus_id:
                    color = (0, 1, 0)
                else:
                    color = (0, 0, 1)
                self.show_widget(ctx, itemid, True, False, (0, 0, 1))
                self.show_desc(ctx, node, color)

        #s.write_to_png('test.png')
        #os.system("%s %s" % (config.picviewer_path, 'test.png'))
        #report_time(start_time, "displayed")

    def move_sibling(self, to_next):
        leaf_list = []
        any_list = []
        for itemid in self.tree:
            node = self.tree[itemid]
            if not self.inside(node, self.clickx, self.clicky):
                continue

            if len(node['children']) == 0:
                leaf_list.append(itemid)
            any_list.append(itemid)

        for i in range(len(leaf_list)):
            if leaf_list[i] == self.focus_id:
                if to_next:
                    idx = (i + 1) % len(leaf_list)
                else:
                    idx = (i - 1) % len(leaf_list)
                self.focus_id = leaf_list[idx]
                return

        if len(leaf_list) == 0:
            for i in range(len(any_list)):
                if any_list[i] == self.focus_id:
                    if to_next:
                        idx = (i + 1) % len(any_list)
                    else:
                        idx = (i - 1) % len(any_list)
                    self.focus_id = any_list[idx]
                    return
            self.focus_id = any_list[0]
        else:
            self.focus_id = leaf_list[0]

    def show_widget(self, ctx, item_id, fill, show_text, colors=None):
        node = self.tree[item_id]

        (x, y, width, height, depth) = self.get_node_info(node)

        if colors is None:
            color_r = self.colors[item_id][0]
            color_g = self.colors[item_id][1]
            color_b = self.colors[item_id][2]
        else:
            (color_r, color_g, color_b) = colors

        ctx.rectangle(x, y, width, height)
        if fill:
            ctx.set_source_rgba(color_r, color_g, color_b, 0.3)
            ctx.fill()
        else:
            ctx.set_source_rgba(color_r, color_g, color_b, 1)
            ctx.set_line_width(5)
            ctx.stroke()

        if show_text:
            max_char = int(width / ctx.text_extents("a")[2])
            text = str(item_id)
            if node['click']:
                text = 'C' + text
            if node['text']:
                text = text + ':' + node['text'][:(max_char - 5)]
            elif node['id']:
                text += '#' + node['id'][:(max_char - 5)]

            self.show_text(ctx, x + width / 2, y + height / 2, text, color_r,
                           color_g, color_b)

    def show_desc(self, ctx, node, color=(0, 0, 1)):
        desc = node['label']
        (x, y, width, height, depth) = self.get_node_info(node)
        self.show_text(ctx, x + width / 2, y + height / 2, desc, color[0],
                       color[1], color[2])

    def show_text(self, ctx, x, y, text, color_r, color_g, color_b):
        x_bearing, y_bearing, text_width, text_height = ctx.text_extents(
            text)[:4]

        ctx.move_to(x - text_width / 2, y + text_height / 2)
        ctx.set_source_rgba(1, 1, 1, 1)
        ctx.set_line_width(5)
        ctx.text_path(text)
        ctx.stroke()

        ctx.move_to(x - text_width / 2, y + text_height / 2)
        ctx.set_source_rgba(color_r, color_g, color_b, 1)
        ctx.text_path(text)
        ctx.fill()

    def move_over(self, widget, evt):
        self.ptx = evt.x
        self.pty = evt.y
        self.queue_draw()

    def click_evt(self, widget, evt):
        if self.in_hint_screen:
            self.process_screen_hint_click(evt)
            return

        if evt.button == 3:
            self.focus_id = -1
        else:
            self.clickx = evt.x
            self.clicky = evt.y
            self.focus_id = self.find_containing_widget(evt.x, evt.y)

        self.queue_draw()

    def scroll_evt(self, widget, evt):
        if self.focus_id == -1:
            return

        scroll_up = evt.direction == Gdk.ScrollDirection.UP
        if scroll_up:
            self.focus_id = self.find_parent_widget(self.focus_id)
        else:
            self.focus_id = self.find_child_widget(self.focus_id)

        self.queue_draw()

    def find_parent_widget(self, wid):
        for itemid in self.tree:
            node = self.tree[itemid]
            if self.ignore_node(node):
                continue
            if wid in node['children']:
                return itemid
        return wid

    def find_child_widget(self, wid):
        for itemid in self.tree[wid]['children']:
            node = self.tree[itemid]
            if self.ignore_node(node):
                continue
            if self.inside(node, self.clickx, self.clicky):
                return itemid
        return wid

    def mark_direct(self):
        enter = self.get_text('Please enter id_label', 'format: <id> <label>')
        if enter is None:
            return
        if ' ' in enter:
            nodeid, label = enter.split(' ')
        else:
            nodeid = enter
            label = ''
        nodeid = int(nodeid)
        if nodeid not in self.tree:
            print('missing node', nodeid)
            return
        node = self.tree[nodeid]

        self.mark_node(node, label)

    def mark_focused(self):
        if self.focus_id < 0:
            return
        node = self.tree[self.focus_id]
        label = self.get_text(
            'Please enter label', 'label for %s: %s (%s) #%s' %
            (self.focus_id, node['text'], node['desc'], node['id']))
        if label is None:
            return

        if self.ml:
            if label == '':
                if 'label' not in self.tree[self.focus_id]:
                    return

                self.generate_negative_hint(self.tree[self.focus_id]['label'])
                del self.tree[self.focus_id]['label']
            else:
                self.generate_hint_for_widget(self.focus_id, label)
                self.add_label(node, label)
        else:
            self.mark_node(node, label)

    def generate_hint_for_widget(self, nodeid, label):
        return self.generate_hint(label,
                                  locator.get_locator(self.tree, nodeid))

    def generate_negative_hint(self, label):
        return self.generate_hint(label, 'notexist')

    def generate_hint(self, label, hint):
        print("@%s.%s %s" % (self.scr, label, hint))

    def mark_node(self, node, label):
        if label == '':
            if 'label' in node:
                del node['label']
                self.forget(node)
        else:
            self.add_label(node, label)
            self.remember(node, label)

        self.save_labels()

    def ocr_text(self):
        node = self.tree[self.focus_id]
        (x, y, width, height, _) = self.get_node_info(node)
        print(x, y, width, height)
        x = max(x - 1, 0)
        y = max(y - 1, 0)
        width = min(width + 2, self.imgwidth)
        height = min(height + 2, self.imgheight)
        #self.tesapi.SetRectangle(x * OCR_RATIO, y * OCR_RATIO,
        #                         width * OCR_RATIO, height * OCR_RATIO)
        self.tesapi.SetRectangle(x, y, width, height)
        print("OCR ret:", self.tesapi.GetUTF8Text())

        x = min(x + width * 0.05, self.imgwidth)
        y = min(y + height * 0.05, self.imgheight)
        width *= 0.9
        height *= 0.9
        self.tesapi.SetRectangle(x, y, width, height)
        print("OCR ret:", self.tesapi.GetUTF8Text())

    def save_region(self):
        if self.focus_id == -1:
            return
        node = self.tree[self.focus_id]
        (x, y, width, height, _) = self.get_node_info(node)
        x = max(x - 1, 0)
        y = max(y - 1, 0)
        width = min(width + 2, self.imgwidth)
        height = min(height + 2, self.imgheight)

        regimg = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width),
                                    int(height))
        ctx = cairo.Context(regimg)
        ctx.set_source_surface(self.img, -x, -y)
        ctx.paint()

        regimg.write_to_png("/tmp/region.png")

    def dump_memory(self):
        for _id in self.memory[self.app]:
            print('MEM %s -> %s' % (_id, self.memory[self.app][_id]))

    def add_label(self, node, desc):
        print('%s -> %s' % (util.describe_node(node, short=True), desc))
        node['label'] = desc

    def auto_label(self):
        for nodeid in self.tree:
            node = self.tree[nodeid]
            if 'label' not in node and node['id'] in self.memory[self.app]:
                if self.memory[self.app][node['id']] != 'MUL':
                    self.add_label(node, self.memory[self.app][node['id']])
                else:
                    print('skip MUL id: %s' % node['id'])
        self.save_labels()

    def remove_all(self):
        for nodeid in self.tree:
            node = self.tree[nodeid]
            if 'label' in node:
                del node['label']

    def process_screen_hint_click(self, evt):
        click_id = self.find_containing_widget(evt.x, evt.y)
        if click_id == -1:
            print('Invalid widget selected')
            return

        hint = locator.get_locator(self.tree, click_id)
        if hint is None:
            print('Cannot generate hint for this widget')
            return

        hint = str(hint)
        if evt.button == 3:
            # negate
            hint = 'not ' + hint

        print('Widget hint: "%s"' % hint)
        self.add_screen_hint(hint)

    def add_screen_hint(self, hint):
        if self.screen_hint == '':
            self.screen_hint = hint
        else:
            self.screen_hint += ' && ' + hint

    def hint_screen(self):
        if not self.in_hint_screen:
            label = self.get_text('Please enter screen name',
                                  'screen name like "signin"')
            if label is None:
                return
            self.screen_hint_label = label

            self.in_hint_screen = True
            self.screen_hint = ''
        else:
            self.in_hint_screen = False
            print("%%%s %s" % (self.screen_hint_label, self.screen_hint))

    def key_evt(self, widget, evt):
        if evt.keyval == Gdk.KEY_space:
            self.mark_focused()
        elif evt.keyval == Gdk.KEY_Tab:
            self.load()
        elif evt.keyval == Gdk.KEY_Left:
            self.move_sibling(to_next=True)
        elif evt.keyval == Gdk.KEY_Right:
            self.move_sibling(to_next=False)
        elif evt.keyval == Gdk.KEY_v:
            self.ocr_text()
        elif evt.keyval == Gdk.KEY_a:
            self.auto_label()
        elif evt.keyval == Gdk.KEY_p:
            self.load(prev=True)
        elif evt.keyval == Gdk.KEY_l:
            self.mark_direct()
        elif evt.keyval == Gdk.KEY_r:
            self.remove_all()
        elif evt.keyval == Gdk.KEY_s:
            self.save_region()
        elif evt.keyval == Gdk.KEY_x:
            self.hint_screen()
        self.queue_draw()

    def save_labels(self):
        with open(self.descname, 'w') as outf:
            for itemid in sorted(self.tree):
                node = self.tree[itemid]
                if 'label' in node:
                    outf.write("%s %s\n" % (itemid, node['label']))

    def get_text(self, title, prompt):
        #base this on a message dialog
        dialog = Gtk.MessageDialog(self, 0, Gtk.MessageType.QUESTION,
                                   Gtk.ButtonsType.OK_CANCEL, title)
        dialog.format_secondary_text(prompt)
        #create the text input field
        entry = Gtk.Entry()
        #allow the user to press enter to do ok
        entry.connect("activate",
                      lambda entry: dialog.response(Gtk.ResponseType.OK))
        #create a horizontal box to pack the entry and a label
        hbox = Gtk.HBox()
        hbox.pack_start(Gtk.Label("Label:"), False, 5, 5)
        hbox.pack_end(entry, True, 0, 0)
        #add it and show it
        dialog.vbox.pack_end(hbox, True, True, 0)
        dialog.show_all()
        #go go go
        response = dialog.run()
        if response == Gtk.ResponseType.OK:
            text = entry.get_text()
        else:
            text = None
        dialog.destroy()
        return text
Exemple #14
0
def ocr_on_bounding_boxes(img, components):

    blurbs = []
    for component in components:
        (aspect, vertical, horizontal) = segment_into_lines(img, component)
        #if len(vertical)<2 and len(horizontal)<2:continue

        #attempt to separately process furigana
        #(furigana, non_furigana) = estimate_furigana(vertical)
        '''
      from http://code.google.com/p/tesseract-ocr/wiki/ControlParams
      Useful parameters for Japanese and Chinese

      Some Japanese tesseract user found these parameters helpful for increasing tesseract-ocr (3.02) accuracy for Japanese :

      Name 	Suggested value 	Description
      chop_enable 	T 	Chop enable.
      use_new_state_cost 	F 	Use new state cost heuristics for segmentation state evaluation
      segment_segcost_rating 	F 	Incorporate segmentation cost in word rating?
      enable_new_segsearch 	0 	Enable new segmentation search path.
      language_model_ngram_on 	0 	Turn on/off the use of character ngram model.
      textord_force_make_prop_words 	F 	Force proportional word segmentation on all rows. 
    '''
        #now run OCR on this bounding box
        api = PyTessBaseAPI(path='C:/Program Files/Tesseract-OCR/tessdata')
        api.Init(".", "jpn", tesseract.OEM_DEFAULT)
        #handle single column lines as "vertical align" and Auto segmentation otherwise
        if len(vertical) < 2:
            api.SetPageSegMode(
                5)  #tesseract.PSM_VERTICAL_ALIGN)#PSM_AUTO)#PSM_SINGLECHAR)#
        else:
            api.SetPageSegMode(tesseract.PSM_AUTO)  #PSM_SINGLECHAR)#
        api.SetVariable('chop_enable', 'T')
        api.SetVariable('use_new_state_cost', 'F')
        api.SetVariable('segment_segcost_rating', 'F')
        api.SetVariable('enable_new_segsearch', '0')
        api.SetVariable('language_model_ngram_on', '0')
        api.SetVariable('textord_force_make_prop_words', 'F')
        api.SetVariable('tessedit_char_blacklist', '}><L')
        api.SetVariable('textord_debug_tabfind', '0')

        x = component[1].start
        y = component[0].start
        w = component[1].stop - x
        h = component[0].stop - y
        roi = cv2.cv.CreateImage((w, h), 8, 1)
        sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
        cv2.cv.Copy(sub, roi)
        tesseract.SetCvImage(roi, api)
        txt = api.GetUTF8Text()
        conf = api.MeanTextConf()
        if conf > 0 and len(txt) > 0:
            blurb = Blurb(x, y, w, h, txt, confidence=conf)
            blurbs.append(blurb)
        '''
    for line in non_furigana:
      x=line[1].start
      y=line[0].start
      w=line[1].stop-x
      h=line[0].stop-y
      roi = cv2.cv.CreateImage((w,h), 8, 1)
      sub = cv2.cv.GetSubRect(cv2.cv.fromarray(img), (x, y, w, h))
      cv2.cv.Copy(sub, roi)
      tesseract.SetCvImage(roi, api)
      txt=api.GetUTF8Text()
      conf=api.MeanTextConf()
      if conf>0:
        blurb = Blurb(x, y, w, h, txt, confidence=conf)
        blurbs.append(blurb)
    '''
    return blurbs
Exemple #15
0
from tesserocr import PyTessBaseAPI, RIL, iterate_level
from PIL import Image
from utils import *
import sys

ABCs = 'ABCDEFGHIJKLMNPQRSTUVWXYZ'
WHITELIST = "1234567890X()/\\" + ABCs
OCR_API = PyTessBaseAPI()
#OCR_API = PyTessBaseAPI(path='C:/Users/conor/Downloads/tessdata-master')
OCR_API.SetVariable('tessedit_pageseg_mode', "7")
OCR_API.SetVariable('tessedit_char_whitelist', WHITELIST)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('usage: %s <input.png [...]>' % sys.argv[0])
        sys.exit(1)

    def num_from_name(x):
        nums = '1234567890'
        n = ''
        for y in x:
            if y in nums:
                n += y
        return int(n)

    with PyTessBaseAPI() as api:
        api.SetVariable('tessedit_pageseg_mode', "7")
        api.SetVariable('tessedit_char_whitelist', WHITELIST)

        for x in sorted(sys.argv[1:], key=lambda x: num_from_name(x)):
            im = load_image(x)
Exemple #16
0
    def run_tesseract(image_file):
        if tessdata:
            api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD)
        else:
            api = PyTessBaseAPI(psm=PSM.AUTO_OSD)

        api.SetImageFile(image_file)
        api.SetVariable("textord_tablefind_recognize_tables", "T")
        api.SetVariable("textord_tabfind_find_tables", "T")
        api.Recognize()

        document = {}
        it = api.AnalyseLayout()
        if it is not None:
            orientation, direction, order, deskew_angle = it.Orientation()
            api.Recognize()
            ri = api.GetIterator()
            if ri is not None:
                document = {
                    "orientation": orientation,
                    "writing_direction": direction,
                    "text_direction": order,
                    "deskew_angle": deskew_angle,
                    "blocks": []
                }
                while ri.IsAtBeginningOf(RIL.BLOCK):
                    block = {
                        "block_type": ri.BlockType(),
                        "block_type_str": BlockType[ri.BlockType()],
                        "box": ri.BoundingBox(RIL.BLOCK),
                        "ocr_text": ri.GetUTF8Text(RIL.BLOCK),
                        "confidence": ri.Confidence(RIL.BLOCK),
                        "paragraphs": []
                    }
                    break_para = False
                    while True:
                        if ri.IsAtFinalElement(RIL.BLOCK, RIL.PARA):
                            break_para = True
                        break_line = False
                        paragraph = {
                            "box": ri.BoundingBox(RIL.PARA),
                            "ocr_text": ri.GetUTF8Text(RIL.PARA),
                            "paragraph_info": list(ri.ParagraphInfo()),
                            "confidence": ri.Confidence(RIL.PARA),
                            "lines": []
                        }
                        while True:
                            if ri.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE):
                                break_line = True
                            break_word = False
                            line = {
                                "box": ri.BoundingBox(RIL.TEXTLINE),
                                "ocr_text": ri.GetUTF8Text(RIL.TEXTLINE),
                                "confidence": ri.Confidence(RIL.TEXTLINE),
                                "words": []
                            }
                            while True:
                                word = {
                                    "box": ri.BoundingBox(RIL.WORD),
                                    "ocr_text": ri.GetUTF8Text(RIL.WORD),
                                    "confidence": ri.Confidence(RIL.WORD),
                                    "attributes": ri.WordFontAttributes()
                                }
                                if ri.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
                                    break_word = True
                                line["words"].append(word)
                                if break_word:
                                    break
                                ri.Next(RIL.WORD)
                            paragraph["lines"].append(line)
                            if break_line:
                                break
                            ri.Next(RIL.TEXTLINE)
                        block["paragraphs"].append(paragraph)
                        if break_para:
                            break
                        ri.Next(RIL.PARA)
                    document["blocks"].append(block)
                    ri.Next(RIL.BLOCK)
        return document
Exemple #17
0
class OCREngine():
    def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'):
        """
        Args:
          extra_whitelist: string of extra chars for Tesseract to consider
              only takes effect when all_unicode is False
          all_unicode: if True, Tess will consider all possible unicode characters
          lang: OCR language
        """
        self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang)
        self.is_closed = False
        if all_unicode:
            self.whitelist_chars = None
        else:
            self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz"
                                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                    "1234567890"
                                    r"~!@#$%^&*()_+-={}|[]\:;'<>?,./"
                                    '"'
                                    "©") + extra_whitelist
            self.tess.SetVariable('tessedit_char_whitelist',
                                  self.whitelist_chars)

    def check_engine(self):
        if self.is_closed:
            raise RuntimeError('OCREngine has been closed.')

    def recognize(self,
                  image,
                  min_text_size=MIN_TEXT_SIZE,
                  max_text_size=MAX_TEXT_SIZE,
                  uniformity_thresh=UNIFORMITY_THRESH,
                  thin_line_thresh=THIN_LINE_THRESH,
                  conf_thresh=CONF_THRESH,
                  box_expand_factor=BOX_EXPAND_FACTOR,
                  horizontal_pooling=HORIZONTAL_POOLING):
        """
        Generator: Blob
        http://stackoverflow.com/questions/23506105/extracting-text-opencv

        Args:
          input_image: can be one of the following types:
            - string: image file path
            - ndarray: numpy image
            - PIL.Image.Image: PIL image
          min_text_size:
            min text height/width in pixels, below which will be ignored
          max_text_size:
            max text height/width in pixels, above which will be ignored
          uniformity_thresh (0.0 < _ < 1.0):
            remove all black or all white regions
            ignore a region if the number of pixels neither black nor white < [thresh]
          thin_line_thresh (must be odd int):
            remove all lines thinner than [thresh] pixels.
            can be used to remove the thin borders of web page textboxes.
          conf_thresh (0 < _ < 100):
            ignore regions with OCR confidence < thresh.
          box_expand_factor (0.0 < _ < 1.0):
            expand the bounding box outwards in case certain chars are cutoff.
          horizontal_pooling:
            result bounding boxes will be more connected with more pooling,
            but large pooling might lower accuracy.
        """
        self.check_engine()
        # param sanity check
        assert max_text_size > min_text_size > 0
        assert 0.0 <= uniformity_thresh < 1.0
        assert thin_line_thresh % 2 == 1
        assert 0 <= conf_thresh < 100
        assert 0.0 <= box_expand_factor < 1.0
        assert horizontal_pooling > 0

        image = get_np_img(image)
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)
        img = img_gray
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
        # cut off all gray pixels < 30.
        # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        # connect horizontally oriented regions
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                           (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        # remove all thin textbox borders (e.g. web page textbox)
        if thin_line_thresh > 0:
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = box = Box(*cv2.boundingRect(contour))
            # remove regions that are beyond size limits
            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue
            # remove regions that are almost uniformly white or black
            binary_region = crop(img_bw, box)
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                continue
            # expand the borders a little bit to include cutoff chars
            expansion = int(min(h, w) * box_expand_factor)
            x = max(0, x - expansion)
            y = max(0, y - expansion)
            h, w = h + 2 * expansion, w + 2 * expansion
            if h > w:  # further extend the long axis
                h += 2 * expansion
            elif w > h:
                w += 2 * expansion
            # image passed to Tess should be grayscale.
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            box = Box(x, y, w, h)
            img_crop = crop(img_gray, box)
            # make sure that crops passed in tesseract have minimum x-height
            # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text
            img_crop = cv2.resize(img_crop,
                                  (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT /
                                       img_crop.shape[0]), CROP_RESIZE_HEIGHT))
            ocr_text, conf = self.run_tess(img_crop)
            if conf > conf_thresh:
                yield Blob(ocr_text, box, conf)

    def _experiment_segment(self,
                            img,
                            min_text_size=MIN_TEXT_SIZE,
                            max_text_size=MAX_TEXT_SIZE,
                            uniformity_thresh=UNIFORMITY_THRESH,
                            horizontal_pooling=HORIZONTAL_POOLING):
        """
        PRIVATE: experiment only
        """
        img_init = img  # preserve initial image
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel)
        disp(img)
        #         morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        #         img = cv2.dilate(img, morph_kernel)
        # OTSU thresholding
        #         _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        #         img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2)
        disp(img)
        # connect horizontally oriented regions
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                 (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel)
        disp(img)

        if 0:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,
                                                     (horizontal_pooling, 3))
            img = cv2.erode(img, morph_kernel, iterations=1)
            disp(img)
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6))
            img = cv2.dilate(img, morph_kernel, iterations=1)
        elif 1:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel)
        disp(img)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        img_copy = np.copy(img_init)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            draw_rect(img_copy, x, y, w, h)

            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue

            binary_region = img_bw[y:y + h, x:x + w]
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                # ignore mostly white or black regions
                #                 print(w, h)
                #                 disp(binary_region)
                continue
            # the image must be grayscale, otherwise Tesseract will SegFault
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            draw_rect(img_init, x, y, w, h)
        disp(img_copy)
        disp(img_init, 0)

    def run_tess(self, img):
        """
        Tesseract python API source code:
        https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx

        Returns:
          (ocr_text, confidence)
        """
        if isinstance(img, np.ndarray):
            img = np2PIL(img)
        self.tess.SetImage(img)
        ocr_text = self.tess.GetUTF8Text().strip()
        conf = self.tess.MeanTextConf()
        return ocr_text, conf

    def _deprec_run_tess(self, img):
        "GetComponentImages throws SegFault randomly. No way to fix. :("
        if isinstance(img, np.ndarray):
            img = np2PIL(img)

        components = self.tess.GetComponentImages(RIL.TEXTLINE, True)
        for _, inner_box, block_id, paragraph_id in components:
            # box is a dict with x, y, w and h keys
            inner_box = Box(**inner_box)
            if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE:
                continue
            self.tess.SetRectangle(*inner_box)
            ocr_text = self.tess.GetUTF8Text().strip()
            conf = self.tess.MeanTextConf()
            yield ocr_text, inner_box, conf

    def close(self):
        self.tess.End()
        self.is_closed = True

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()
Exemple #18
0
def _create_tesseract():
    tesseract = PyTessBaseAPI()
    tesseract.SetVariable("load_system_dawg", "F")
    tesseract.SetVariable("load_freq_dawg", "F")
    tesseract.SetVariable("load_punc_dawg", "F")
    tesseract.SetVariable("load_number_dawg", "F")
    tesseract.SetVariable("load_unambig_dawg", "F")
    tesseract.SetVariable("load_bigram_dawg", "F")
    tesseract.SetVariable("load_fixed_length_dawgs", "F")

    tesseract.SetVariable("classify_enable_learning", "F")
    tesseract.SetVariable("classify_enable_adaptive_matcher", "F")

    tesseract.SetVariable("segment_penalty_garbage", "F")
    tesseract.SetVariable("segment_penalty_dict_nonword", "F")
    tesseract.SetVariable("segment_penalty_dict_frequent_word", "F")
    tesseract.SetVariable("segment_penalty_dict_case_ok", "F")
    tesseract.SetVariable("segment_penalty_dict_case_bad", "F")

    tesseract.SetVariable("edges_use_new_outline_complexity", "T")
    tesseract.SetVariable("tessedit_char_whitelist",
                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
    tesseract.SetPageSegMode(PSM.SINGLE_LINE)

    return tesseract
Exemple #19
0
import tesserocr
import numpy as np
import cv2

user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
headers = {'User-Agent': user_agent}

url = 'http://www.bjsuperpass.com/captcha.svl?d=1503144107405'
rs = requests.get(url, headers=headers, timeout=10)
print('获取公交一卡通网站的验证码', rs.status_code)
#TODO 获取cookies

print('用BytesIO导入到Image,Numpy,Opencv')
s1 = BytesIO(rs.content)  # img = Image.open(BytesIO(resp.read()))
#
img = Image.open(s1)
img = img.convert("RGB")
im = np.array(img)
cv2.imshow('src', im)
cv2.waitKey(0)
cv2.imwrite('captcha.jpg', im)

ocr = PyTessBaseAPI()
# ocr.Init(".", "eng", tesseract.OEM_DEFAULT)
ocr.SetVariable("tessedit_char_whitelist",
                "0123456789abcdefghijklmnopqrstuvwxyz")
# ocr.SetPageSegMode(tesseract.PSM_AUTO)
# ocr.SetImage(img)

print('验证码是', tesserocr.image_to_text(img))
#TODO 发送cookies
Exemple #20
0
#        api.SetImageFile(img)
#        print( api.GetUTF8Text())
#        print( api.AllWordConfidences())

img = Image.open(
    glo.DATA_FOLDER + '/number_range_predictorcropped3.png'
)  #glo.UNCLASSIFIED_GLOBAL_CAPTURES_FOLDER + '/fullcapture961 .png')
#img = img.convert('L')

from tesserocr import PyTessBaseAPI, RIL, iterate_level, PSM
#print(help(tesserocr))

api = PyTessBaseAPI()
api.Init()
api.SetImageFile(glo.DATA_FOLDER + '/number_range_predictorcropped3.png')
api.SetVariable("tessedit_pageseg_mode", "7")
api.SetVariable("language_model_penalty_non_dict_word", "0")
api.SetVariable("doc_dict_enable", "0")
print("recognized txt:", api.GetUTF8Text().encode('utf-8').strip())
#api.Recognize()
"""
ri = api.GetIterator()
level = RIL.SYMBOL
for r in iterate_level(ri, level):
    symbol = r.GetUTF8Text(level)  # r == ri
    conf = r.Confidence(level)
    print(u'symbol {}, conf: {}'.format(symbol, conf).encode('utf-8').strip())
    indent = False
    ci = r.GetChoiceIterator()
    for c in ci:
        if indent:
Exemple #21
0
class OCREngine:
    def __init__(self, psm: int = 3, config: dict = {}):
        logging.info('Initializing OCR engine with PSM=%d and configs=%s' %
                     (psm, config))
        self.api = PyTessBaseAPI(psm=psm)
        for key in config.keys():
            self.api.SetVariable(key, config[key])
        logging.debug('OCR engine initialized')

    def build_graph(self,
                    image_path: str,
                    scheme: str = None) -> DocumentGraph:

        hocr = self._get_hocr(image_path)
        words = self._get_words(hocr, scheme)
        dg = DocumentGraph(words)

        return dg

    def _get_hocr(self, image_path: str) -> str:
        logging.info('Reading to hOCR from image: %s' % image_path)
        self.api.SetImageFile(image_path)
        hocr_text = self.api.GetHOCRText(0)
        logging.debug('Image read')

        return hocr_text

    def _get_words(self, hocr: str, scheme: str = None):
        logging.info('Extracting words from hOCR.')
        if scheme is None:
            logging.warning('No scheme specified. Assuming xyxy')
            scheme = 'xyxy'

        soup = BeautifulSoup(hocr, 'html.parser')
        word_tags = soup.select('.ocrx_word')

        word_nodes = [self._make_node(tag, scheme=scheme) for tag in word_tags]
        word_nodes = list(filter(lambda node: node is not None, word_nodes))

        return word_nodes

    def _make_node(self, tag: dict, scheme: str) -> WordNode:
        fields = tag['title'].split(';')
        if not len(fields) == 2:
            logging.warn('Malformed tag: %s. Skipping.' % tag)
            return None

        word = tag.text
        coordinates = tuple(map(int, fields[0].split()[1:]))
        conf = int(fields[1].split()[1])

        wn = WordNode(word, WordNode.convert_coords(coordinates, scheme), conf)
        logging.debug('Made word: %s' % wn.__repr__())

        return wn

    def close(self):
        self.api.End()
        logging.debug('OCR engine closed')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if exc_type:
            print("type: %s\nvalue: %s\ntrace: %s" %
                  (exc_type, exc_value, traceback))

        self.close()