Esempio n. 1
0
def read_text_with_confidence(image,
                              lang='fast_ind',
                              path='/usr/share/tesseract-ocr/5/tessdata',
                              psm=4,
                              whitelist=''):
    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return '', 0

    image_pil = Image.fromarray(image)

    api = PyTessBaseAPI(lang=lang, psm=psm, path=path, oem=OEM.LSTM_ONLY)

    try:
        api.SetImage(image_pil)

        if whitelist != '':
            api.SetVariable('tessedit_char_whitelist', whitelist)

        api.Recognize()

        text = api.GetUTF8Text()
        confidence = api.MeanTextConf()
    except Exception:
        print("[ERROR] Tesseract exception")
    finally:
        api.End()

    return text, confidence
Esempio n. 2
0
def get_text_bounding_boxes(image, psm=12):
    bounding_boxes = []

    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return bounding_boxes

    image_pil = Image.fromarray(image)  # Load PIL image from numpy

    api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY)

    try:
        # api.SetVariable('textord_tabfind_find_tables', 'true')
        # api.SetVariable('textord_tablefind_recognize_tables', 'true')
        api.SetImage(image_pil)

        api.Recognize()

        boxes = api.GetComponentImages(RIL.TEXTLINE, True)

        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']

            bounding_boxes.append((x, y, w, h))
    finally:
        api.End()

    return bounding_boxes
 def __init__(self, image_file, tessdata):
     api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD)
     api.SetImageFile(image_file)
     api.SetVariable("textord_tablefind_recognize_tables", "T")
     api.SetVariable("textord_tabfind_find_tables", "T")
     api.Recognize()
     self.api = api
Esempio n. 4
0
def getWords(pages, letters_cache):

    standard_words, split_words, letters = [], [], {'bid': [], 'letters': []}
    prev_word = None
    letter_detect = PyTessBaseAPI(psm=8, lang='eng')
    letter_detect.SetVariable('tessedit_char_whitelist', ascii_uppercase)
    bid = 0
    for pg_num in pages:
        page = doc[pg_num]

        # get initial block bounding boxes
        blocks = []
        for block in page.getText("blocks"):
            bbox = block[:4]
            text = block[4].strip()
            if len(text) != 1:  # not a single letter
                blocks.append({
                    'bid': bid,
                    'bbox': bbox,
                    'pg': page.number,
                    'text': text
                })
                bid += 1
            elif not letters_cache:
                # maps each bid to a corresponding dictionary letter
                # this provides a heuristic for our search
                sf, eps = 25 / 6, 1
                pix = page.getPixmap(matrix=fitz.Matrix(sf, sf))
                img = Image.open(io.BytesIO(pix.getPNGData()))
                bbox = resize(bbox, sf, eps)
                block_img = img.crop(bbox)
                letter_detect.SetImage(block_img)
                letter_detect.Recognize()
                letter = letter_detect.AllWords()[0]
                assert (len(letter) == 1)
                letters['bid'].append(bid)
                letters['letters'].append(letter.lower())

        standard, split, prev_word, insert_word = groupBlocks(
            blocks, prev_word, pg_num)

        # last block from previous page (no spillover)
        if insert_word:
            add_word(standard, insert_word)

        # clean up
        standard_words.extend(standard)
        split_words.extend(split)

    # add the last word
    if prev_word:
        add_word(standard, prev_word)

    # make sure all the blocks are properly formatted
    for word in chain(standard_words, split_words):
        test_word_format(word)

    return standard_words, split_words, letters
Esempio n. 5
0
def read_char(image, whitelist=None):
    """ OCR a single character from an image. Useful for captchas."""
    api = PyTessBaseAPI()
    api.SetPageSegMode(10)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    return api.GetUTF8Text().strip()
Esempio n. 6
0
def read_word(image, whitelist=None, chars=None, spaces=False):
    """ OCR a single word from an image. Useful for captchas.
        Image should be pre-processed to remove noise etc. """
    api = PyTessBaseAPI()
    api.SetPageSegMode(8)
    if whitelist is not None:
        api.SetVariable("tessedit_char_whitelist", whitelist)
    api.SetImage(image)
    api.Recognize()
    guess = api.GetUTF8Text()

    if not spaces:
        guess = ''.join([c for c in guess if c != " "])
        guess = guess.strip()

    if chars is not None and len(guess) != chars:
        return guess, None

    return guess, api.MeanTextConf()
Esempio n. 7
0
def get_boxes(image_filename: str) -> list:
    image = Image.open(image_filename)
    width = image.width
    height = image.height
    max_width = width // 2
    max_height = height // 2

    api = PyTessBaseAPI(lang="jpn_vert")
    # api.ReadConfigFile("tess.conf")
    api.SetPageSegMode(PSM.SPARSE_TEXT_OSD)
    api.SetImage(image)
    api.Recognize(0)
    ri = api.GetIterator()
    level = RIL.WORD
    boxes = []
    for r in iterate_level(ri, level):
        conf = r.Confidence(level)
        text = r.GetUTF8Text(level)
        left, top, right, bottom = r.BoundingBox(level)
        # boxes = api.GetComponentImages(RIL.SYMBOL, True)
        # for im, rect, _, _ in boxes:
        #     # im is a PIL image object
        #     # rect is a dict with x, y, w and h keys
        #     left, top, right, bottom = rect['x'], rect['y'], rect['w'], rect['h']
        #     api.SetRectangle(left, top, right, bottom)
        #     text = api.GetUTF8Text()
        #     conf = api.MeanTextConf()
        print("'%s' \tConf: %.2f \tCoords: %d,%d,%d,%d" %
              (text, conf, left, top, right, bottom))
        box = {
            'text': text,
            'left': left,
            'top': top,
            'width': right - left,
            'height': bottom - top
        }
        if should_ignore_box(conf, box, max_width, max_height):
            continue
        boxes.append(box)
    api.End()
    image.close()
    return boxes
Esempio n. 8
0
class Analyzer(object):
    TEXT_TYPES = set([
        PT.FLOWING_TEXT, PT.HEADING_TEXT, PT.PULLOUT_TEXT, PT.VERTICAL_TEXT,
        PT.CAPTION_TEXT
    ])

    def __init__(self, lang=None):
        super(Analyzer, self).__init__()
        kwargs = {}
        if lang is not None:
            kwargs['lang'] = lang
        self.api = PyTessBaseAPI(psm=PSM.AUTO_OSD, **kwargs)

    def analyze_image(self, image):
        page = Page()

        self.api.SetImage(image)
        self.api.Recognize()
        iterator = self.api.GetIterator()
        page.blocks = self.__decode_blocks(iterator, image)
        page.size = Size(*image.size)

        return page

    def close(self):
        self.api.End()

    def __decode_blocks(self, iterator, image):
        blocks = []
        for tesseract_block in iterate_level(iterator, RIL.BLOCK):
            block = Block()
            block.bounding_box = BoundingBox.from_coordinates(
                *tesseract_block.BoundingBox(RIL.BLOCK))
            if not tesseract_block.GetUTF8Text(RIL.BLOCK).strip():
                block.image = tesseract_block.GetImage(RIL.BLOCK, 0, image)
                blocks.append(block)
                continue
            block.paragraphs = self.__decode_paragraphs(iterator)
            blocks.append(block)
        return blocks

    def __decode_paragraphs(self, iterator):
        paragraphs = []
        for tesseract_paragraph in iterate_level(iterator, RIL.PARA):
            paragraph = Paragraph()
            paragraph.bounding_box = BoundingBox.from_coordinates(
                *tesseract_paragraph.BoundingBox(RIL.PARA))
            paragraph.lines = self.__decode_lines(iterator)
            paragraphs.append(paragraph)
            if iterator.IsAtFinalElement(RIL.BLOCK, RIL.PARA):
                break
        return paragraphs

    def __decode_lines(self, iterator):
        lines = []
        for tesseract_line in iterate_level(iterator, RIL.TEXTLINE):
            line = TextLine()
            line.bounding_box = BoundingBox.from_coordinates(
                *tesseract_line.BoundingBox(RIL.TEXTLINE))
            line.words = self.__decode_words(iterator)
            lines.append(line)
            if iterator.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE):
                break
        return lines

    def __decode_words(self, iterator):
        words = []
        for tesseract_word in iterate_level(iterator, RIL.WORD):
            font_attributes = tesseract_word.WordFontAttributes()
            word = Word()
            word.bounding_box = BoundingBox.from_coordinates(
                *tesseract_word.BoundingBox(RIL.WORD))
            word.confidence = float(tesseract_word.Confidence(
                RIL.WORD)) / 100.0
            word.text = tesseract_word.GetUTF8Text(RIL.WORD)
            word.symbols = self.__decode_symbols(iterator)
            font = Font()
            font.bold = font_attributes['bold']
            font.italic = font_attributes['italic']
            font.underline = font_attributes['underlined']
            font.monospace = font_attributes['monospace']
            font.serif = font_attributes['serif']
            font.pointsize = font_attributes['pointsize']
            font.id = font_attributes['font_id']
            for symbol in word.symbols:
                symbol.font = font
            words.append(word)
            if iterator.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
                break
        return words

    def __decode_symbols(self, iterator):
        symbols = []
        for tesseract_symbol in iterate_level(iterator, RIL.SYMBOL):
            symbol = Symbol()
            symbol.bounding_box = BoundingBox.from_coordinates(
                *tesseract_symbol.BoundingBox(RIL.SYMBOL))
            symbol.confidence = float(tesseract_symbol.Confidence(
                RIL.SYMBOL)) / 100.0
            symbol.text = tesseract_symbol.GetUTF8Text(RIL.SYMBOL)
            symbol.image = tesseract_symbol.GetBinaryImage(RIL.SYMBOL).convert(
                '1', dither=Image.NONE)
            symbols.append(symbol)
            if iterator.IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
                break
        return symbols
class TesseractOCR:

    #private static   TESSERACT_ENGINE_MODE = TessAPI1.TessOcrEngineMode.OEM_DEFAULT

    #
    # bpp - bits per pixel, represents the bit depth of the image, with 1 for
    # binary bitmap, 8 for gray, and 24 for color RGB.
    #
    BBP = 8
    DEFAULT_CONFIDENT_THRESHOLD = 60.0
    MINIMUM_DESKEW_THRESHOLD = 0.05

    def __init__(self, rgbaImage, dipCalculator, language):
        self.mRgbaImage = rgbaImage
        self.mDipCalculator = dipCalculator
        self.mHandle = PyTessBaseAPI()

        self.mOcrTextWrappers = []
        self.mOcrBlockWrappers = []
        self.mOcrLineWrappers = []
        self.raWrappers = []
        #         self.mLanguage = language

        self.mBufferedImageRgbaImage = Image.fromarray(self.mRgbaImage)
        self.initOCR()

    def baseInit(self, iteratorLevel):
        width = 0
        height = 0
        channels = 1

        if len(self.mRgbaImage.shape) == 2:
            height, width = self.mRgbaImage.shape
        else:
            height, width, channels = self.mRgbaImage.shape

        return self.baseInitIter(self.mRgbaImage, Rect(0, 0, width, height),
                                 channels, iteratorLevel)

    def baseInitIter(self, imageMat, rect, channels, iteratorLevel):
        listdata = []
        parentX = rect.x
        parentY = rect.y
        #        subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x]
        #
        #        if(channels != 1):
        #            subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels]

        #tessAPI = PyTessBaseAPI()
        #Convert to PIL image
        imgPIL = Image.fromarray(imageMat)
        self.mHandle.SetImage(imgPIL)
        boxes = self.mHandle.GetComponentImages(iteratorLevel, True)

        for i, (im, box, _, _) in enumerate(boxes):

            wrapper = OCRTextWrapper.OCRTextWrapper()
            self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = self.mHandle.GetUTF8Text()
            wrapper.text = ocrResult
            conf = self.mHandle.MeanTextConf()
            wrapper.confidence = conf
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            wrapper.x = box['x'] + parentX
            wrapper.y = box['y'] + parentY
            wrapper.width = box['w']
            wrapper.height = box['h']
            wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width,
                                wrapper.height)
            #            print(box)
            #
            if (fontAttribute != None):
                wrapper.fontName = fontAttribute['font_name']
                wrapper.bold = fontAttribute['bold']
                wrapper.italic = fontAttribute['italic']
                wrapper.underlined = fontAttribute['underlined']
                wrapper.monospace = fontAttribute['monospace']
                wrapper.serif = fontAttribute['serif']
                wrapper.smallcaps = fontAttribute['smallcaps']
                wrapper.fontSize = fontAttribute['pointsize']
                wrapper.fontId = fontAttribute['font_id']

            listdata.append(wrapper)

        return listdata

    def getBlockWithLocation(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrBlockWrappers:
            bound = ocrTextWrapper.rect
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def getImage(self, rect):
        x2 = rect.x + rect.width
        y2 = rect.y + rect.height
        mat = self.mRgbaImage[rect.y:y2, rect.x:x2]
        return Image.fromarray(mat)

    def getText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getLineText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            if (TextUtils.isEmpty(text)):
                self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE)
                self.mHandle.SetImage(self.mBufferedImageRgbaImage)
                self.mHandle.SetRectangle(rect.x, rect.y, rect.width,
                                          rect.height)
                text = self.mHandle.GetUTF8Text()
                if (TextUtils.isEmpty(text)):
                    self.mHandle.SetImage(self.getImage(rect))
                    text = self.mHandle.GetUTF8Text()

                self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getRectWordForLowConfidence(self, ocr):
        try:
            rect = ocr.bound()
            self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_WORD)
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            ocr.text = self.mHandle.GetUTF8Text()
            ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                self.mHandle.SetImage(self.getImage(rect))
                ocr.text = self.mHandle.GetUTF8Text()
                ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                return False
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            if (fontAttribute != None):
                ocr.fontName = fontAttribute['font_name']
                ocr.bold = fontAttribute['bold']
                ocr.italic = fontAttribute['italic']
                ocr.underlined = fontAttribute['underlined']
                ocr.monospace = fontAttribute['monospace']
                ocr.serif = fontAttribute['serif']
                ocr.smallcaps = fontAttribute['smallcaps']
                ocr.fontSize = fontAttribute['pointsize']
                ocr.fontId = fontAttribute['font_id']
#                ocr.fontsize = self.getPreferenceFontSize(ocr)

            self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return True
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return False

    def getWordsIn(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def initOCR(self):

        #
        self.initText()

        self.initBlock()
        #        self.initPara()
        self.initLine()
#

    def initBlock(self):
        self.mOcrBlockWrappers = self.baseInit(RIL.BLOCK)

    def initLine(self):
        self.mOcrLineWrappers = self.baseInit(RIL.TEXTLINE)
        invalidLineWrappers = []
        # a line cannot contain another lines
        for ocrLine in self.mOcrLineWrappers:
            for otherOcrLine in self.mOcrLineWrappers:
                if (ocrLine != otherOcrLine and RectUtil.contains(
                        ocrLine.bound(), otherOcrLine.bound())):
                    invalidLineWrappers.append(ocrLine)
        self.mOcrLineWrappers = [
            x for x in self.mOcrLineWrappers if x not in invalidLineWrappers
        ]

    def initPara(self):
        self.mOcrParaWrappers = self.baseInit(RIL.PARA)

    def initText(self):
        self.mOcrTextWrappers = self.baseInit(RIL.WORD)

    def isOverlapText(self, rect, confident):
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (ocrTextWrapper.getConfidence() >= confident
                    and RectUtil.intersects(rect, bound)):
                return True
        return False

    def reset(self):
        self.mOcrTextWrappers = []
        self.mOcrLineWrappers = []
        self.initOCR()

#    def rotateImage(bi) :
#        iden = ImageDeskew(bi)
#        imageSkewAngle = iden.getSkewAngle() # determine skew angle
#        if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD or imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD :
#            bi = ImageHelper.rotateImage(bi, -imageSkewAngle) # deskew
#        return bi

    def getPreferenceFontSize(self, ocrTextWrapper, parentHeight):

        #        TODO TODO
        fontName = ocrTextWrapper.fontName
        fontSize = ocrTextWrapper.fontSize

        height = ocrTextWrapper.height * Constants.TEXT_BOX_AND_TEXT_HEIGHT_RATIO

        #        height = ocrTextWrapper.height
        textHeight = int(
            self.mDipCalculator.pxToHeightDip(min(parentHeight, height)))
        #        font = QFont(fontName, fontSize)
        newFontSize = fontSize
        if (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                               fontSize) == textHeight):
            newFontSize = fontSize

        elif (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                 fontSize) < textHeight):
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) < textHeight):
                fontSize = fontSize + 1
            newFontSize = fontSize

        else:
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) > textHeight):
                fontSize = fontSize - 1

            newFontSize = fontSize

        return newFontSize

    def getTextHeightUsingFontMetrics(self, ocrTextWrapper, fontName,
                                      fontSize):
        #        class SIZE(ctypes.Structure):
        #            _fields_ = [("cx", ctypes.c_long), ("cy", ctypes.c_long)]
        #        hdc = ctypes.windll.user32.GetDC(0)
        #        hfont = ctypes.windll.gdi32.CreateFontA(-fontSize, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName)
        #        hfont_old = ctypes.windll.gdi32.SelectObject(hdc, hfont)
        #        size = SIZE(0, 0)
        #        ctypes.windll.gdi32.GetTextExtentPoint32A(hdc, text, len(text), ctypes.byref(size))
        #        ctypes.windll.gdi32.SelectObject(hdc, hfont_old)
        #        ctypes.windll.gdi32.DeleteObject(hfont)
        #        return size.cy
        file = "fonts//" + fontName + ".ttf"

        font = ImageFont.truetype(file, fontSize)
        fontSize = font.getsize(ocrTextWrapper.text)
        return fontSize[1]

    def validCharacter(self, word):
        return self.mHandle.IsValidCharacter(word)

        #Don't have this method return TessAPI1.TessBaseAPIIsValidWord(mHandle, word) != 0
#        return True

#TODO
#    def getTextHeightUsingTextLayout(self,ocrTextWrapper, font) :
#        frc = self.mGraphics.getFontRenderContext()
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text, font, frc)
#        layout.draw(self.mGraphics, float(loc.x, loc.y))
#        bounds = layout.getBounds()
#        height = bounds.getHeight()
#        return height

#    def isValidTextUsingConfidentAndBoundaryCheck(self, ocrTextWrapper) :
#        if (ocrTextWrapper.getConfidence() > Constants.TEXT_CONFIDENT_THRESHOLD + Constants.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE) :
#            return True
#
#        elif (ocrTextWrapper.getConfidence() <= Constants.TEXT_CONFIDENT_THRESHOLD) :
#            return False
#
#        return self.isValidTextUsingBoundaryCheck(ocrTextWrapper)
#
#

    def getTextDimensions(self, text, fontName, fontSize):
        file = "fonts//" + fontName + ".ttf"
        try:
            font = ImageFont.truetype(file, fontSize)
            fontSize = font.getsize(text)
            return fontSize
        except OSError:
            print(file)

    def isValidTextUsingBoundaryCheck(self, ocrTextWrapper):
        # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD +
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE
        if (TextUtils.isEmpty(ocrTextWrapper.text)):
            # We cannot calculate width of empty text
            return True
#        return True

#        frc = mGraphics.getFontRenderContext()
#        font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize)
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text,font, frc)
#        layout.draw(mGraphics,  loc.getX(), loc.getY())
#        bound = layout.getBounds()
        width, height = self.getTextDimensions(ocrTextWrapper.text,
                                               ocrTextWrapper.fontName,
                                               ocrTextWrapper.fontSize)

        fontRatio = float(height / width)
        boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width)
        fontArea = self.mDipCalculator.dipToHeightPx(
            height) * self.mDipCalculator.dipToWidthPx(width)
        boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height)
        #
        # the different between dimensions of the text should be smaller than
        # 10% of the max dimension.
        # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", "
        # + Math.abs(boundRatio - fontRatio)
        # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", "
        # + boundArea + ", " + Math.min(fontArea, boundArea)
        # / Math.max(fontArea, boundArea))

        # It the bound is square, it less likely that this text is correct
        # TODO: This rule may not need it
        #        if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(),
        #						ocrTextWrapper.getHeight())) > 0.95) :
        #			# if drawing text cannot create square, sorry -> invalid
        #            if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) :
        #                return False
        #
        #
        #

        #        print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height))
        #        print( ocrTextWrapper.width, ocrTextWrapper.height)
        dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio)
        #        print(dimension)

        dimensionCheck = abs(boundRatio - fontRatio) / max(
            boundRatio, fontRatio
        ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD

        areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea)
        #        print(areaCheckVal)
        #        print(ocrTextWrapper.text)
        areaCheck = min(fontArea, boundArea) / max(
            fontArea,
            boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD

        return dimensionCheck and areaCheck

    def destroy(self):
        self.mHandle.End
Esempio n. 10
0
    def run_tesseract(image_file):
        if tessdata:
            api = PyTessBaseAPI(path=tessdata, psm=PSM.AUTO_OSD)
        else:
            api = PyTessBaseAPI(psm=PSM.AUTO_OSD)

        api.SetImageFile(image_file)
        api.SetVariable("textord_tablefind_recognize_tables", "T")
        api.SetVariable("textord_tabfind_find_tables", "T")
        api.Recognize()

        document = {}
        it = api.AnalyseLayout()
        if it is not None:
            orientation, direction, order, deskew_angle = it.Orientation()
            api.Recognize()
            ri = api.GetIterator()
            if ri is not None:
                document = {
                    "orientation": orientation,
                    "writing_direction": direction,
                    "text_direction": order,
                    "deskew_angle": deskew_angle,
                    "blocks": []
                }
                while ri.IsAtBeginningOf(RIL.BLOCK):
                    block = {
                        "block_type": ri.BlockType(),
                        "block_type_str": BlockType[ri.BlockType()],
                        "box": ri.BoundingBox(RIL.BLOCK),
                        "ocr_text": ri.GetUTF8Text(RIL.BLOCK),
                        "confidence": ri.Confidence(RIL.BLOCK),
                        "paragraphs": []
                    }
                    break_para = False
                    while True:
                        if ri.IsAtFinalElement(RIL.BLOCK, RIL.PARA):
                            break_para = True
                        break_line = False
                        paragraph = {
                            "box": ri.BoundingBox(RIL.PARA),
                            "ocr_text": ri.GetUTF8Text(RIL.PARA),
                            "paragraph_info": list(ri.ParagraphInfo()),
                            "confidence": ri.Confidence(RIL.PARA),
                            "lines": []
                        }
                        while True:
                            if ri.IsAtFinalElement(RIL.PARA, RIL.TEXTLINE):
                                break_line = True
                            break_word = False
                            line = {
                                "box": ri.BoundingBox(RIL.TEXTLINE),
                                "ocr_text": ri.GetUTF8Text(RIL.TEXTLINE),
                                "confidence": ri.Confidence(RIL.TEXTLINE),
                                "words": []
                            }
                            while True:
                                word = {
                                    "box": ri.BoundingBox(RIL.WORD),
                                    "ocr_text": ri.GetUTF8Text(RIL.WORD),
                                    "confidence": ri.Confidence(RIL.WORD),
                                    "attributes": ri.WordFontAttributes()
                                }
                                if ri.IsAtFinalElement(RIL.TEXTLINE, RIL.WORD):
                                    break_word = True
                                line["words"].append(word)
                                if break_word:
                                    break
                                ri.Next(RIL.WORD)
                            paragraph["lines"].append(line)
                            if break_line:
                                break
                            ri.Next(RIL.TEXTLINE)
                        block["paragraphs"].append(paragraph)
                        if break_para:
                            break
                        ri.Next(RIL.PARA)
                    document["blocks"].append(block)
                    ri.Next(RIL.BLOCK)
        return document
Esempio n. 11
0
class GameAdaptor:
    def __init__(self, window_name):
        self._window_name = window_name
        self._hwnd = win32gui.FindWindow(None, window_name)
        _options = dict(psm=PSM.SINGLE_LINE, oem=OEM.LSTM_ONLY)
        self._api = PyTessBaseAPI('tessdata', 'eng', **_options)
        self._image = None
        self._lock = Lock()
        self._work = 0
        if self._hwnd == 0:
            raise Exception('Window Handle Not Found! xD')

    def _get_window_region(self):
        bl, bt, br, bb = 12, 31, 12, 20
        l, t, r, b = win32gui.GetWindowRect(self._hwnd)
        w = r - l - br - bl
        h = b - t - bt - bb
        return l, t, w, h, bl, bt

    @contextmanager
    def _window_device_context(self):
        wdc = win32gui.GetWindowDC(self._hwnd)
        dc_obj = win32ui.CreateDCFromHandle(wdc)
        c_dc = dc_obj.CreateCompatibleDC()
        yield dc_obj, c_dc
        dc_obj.DeleteDC()
        c_dc.DeleteDC()
        win32gui.ReleaseDC(self._hwnd, wdc)

    def _capture(self):
        x, y, w, h, bx, by = self._get_window_region()
        with self._window_device_context() as (dc_obj, cdc):
            bmp = win32ui.CreateBitmap()
            bmp.CreateCompatibleBitmap(dc_obj, w, h)
            cdc.SelectObject(bmp)
            cdc.BitBlt((0, 0), (w, h), dc_obj, (bx, by), win32con.SRCCOPY)
            bmp_info = bmp.GetInfo()
            img = np.frombuffer(bmp.GetBitmapBits(True), dtype=np.uint8)
            win32gui.DeleteObject(bmp.GetHandle())
        return img.reshape(bmp_info['bmHeight'], bmp_info['bmWidth'],
                           4)[:, :, :-1]

    def _do_capture(self):
        while self._work == 1:
            temp_image = self._capture()
            self._lock.acquire()
            self._image = temp_image
            self._lock.release()
            sleep(0.001)
        self._work = -1

    def start_capture(self):
        self._work = 1
        Thread(target=self._do_capture).start()
        while self._image is None:
            sleep(0.001)

    def stop_capture(self):
        self._work = 0
        while self._work != -1:
            sleep(0.001)
        self._image = None

    def get_image(self):
        self._lock.acquire()
        res = self._image
        self._lock.release()
        return res

    def send_keys(self, *keys):
        for k in keys:
            win32gui.PostMessage(self._hwnd, win32con.WM_KEYDOWN, k, 0)

    def get_text(self, region):
        temp_pil_image = Image.fromarray(self.get_image())
        self._api.SetImage(temp_pil_image)
        while region is not None:
            x, y, w, h = region
            self._api.SetRectangle(x, y, w, h)
            self._api.Recognize(0)
            region = yield self._api.GetUTF8Text()