Ejemplo n.º 1
0
def get_text_bounding_boxes(image, psm=12):
    bounding_boxes = []

    height, width = image.shape[:2]

    if height <= 0 or width <= 0:
        return bounding_boxes

    image_pil = Image.fromarray(image)  # Load PIL image from numpy

    api = PyTessBaseAPI(psm=psm, oem=OEM.LSTM_ONLY)

    try:
        # api.SetVariable('textord_tabfind_find_tables', 'true')
        # api.SetVariable('textord_tablefind_recognize_tables', 'true')
        api.SetImage(image_pil)

        api.Recognize()

        boxes = api.GetComponentImages(RIL.TEXTLINE, True)

        for (im, box, _, _) in boxes:
            x, y, w, h = box['x'], box['y'], box['w'], box['h']

            bounding_boxes.append((x, y, w, h))
    finally:
        api.End()

    return bounding_boxes
Ejemplo n.º 2
0
def preprocess_title(filename):
    title = ''
    api = PyTessBaseAPI()
    api.SetImageFile(filename)
    boxes = api.GetComponentImages(RIL.TEXTLINE, True)
    for i, (im, box, _, _) in enumerate(boxes):
        api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
        ocrResult = api.GetUTF8Text()
        text = ' '.join(alpha_re.findall(ocrResult.strip()))
        if len(text) < 5:
            continue

        title = text
        break

    if title:
        logger.info("%s: %s", filename, title)
    return title
class TesseractOCR:

    #private static   TESSERACT_ENGINE_MODE = TessAPI1.TessOcrEngineMode.OEM_DEFAULT

    #
    # bpp - bits per pixel, represents the bit depth of the image, with 1 for
    # binary bitmap, 8 for gray, and 24 for color RGB.
    #
    BBP = 8
    DEFAULT_CONFIDENT_THRESHOLD = 60.0
    MINIMUM_DESKEW_THRESHOLD = 0.05

    def __init__(self, rgbaImage, dipCalculator, language):
        self.mRgbaImage = rgbaImage
        self.mDipCalculator = dipCalculator
        self.mHandle = PyTessBaseAPI()

        self.mOcrTextWrappers = []
        self.mOcrBlockWrappers = []
        self.mOcrLineWrappers = []
        self.raWrappers = []
        #         self.mLanguage = language

        self.mBufferedImageRgbaImage = Image.fromarray(self.mRgbaImage)
        self.initOCR()

    def baseInit(self, iteratorLevel):
        width = 0
        height = 0
        channels = 1

        if len(self.mRgbaImage.shape) == 2:
            height, width = self.mRgbaImage.shape
        else:
            height, width, channels = self.mRgbaImage.shape

        return self.baseInitIter(self.mRgbaImage, Rect(0, 0, width, height),
                                 channels, iteratorLevel)

    def baseInitIter(self, imageMat, rect, channels, iteratorLevel):
        listdata = []
        parentX = rect.x
        parentY = rect.y
        #        subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x]
        #
        #        if(channels != 1):
        #            subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels]

        #tessAPI = PyTessBaseAPI()
        #Convert to PIL image
        imgPIL = Image.fromarray(imageMat)
        self.mHandle.SetImage(imgPIL)
        boxes = self.mHandle.GetComponentImages(iteratorLevel, True)

        for i, (im, box, _, _) in enumerate(boxes):

            wrapper = OCRTextWrapper.OCRTextWrapper()
            self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h'])
            ocrResult = self.mHandle.GetUTF8Text()
            wrapper.text = ocrResult
            conf = self.mHandle.MeanTextConf()
            wrapper.confidence = conf
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            wrapper.x = box['x'] + parentX
            wrapper.y = box['y'] + parentY
            wrapper.width = box['w']
            wrapper.height = box['h']
            wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width,
                                wrapper.height)
            #            print(box)
            #
            if (fontAttribute != None):
                wrapper.fontName = fontAttribute['font_name']
                wrapper.bold = fontAttribute['bold']
                wrapper.italic = fontAttribute['italic']
                wrapper.underlined = fontAttribute['underlined']
                wrapper.monospace = fontAttribute['monospace']
                wrapper.serif = fontAttribute['serif']
                wrapper.smallcaps = fontAttribute['smallcaps']
                wrapper.fontSize = fontAttribute['pointsize']
                wrapper.fontId = fontAttribute['font_id']

            listdata.append(wrapper)

        return listdata

    def getBlockWithLocation(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrBlockWrappers:
            bound = ocrTextWrapper.rect
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def getImage(self, rect):
        x2 = rect.x + rect.width
        y2 = rect.y + rect.height
        mat = self.mRgbaImage[rect.y:y2, rect.x:x2]
        return Image.fromarray(mat)

    def getText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getLineText(self, rect):
        try:
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            text = self.mHandle.GetUTF8Text()
            if (TextUtils.isEmpty(text)):
                self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE)
                self.mHandle.SetImage(self.mBufferedImageRgbaImage)
                self.mHandle.SetRectangle(rect.x, rect.y, rect.width,
                                          rect.height)
                text = self.mHandle.GetUTF8Text()
                if (TextUtils.isEmpty(text)):
                    self.mHandle.SetImage(self.getImage(rect))
                    text = self.mHandle.GetUTF8Text()

                self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return text
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return ""

    def getRectWordForLowConfidence(self, ocr):
        try:
            rect = ocr.bound()
            self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_WORD)
            self.mHandle.SetImage(self.mBufferedImageRgbaImage)
            self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height)
            ocr.text = self.mHandle.GetUTF8Text()
            ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                self.mHandle.SetImage(self.getImage(rect))
                ocr.text = self.mHandle.GetUTF8Text()
                ocr.confidence = self.mHandle.MeanTextConf()
            if (ocr.confidence <= Constants.TEXT_CONFIDENT_THRESHOLD):
                return False
            self.mHandle.Recognize()
            iterator = self.mHandle.GetIterator()
            fontAttribute = iterator.WordFontAttributes()
            if (fontAttribute != None):
                ocr.fontName = fontAttribute['font_name']
                ocr.bold = fontAttribute['bold']
                ocr.italic = fontAttribute['italic']
                ocr.underlined = fontAttribute['underlined']
                ocr.monospace = fontAttribute['monospace']
                ocr.serif = fontAttribute['serif']
                ocr.smallcaps = fontAttribute['smallcaps']
                ocr.fontSize = fontAttribute['pointsize']
                ocr.fontId = fontAttribute['font_id']
#                ocr.fontsize = self.getPreferenceFontSize(ocr)

            self.mHandle = PyTessBaseAPI(psm=PSM.AUTO)
            return True
        except Exception as error:
            print('Caught this error: ' + repr(error))

        return False

    def getWordsIn(self, rect):
        wrappers = []
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (RectUtil.contains(rect, bound)):
                wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper))

        return wrappers

    def initOCR(self):

        #
        self.initText()

        self.initBlock()
        #        self.initPara()
        self.initLine()
#

    def initBlock(self):
        self.mOcrBlockWrappers = self.baseInit(RIL.BLOCK)

    def initLine(self):
        self.mOcrLineWrappers = self.baseInit(RIL.TEXTLINE)
        invalidLineWrappers = []
        # a line cannot contain another lines
        for ocrLine in self.mOcrLineWrappers:
            for otherOcrLine in self.mOcrLineWrappers:
                if (ocrLine != otherOcrLine and RectUtil.contains(
                        ocrLine.bound(), otherOcrLine.bound())):
                    invalidLineWrappers.append(ocrLine)
        self.mOcrLineWrappers = [
            x for x in self.mOcrLineWrappers if x not in invalidLineWrappers
        ]

    def initPara(self):
        self.mOcrParaWrappers = self.baseInit(RIL.PARA)

    def initText(self):
        self.mOcrTextWrappers = self.baseInit(RIL.WORD)

    def isOverlapText(self, rect, confident):
        for ocrTextWrapper in self.mOcrTextWrappers:
            bound = ocrTextWrapper.bound()
            if (ocrTextWrapper.getConfidence() >= confident
                    and RectUtil.intersects(rect, bound)):
                return True
        return False

    def reset(self):
        self.mOcrTextWrappers = []
        self.mOcrLineWrappers = []
        self.initOCR()

#    def rotateImage(bi) :
#        iden = ImageDeskew(bi)
#        imageSkewAngle = iden.getSkewAngle() # determine skew angle
#        if imageSkewAngle > MINIMUM_DESKEW_THRESHOLD or imageSkewAngle < -MINIMUM_DESKEW_THRESHOLD :
#            bi = ImageHelper.rotateImage(bi, -imageSkewAngle) # deskew
#        return bi

    def getPreferenceFontSize(self, ocrTextWrapper, parentHeight):

        #        TODO TODO
        fontName = ocrTextWrapper.fontName
        fontSize = ocrTextWrapper.fontSize

        height = ocrTextWrapper.height * Constants.TEXT_BOX_AND_TEXT_HEIGHT_RATIO

        #        height = ocrTextWrapper.height
        textHeight = int(
            self.mDipCalculator.pxToHeightDip(min(parentHeight, height)))
        #        font = QFont(fontName, fontSize)
        newFontSize = fontSize
        if (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                               fontSize) == textHeight):
            newFontSize = fontSize

        elif (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                 fontSize) < textHeight):
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) < textHeight):
                fontSize = fontSize + 1
            newFontSize = fontSize

        else:
            while (self.getTextHeightUsingFontMetrics(ocrTextWrapper, fontName,
                                                      fontSize) > textHeight):
                fontSize = fontSize - 1

            newFontSize = fontSize

        return newFontSize

    def getTextHeightUsingFontMetrics(self, ocrTextWrapper, fontName,
                                      fontSize):
        #        class SIZE(ctypes.Structure):
        #            _fields_ = [("cx", ctypes.c_long), ("cy", ctypes.c_long)]
        #        hdc = ctypes.windll.user32.GetDC(0)
        #        hfont = ctypes.windll.gdi32.CreateFontA(-fontSize, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, fontName)
        #        hfont_old = ctypes.windll.gdi32.SelectObject(hdc, hfont)
        #        size = SIZE(0, 0)
        #        ctypes.windll.gdi32.GetTextExtentPoint32A(hdc, text, len(text), ctypes.byref(size))
        #        ctypes.windll.gdi32.SelectObject(hdc, hfont_old)
        #        ctypes.windll.gdi32.DeleteObject(hfont)
        #        return size.cy
        file = "fonts//" + fontName + ".ttf"

        font = ImageFont.truetype(file, fontSize)
        fontSize = font.getsize(ocrTextWrapper.text)
        return fontSize[1]

    def validCharacter(self, word):
        return self.mHandle.IsValidCharacter(word)

        #Don't have this method return TessAPI1.TessBaseAPIIsValidWord(mHandle, word) != 0
#        return True

#TODO
#    def getTextHeightUsingTextLayout(self,ocrTextWrapper, font) :
#        frc = self.mGraphics.getFontRenderContext()
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text, font, frc)
#        layout.draw(self.mGraphics, float(loc.x, loc.y))
#        bounds = layout.getBounds()
#        height = bounds.getHeight()
#        return height

#    def isValidTextUsingConfidentAndBoundaryCheck(self, ocrTextWrapper) :
#        if (ocrTextWrapper.getConfidence() > Constants.TEXT_CONFIDENT_THRESHOLD + Constants.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE) :
#            return True
#
#        elif (ocrTextWrapper.getConfidence() <= Constants.TEXT_CONFIDENT_THRESHOLD) :
#            return False
#
#        return self.isValidTextUsingBoundaryCheck(ocrTextWrapper)
#
#

    def getTextDimensions(self, text, fontName, fontSize):
        file = "fonts//" + fontName + ".ttf"
        try:
            font = ImageFont.truetype(file, fontSize)
            fontSize = font.getsize(text)
            return fontSize
        except OSError:
            print(file)

    def isValidTextUsingBoundaryCheck(self, ocrTextWrapper):
        # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD +
        # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE
        if (TextUtils.isEmpty(ocrTextWrapper.text)):
            # We cannot calculate width of empty text
            return True
#        return True

#        frc = mGraphics.getFontRenderContext()
#        font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize)
#        loc = Point(0, 0)
#        layout = TextLayout(ocrTextWrapper.text,font, frc)
#        layout.draw(mGraphics,  loc.getX(), loc.getY())
#        bound = layout.getBounds()
        width, height = self.getTextDimensions(ocrTextWrapper.text,
                                               ocrTextWrapper.fontName,
                                               ocrTextWrapper.fontSize)

        fontRatio = float(height / width)
        boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width)
        fontArea = self.mDipCalculator.dipToHeightPx(
            height) * self.mDipCalculator.dipToWidthPx(width)
        boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height)
        #
        # the different between dimensions of the text should be smaller than
        # 10% of the max dimension.
        # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", "
        # + Math.abs(boundRatio - fontRatio)
        # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", "
        # + boundArea + ", " + Math.min(fontArea, boundArea)
        # / Math.max(fontArea, boundArea))

        # It the bound is square, it less likely that this text is correct
        # TODO: This rule may not need it
        #        if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(),
        #						ocrTextWrapper.getHeight())) > 0.95) :
        #			# if drawing text cannot create square, sorry -> invalid
        #            if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) :
        #                return False
        #
        #
        #

        #        print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height))
        #        print( ocrTextWrapper.width, ocrTextWrapper.height)
        dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio)
        #        print(dimension)

        dimensionCheck = abs(boundRatio - fontRatio) / max(
            boundRatio, fontRatio
        ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD

        areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea)
        #        print(areaCheckVal)
        #        print(ocrTextWrapper.text)
        areaCheck = min(fontArea, boundArea) / max(
            fontArea,
            boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD

        return dimensionCheck and areaCheck

    def destroy(self):
        self.mHandle.End
Ejemplo n.º 4
0
    #print(thres)
    #skimg = skimg >= thres
    #skimg = skimage.img_as_float(skimg)

    #skimage.io.imsave('page0x.png', skimg)

    image = Image.fromarray(np.uint8(skimg * 255))
    #image = Image.
    #image.save('page0y.png')
    #image = Image.open('page0.png')
    title = ''
    max_h = 0
    min_y = 10000
    api.SetImage(image)
    #api.SetImageFile(filename)
    boxes = api.GetComponentImages(RIL.TEXTLINE, True)
    #print('Found {} textline image components.'.format(len(boxes)))
    for i, (im, box, _, _) in enumerate(boxes):
        # im is a PIL image object
        # box is a dict with x, y, w and h keys
        api.SetRectangle(box['x'], box['y'], box['w'], box['h'])
        ocrResult = api.GetUTF8Text().replace('\n', ' ').strip()
        conf = api.MeanTextConf()
        print("Box[{0}]: x={x}, y={y}, w={w}, h={h}, "
              "confidence: {1}, text: {2}".format(i, conf, ocrResult, **box))

        text = ' '.join(alpha_re.findall(ocrResult.strip()))
        if len(text) < 5:
            continue

        if box['y'] <= 50:
Ejemplo n.º 5
0
class OCREngine():
    def __init__(self, extra_whitelist='', all_unicode=False, lang='eng'):
        """
        Args:
          extra_whitelist: string of extra chars for Tesseract to consider
              only takes effect when all_unicode is False
          all_unicode: if True, Tess will consider all possible unicode characters
          lang: OCR language
        """
        self.tess = PyTessBaseAPI(psm=PSM_MODE, lang=lang)
        self.is_closed = False
        if all_unicode:
            self.whitelist_chars = None
        else:
            self.whitelist_chars = ("abcdefghijklmnopqrstuvwxyz"
                                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                    "1234567890"
                                    r"~!@#$%^&*()_+-={}|[]\:;'<>?,./"
                                    '"'
                                    "©") + extra_whitelist
            self.tess.SetVariable('tessedit_char_whitelist',
                                  self.whitelist_chars)

    def check_engine(self):
        if self.is_closed:
            raise RuntimeError('OCREngine has been closed.')

    def recognize(self,
                  image,
                  min_text_size=MIN_TEXT_SIZE,
                  max_text_size=MAX_TEXT_SIZE,
                  uniformity_thresh=UNIFORMITY_THRESH,
                  thin_line_thresh=THIN_LINE_THRESH,
                  conf_thresh=CONF_THRESH,
                  box_expand_factor=BOX_EXPAND_FACTOR,
                  horizontal_pooling=HORIZONTAL_POOLING):
        """
        Generator: Blob
        http://stackoverflow.com/questions/23506105/extracting-text-opencv

        Args:
          input_image: can be one of the following types:
            - string: image file path
            - ndarray: numpy image
            - PIL.Image.Image: PIL image
          min_text_size:
            min text height/width in pixels, below which will be ignored
          max_text_size:
            max text height/width in pixels, above which will be ignored
          uniformity_thresh (0.0 < _ < 1.0):
            remove all black or all white regions
            ignore a region if the number of pixels neither black nor white < [thresh]
          thin_line_thresh (must be odd int):
            remove all lines thinner than [thresh] pixels.
            can be used to remove the thin borders of web page textboxes.
          conf_thresh (0 < _ < 100):
            ignore regions with OCR confidence < thresh.
          box_expand_factor (0.0 < _ < 1.0):
            expand the bounding box outwards in case certain chars are cutoff.
          horizontal_pooling:
            result bounding boxes will be more connected with more pooling,
            but large pooling might lower accuracy.
        """
        self.check_engine()
        # param sanity check
        assert max_text_size > min_text_size > 0
        assert 0.0 <= uniformity_thresh < 1.0
        assert thin_line_thresh % 2 == 1
        assert 0 <= conf_thresh < 100
        assert 0.0 <= box_expand_factor < 1.0
        assert horizontal_pooling > 0

        image = get_np_img(image)
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)
        img = img_gray
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
        # cut off all gray pixels < 30.
        # `cv2.THRESH_BINARY | cv2.THRESH_OTSU` is also good, but might overlook certain light gray areas
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        # connect horizontally oriented regions
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                           (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
        # remove all thin textbox borders (e.g. web page textbox)
        if thin_line_thresh > 0:
            kernel = cv2.getStructuringElement(
                cv2.MORPH_RECT, (thin_line_thresh, thin_line_thresh))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            x, y, w, h = box = Box(*cv2.boundingRect(contour))
            # remove regions that are beyond size limits
            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue
            # remove regions that are almost uniformly white or black
            binary_region = crop(img_bw, box)
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                continue
            # expand the borders a little bit to include cutoff chars
            expansion = int(min(h, w) * box_expand_factor)
            x = max(0, x - expansion)
            y = max(0, y - expansion)
            h, w = h + 2 * expansion, w + 2 * expansion
            if h > w:  # further extend the long axis
                h += 2 * expansion
            elif w > h:
                w += 2 * expansion
            # image passed to Tess should be grayscale.
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            box = Box(x, y, w, h)
            img_crop = crop(img_gray, box)
            # make sure that crops passed in tesseract have minimum x-height
            # http://github.com/tesseract-ocr/tesseract/wiki/FAQ#is-there-a-minimum-text-size-it-wont-read-screen-text
            img_crop = cv2.resize(img_crop,
                                  (int(img_crop.shape[1] * CROP_RESIZE_HEIGHT /
                                       img_crop.shape[0]), CROP_RESIZE_HEIGHT))
            ocr_text, conf = self.run_tess(img_crop)
            if conf > conf_thresh:
                yield Blob(ocr_text, box, conf)

    def _experiment_segment(self,
                            img,
                            min_text_size=MIN_TEXT_SIZE,
                            max_text_size=MAX_TEXT_SIZE,
                            uniformity_thresh=UNIFORMITY_THRESH,
                            horizontal_pooling=HORIZONTAL_POOLING):
        """
        PRIVATE: experiment only
        """
        img_init = img  # preserve initial image
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_bw = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 5)

        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        img = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, morph_kernel)
        disp(img)
        #         morph_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        #         img = cv2.dilate(img, morph_kernel)
        # OTSU thresholding
        #         _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        _, img = cv2.threshold(img, 30, 255, cv2.THRESH_BINARY)
        #         img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,9,2)
        disp(img)
        # connect horizontally oriented regions
        morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,
                                                 (horizontal_pooling, 1))
        img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, morph_kernel)
        disp(img)

        if 0:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,
                                                     (horizontal_pooling, 3))
            img = cv2.erode(img, morph_kernel, iterations=1)
            disp(img)
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (6, 6))
            img = cv2.dilate(img, morph_kernel, iterations=1)
        elif 1:
            morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
            img = cv2.morphologyEx(img, cv2.MORPH_OPEN, morph_kernel)
        disp(img)

        # http://docs.opencv.org/trunk/d9/d8b/tutorial_py_contours_hierarchy.html
        _, contours, hierarchy = cv2.findContours(img, cv2.RETR_CCOMP,
                                                  cv2.CHAIN_APPROX_SIMPLE)
        img_copy = np.copy(img_init)
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            draw_rect(img_copy, x, y, w, h)

            if (w < min_text_size or h < min_text_size or h > max_text_size):
                continue

            binary_region = img_bw[y:y + h, x:x + w]
            uniformity = np.count_nonzero(binary_region) / float(w * h)
            if (uniformity > 1 - uniformity_thresh
                    or uniformity < uniformity_thresh):
                # ignore mostly white or black regions
                #                 print(w, h)
                #                 disp(binary_region)
                continue
            # the image must be grayscale, otherwise Tesseract will SegFault
            # http://stackoverflow.com/questions/15606379/python-tesseract-segmentation-fault-11
            draw_rect(img_init, x, y, w, h)
        disp(img_copy)
        disp(img_init, 0)

    def run_tess(self, img):
        """
        Tesseract python API source code:
        https://github.com/sirfz/tesserocr/blob/master/tesserocr.pyx

        Returns:
          (ocr_text, confidence)
        """
        if isinstance(img, np.ndarray):
            img = np2PIL(img)
        self.tess.SetImage(img)
        ocr_text = self.tess.GetUTF8Text().strip()
        conf = self.tess.MeanTextConf()
        return ocr_text, conf

    def _deprec_run_tess(self, img):
        "GetComponentImages throws SegFault randomly. No way to fix. :("
        if isinstance(img, np.ndarray):
            img = np2PIL(img)

        components = self.tess.GetComponentImages(RIL.TEXTLINE, True)
        for _, inner_box, block_id, paragraph_id in components:
            # box is a dict with x, y, w and h keys
            inner_box = Box(**inner_box)
            if inner_box.w < MIN_TEXT_SIZE or inner_box.h < MIN_TEXT_SIZE:
                continue
            self.tess.SetRectangle(*inner_box)
            ocr_text = self.tess.GetUTF8Text().strip()
            conf = self.tess.MeanTextConf()
            yield ocr_text, inner_box, conf

    def close(self):
        self.tess.End()
        self.is_closed = True

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()