def getBlockWithLocation(self, rect): wrappers = [] for ocrTextWrapper in self.mOcrBlockWrappers: bound = ocrTextWrapper.rect if (RectUtil.contains(rect, bound)): wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper)) return wrappers
def getWordsIn(self, rect): wrappers = [] for ocrTextWrapper in self.mOcrTextWrappers: bound = ocrTextWrapper.bound() if (RectUtil.contains(rect, bound)): wrappers.append(OCRTextWrapper.OCRTextWrapper(ocrTextWrapper)) return wrappers
def baseInitIter(self, imageMat, rect, channels, iteratorLevel): listdata = [] parentX = rect.x parentY = rect.y # subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x] # # if(channels != 1): # subMat = imageMat[rect.y:rect.y+rect.height, rect.x:rect.width+rect.x, 0:channels] #tessAPI = PyTessBaseAPI() #Convert to PIL image imgPIL = Image.fromarray(imageMat) self.mHandle.SetImage(imgPIL) boxes = self.mHandle.GetComponentImages(iteratorLevel, True) for i, (im, box, _, _) in enumerate(boxes): wrapper = OCRTextWrapper.OCRTextWrapper() self.mHandle.SetRectangle(box['x'], box['y'], box['w'], box['h']) ocrResult = self.mHandle.GetUTF8Text() wrapper.text = ocrResult conf = self.mHandle.MeanTextConf() wrapper.confidence = conf self.mHandle.Recognize() iterator = self.mHandle.GetIterator() fontAttribute = iterator.WordFontAttributes() # fontAttribute = None wrapper.x = box['x'] + parentX wrapper.y = box['y'] + parentY wrapper.width = box['w'] wrapper.height = box['h'] wrapper.rect = Rect(wrapper.x, wrapper.y, wrapper.width, wrapper.height) # print(box) # if (fontAttribute != None): wrapper.fontName = fontAttribute['font_name'] wrapper.bold = fontAttribute['bold'] wrapper.italic = fontAttribute['italic'] wrapper.underlined = fontAttribute['underlined'] wrapper.monospace = fontAttribute['monospace'] wrapper.serif = fontAttribute['serif'] wrapper.smallcaps = fontAttribute['smallcaps'] wrapper.fontSize = fontAttribute['pointsize'] wrapper.fontId = fontAttribute['font_id'] listdata.append(wrapper) return listdata
def processText(self, color): ocrTextWrappers = self.mOcr.mOcrTextWrappers width = 0 height = 0 copyImage = copy.deepcopy(self.mRgbaImage) if len(copyImage.shape) == 2: height, width = copyImage.shape else: height, width, channels = copyImage.shape # ocrOnlyProcessingStepImage = copy.deepcopy(self.mRgbaImage) acceptedOcrTextWrappers = [] ruleManager = FilterRuleManager(self.mDipCalculator, self.mOcr, self.mRgbaImage, ocrTextWrappers, self.mViews) invalidTexts = {} for ocrTextWrapper in ocrTextWrappers: textValidator = ruleManager.acceptOCRRules(ocrTextWrapper) if textValidator != None and not textValidator.valid: invalidTexts[ocrTextWrapper] = textValidator # if(self.isDebugMode) : # cv2.rectangle(ocrOnlyProcessingStepImage, ocrTextWrapper.bound().tl(), ocrTextWrapper.bound().br(), CColor.Red, 2) else: acceptedOcrTextWrappers.append(ocrTextWrapper) # if(self.isDebugMode) : # cv2.rectangle(ocrOnlyProcessingStepImage, ocrTextWrapper.bound().tl(), ocrTextWrapper.bound().br(), CColor.Blue, 2) ruleManager.acceptVisionRules(invalidTexts, acceptedOcrTextWrappers) validTexts = [] validTexts.extend(acceptedOcrTextWrappers) validTexts = [x for x in validTexts if x not in invalidTexts] # ImageUtil.drawWindow( "basic Text",ocrOnlyProcessingStepImage) ocrLineWrappers = self.mOcr.mOcrLineWrappers # sort top bottom copyLines = [] copyLines.extend(ocrLineWrappers) copyLines.sort(key=cmp_to_key(RectUtil.getTopBottomComparator)) validLines = [] addedWords = [] for ocrLineWrapper in copyLines: words = [] line = OCRTextWrapper.OCRTextWrapper(ocrLineWrapper) for ocrWordWrapper in validTexts: if (ocrWordWrapper not in addedWords) and RectUtil.contains( ocrLineWrapper.bound(), ocrWordWrapper.bound()): words.append(ocrWordWrapper) addedWords.append(ocrWordWrapper) # Some line contain 2 words which are vertically alignment if len(words) > 0: notHorizontalAlignmentWords = self.getNotHorizontalAlignmentWords( words) if len(notHorizontalAlignmentWords) == 0: validLines.append(line) words.sort(key=cmp_to_key(RectUtil.getLeftRightComparator)) line.words = words else: # Take it from addedWords. This will help these words be # added to other lines, since this line is invalid addedWords = [ x for x in addedWords if x not in notHorizontalAlignmentWords ] # remove bad guy words = [ x for x in words if x not in notHorizontalAlignmentWords ] validLines.append(line) words.sort(key=cmp_to_key(RectUtil.getLeftRightComparator)) line.words = words # We still want to add word as line when it did not get add to any # lines remainWords = [] remainWords.extend(validTexts) remainWords = [x for x in remainWords if x not in addedWords] for word in remainWords: # System.out.println("Remain words: " + word); if (word.confidence < 90 and not self.mOcr.isValidTextUsingBoundaryCheck(word)): continue line = OCRTextWrapper.OCRTextWrapper(word) words = [] words.append(word) line.words = words validLines.append(line) validLines.sort(key=cmp_to_key(RectUtil.getTopBottomComparator)) # self.log("ValidLines", validLines, CColor.Red) for ocrLineWrapper in validLines: rect = ocrLineWrapper.reCalculateBoundBaseOnWordList() if (rect == None): print("Error with line, there is no more text: " + ocrLineWrapper.text) #System.out.println("Error with line, there is no more text: "+ ocrLineWrapper.getText()); else: text = self.mOcr.getText(rect) ocrLineWrapper.text = text ocrLineWrapper.rect = rect # word is sort from left to right for ocrLineWrapper in validLines: blocks = [[]] words = ocrLineWrapper.words currentBlock = [] if len(words) > 0: currentBlock.append(words[0]) for i in range(len(words) - 1): nextWord = words[i + 1] currentWord = words[i] xDistance = nextWord.x - (currentWord.x + currentWord.width) xDistanceThreshold = int( Constants.WORD_SPACE_THRESHOLD_BASE_ON_HEIGHT * float( min(currentWord.bound().height, nextWord.bound().height))) fontDiff = abs(currentWord.fontSize - nextWord.fontSize) # if (xDistance <= xDistanceThreshold and fontDiff <= 1) : if (xDistance <= xDistanceThreshold): currentBlock.append(nextWord) else: blocks.append(currentBlock) currentBlock = [] currentBlock.append(nextWord) if currentBlock not in blocks: blocks.append(currentBlock) ocrLineWrapper.blocks = blocks # logImageWithValidTextBox = copy.deepcopy(self.mRgbaImage) # ImageUtil.fillRect(logImageWithValidTextBox, Rect(0, 0, width,height), ColorUtil.toInt(255, 255, 255, 255)) blocksInline = [] for lineOCR in validLines: blocks = lineOCR.blocks for listWord in blocks: if len(listWord) > 0: firstWord = listWord[0] rect = RectUtil.findBoundRectangle(listWord) # cv2.rectangle(logImageWithValidTextBox, rect.tl(), # rect.br(), CColor.Red, 2); block = OCRTextWrapper.OCRTextWrapper(firstWord) block.words = listWord block.width = rect.width block.height = rect.height block.rect = rect lineText = "" # rect = Rect(rect.x -2, rect.y-2, rect.width +2, rect.height +2) # if len(listWord) == 1 : # lineText = listWord[0].text # else : # # override text lineText = self.mOcr.getLineText(rect) block.text = lineText # will ignore this block if it contains only invisible # chars # blocksInline.append(block) if (RuleAllSpace.containAllSpacesOrInvalidChars(lineText)): blocksInline.append(block) colListmap = {} for blocks in blocksInline: colListmap[ColorWrapper(ColorUtil.cColortoInt(CColor.Red), 1)] = blocks # ImageUtil.logDrawMap(colListmap, "Text Block", self.mRgbaImage) textInfo = TextInfo() textInfo.lines = validLines textInfo.blocksInALine = blocksInline textInfo.blocksInALine.sort( key=cmp_to_key(RectUtil.getTopBottomComparator)) #mSreenshotProcessor.getTimerManager().log(Constants.TIMER_ID_SPLIT_LINE_INTO_TEXT_BOXES); return textInfo