def getDipValue(element, attributeName) : if attributeName in element.attrib: value = element.get(attributeName) if (not TextUtils.isEmpty(value) and value.endswith(Constants.UNIT_DIP)) : dValue = float(value[0:len(value) - 3]) return dValue return 0
def containAllSpacesOrInvalidChars(text): if TextUtils.isEmpty(text): return True asciiContain = all([ord(char) < 33 or ord(char) > 126 for char in text]) invalidChar = len([char for char in text if char not in printable]) != 0 allSpace = all([" " == c or '\n' == c for c in text]) return not (allSpace or invalidChar or asciiContain)
def __init__(self, appName): super().__init__() self.mAppName = appName self.mRoot = Element(Constants.ELEMENT_RESOURCE) element = SubElement(self.mRoot, Constants.ELEMENT_STRING) element.set(Constants.ATTRIBUTE_NAME, "app_name") element.text = TextUtils.formatText(self.mAppName) self.mDataIndexMap = {}
def getOriginalIdFromRectView(self, baseView): if (baseView.mType == RectView.VIEW_TYPE_IMAGE): return baseView.getImageInfo().drawableId elif baseView.mType == RectView.VIEW_TYPE_TEXT: if (TextUtils.isEmpty(baseView.getTextInfo().id)): return self.mWriter.addResource( baseView.getTextInfo().textWrapper.text) else: return baseView.getTextInfo().id else: return None
def getLineText(self, rect): try: self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) text = self.mHandle.GetUTF8Text() if (TextUtils.isEmpty(text)): self.mHandle = PyTessBaseAPI(psm=PSM.SINGLE_LINE) self.mHandle.SetImage(self.mBufferedImageRgbaImage) self.mHandle.SetRectangle(rect.x, rect.y, rect.width, rect.height) text = self.mHandle.GetUTF8Text() if (TextUtils.isEmpty(text)): self.mHandle.SetImage(self.getImage(rect)) text = self.mHandle.GetUTF8Text() self.mHandle = PyTessBaseAPI(psm=PSM.AUTO) return text except Exception as error: print('Caught this error: ' + repr(error)) return ""
def addResource(self, value): formatText = TextUtils.formatText(value) _id = "string_" if formatText in self.mDataIndexMap: index = self.mDataIndexMap[formatText] _id += str(index) else: _id += str(self.mId) self.mDataIndexMap[value] = self.mId self.mId = self.mId + 1 element = SubElement(self.mRoot, Constants.ELEMENT_STRING) element.set(Constants.ATTRIBUTE_NAME, _id) element.text = formatText return _id
def hasDipValue(element, attributeName) : value = element.get(attributeName) if (not TextUtils.isEmpty(value) and value.endswith(Constants.UNIT_DIP)) : return True return False
def isValidTextUsingBoundaryCheck(self, ocrTextWrapper): # confident between TextProcessor.TEXT_CONFIDENT_THRESHOLD and # TextProcessor.TEXT_CONFIDENT_THRESHOLD + # TextProcessor.TEXT_CONFIDENT_THRESHOLD_SECONDARY_RANGE if (TextUtils.isEmpty(ocrTextWrapper.text)): # We cannot calculate width of empty text return True # return True # frc = mGraphics.getFontRenderContext() # font = QFont(ocrTextWrapper.fontName,ocrTextWrapper.fontSize) # loc = Point(0, 0) # layout = TextLayout(ocrTextWrapper.text,font, frc) # layout.draw(mGraphics, loc.getX(), loc.getY()) # bound = layout.getBounds() width, height = self.getTextDimensions(ocrTextWrapper.text, ocrTextWrapper.fontName, ocrTextWrapper.fontSize) fontRatio = float(height / width) boundRatio = float(ocrTextWrapper.height / ocrTextWrapper.width) fontArea = self.mDipCalculator.dipToHeightPx( height) * self.mDipCalculator.dipToWidthPx(width) boundArea = float(ocrTextWrapper.width * ocrTextWrapper.height) # # the different between dimensions of the text should be smaller than # 10% of the max dimension. # System.out.prln(" Ratio: " + fontRatio + ", " + boundRatio + ", " # + Math.abs(boundRatio - fontRatio) # / Math.max(boundRatio, fontRatio) + "," + fontArea + ", " # + boundArea + ", " + Math.min(fontArea, boundArea) # / Math.max(fontArea, boundArea)) # It the bound is square, it less likely that this text is correct # TODO: This rule may not need it # if (float(min(ocrTextWrapper.getWidth(),ocrTextWrapper.getHeight()) / max( ocrTextWrapper.getWidth(), # ocrTextWrapper.getHeight())) > 0.95) : # # if drawing text cannot create square, sorry -> invalid # if (float(min(width, height) / max(width, height)) <= 0.95 and not validWord(ocrTextWrapper.text)) : # return False # # # # print(self.mDipCalculator.dipToWidthPx(width), self.mDipCalculator.dipToHeightPx(height)) # print( ocrTextWrapper.width, ocrTextWrapper.height) dimension = abs(boundRatio - fontRatio) / max(boundRatio, fontRatio) # print(dimension) dimensionCheck = abs(boundRatio - fontRatio) / max( boundRatio, fontRatio ) <= Constants.TEXT_CONFIDENT_ACCEPTANCE_DIMENSION_RATIO_DIFFERENCE_THRESHOLD areaCheckVal = min(fontArea, boundArea) / max(fontArea, boundArea) # print(areaCheckVal) # print(ocrTextWrapper.text) areaCheck = min(fontArea, boundArea) / max( fontArea, boundArea) >= Constants.TEXT_AREA_ACCEPTANCE_DIFFERENCE_THRESHOLD return dimensionCheck and areaCheck
def pruneBasicInternal(self, parent, view): # TODO: if this view is too small and it has no children,so we don't # need them if self.mDipCalculator.isViewToBeIgnore(view.width, view.height): if (parent != None): parent.mChildren.remove(view) return # # allChildrenAreTooSmall = self.isAllChildrenTooSmall(view) if not allChildrenAreTooSmall and len(view.mChildren) != 0: removedChildren = [] for childView in view.mChildren: if self.mDipCalculator.isViewToBeIgnore( childView.width, childView.height): removedChildren.append(childView) view.mChildren = [ x for x in view.mChildren if x not in removedChildren ] for childView in view.mChildren: self.pruneBasicInternal(view, childView) # # add this drawable if we did not want to show any children # # here # isAImageView = self.isFullImage(view) if isAImageView: # if not view.hasText() and allChildrenAreTooSmall and len(view.mChildren) == 0 : currentMat = ImageUtil.getImageFromRect(self.mImage, view.bound()) iconInfo = IconInfo(currentMat) drawableId = "" if iconInfo in self.interestedIcons: drawableId = self.interestedIcons[iconInfo] viewsSameDrawable = None if (TextUtils.isEmpty(drawableId)): drawableId = self.mDrawableWriter.addResourceDirectly( currentMat, view) self.interestedIcons[iconInfo] = drawableId viewsSameDrawable = [] self.mDrawableMap[drawableId] = viewsSameDrawable else: viewsSameDrawable = self.mDrawableMap[drawableId] view.mType = RectViewTypes.VIEW_TYPE_IMAGE view.mImageInfo.iconInfo = iconInfo view.mImageInfo.drawableId = drawableId view.mChildren = [] viewsSameDrawable.append(view) elif view.hasTextRecusive(): # process text view textWithLocations = view.mTextWithLocations view.mColor = ColorUtil.findDominateColor(view, self.mImage) for textWrapper in textWithLocations: newHeight = TesseractOCR.increaseHeight(textWrapper.height) textView = textWrapper.boundRectView newY = textView.y - (newHeight - textView.height) / 2.0 textView.y = newY textView.x = textWrapper.x textView.width = textWrapper.width textView.height = newHeight textView.mType = RectViewTypes.VIEW_TYPE_TEXT textView.mTextInfo.textWrapper = textWrapper textView.rect = Rect(textView.x, textView.y, textWrapper.width, textView.height) color = ColorUtil.findDominateColorForTextView( textView, self.mImage) textView.mColor = color[0] textView.textColor = color[1] # textView.mColor = ColorUtil.findDominateColor(textView,self.mImage) view.addChild(textView) # currentMat = ImageUtil.getImageFromRect(self.mImage, textView.bound()) # iconInfo = IconInfo(currentMat) # drawableId = "" # if iconInfo in self.interestedIcons: # drawableId = self.interestedIcons[iconInfo] # viewsSameDrawable = None # if (TextUtils.isEmpty(drawableId)) : # drawableId = self.mDrawableWriter.addResourceDirectly(currentMat,view) # self.interestedIcons[iconInfo] = drawableId # viewsSameDrawable = [] # self.mDrawableMap[drawableId] = viewsSameDrawable # else : # viewsSameDrawable = self.mDrawableMap[drawableId] # # textView.mType = RectViewTypes.VIEW_TYPE_IMAGE # textView.mImageInfo.iconInfo = iconInfo # textView.mImageInfo.drawableId = drawableId # textView.mChildren = [] # viewsSameDrawable.append(textView) # Update Bound of parent text view if len(view.mChildren) > 0: allViews = [] allViews.extend(view.mChildren) allViews.append(view) view.bound = RectUtil.findBoundRectangle(allViews)
def getLayoutId(self): if (TextUtils.isEmpty(self.layoutId)): return self.getVariableName() return self.layoutId
def findInInvalidTextByList(self, ocrs): invalidTexts = [] groups = GroupUtil.group(ocrs, self.sameGroup) for group in groups: if len( group ) >= Constants.MIN_ACCEPTABLE_LIST_SIZE_FOR_INVALID_LIST_TEXT: for ocrTextWrapper in group: if ocrTextWrapper.confidence <= Constants.MIN_INVALID_LIST_TEXT_THRESHOLD and not TextUtils.isEmpty( ocrTextWrapper.text) and len( ocrTextWrapper.text) == 1: invalidTexts.append(ocrTextWrapper) return invalidTexts
def __init__(self, name): self.mName = TextUtils.removeInvalidProjectNameChars(name.lower()) self.mPath = ""
def getBaseNameRemoveInvalidChars(fileName): return TextUtils.removeInvalidProjectNameChars(basename(fileName))