def groupViewsForList(self, parentView, newChildren, viewTypeRect, minChidren): if len(newChildren) >= minChidren: bound = RectUtil.findBoundRectangle(newChildren) newParent = RectView(bound, None) newParent.mType = viewTypeRect # replace the parent at the location of the first child indexOf = parentView.mChildren.index(newChildren[0]) if indexOf > 0 and indexOf < len(parentView.mChildren): parentView.mChildren[indexOf] = newParent else: parentView.mChildren.append(newParent) # Now remove the rest parentView.mChildren = [ x for x in parentView.mChildren if x not in newChildren ] # Make sure there is no view is hidden under the new parent insideViews = RectUtil.contain(newParent, parentView.mChildren) parentView.mChildren = [ x for x in parentView.mChildren if x not in insideViews ] indexOfNewParent = parentView.mChildren.index(newParent) if (indexOfNewParent == len(parentView.mChildren) - 1): parentView.mChildren.extend(insideViews) else: parentView.mChildren.extend(indexOfNewParent + 1, insideViews) return newParent return None
def removeInvalidTextBaseOnNeighbours(self, acceptedOcrTextWrappers, invalidTexts): validTexts = [] validTexts.extend(acceptedOcrTextWrappers) for invText in invalidTexts: if invText in validTexts: validTexts.remove(invText) moreInvalidText = [] groups = GroupUtil.group(self.mViews, self.sameGroup) for group in groups: if len( group ) >= Constants.TEXT_TO_BECOME_IMAGE_MIN_ACCEPTABLE_LIST_SIZE: alignmentType = RectUtil.getAlignmentType( group, self.mDefaultAlignThreshold) if alignmentType != RectUtil.ALIGNMENT_UNKNOWN: skipGroup = False # if all text exactly the same and have invalid text, we # kill them all. sameText = "" inValidTextsInGroup = set() for i in range(len(group)): rectView = group[i] texts = RectUtil.contain(rectView, validTexts) # We only test views have one text # And not "intersect not include" with other valid text if len( texts ) == 1 and RectUtil.countIntersectNotIncludeRect( rectView, validTexts) == 0: # If the confident is acceptable but boundary is # wrong, only one character is accept here text = texts[0].text # //TODO if self.mTesseractOCR.validWord(text) or len(text) > 1: if len(text) > 1: skipGroup = True break if i == 0: sameText = text elif not sameText == text: skipGroup = True break # if not add to group inValidTextsInGroup.add(texts[0]) # System.out.println("Added: " + texts); else: skipGroup = True break if not skipGroup: if len(inValidTextsInGroup) == len(group): stillInvalid = False for textWrapper in inValidTextsInGroup: if textWrapper.confidence < Constants.TEXT_TO_BECOME_IMAGE_IN_LIST_THRESHOLD or not self.mTesseractOCR.validWord( textWrapper.getText()): stillInvalid = True break if stillInvalid: moreInvalidText.extend(inValidTextsInGroup) continue # We will check other rules after this skipGroup = True mapCountTexts = {} if skipGroup: for rectView in group: texts = RectUtil.contain(rectView, validTexts) # We only test views have one text # And not "intersect not include" with other valid # text if len( texts ) >= 2 or RectUtil.countIntersectNotIncludeRect( rectView, validTexts) > 0: skipGroup = True break else: mapCountTexts[rectView] = texts if skipGroup: continue else: countEmptyView = 0 uniqueInvalidRectView = None for rectView in group: if len(mapCountTexts.get(rectView)) == 0: countEmptyView = countEmptyView + 1 else: uniqueInvalidRectView = rectView if len( group ) == countEmptyView + 1 and uniqueInvalidRectView != None: moreInvalidText.extend( mapCountTexts.get(uniqueInvalidRectView)) return moreInvalidText