Example #1
0
    def groupViewsForList(self, parentView, newChildren, viewTypeRect,
                          minChidren):
        if len(newChildren) >= minChidren:
            bound = RectUtil.findBoundRectangle(newChildren)
            newParent = RectView(bound, None)
            newParent.mType = viewTypeRect
            # replace the parent at the location of the first child
            indexOf = parentView.mChildren.index(newChildren[0])
            if indexOf > 0 and indexOf < len(parentView.mChildren):
                parentView.mChildren[indexOf] = newParent
            else:
                parentView.mChildren.append(newParent)

            # Now remove the rest

            parentView.mChildren = [
                x for x in parentView.mChildren if x not in newChildren
            ]

            # Make sure there is no view is hidden under the new parent
            insideViews = RectUtil.contain(newParent, parentView.mChildren)
            parentView.mChildren = [
                x for x in parentView.mChildren if x not in insideViews
            ]
            indexOfNewParent = parentView.mChildren.index(newParent)
            if (indexOfNewParent == len(parentView.mChildren) - 1):
                parentView.mChildren.extend(insideViews)
            else:
                parentView.mChildren.extend(indexOfNewParent + 1, insideViews)

            return newParent

        return None
Example #2
0
    def removeInvalidTextBaseOnNeighbours(self, acceptedOcrTextWrappers,
                                          invalidTexts):
        validTexts = []
        validTexts.extend(acceptedOcrTextWrappers)

        for invText in invalidTexts:
            if invText in validTexts:
                validTexts.remove(invText)

        moreInvalidText = []

        groups = GroupUtil.group(self.mViews, self.sameGroup)
        for group in groups:
            if len(
                    group
            ) >= Constants.TEXT_TO_BECOME_IMAGE_MIN_ACCEPTABLE_LIST_SIZE:
                alignmentType = RectUtil.getAlignmentType(
                    group, self.mDefaultAlignThreshold)
                if alignmentType != RectUtil.ALIGNMENT_UNKNOWN:
                    skipGroup = False
                    # if all text exactly the same and have invalid text, we
                    # kill them all.
                    sameText = ""
                    inValidTextsInGroup = set()
                    for i in range(len(group)):
                        rectView = group[i]
                        texts = RectUtil.contain(rectView, validTexts)
                        # We only test views have one text
                        # And not "intersect not include" with other valid text
                        if len(
                                texts
                        ) == 1 and RectUtil.countIntersectNotIncludeRect(
                                rectView, validTexts) == 0:
                            # If the confident is acceptable but boundary is
                            # wrong, only one character is accept here
                            text = texts[0].text
                            # //TODO                            if self.mTesseractOCR.validWord(text) or len(text) > 1:
                            if len(text) > 1:
                                skipGroup = True
                                break

                            if i == 0:
                                sameText = text
                            elif not sameText == text:
                                skipGroup = True
                                break

                            # if not add to group
                            inValidTextsInGroup.add(texts[0])
                            # System.out.println("Added: " + texts);
                        else:
                            skipGroup = True
                            break

                    if not skipGroup:
                        if len(inValidTextsInGroup) == len(group):
                            stillInvalid = False
                            for textWrapper in inValidTextsInGroup:
                                if textWrapper.confidence < Constants.TEXT_TO_BECOME_IMAGE_IN_LIST_THRESHOLD or not self.mTesseractOCR.validWord(
                                        textWrapper.getText()):
                                    stillInvalid = True
                                    break

                            if stillInvalid:
                                moreInvalidText.extend(inValidTextsInGroup)
                                continue

                        # We will check other rules after this
                        skipGroup = True

                    mapCountTexts = {}

                    if skipGroup:
                        for rectView in group:
                            texts = RectUtil.contain(rectView, validTexts)
                            # We only test views have one text
                            # And not "intersect not include" with other valid
                            # text
                            if len(
                                    texts
                            ) >= 2 or RectUtil.countIntersectNotIncludeRect(
                                    rectView, validTexts) > 0:
                                skipGroup = True
                                break
                            else:
                                mapCountTexts[rectView] = texts

                    if skipGroup:
                        continue

                    else:
                        countEmptyView = 0
                        uniqueInvalidRectView = None
                        for rectView in group:
                            if len(mapCountTexts.get(rectView)) == 0:
                                countEmptyView = countEmptyView + 1
                            else:
                                uniqueInvalidRectView = rectView
                        if len(
                                group
                        ) == countEmptyView + 1 and uniqueInvalidRectView != None:
                            moreInvalidText.extend(
                                mapCountTexts.get(uniqueInvalidRectView))

        return moreInvalidText