Ejemplo n.º 1
0
    def _getNumberSelectionSplittedNumber(self, firstPartNumber,
                                          lastPartNumber):
        escapedFirstPartNumber = helper.escapeForRegex(
            firstPartNumber)  #Not super necessary, but doesn't hurt
        escapedLastPartNumber = helper.escapeForRegex(
            lastPartNumber)  #Here necessary because of dot

        #Get all chunks that start with *last* part of number.
        allSelectionsLastPartNumber = self.cutter.filter(
            auto_regex='^{}'.format(escapedLastPartNumber)
        )  # Returns all Selections that have Chunks which start with the number

        #Sort them from highest to lowest
        sortedAllSelecionsLastPartNumber = sorted(
            allSelectionsLastPartNumber,
            key=lambda x: x.doc_top)  #Sort by appearance

        firstPartNumberSelection = None
        for selection in sortedAllSelecionsLastPartNumber:  #Start with highest selection
            #All Chunks that are "slightly" (strict) above last part number chunk
            aboveSelections = self.cutter.all().filter(
                doc_top__gte=selection.doc_top - 50,
                doc_bottom__lte=selection.doc_top,
            )
            #Any Chunk slightly above that starts with *first* part of number?
            maybeFirstPartNumberAboveSelection = aboveSelections.filter(
                auto_regex='^{}$'.format(escapedFirstPartNumber))

            if len(maybeFirstPartNumberAboveSelection
                   ) == 1:  #There is exactly one such selection
                #Therefore. return it (per Definition) as number chunk and stop
                firstPartNumberSelection = maybeFirstPartNumberAboveSelection
                break
        return firstPartNumberSelection
Ejemplo n.º 2
0
 def _getNumberSelection(self, number):
     numberWithoutPoint = number[:-1]
     escapedNum = helper.escapeForRegex(numberWithoutPoint)
     allSelectionsNumber = self.cutter.filter(
         auto_regex='^{}'.format(escapedNum)
     )  # Returns all Selections that have Chunks which start with the number
     return self._getHighestSelection(allSelectionsNumber)
Ejemplo n.º 3
0
 def _getSubpartSelectionNonStrictBelowNumberSelection(
         self, subpart, numberSelection):
     escapedSubpart = helper.escapeForRegex(subpart)
     numberUpperBorder = self.cutter.all().filter(
         doc_top__gte=numberSelection.doc_top - 50, )
     allSelectionsSubpartNonStrictBelowNumber = numberUpperBorder.filter(
         regex="[^G]" + escapedSubpart
     )  #Disallow G in Selection for TOP 18. b) because of "(LFGB)" getting attention too (although upper case?)
     return self._getHighestSelection(
         allSelectionsSubpartNonStrictBelowNumber)
Ejemplo n.º 4
0
 def _getNumberSelection(self, number):
     escapedNum = helper.escapeForRegex(number)
     allSelectionsNumber = self.cutter.filter(
         auto_regex='^{}'.format(escapedNum)
     ).filter(  # Returns all Selections that have Chunks which start with the number
         left__lte=self.
         TOPRight,  #Can't do anything if whole line is one chunk (therefore right__lte bad), but it should at least start before TOPRight
         top__gte=self.page_heading,
     )
     highestSelection = self._getHighestSelection(allSelectionsNumber)
     #dVis.showCutter(highestSelection)
     return highestSelection
Ejemplo n.º 5
0
 def _getNumberSelection(self, number):
     formatedNumber = number.split(
         "."
     )[0]  #46. -> 46 , Done with split because also use it here in SA for Subparts as well
     escapedNum = helper.escapeForRegex(formatedNumber)
     allSelectionsNumber = self.cutter.filter(
         auto_regex='^{}'.format(escapedNum)
     ).filter(
         left__lte=
         160  # Dont match e.g. "980 Sitzung" in title for TOP 9 (happens in 989 TOP 9)
     )
     return self._getHighestSelection(allSelectionsNumber)
Ejemplo n.º 6
0
    def _getSubpartSelectionNonStrictBelowNumberSelection(
            self, subpart, numberSelection):
        escapedSubpart = helper.escapeForRegex(subpart)
        numberUpperBorder = self.cutter.all().filter(
            doc_top__gte=numberSelection.doc_top -
            50,  #Return all Chunks below given numer chunk and the number chunk itself. subpart chunk could be same as number chunk
        )  # INFO a) for 1. a) NS 970 in same chunk, for 34. a) not

        # All Chunks non-strict below number chunk that contain given subpart
        allSelectionsSubpartNonStrictBelowNumber = numberUpperBorder.filter(
            auto_regex=escapedSubpart
        ).filter(  #46. b) -> b\) because of regex brackets
            left__lte=self.
            TOPRight,  #Can't do anything if whole line is one chunk (therefore right__lte bad), but it should at least start before TOPRight
            top__gte=self.page_heading,
        )
        #Return highest of these
        #INFO adding number chunk as upperbound can break this when subpart chunk == number chunk
        return self._getHighestSelection(
            allSelectionsSubpartNonStrictBelowNumber)
Ejemplo n.º 7
0
 def _getPrefixStringSelection(self, s):
     escapedS = helper.escapeForRegex(s)
     allSelectionsS = self.cutter.filter(
         auto_regex='{}'.format(escapedS)
     )  # Returns all Selections that have Chunks which *contain* s
     return self._getHighestSelection(allSelectionsS)