Example #1
0
    def computeSpecialEdges(self, lClassicPageNode, lSpecialPageNode,
                            bCutIsBeforeText):
        """
        Compute:
        - edges between each block and the cut line above/across/below the block
        - edges between cut lines
        return a list of edges
        """

        #augment the block with the coordinate of its baseline central point
        for blk in lClassicPageNode:
            try:
                x, y = BaselineCutAnnotator.getDomBaselineXY(blk.node)
                blk.x_bslne = x
                blk.y_bslne = y
            except IndexError:
                traceln("** WARNING: no Baseline in ", blk.domid)
                traceln("** Using x2 and y2 instead... :-/")
                blk.x_bslne = blk.x2
                blk.y_bslne = blk.y2

        for cutBlk in lSpecialPageNode:
            assert cutBlk.y1 == cutBlk.y2
            cutBlk.y1 = int(round(cutBlk.y1))  #DeltaFun make float
            cutBlk.y2 = cutBlk.y1

        #block to cut line edges
        lEdge = []
        for blk in lClassicPageNode:
            for cutBlk in lSpecialPageNode:
                if blk.y_bslne == cutBlk.y1:
                    edge = Edge_BL(blk, cutBlk)
                    edge.len = 0
                    edge._type = 0  # Cut line is crossing the block
                    lEdge.append(edge)
                elif abs(blk.y_bslne - cutBlk.y1) <= self.iBlockVisibility:
                    edge = Edge_BL(blk, cutBlk)
                    # experiments show that abs helps
                    # edge.len = (blk.y_bslne - cutBlk.y1) / self.iBlockVisibility
                    edge.len = abs(blk.y_bslne -
                                   cutBlk.y1) / self.iBlockVisibility
                    edge._type = -1 if blk.y_bslne > cutBlk.y1 else +1
                    lEdge.append(edge)

        #sort those edge from top to bottom
        lEdge.sort(key=lambda o: o.B.y1)  # o.B.y1 == o.B.y2 by construction

        #now filter those edges
        n0 = len(lEdge)
        if False:
            print("--- before filtering: %d edges" % len(lEdge))
            lSortedEdge = sorted(lEdge, key=lambda x: x.A.domid)
            for edge in lSortedEdge:
                print("Block domid=%s y1=%s y2=%s yg=%s" %
                      (edge.A.domid, edge.A.y1, edge.A.y2, edge.A.y_bslne) +
                      "  %s line %s " %
                      (["↑", "-", "↓"][1 + edge._type], edge.B.y1) +
                      "domid=%s y1=%s  " % (edge.B.domid, edge.B.y1) +
                      str(id(edge)))
        lEdge = self._filterBadEdge(lEdge, lSpecialPageNode, bCutIsBeforeText)
        traceln(" - filtering: removed %d edges due to obstruction." %
                (n0 - len(lEdge)))
        if False:
            print("--- After filtering: %d edges" % len(lEdge))
            lSortedEdge = sorted(lEdge, key=lambda x: x.A.domid)
            print(len(lSortedEdge))
            for edge in lSortedEdge:
                print("Block domid=%s y1=%s y2=%s yg=%s" %
                      (edge.A.domid, edge.A.y1, edge.A.y2, edge.A.y_bslne) +
                      "  %s line %s " %
                      (["↑", "-", "↓"][1 + edge._type], edge.B.y1) +
                      "domid=%s y1=%s  " % (edge.B.domid, edge.B.y1) +
                      str(id(edge)))

        if self.iLineVisibility > 0:
            # Cut line to Cut line edges
            lSpecialPageNode.sort(key=lambda o: o.y1)
            for i, A in enumerate(lSpecialPageNode):
                for B in lSpecialPageNode[i + 1:]:
                    if B.y1 - A.y1 <= self.iLineVisibility:
                        edge = Edge_LL(A, B)
                        edge.len = (B.y1 - A.y1) / self.iLineVisibility
                        assert edge.len >= 0
                        lEdge.append(edge)
                    else:
                        break

        return lEdge
Example #2
0
    def computeSpecialEdges(self, lClassicPageNode, lSpecialPageNode,
                            bCutIsBeforeText):
        """
        Compute:
        - edges between each block and the cut line above/across/below the block
        - edges between cut lines
        return a list of edges
        """
       
        #augment the block with the coordinate of its baseline central point
        for blk in lClassicPageNode:
            try:
                x,y = BaselineCutAnnotator.getDomBaselineXY(blk.node)
                blk.x_bslne = x
                blk.y_bslne = y
            except IndexError:
                traceln("** WARNING: no Baseline in ", blk.domid)
                traceln("** Using x2 and y2 instead... :-/")
                blk.x_bslne = int(round(blk.x2))
                blk.y_bslne = int(round(blk.y2))
                
                
        
        for cutBlk in lSpecialPageNode:
            assert cutBlk.y1 == cutBlk.y2, "Update code: horizontal cut only for now"
            cutBlk.y1 = int(round(cutBlk.y1))  #DeltaFun make float
            cutBlk.y2 = cutBlk.y1

        #block to cut line edges
        lEdge = []
        for blk in lClassicPageNode:
            for cutBlk in lSpecialPageNode:
                if blk.y1 <= cutBlk.y1 and cutBlk.y1 <= blk.y2:
                    #crossing
                    typ = 2 if blk.y_bslne == cutBlk.y1 else 0
                    edge = Edge_BL(blk, cutBlk, typ)
                    edge.len = (blk.y_bslne - cutBlk.y1) / abs(blk.y1 - blk.y2)
                    lEdge.append(edge)                    
                elif abs(blk.y_bslne - cutBlk.y1) <= self.iBlockVisibility:
                    typ = -1 if blk.y_bslne > cutBlk.y1 else +1
                    edge = Edge_BL(blk, cutBlk, typ)
                    edge.len = abs(blk.y_bslne - cutBlk.y1) / self.iBlockVisibility
                    lEdge.append(edge)                    
        
        #sort those edge from top to bottom
        lEdge.sort(key=lambda o: o.B.y1)  # o.B.y1 == o.B.y2 by construction
        
        #now filter those edges
        n0 = len(lEdge)
        if False:
            print("--- before filtering: %d edges" % len(lEdge))
            lSortedEdge = sorted(lEdge, key=lambda x: x.A.domid)
            for edge in lSortedEdge:
                print("Block domid=%s y1=%s y2=%s yg=%s"%(edge.A.domid, edge.A.y1, edge.A.y2, edge.A.y_bslne)
                        + "  %s line %s "%(["↑", "x", "↓", "-"][1+edge._type],                                       
                                       edge.B.y1)
                        + "domid=%s y1=%s  " %(edge.B.domid, edge.B.y1)
                        +str(id(edge))
                    )
        lEdge = self._filterBadEdge(lEdge, lSpecialPageNode, bCutIsBeforeText)
        traceln(" - filtering: removed %d edges due to obstruction." % (n0-len(lEdge)))
        if False:
            print("--- After filtering: %d edges" % len(lEdge))
            lSortedEdge = sorted(lEdge, key=lambda x: x.A.domid)
            print(len(lSortedEdge))
            for edge in lSortedEdge:
                print("Block domid=%s y1=%s y2=%s yg=%s"%(edge.A.domid, edge.A.y1, edge.A.y2, edge.A.y_bslne)
                        + "  %s line %s "%(["↑", "x", "↓", "-"][1+edge._type],                                       
                                       edge.B.y1)
                        + "domid=%s y1=%s  " %(edge.B.domid, edge.B.y1)
                        +str(id(edge))
                    )

#         # grid line to grid line edges
#         n = len(dGridLineByIndex)
#         for i in range(n):
#             A = dGridLineByIndex[i]
#             for j in range(i+1, min(n, i+cls.iGridVisibility+1)):
#                 edge = Edge_LL(A, dGridLineByIndex[j])
#                 edge.len = (j - i)
#                 lEdge.append(edge) 
        
        return lEdge