def addEdgeToDoc(self, Y=None):
        """
        To display the grpah conveniently we add new Edge elements
        """
        import random
        (pnum, page, ndPage) = next(self._iter_Page_DocNode(self.doc))
        w = int(ndPage.get("imageWidth"))

        nn = 1 + len([e for e in self.lEdge if type(e) not in [HorizontalEdge, VerticalEdge, Edge_BL]])
        ii = 0
        for edge in self.lEdge:
            if type(edge) in [HorizontalEdge, VerticalEdge]:
                A, B = edge.A.shape.centroid, edge.B.shape.centroid
            elif type(edge) in [Edge_BL]:
                A = edge.A.shape.centroid
                # not readable                 _pt, B = shapely.ops.nearest_points(A, edge.B.shape)
                _pt, B = shapely.ops.nearest_points(edge.A.shape, edge.B.shape)
            else:
                ii += 1
                x = 1 + ii * (w/nn)
                pt = geom.Point(x, 0)
                A, _ = shapely.ops.nearest_points(edge.A.shape, pt)
                B, _ = shapely.ops.nearest_points(edge.B.shape, pt)
            ndSep = MultiPageXml.createPageXmlNode("Edge")
            ndSep.set("DU_type", type(edge).__name__)
            ndPage.append(ndSep)
            MultiPageXml.setPoints(ndSep, [(A.x, A.y), (B.x, B.y)])
        return
def addSeparator(root, lCells):
    """
    Add separator that correspond to cell boundaries
    modify the XML DOM
    """
    dRow, dCol = getCellsSeparators(lCells)

    try:
        ndTR = MultiPageXml.getChildByName(root,'TableRegion')[0]
    except IndexError:
        raise TableAnnotationException("No TableRegion!!! ")

    lRow = sorted(dRow.keys())
    lB = []
    for row in lRow:
        (x1, y1), (x2, y2) = dRow[row]
        b = math.degrees(math.atan((y2-y1) / (x2-x1)))
        lB.append(b)
        
        ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion")
        ndSep.set("orient", "horizontal angle=%.2f" % b)
        ndSep.set("row", "%d" % row)
        ndTR.append(ndSep)
        ndCoord = MultiPageXml.createPageXmlNode("Coords")
        MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)])
        ndSep.append(ndCoord)
        sStat = "\tHORIZONTAL: Average=%.1f°  stdev=%.2f°  min=%.1f° max=%.1f°" % (
        np.average(lB), np.std(lB), min(lB), max(lB)
        )
    ndTR.append(etree.Comment(sStat))
    traceln(sStat)
    
    lCol = sorted(dCol.keys())
    lB = []
    for col in lCol:
        (x1, y1), (x2, y2) = dCol[col]
        b = 90  -math.degrees(math.atan((x2-x1) / (y2 - y1)))
        lB.append(b)
        ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion")
        ndSep.set("orient", "vertical %.2f" % b)
        ndSep.set("col", "%d" % col)
        ndTR.append(ndSep)
        ndCoord = MultiPageXml.createPageXmlNode("Coords")
        MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)])
        ndSep.append(ndCoord)
    sStat = "\tVERTICAL  : Average=%.1f°  stdev=%.2f°  min=%.1f° max=%.1f°" % (
        np.average(lB), np.std(lB), min(lB), max(lB)
        )
    ndTR.append(etree.Comment(sStat))
    traceln(sStat)

    return
 def addPageXmlSeparator(cls, nd, sLabel, x1, y1, x2, y2, domid):
     ndSep = MultiPageXml.createPageXmlNode("CutSeparator")
     if not sLabel is None:
         # propagate the groundtruth info we have
         ndSep.set("type", sLabel)
     if abs(x2 - x1) > abs(y2 - y1):
         ndSep.set("orient", "0")
     else:
         ndSep.set("orient", "90")
     ndSep.set("id", "s_%d" % domid)
     nd.append(ndSep)
     ndCoord = MultiPageXml.createPageXmlNode("Coords")
     MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)])
     ndSep.append(ndCoord)
     return ndSep
 def addPageXmlSeparator(cls, ndPage, oCut, domid):
     ndSep = MultiPageXml.createPageXmlNode("CutSeparator")
     # propagate the groundtruth info we have
     ndSep.set("DU_type", oCut._du_label)
     ndSep.set("orient", "0")
     ndSep.set("DU_angle", "%.1f" % math.degrees(oCut._du_angle))
     ndSep.set("DU_angle_freq", "%.3f" % oCut._du_angle_freq)
     ndSep.set("DU_angle_cumul_freq", "%.3f" % oCut._du_angle_cumfreq)
     ndSep.set("DU_set_support", "%s" % oCut._du_set_support)
     ndSep.set("id", "cs_%d" % domid)
     ndPage.append(ndSep)
     ndCoord = MultiPageXml.createPageXmlNode("Coords")
     MultiPageXml.setPoints(ndCoord, oCut.coords)
     ndSep.append(ndCoord)
     return ndSep
Example #5
0
 def addPageXmlSeparator(nd, i, lGTi, x1, y1, x2, y2, domid):
     ndSep = MultiPageXml.createPageXmlNode("GridSeparator")
     if lGTi:
         # propagate the groundtruth info we have
         sLabel = self.getLabel(i, lGTi)
         ndSep.set("type", sLabel)
     if abs(x2 - x1) > abs(y2 - y1):
         ndSep.set("orient", "0")
     else:
         ndSep.set("orient", "90")
     ndSep.set("id", "s_%d" % domid)
     nd.append(ndSep)
     ndCoord = MultiPageXml.createPageXmlNode("Coords")
     MultiPageXml.setPoints(ndCoord, [(x1, y1), (x2, y2)])
     ndSep.append(ndCoord)
     return ndSep
 def addEdgeToDOM(self):
     """
     To display the grpah conveniently we add new Edge elements
     Since we change the BAseline representation, we show the new one
     """
     super().addEdgeToDOM()
     
     for blk in self.lNode:
         assert blk.type.name in ["row", "sepH"], blk.type.name
         
         if blk.type.name == "row":
             ndBaseline = blk.node.xpath(".//pc:Baseline", namespaces=self.dNS)[0]
             o = self.shaper_fun(ndBaseline)
             MultiPageXml.setPoints(ndBaseline, list(o.coords))
         
     return
Example #7
0
def addSeparator(root, lCells):
    """
    Add separator that correspond to cell boundaries
    modify the XML DOM
    """
    # let's collect the segment forming the separators
    dRowSep_lSgmt = collections.defaultdict(list)
    dColSep_lSgmt = collections.defaultdict(list)
    for cell in lCells:
        row, col, rowSpan, colSpan = [int(cell.get(sProp)) for sProp \
                                      in ["row", "col", "rowSpan", "colSpan"] ]
        coord = cell.xpath("./a:%s" % ("Coords"),
                           namespaces={"a": MultiPageXml.NS_PAGE_XML})[0]
        sPoints = coord.get('points')
        plgn = Polygon.parsePoints(sPoints)
        try:
            lT, lR, lB, lL = plgn.partitionSegmentTopRightBottomLeft()
            #now the top segments contribute to row separator of index: row
            dRowSep_lSgmt[row].extend(lT)
            #now the bottom segments contribute to row separator of index: row+rowSpan
            dRowSep_lSgmt[row + rowSpan].extend(lB)

            dColSep_lSgmt[col].extend(lL)
            dColSep_lSgmt[col + colSpan].extend(lR)
        except ValueError:
            pass

    #now make linear regression to draw relevant separators
    def getX(lSegment):
        lX = list()
        for x1, y1, x2, y2 in lSegment:
            lX.append(x1)
            lX.append(x2)
        return lX

    def getY(lSegment):
        lY = list()
        for x1, y1, x2, y2 in lSegment:
            lY.append(y1)
            lY.append(y2)
        return lY

    ndTR = MultiPageXml.getChildByName(root, 'TableRegion')[0]

    lB = []
    for irow, lSegment in dRowSep_lSgmt.items():
        X = getX(lSegment)
        Y = getY(lSegment)
        #sum(l,())
        lfNorm = [
            np.linalg.norm([[x1, y1], [x2, y2]]) for x1, y1, x2, y2 in lSegment
        ]
        #duplicate each element
        W = [fN for fN in lfNorm for _ in (0, 1)]

        a, b = np.polynomial.polynomial.polyfit(X, Y, 1, w=W)

        xmin, xmax = min(X), max(X)
        y1 = a + b * xmin
        y2 = a + b * xmax
        lB.append(b * 100)

        ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion")
        ndSep.set("orient", "horizontal %.1f %.3f" % (a, b))
        ndTR.append(ndSep)
        ndCoord = MultiPageXml.createPageXmlNode("Coords")
        MultiPageXml.setPoints(ndCoord, [(xmin, y1), (xmax, y2)])
        ndSep.append(ndCoord)

    sStat = "\tHORIZONTAL: Average=%.1f%%  stdev=%.2f%%  min=%.1f%% max=%.1f%%" % (
        np.average(lB), np.std(lB), min(lB), max(lB))
    ndTR.append(etree.Comment(sStat))
    print(sStat)

    lB = []
    for icol, lSegment in dColSep_lSgmt.items():
        X = getX(lSegment)
        Y = getY(lSegment)
        #sum(l,())
        lfNorm = [
            np.linalg.norm([[x1, y1], [x2, y2]]) for x1, y1, x2, y2 in lSegment
        ]
        #duplicate each element
        W = [fN for fN in lfNorm for _ in (0, 1)]

        # a * x + b
        a, b = np.polynomial.polynomial.polyfit(Y, X, 1, w=W)
        lB.append(b * 100)

        ymin, ymax = min(Y), max(Y)
        x1 = a + b * ymin
        x2 = a + b * ymax
        ndSep = MultiPageXml.createPageXmlNode("SeparatorRegion")
        ndSep.set("orient", "vertical %.1f %.3f" % (a, b))
        ndTR.append(ndSep)
        ndCoord = MultiPageXml.createPageXmlNode("Coords")
        MultiPageXml.setPoints(ndCoord, [(x1, ymin), (x2, ymax)])
        ndSep.append(ndCoord)
    sStat = "\tVERTICAL  : Average=%.1f%%  stdev=%.2f%%  min=%.1f%% max=%.1f%%" % (
        np.average(lB), np.std(lB), min(lB), max(lB))
    ndTR.append(etree.Comment(sStat))
    print(sStat)

    return