コード例 #1
0
def assignConCompToCells(lCC, lCells):
    """
        get textlines from ids and best-assign them to the list of dCells (shape)
        
        # do it at the cc level?
    """
    dCellCC = {}
    for i, cc in enumerate(ShapeLoader.contourObject(lCC)):
        if not cc.is_empty:
            cellshape = bestRegionsAssignment(cc, lCells)
            if cellshape:
                try:
                    dCellCC[cellshape.wkt].append(lCC[i])
                except KeyError:
                    dCellCC[cellshape.wkt] = [lCC[i]]

    return dCellCC
コード例 #2
0
def transformMinima2envelop(pageNd, dClusterWithTL):
    """
    """
    lClustersPoly = []
    for row in dClusterWithTL:
        #         bb = ShapeLoader.contourObject(dClusterWithTL[row]).envelope
        #         if not bb.is_empty:
        #             cellNd = etree.Element('COL')
        #             pageNd.append(cellNd)
        #             cellNd.set('points',ShapeLoader.getCoordsString(bb))

        ch = ShapeLoader.contourObject(dClusterWithTL[row]).convex_hull
        lClustersPoly.append(ch)
        if not ch.is_empty:
            cellNd = etree.Element('LINE')
            pageNd.append(cellNd)
            cellNd.set('points', ShapeLoader.getCoordsString(ch))

    return lClustersPoly
コード例 #3
0
def checkClusteringPerRow(pageNd, dCellCndPerRow, dTextInCells, dClusterWithTL,
                          dEdges):
    """
        recompute clusters for the rows
    """
    lNewPoly = []
    lNewClusterWithTL = []
    lNewClusterWithCC = []
    lRowtodel = []
    lTLtoBeAdded = []
    for rowid, rowCells in dCellCndPerRow.items():
        lNbClusterPerCol = []
        lClusterPerCol = []
        for cell in rowCells:
            if cell.wkt in dTextInCells.keys():
                lIds = [tl.get('id') for tl in dTextInCells[cell.wkt]]
                lClusterCells = connected_components(dEdges, lIds, TH=0.5)
                lNbClusterPerCol.append(len(lClusterCells))
                lClusterPerCol.append(lClusterCells)
                #print (len(rowCells),lIds,lClusterCells,lNbClusterPerCol)
            #else empty cell
        if len(lNbClusterPerCol) > 2:
            #             print (lNbClusterPerCol)
            try:
                nbRows = mode(lNbClusterPerCol)
            except statistics.StatisticsError:
                nbRows = 2
            if nbRows != 1:
                print('WARNING CUT ', rowid, "mode", nbRows)
                lRowtodel.append(rowid)
                dClusterCells = defaultdict(list)
                for colcluster in lClusterPerCol:
                    if len(colcluster) == nbRows:
                        # get tl instead of ids
                        # take the first element only for comparing position
                        colclustertxt = []
                        for cc in colcluster:
                            colclustertxt.append([
                                tl for tl in dClusterWithTL[rowid]
                                if tl.get('id') in cc
                            ])
                        sortedC = sorted(colclustertxt,
                                         key=lambda x: ShapeLoader.
                                         node_to_Polygon(x[0]).centroid.y)
                        for i, cc in enumerate(sortedC):
                            dClusterCells[i].append(cc)
                    else:
                        for cc in colcluster:
                            lTLtoBeAdded.extend([
                                tl for tl in dClusterWithTL[rowid]
                                if tl.get('id') in cc
                            ])
                for i, lcc in dClusterCells.items():
                    #                     print (i,lcc)
                    rect = ShapeLoader.contourObject([
                        x for cc in lcc for x in cc
                    ]).envelope  # minimal_rectangle?
                    rect = scale(rect, xfact=2)
                    lNewPoly.append(rect)
                    lNewClusterWithCC.append(lcc)
                    lNewClusterWithTL.append([x for cc in lcc for x in cc])
                    #                         lNewCluster.Append()
                    # need also to create a cluster with list of ids!!
                    rNd = etree.Element('LINE')
                    rNd.set('points', ShapeLoader.getCoordsString(rect))
                    pageNd.append(rNd)
                    # final check: if overlap between rectangle: top rectangle is the cut

    #return updated list of lCLusters
    return lRowtodel, lNewPoly, lNewClusterWithTL, lNewClusterWithCC, lTLtoBeAdded