コード例 #1
0
 def compute_contour_centers(self):
     """
     Compute cell centers for each contour bounding box using meanCenter()
     """
     self.contour_centers = np.full((len(self.contours), 2), -1.)  # -1: Contour invalid
     for i in range(len(self.contours)):
         if self.contours[i] is None: continue
         self.contour_centers[i] = cv_algorithms.meanCenter(self.contours_bbox[i])
コード例 #2
0
 def compute_cell_hulls(self):
     """
     Run find_table_cell_polygons() and compute a rectangle enclosing the cell (for each cell).
     For most (4-point) cells, this is equivalent to the original path, however this removes
     small irregularities and extra points from larger, 5+-point cells (mostly merged cells)
     """
     self.compute_cell_polygons()
     # cv2 convexHull / minAreaRect only work with integer coordinates.
     self.cell_hulls = [
         cv2.boxPoints(cv2.minAreaRect(np.rint(self.cluster_coords[path]).astype(int)))
         for path in self.cell_polygons]
     # Compute centers of cell hulls
     self.cell_centers = np.zeros((len(self.cell_hulls), 2))
     for i in range(len(self.cell_hulls)):
         hull_points = self.cell_hulls[i]
         self.cell_centers[i] = cv_algorithms.meanCenter(hull_points)
コード例 #3
0
ファイル: TableRecognition.py プロジェクト: zwcdp/OTR
    def find_corner_clusters(self, distance_threshold=20.):
        # Find all bounding box corners
        corners = []
        for i in range(len(self.contours)):
            if self.contours[i] is None: continue
            bbox = self.contours_bbox[i]
            for coord in bbox:
                corners.append((coord[0], coord[1]))

        # Simpler algorithm, still superfast (<40 ms for 2k corners): Compute all distances using cdist
        corners = np.asarray(corners)
        distmat = scipy.spatial.distance.cdist(corners, corners, 'euclidean')
        ignore = np.zeros(
            corners.shape[0],
            np.bool)  # Set to true if we found a cluster for this node already

        # Find cluster in the distance matrix, i.e. node groups which are close together
        cluster_coords = []  # For each cluster, a (x,y coordinate pair)
        cluster_num_nodes = [
        ]  # For each cluster, the number of nodes it consists of
        cluster_coords_to_node_id = {}  # (x,y) tuple => cluster ID
        for i in range(corners.shape[0]):
            if ignore[i]: continue
            # Which nodes are close to this node, including itself
            below_thresh = distmat[
                i, :] < distance_threshold  # Rather set this large, we can correct non-convexity later
            allnodes = np.nonzero(below_thresh)[0]  # index list
            # Get a new ID
            clusterid = len(cluster_coords)
            allcorners = corners[allnodes]
            cluster_coords.append(tuple(cv_algorithms.meanCenter(allcorners)))
            cluster_num_nodes.append(allnodes.size)
            # Also create a map from each position to the current cluster ID
            # This works only because these coordinates are discrete integer pixel indices
            for coord in allcorners:
                cluster_coords_to_node_id[tuple(coord)] = clusterid
            # Ignore all nodes in the cluster (i.e. don't assign them to a new cluster)
            ignore[allnodes] = True
        # Now that the size is known, we can convert to numpy arrays
        self.cluster_coords = np.asarray(cluster_coords)
        self.cluster_num_nodes = np.asarray(cluster_num_nodes)
        self.cluster_coords_to_node_id = cluster_coords_to_node_id
コード例 #4
0
ファイル: TableRecognition.py プロジェクト: ulikoehler/OTR
    def find_corner_clusters(self, distance_threshold=20.):
        # Find all bounding box corners
        corners = []
        for i in range(len(self.contours)):
            if self.contours[i] is None: continue
            bbox = self.contours_bbox[i]
            for coord in bbox:
                corners.append((coord[0], coord[1]))

        # Simpler algorithm, still superfast (<40 ms for 2k corners): Compute all distances using cdist
        corners = np.asarray(corners)
        distmat = scipy.spatial.distance.cdist(corners, corners, 'euclidean')
        ignore = np.zeros(corners.shape[0], np.bool) # Set to true if we found a cluster for this node already

        # Find cluster in the distance matrix, i.e. node groups which are close together
        cluster_coords = []  # For each cluster, a (x,y coordinate pair)
        cluster_num_nodes = []  # For each cluster, the number of nodes it consists of
        cluster_coords_to_node_id = {}  # (x,y) tuple => cluster ID
        for i in range(corners.shape[0]):
            if ignore[i]: continue
            # Which nodes are close to this node, including itself
            below_thresh = distmat[i, :] < distance_threshold # Rather set this large, we can correct non-convexity later
            allnodes = np.nonzero(below_thresh)[0] # index list
            # Get a new ID
            clusterid = len(cluster_coords)
            allcorners = corners[allnodes]
            cluster_coords.append(tuple(cv_algorithms.meanCenter(allcorners)))
            cluster_num_nodes.append(allnodes.size)
            # Also create a map from each position to the current cluster ID
            # This works only because these coordinates are discrete integer pixel indices
            for coord in allcorners:
                cluster_coords_to_node_id[tuple(coord)] = clusterid
            # Ignore all nodes in the cluster (i.e. don't assign them to a new cluster)
            ignore[allnodes] = True
        # Now that the size is known, we can convert to numpy arrays
        self.cluster_coords = np.asarray(cluster_coords)
        self.cluster_num_nodes = np.asarray(cluster_num_nodes)
        self.cluster_coords_to_node_id = cluster_coords_to_node_id