def compute_contour_centers(self): """ Compute cell centers for each contour bounding box using meanCenter() """ self.contour_centers = np.full((len(self.contours), 2), -1.) # -1: Contour invalid for i in range(len(self.contours)): if self.contours[i] is None: continue self.contour_centers[i] = cv_algorithms.meanCenter(self.contours_bbox[i])
def compute_cell_hulls(self): """ Run find_table_cell_polygons() and compute a rectangle enclosing the cell (for each cell). For most (4-point) cells, this is equivalent to the original path, however this removes small irregularities and extra points from larger, 5+-point cells (mostly merged cells) """ self.compute_cell_polygons() # cv2 convexHull / minAreaRect only work with integer coordinates. self.cell_hulls = [ cv2.boxPoints(cv2.minAreaRect(np.rint(self.cluster_coords[path]).astype(int))) for path in self.cell_polygons] # Compute centers of cell hulls self.cell_centers = np.zeros((len(self.cell_hulls), 2)) for i in range(len(self.cell_hulls)): hull_points = self.cell_hulls[i] self.cell_centers[i] = cv_algorithms.meanCenter(hull_points)
def find_corner_clusters(self, distance_threshold=20.): # Find all bounding box corners corners = [] for i in range(len(self.contours)): if self.contours[i] is None: continue bbox = self.contours_bbox[i] for coord in bbox: corners.append((coord[0], coord[1])) # Simpler algorithm, still superfast (<40 ms for 2k corners): Compute all distances using cdist corners = np.asarray(corners) distmat = scipy.spatial.distance.cdist(corners, corners, 'euclidean') ignore = np.zeros( corners.shape[0], np.bool) # Set to true if we found a cluster for this node already # Find cluster in the distance matrix, i.e. node groups which are close together cluster_coords = [] # For each cluster, a (x,y coordinate pair) cluster_num_nodes = [ ] # For each cluster, the number of nodes it consists of cluster_coords_to_node_id = {} # (x,y) tuple => cluster ID for i in range(corners.shape[0]): if ignore[i]: continue # Which nodes are close to this node, including itself below_thresh = distmat[ i, :] < distance_threshold # Rather set this large, we can correct non-convexity later allnodes = np.nonzero(below_thresh)[0] # index list # Get a new ID clusterid = len(cluster_coords) allcorners = corners[allnodes] cluster_coords.append(tuple(cv_algorithms.meanCenter(allcorners))) cluster_num_nodes.append(allnodes.size) # Also create a map from each position to the current cluster ID # This works only because these coordinates are discrete integer pixel indices for coord in allcorners: cluster_coords_to_node_id[tuple(coord)] = clusterid # Ignore all nodes in the cluster (i.e. don't assign them to a new cluster) ignore[allnodes] = True # Now that the size is known, we can convert to numpy arrays self.cluster_coords = np.asarray(cluster_coords) self.cluster_num_nodes = np.asarray(cluster_num_nodes) self.cluster_coords_to_node_id = cluster_coords_to_node_id
def find_corner_clusters(self, distance_threshold=20.): # Find all bounding box corners corners = [] for i in range(len(self.contours)): if self.contours[i] is None: continue bbox = self.contours_bbox[i] for coord in bbox: corners.append((coord[0], coord[1])) # Simpler algorithm, still superfast (<40 ms for 2k corners): Compute all distances using cdist corners = np.asarray(corners) distmat = scipy.spatial.distance.cdist(corners, corners, 'euclidean') ignore = np.zeros(corners.shape[0], np.bool) # Set to true if we found a cluster for this node already # Find cluster in the distance matrix, i.e. node groups which are close together cluster_coords = [] # For each cluster, a (x,y coordinate pair) cluster_num_nodes = [] # For each cluster, the number of nodes it consists of cluster_coords_to_node_id = {} # (x,y) tuple => cluster ID for i in range(corners.shape[0]): if ignore[i]: continue # Which nodes are close to this node, including itself below_thresh = distmat[i, :] < distance_threshold # Rather set this large, we can correct non-convexity later allnodes = np.nonzero(below_thresh)[0] # index list # Get a new ID clusterid = len(cluster_coords) allcorners = corners[allnodes] cluster_coords.append(tuple(cv_algorithms.meanCenter(allcorners))) cluster_num_nodes.append(allnodes.size) # Also create a map from each position to the current cluster ID # This works only because these coordinates are discrete integer pixel indices for coord in allcorners: cluster_coords_to_node_id[tuple(coord)] = clusterid # Ignore all nodes in the cluster (i.e. don't assign them to a new cluster) ignore[allnodes] = True # Now that the size is known, we can convert to numpy arrays self.cluster_coords = np.asarray(cluster_coords) self.cluster_num_nodes = np.asarray(cluster_num_nodes) self.cluster_coords_to_node_id = cluster_coords_to_node_id