def test_union_by_rank(self): uf = UnionFind() one = uf.MakeSet(1) two = uf.MakeSet(2) three = uf.MakeSet(3) uf.Union(one, two) # 1st added to 2nd self.assertEqual(uf.Find(one), two) self.assertEqual(uf.Find(two), two) self.assertEqual(uf.Find(three), three) self.assertEqual(one.rank, 0) self.assertEqual(two.rank, 1) self.assertEqual(three.rank, 0) uf.Union(one, three) # arbitrarily, 1st would be added to 2nd # union by rank -> 3 should be added to (1,2) self.assertEqual(uf.Find(one), two) self.assertEqual(uf.Find(two), two) self.assertEqual(uf.Find(three), two) self.assertEqual(one.rank, 0) self.assertEqual(two.rank, 1) self.assertEqual(three.rank, 0) self.assertEqual(one.parent, two) self.assertEqual(two.parent, two) self.assertEqual(three.parent, two)
def test_find(self): uf = UnionFind() five = uf.MakeSet(5) seven = uf.MakeSet(7) uf.Union(five, seven) self.assertEqual(uf.Find(five), seven) self.assertEqual(uf.Find(seven), seven)
def test_path_compression(self): uf = UnionFind() one = uf.MakeSet(1) two = uf.MakeSet(2) three = uf.MakeSet(3) four = uf.MakeSet(4) uf.Union(one, two) uf.Union(three, four) uf.Union(two, three) # 4 # / \ # 2 3 # / # 1 self.assertEqual(one.parent, two) self.assertEqual(two.parent, four) self.assertEqual(four.parent, four) self.assertEqual(three.parent, four) uf.Find(one) # 4 # / | \ # 1 2 3 self.assertEqual(one.parent, four) self.assertEqual(two.parent, four) self.assertEqual(four.parent, four) self.assertEqual(three.parent, four)
def connected_component_labelling(bool_input_image, connectivity_type=CONNECTIVITY_8): """ 2 pass algorithm using disjoint-set data structure with Union-Find algorithms to maintain record of label equivalences. Input: binary image as 2D boolean array. Output: 2D integer array of labelled pixels. 1st pass: label image and record label equivalence classes. 2nd pass: replace labels with their root labels. (optional 3rd pass: Flatten labels so they are consecutive integers starting from 1.) """ if connectivity_type != 4 and connectivity_type != 8: raise ValueError("Invalid connectivity type (choose 4 or 8)") image_width = len(bool_input_image[0]) image_height = len(bool_input_image) # initialize efficient 2D int array with numpy # N.B. numpy matrix addressing syntax: array[y,x] labelled_image = np.zeros((image_height, image_width), dtype=np.int16) uf = UnionFind() # initialise union find data structure current_label = 1 # initialise label counter # 1st Pass: label image and record label equivalences for y, row in enumerate(bool_input_image): for x, pixel in enumerate(row): if pixel == False: # Background pixel - leave output pixel value as 0 pass else: # Foreground pixel - work out what its label should be # Get set of neighbour's labels labels = neighbouring_labels(labelled_image, connectivity_type, x, y) if not labels: # If no neighbouring foreground pixels, new label -> use current_label labelled_image[y, x] = current_label uf.MakeSet(current_label) # record label in disjoint set current_label = current_label + 1 # increment for next time else: # Pixel is definitely part of a connected component: get smallest label of # neighbours smallest_label = min(labels) labelled_image[y, x] = smallest_label if len( labels ) > 1: # More than one type of label in component -> add # equivalence class for label in labels: uf.Union(uf.GetNode(smallest_label), uf.GetNode(label)) # 2nd Pass: replace labels with their root labels final_labels = {} new_label_number = 1 for y, row in enumerate(labelled_image): for x, pixel_value in enumerate(row): if pixel_value > 0: # Foreground pixel # Get element's set's representative value and use as the pixel's new label new_label = uf.Find(uf.GetNode(pixel_value)).value labelled_image[y, x] = new_label # Add label to list of labels used, for 3rd pass (flattening label list) if new_label not in final_labels: final_labels[new_label] = new_label_number new_label_number = new_label_number + 1 # 3rd Pass: flatten label list so labels are consecutive integers starting from 1 (in order # top to bottom, left to right) # Different implementation of disjoint-set may remove the need for 3rd pass? for y, row in enumerate(labelled_image): for x, pixel_value in enumerate(row): if pixel_value > 0: # Foreground pixel labelled_image[y, x] = final_labels[pixel_value] return labelled_image