def main(): c.clear_temp() img = dataset_manager.get_training_image() recognizer = captcha_recognizer.CaptchaRecognizer() mpimg.imsave(c.temp_path('00.origin.png'), img) # 1 img_01 = time_func( 'remove_noise_with_hsv', lambda: recognizer.remove_noise_with_hsv(img) ) mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # 2 img_02 = time_func( 'remove_noise_with_neighbors', lambda: repeat(recognizer.remove_noise_with_neighbors, 2)(img_01) ) mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) img_03a = time_func( 'skeletonize', lambda: morph.skeletonize(img_02) ) mpimg.imsave(c.temp_path('03a.skeleton.png'), img_03a, cmap=cm_greys)
def partition(self, img, save_intermediate=False, verbose=False, force_partition=True): weak_confidence = 0 if save_intermediate: mpimg.imsave(c.temp_path("00.origin.png"), img) # step 1 # remove noise with hsv img_01 = time_func("remove_noise_with_hsv" if verbose else None, lambda: self.remove_noise_with_hsv(img)) if save_intermediate: mpimg.imsave(c.temp_path("01.hsv.png"), img_01, cmap=cm_greys) # step 2 # remove noise with neighbors img_02 = time_func( "remove_noise_with_neighbors" if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01), ) if save_intermediate: mpimg.imsave(c.temp_path("02.neighbor.png"), img_02, cmap=cm_greys) # step 3 # partition stage 1 labels, object_slices = time_func( "segment_with_label" if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print("{} connected components found".format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path("03.00000.png"), labels) # step 4 # Arrange the segments from left to right and probably partition stage 2 xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] for i in list(sort_index): char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) if force_partition and len(char_images) == self.character_num - 1: weak_confidence = 1 char_images = self.force_partition(char_images) # step 5 # Check if segmentation was successful and get characters if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print("Heights {}".format(heights)) print("Widths {}".format(widths)) # noinspection PyTypeChecker if ( np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max) ): if save_intermediate: for i in range(len(char_images)): mpimg.imsave(c.temp_path("03.char.{}.png".format(i + 1)), char_images[i], cmap=cm_greys) return char_images, weak_confidence if verbose: print("Warning: partition failed!") return None, weak_confidence
def partition(self, img, save_intermediate=False, verbose=False, force_partition=True): weak_confidence = 0 if save_intermediate: mpimg.imsave(c.temp_path('00.origin.png'), img) # step 1 # remove noise with hsv img_01 = time_func( 'remove_noise_with_hsv' if verbose else None, lambda: self.remove_noise_with_hsv(img) ) if save_intermediate: mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # step 2 # remove noise with neighbors img_02 = time_func( 'remove_noise_with_neighbors' if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01) ) if save_intermediate: mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) # step 3 # partition stage 1 labels, object_slices = time_func( 'segment_with_label' if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print('{} connected components found'.format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path('03.00000.png'), labels) # step 4 # Arrange the segments from left to right and probably partition stage 2 xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] for i in list(sort_index): char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) if force_partition and len(char_images) == self.character_num - 1: weak_confidence = 1 char_images = self.force_partition(char_images) # step 5 # Check if segmentation was successful and get characters if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print('Heights {}'.format(heights)) print('Widths {}'.format(widths)) # noinspection PyTypeChecker if (np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max)): if save_intermediate: for i in range(len(char_images)): mpimg.imsave( c.temp_path('03.char.{}.png'.format(i + 1)), char_images[i], cmap=cm_greys) return char_images, weak_confidence if verbose: print('Warning: partition failed!') return None, weak_confidence
def partition(self, img, save_intermediate=False, verbose=False): if save_intermediate: mpimg.imsave(c.temp_path('00.origin.png'), img) # 1 img_01 = time_func( 'remove_noise_with_hsv' if verbose else None, lambda: self.remove_noise_with_hsv(img) ) if save_intermediate: mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # 2 img_02 = time_func( 'remove_noise_with_neighbors' if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01) ) if save_intermediate: mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) # 3 labels, object_slices = time_func( 'segment_with_label' if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print('{} connected components found'.format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path('03.00000.png'), labels) # Arrange the segments from left to right xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] # noinspection PyTypeChecker for i in sort_index: char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) # Check if segmentation was successful if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print('Heights {}'.format(heights)) print('Widths {}'.format(widths)) # noinspection PyTypeChecker if (np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max)): if save_intermediate: for i in range(len(char_images)): mpimg.imsave( c.temp_path('03.char.{}.png'.format(i + 1)), char_images[i], cmap=cm_greys) return char_images if verbose: print('Warning: partition failed!') return None