def main(): c.clear_temp() img = dataset_manager.get_training_image() recognizer = captcha_recognizer.CaptchaRecognizer() mpimg.imsave(c.temp_path('00.origin.png'), img) # 1 img_01 = time_func( 'remove_noise_with_hsv', lambda: recognizer.remove_noise_with_hsv(img) ) mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # 2 img_02 = time_func( 'remove_noise_with_neighbors', lambda: repeat(recognizer.remove_noise_with_neighbors, 2)(img_01) ) mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) img_03a = time_func( 'skeletonize', lambda: morph.skeletonize(img_02) ) mpimg.imsave(c.temp_path('03a.skeleton.png'), img_03a, cmap=cm_greys)
def recognize(self, img, save_intermediate=False, verbose=False, reconstruct=False, force_partition=True): seq = [] char_images, weak_confidence = self.partition(img, save_intermediate, verbose, force_partition) if reconstruct: captcha_learn.reconstruct_model() if char_images is not None and len(char_images) == self.character_num: success = True def predict(): nonlocal seq for i in range(len(char_images)): seq.append(captcha_learn.predict(char_images[i])) time_func("predict" if verbose else None, predict) seq = "".join(seq) else: success = False return success, seq, weak_confidence
def recognize(self, img, save_intermediate=False, verbose=False, reconstruct=False, force_partition=True): seq = [] char_images, weak_confidence = self.partition(img, save_intermediate, verbose,force_partition) if reconstruct: captcha_learn.reconstruct_model() if char_images is not None and len(char_images) == self.character_num: success = True def predict(): nonlocal seq for i in range(len(char_images)): seq.append(captcha_learn.predict(char_images[i])) time_func('predict' if verbose else None, predict) seq = ''.join(seq) else: success = False return success, seq, weak_confidence
def partition(self, img, save_intermediate=False, verbose=False, force_partition=True): weak_confidence = 0 if save_intermediate: mpimg.imsave(c.temp_path("00.origin.png"), img) # step 1 # remove noise with hsv img_01 = time_func("remove_noise_with_hsv" if verbose else None, lambda: self.remove_noise_with_hsv(img)) if save_intermediate: mpimg.imsave(c.temp_path("01.hsv.png"), img_01, cmap=cm_greys) # step 2 # remove noise with neighbors img_02 = time_func( "remove_noise_with_neighbors" if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01), ) if save_intermediate: mpimg.imsave(c.temp_path("02.neighbor.png"), img_02, cmap=cm_greys) # step 3 # partition stage 1 labels, object_slices = time_func( "segment_with_label" if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print("{} connected components found".format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path("03.00000.png"), labels) # step 4 # Arrange the segments from left to right and probably partition stage 2 xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] for i in list(sort_index): char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) if force_partition and len(char_images) == self.character_num - 1: weak_confidence = 1 char_images = self.force_partition(char_images) # step 5 # Check if segmentation was successful and get characters if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print("Heights {}".format(heights)) print("Widths {}".format(widths)) # noinspection PyTypeChecker if ( np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max) ): if save_intermediate: for i in range(len(char_images)): mpimg.imsave(c.temp_path("03.char.{}.png".format(i + 1)), char_images[i], cmap=cm_greys) return char_images, weak_confidence if verbose: print("Warning: partition failed!") return None, weak_confidence
def partition(self, img, save_intermediate=False, verbose=False, force_partition=True): weak_confidence = 0 if save_intermediate: mpimg.imsave(c.temp_path('00.origin.png'), img) # step 1 # remove noise with hsv img_01 = time_func( 'remove_noise_with_hsv' if verbose else None, lambda: self.remove_noise_with_hsv(img) ) if save_intermediate: mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # step 2 # remove noise with neighbors img_02 = time_func( 'remove_noise_with_neighbors' if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01) ) if save_intermediate: mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) # step 3 # partition stage 1 labels, object_slices = time_func( 'segment_with_label' if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print('{} connected components found'.format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path('03.00000.png'), labels) # step 4 # Arrange the segments from left to right and probably partition stage 2 xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] for i in list(sort_index): char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) if force_partition and len(char_images) == self.character_num - 1: weak_confidence = 1 char_images = self.force_partition(char_images) # step 5 # Check if segmentation was successful and get characters if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print('Heights {}'.format(heights)) print('Widths {}'.format(widths)) # noinspection PyTypeChecker if (np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max)): if save_intermediate: for i in range(len(char_images)): mpimg.imsave( c.temp_path('03.char.{}.png'.format(i + 1)), char_images[i], cmap=cm_greys) return char_images, weak_confidence if verbose: print('Warning: partition failed!') return None, weak_confidence
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func("fetch" if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition ) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition, ) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition ) if success: print(seq) result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print("Recognized seq is {}".format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print("Fail: ", fail) print("Right weak: ", right_weak) print("Right strong: ", right_strong) print("Right total: ", right_total) print("Wrong weak: ", wrong_weak) print("Wrong strong: ", wrong_strong) print("Wrong total: ", wrong_total) print("Total success rate: ", (right_weak + right_strong) / num) print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( "Success rate when strongly confident: ", right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0, ) print( "Success rate when weakly confident: ", right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0, ) time_end = time.time() print("Time used to test recognize http is: ", time_end - time_start)
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func('fetch' if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition) if success: print(seq) result = time_func('verify' if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print('Recognized seq is {}'.format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print('Fail: ', fail) print('Right weak: ', right_weak) print('Right strong: ', right_strong) print('Right total: ', right_total) print('Wrong weak: ', wrong_weak) print('Wrong strong: ', wrong_strong) print('Wrong total: ', wrong_total) print('Total success rate: ', (right_weak + right_strong) / num) print('Success rate when confident: ', (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( 'Success rate when strongly confident: ', right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0) print( 'Success rate when weakly confident: ', right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0) time_end = time.time() print('Time used to test recognize http is: ', time_end - time_start)
def partition(self, img, save_intermediate=False, verbose=False): if save_intermediate: mpimg.imsave(c.temp_path('00.origin.png'), img) # 1 img_01 = time_func( 'remove_noise_with_hsv' if verbose else None, lambda: self.remove_noise_with_hsv(img) ) if save_intermediate: mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys) # 2 img_02 = time_func( 'remove_noise_with_neighbors' if verbose else None, lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01) ) if save_intermediate: mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys) # 3 labels, object_slices = time_func( 'segment_with_label' if verbose else None, lambda: self.segment_with_label(img_02) ) if verbose: print('{} connected components found'.format(len(object_slices))) if save_intermediate: mpimg.imsave(c.temp_path('03.00000.png'), labels) # Arrange the segments from left to right xmin_arr = np.array([s[1].start for s in object_slices]) sort_index = xmin_arr.argsort() char_images = [] # noinspection PyTypeChecker for i in sort_index: char_image = img_02.copy() char_image[labels != i + 1] = 0 char_image = char_image[object_slices[i]] char_images.append(char_image) # Check if segmentation was successful if len(char_images) == self.character_num: shapes = np.array(list(map(np.shape, char_images))) heights, widths = shapes[:, 0], shapes[:, 1] if verbose: print('Heights {}'.format(heights)) print('Widths {}'.format(widths)) # noinspection PyTypeChecker if (np.all(heights >= self.char_height_min) and np.all(heights <= self.char_height_max) and np.all(widths >= self.char_width_min) and np.all(widths <= self.char_width_max)): if save_intermediate: for i in range(len(char_images)): mpimg.imsave( c.temp_path('03.char.{}.png'.format(i + 1)), char_images[i], cmap=cm_greys) return char_images if verbose: print('Warning: partition failed!') return None