def test_captcha_recognition(): c.clear_temp() seq = None # Below are all the training images that are partitioned falsely # seq = 'YFF5M' # seq = 'W1PM4' # seq = 'W1R4R' # seq = 'YTM6X' # seq = 'W9WU4' # seq = 'EFTWY' # seq = '5WTGP' # seq = '113W2' # seq = 'UWFG1' # Sticking together # seq = 'WMQPQ' # seq = '14FWX' # seq = '4TJ3R' # seq = '5PW9Y' # seq = '6ML6X' # seq = '48HXH' # seq = 'Y581K' # Isolation # seq = 'QN4EL' # Complicated # seq = '2XML9' # seq = 'W9WU4' # image = dataset_manager.get_training_image(seq) # image = dataset_manager.get_test_image(seq) a = BilibiliCaptchaProvider() image = a.fetch() success, captcha = CaptchaRecognizer().recognize(image, save_intermediate=True, verbose=True, reoptimize=False) if success: print(captcha) print('Recognized captcha is ', BilibiliCaptchaProvider().verify(captcha))
def __init__(self, captcha_provider=BilibiliCaptchaProvider(), h_tol=6 / 360, s_tol=36 / 100, v_tol=40 / 100): # Three parameters to be used in remove_noise_with_hsv self.h_tolerance = h_tol self.s_tolerance = s_tol self.v_tolerance = v_tol self.character_num = captcha_provider.seq_length # Four parameters to be used in partition self.char_width_min = 5 self.char_width_max = 30 self.char_height_min = 10 self.char_height_max = 30
with open(os.path.join(c.get('dataset'), c.get('fail_char.txt'))) as f: _fail_char_set = set(f.read().splitlines()) _PARTITION_JSON = os.path.join(_dataset_dir, 'partition.json') _NUM_TOTAL = '###total' _NUM_FAIL = '##fail' _NUM_SUCCESS = '##success' _SUCCESS_RATE = '##success_rate' _NUM_CHAR = '#{}' _FAIL = 'fail' _SUCCESS = 'success' # the sequence that tells the program to skip a image when fetching _SEQ_SKIP = '0' _captcha_provider = BilibiliCaptchaProvider() def _get_training_char_dir(char): return os.path.join(_training_char_dir, char) for _char in _captcha_provider.chars: c.make_dirs(_get_training_char_dir(_char)) def _get_training_char_path(char, path): return os.path.join(_get_training_char_dir(char), path) # Fetch some CAPTCHA images from a CAPTCHA source to a directory
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func("fetch" if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition ) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition, ) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition ) if success: print(seq) result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print("Recognized seq is {}".format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print("Fail: ", fail) print("Right weak: ", right_weak) print("Right strong: ", right_strong) print("Right total: ", right_total) print("Wrong weak: ", wrong_weak) print("Wrong strong: ", wrong_strong) print("Wrong total: ", wrong_total) print("Total success rate: ", (right_weak + right_strong) / num) print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( "Success rate when strongly confident: ", right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0, ) print( "Success rate when weakly confident: ", right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0, ) time_end = time.time() print("Time used to test recognize http is: ", time_end - time_start)
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func('fetch' if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition) if success: print(seq) result = time_func('verify' if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print('Recognized seq is {}'.format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print('Fail: ', fail) print('Right weak: ', right_weak) print('Right strong: ', right_strong) print('Right total: ', right_total) print('Wrong weak: ', wrong_weak) print('Wrong strong: ', wrong_strong) print('Wrong total: ', wrong_total) print('Total success rate: ', (right_weak + right_strong) / num) print('Success rate when confident: ', (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( 'Success rate when strongly confident: ', right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0) print( 'Success rate when weakly confident: ', right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0) time_end = time.time() print('Time used to test recognize http is: ', time_end - time_start)