def test_recognize_training(): c.clear_temp() seq = 'JWP26' # seq = 'K464J' # Sticking together # seq = 'WMQPQ' # seq = '14FWX' # seq = '4TJ3R' # seq = '5PW9Y' # seq = '6ML6X' # seq = '48HXH' # seq = 'Y581K' # Isolation # seq = 'QN4EL' # Complicated # seq = '2XML9' # seq = 'W9WU4' if seq: image = dataset_manager.get_training_image(seq) else: seq, image = dataset_manager.get_training_image() success, seq_r, weak_confidence = CaptchaRecognizer().recognize( image, verbose=True, save_intermediate=True, force_partition=True) if success: if weak_confidence: print('Weak confidence') print('Recognized is', seq_r) print('Actual is', seq) print('Result: {}'.format(seq == seq_r))
def test_captcha_recognition(): c.clear_temp() seq = None # Below are all the training images that are partitioned falsely # seq = 'YFF5M' # seq = 'W1PM4' # seq = 'W1R4R' # seq = 'YTM6X' # seq = 'W9WU4' # seq = 'EFTWY' # seq = '5WTGP' # seq = '113W2' # seq = 'UWFG1' # Sticking together # seq = 'WMQPQ' # seq = '14FWX' # seq = '4TJ3R' # seq = '5PW9Y' # seq = '6ML6X' # seq = '48HXH' # seq = 'Y581K' # Isolation # seq = 'QN4EL' # Complicated # seq = '2XML9' # seq = 'W9WU4' # image = dataset_manager.get_training_image(seq) # image = dataset_manager.get_test_image(seq) a = BilibiliCaptchaProvider() image = a.fetch() success, captcha = CaptchaRecognizer().recognize(image, save_intermediate=True, verbose=True, reoptimize=False) if success: print(captcha) print('Recognized captcha is ', BilibiliCaptchaProvider().verify(captcha))
def tune_partition_parameter(): h_tol = np.arange(4, 8) s_tol = np.arange(30, 50, 4) v_tol = np.arange(50, 70, 4) rate = np.zeros((len(h_tol), len(s_tol), len(v_tol))) for h in h_tol: for s in s_tol: for v in v_tol: recognizer = CaptchaRecognizer(_captcha_provider, h / 360, s / 100, v / 100) rate[ h - 4, (s - 30)/4, (v - 50)/4] = \ partition_training_images_to_chars(recognizer, force_update=True, save=False) print(np.unravel_index(rate.argmax(), rate.shape)) print(rate.max()) np.save(os.path.join(_dataset_dir, 'gridsearch.npy'), rate) return rate
def partition_training_images_to_chars( captcha_recognizer=CaptchaRecognizer(), force_update=False, save=True): time_start = time.time() try: json_dict = json.load(open(_PARTITION_JSON)) except Exception as e: print(e) print('Warning: failed to load {}. Reconstructing...'.format( _PARTITION_JSON)) json_dict = {} force_update = True if force_update: json_dict[_FAIL] = [] json_dict[_SUCCESS] = [] for char in _captcha_provider.chars: json_dict[_NUM_CHAR.format(char)] = [0, 0] seqs = _list_basename(_training_set_dir) num_total = len(seqs) old_seq_set = set(json_dict[_FAIL] + json_dict[_SUCCESS]) def seq_filter(s): return s not in old_seq_set seqs = list(filter(seq_filter, seqs)) num_update = len(seqs) num_update_success = 0 recognizer = captcha_recognizer for n in range(num_update): seq = seqs[n] if save: print('{}/{}: {}'.format(n, num_update, seq)) img = get_training_image(seq) char_images, _ = recognizer.partition(img, force_partition=False) # If successful if char_images is not None: json_dict[_SUCCESS].append(seq) num_update_success += 1 for i in range(_captcha_provider.seq_length): char = seq[i] json_dict[_NUM_CHAR.format(char)][1] += 1 json_dict[_NUM_CHAR.format(char)][0] += 1 if save: path = _get_training_char_path( char, _add_suffix('{}.{}'.format(seq, i + 1))) mpimg.imsave(path, char_images[i], cmap=_cm_greys) else: json_dict[_FAIL].append(seq) for i in range(_captcha_provider.seq_length): char = seq[i] json_dict[_NUM_CHAR.format(char)][1] += 1 num_total_success = len(json_dict[_SUCCESS]) json_dict[_NUM_TOTAL] = num_total json_dict[_NUM_FAIL] = num_total - num_total_success json_dict[_NUM_SUCCESS] = num_total_success total_success_rate = num_total_success / num_total if num_total else 0 json_dict[_SUCCESS_RATE] = '{:.3%}'.format(total_success_rate) json_dict[_FAIL].sort() json_dict[_SUCCESS].sort() json.dump(json_dict, open(_PARTITION_JSON, 'w'), sort_keys=True, indent=2) if save: print('Update: {}'.format(num_update)) print('Update success: {}'.format(num_update_success)) if num_update: print('Update success rate is: {}'.format(num_update_success / num_update)) print('Total: {}'.format(num_total)) print('Total success: {}'.format(num_total_success)) print('Total success rate is: {}'.format(total_success_rate)) time_end = time.time() print('Elapsed time of partitioning training images: {}'.format( time_end - time_start)) if not save: print('h_tol = {}'.format(recognizer.h_tolerance)) print('s_tol = {}'.format(recognizer.s_tolerance)) print('v_tol = {}'.format(recognizer.v_tolerance)) print('Total success rate is: {}'.format(total_success_rate)) return total_success_rate
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func("fetch" if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition ) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition, ) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition ) if success: print(seq) result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print("Recognized seq is {}".format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print("Fail: ", fail) print("Right weak: ", right_weak) print("Right strong: ", right_strong) print("Right total: ", right_total) print("Wrong weak: ", wrong_weak) print("Wrong strong: ", wrong_strong) print("Wrong total: ", wrong_total) print("Total success rate: ", (right_weak + right_strong) / num) print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( "Success rate when strongly confident: ", right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0, ) print( "Success rate when weakly confident: ", right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0, ) time_end = time.time() print("Time used to test recognize http is: ", time_end - time_start)
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func('fetch' if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition) if success: print(seq) result = time_func('verify' if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print('Recognized seq is {}'.format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print('Fail: ', fail) print('Right weak: ', right_weak) print('Right strong: ', right_strong) print('Right total: ', right_total) print('Wrong weak: ', wrong_weak) print('Wrong strong: ', wrong_strong) print('Wrong total: ', wrong_total) print('Total success rate: ', (right_weak + right_strong) / num) print('Success rate when confident: ', (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( 'Success rate when strongly confident: ', right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0) print( 'Success rate when weakly confident: ', right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0) time_end = time.time() print('Time used to test recognize http is: ', time_end - time_start)