예제 #1
0
def test_recognize_training():
    c.clear_temp()
    seq = 'JWP26'
    # seq = 'K464J'

    # Sticking together
    # seq = 'WMQPQ'
    # seq = '14FWX'
    # seq = '4TJ3R'
    # seq = '5PW9Y'
    # seq = '6ML6X'
    # seq = '48HXH'
    # seq = 'Y581K'

    # Isolation
    # seq = 'QN4EL'

    # Complicated
    # seq = '2XML9'
    # seq = 'W9WU4'

    if seq:
        image = dataset_manager.get_training_image(seq)
    else:
        seq, image = dataset_manager.get_training_image()
    success, seq_r, weak_confidence = CaptchaRecognizer().recognize(
        image, verbose=True, save_intermediate=True, force_partition=True)
    if success:
        if weak_confidence:
            print('Weak confidence')
        print('Recognized is', seq_r)
        print('Actual is', seq)
        print('Result: {}'.format(seq == seq_r))
예제 #2
0
def test_captcha_recognition():
    c.clear_temp()
    seq = None

    # Below are all the training images that are partitioned falsely
    # seq = 'YFF5M'
    # seq = 'W1PM4'
    # seq = 'W1R4R'
    # seq = 'YTM6X'
    # seq = 'W9WU4'
    # seq = 'EFTWY'
    # seq = '5WTGP'
    # seq = '113W2'
    # seq = 'UWFG1'

    # Sticking together
    # seq = 'WMQPQ'
    # seq = '14FWX'
    # seq = '4TJ3R'
    # seq = '5PW9Y'
    # seq = '6ML6X'
    # seq = '48HXH'
    # seq = 'Y581K'

    # Isolation
    # seq = 'QN4EL'

    # Complicated
    # seq = '2XML9'
    # seq = 'W9WU4'

    # image = dataset_manager.get_training_image(seq)
    # image = dataset_manager.get_test_image(seq)
    a = BilibiliCaptchaProvider()
    image = a.fetch()

    success, captcha = CaptchaRecognizer().recognize(image,
                                                     save_intermediate=True,
                                                     verbose=True,
                                                     reoptimize=False)
    if success:
        print(captcha)
        print('Recognized captcha is ',
              BilibiliCaptchaProvider().verify(captcha))
def tune_partition_parameter():
    h_tol = np.arange(4, 8)
    s_tol = np.arange(30, 50, 4)
    v_tol = np.arange(50, 70, 4)
    rate = np.zeros((len(h_tol), len(s_tol), len(v_tol)))
    for h in h_tol:
        for s in s_tol:
            for v in v_tol:
                recognizer = CaptchaRecognizer(_captcha_provider, h / 360,
                                               s / 100, v / 100)
                rate[
                    h - 4, (s - 30)/4, (v - 50)/4] = \
                    partition_training_images_to_chars(recognizer,
                                                       force_update=True,
                                                       save=False)
    print(np.unravel_index(rate.argmax(), rate.shape))
    print(rate.max())
    np.save(os.path.join(_dataset_dir, 'gridsearch.npy'), rate)
    return rate
def partition_training_images_to_chars(
        captcha_recognizer=CaptchaRecognizer(), force_update=False, save=True):
    time_start = time.time()
    try:
        json_dict = json.load(open(_PARTITION_JSON))
    except Exception as e:
        print(e)
        print('Warning: failed to load {}. Reconstructing...'.format(
            _PARTITION_JSON))
        json_dict = {}
        force_update = True
    if force_update:
        json_dict[_FAIL] = []
        json_dict[_SUCCESS] = []
        for char in _captcha_provider.chars:
            json_dict[_NUM_CHAR.format(char)] = [0, 0]
    seqs = _list_basename(_training_set_dir)
    num_total = len(seqs)
    old_seq_set = set(json_dict[_FAIL] + json_dict[_SUCCESS])

    def seq_filter(s):
        return s not in old_seq_set

    seqs = list(filter(seq_filter, seqs))
    num_update = len(seqs)
    num_update_success = 0
    recognizer = captcha_recognizer

    for n in range(num_update):
        seq = seqs[n]
        if save:
            print('{}/{}: {}'.format(n, num_update, seq))
        img = get_training_image(seq)
        char_images, _ = recognizer.partition(img, force_partition=False)
        # If successful
        if char_images is not None:
            json_dict[_SUCCESS].append(seq)
            num_update_success += 1
            for i in range(_captcha_provider.seq_length):
                char = seq[i]
                json_dict[_NUM_CHAR.format(char)][1] += 1
                json_dict[_NUM_CHAR.format(char)][0] += 1
                if save:
                    path = _get_training_char_path(
                        char, _add_suffix('{}.{}'.format(seq, i + 1)))
                    mpimg.imsave(path, char_images[i], cmap=_cm_greys)
        else:
            json_dict[_FAIL].append(seq)
            for i in range(_captcha_provider.seq_length):
                char = seq[i]
                json_dict[_NUM_CHAR.format(char)][1] += 1

    num_total_success = len(json_dict[_SUCCESS])
    json_dict[_NUM_TOTAL] = num_total
    json_dict[_NUM_FAIL] = num_total - num_total_success
    json_dict[_NUM_SUCCESS] = num_total_success
    total_success_rate = num_total_success / num_total if num_total else 0
    json_dict[_SUCCESS_RATE] = '{:.3%}'.format(total_success_rate)
    json_dict[_FAIL].sort()
    json_dict[_SUCCESS].sort()
    json.dump(json_dict, open(_PARTITION_JSON, 'w'), sort_keys=True, indent=2)
    if save:
        print('Update: {}'.format(num_update))
        print('Update success: {}'.format(num_update_success))
        if num_update:
            print('Update success rate is: {}'.format(num_update_success /
                                                      num_update))
        print('Total: {}'.format(num_total))
        print('Total success: {}'.format(num_total_success))
        print('Total success rate is: {}'.format(total_success_rate))
        time_end = time.time()
        print('Elapsed time of partitioning training images: {}'.format(
            time_end - time_start))
    if not save:
        print('h_tol = {}'.format(recognizer.h_tolerance))
        print('s_tol = {}'.format(recognizer.s_tolerance))
        print('v_tol = {}'.format(recognizer.v_tolerance))
        print('Total success rate is: {}'.format(total_success_rate))
    return total_success_rate
예제 #5
0
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func("fetch" if num == 1 else None, lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition
            )
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition,
                )
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition
                )
        if success:
            print(seq)
            result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq))
            if num == 1:
                print("Recognized seq is {}".format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print("Fail: ", fail)
    print("Right weak: ", right_weak)
    print("Right strong: ", right_strong)
    print("Right total: ", right_total)
    print("Wrong weak: ", wrong_weak)
    print("Wrong strong: ", wrong_strong)
    print("Wrong total: ", wrong_total)
    print("Total success rate: ", (right_weak + right_strong) / num)
    print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        "Success rate when strongly confident: ",
        right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0,
    )
    print(
        "Success rate when weakly confident: ",
        right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0,
    )
    time_end = time.time()
    print("Time used to test recognize http is: ", time_end - time_start)
예제 #6
0
def test_recognize_http(show_img=False,
                        num=1,
                        reconstruct=False,
                        force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func('fetch' if num == 1 else None,
                          lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image,
                save_intermediate=True,
                verbose=True,
                reconstruct=reconstruct,
                force_partition=force_partition)
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition)
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=False,
                    force_partition=force_partition)
        if success:
            print(seq)
            result = time_func('verify' if num == 1 else None,
                               lambda: provider.verify(seq))
            if num == 1:
                print('Recognized seq is {}'.format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print('Fail: ', fail)
    print('Right weak: ', right_weak)
    print('Right strong: ', right_strong)
    print('Right total: ', right_total)
    print('Wrong weak: ', wrong_weak)
    print('Wrong strong: ', wrong_strong)
    print('Wrong total: ', wrong_total)
    print('Total success rate: ', (right_weak + right_strong) / num)
    print('Success rate when confident: ',
          (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        'Success rate when strongly confident: ',
        right_strong / (right_strong + wrong_strong)
        if right_strong + wrong_strong > 0 else 0)
    print(
        'Success rate when weakly confident: ', right_weak /
        (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0)
    time_end = time.time()
    print('Time used to test recognize http is: ', time_end - time_start)