Exemple #1
0
def test_captcha_recognition():
    c.clear_temp()
    seq = None

    # Below are all the training images that are partitioned falsely
    # seq = 'YFF5M'
    # seq = 'W1PM4'
    # seq = 'W1R4R'
    # seq = 'YTM6X'
    # seq = 'W9WU4'
    # seq = 'EFTWY'
    # seq = '5WTGP'
    # seq = '113W2'
    # seq = 'UWFG1'

    # Sticking together
    # seq = 'WMQPQ'
    # seq = '14FWX'
    # seq = '4TJ3R'
    # seq = '5PW9Y'
    # seq = '6ML6X'
    # seq = '48HXH'
    # seq = 'Y581K'

    # Isolation
    # seq = 'QN4EL'

    # Complicated
    # seq = '2XML9'
    # seq = 'W9WU4'

    # image = dataset_manager.get_training_image(seq)
    # image = dataset_manager.get_test_image(seq)
    a = BilibiliCaptchaProvider()
    image = a.fetch()

    success, captcha = CaptchaRecognizer().recognize(image,
                                                     save_intermediate=True,
                                                     verbose=True,
                                                     reoptimize=False)
    if success:
        print(captcha)
        print('Recognized captcha is ',
              BilibiliCaptchaProvider().verify(captcha))
Exemple #2
0
def test_captcha_recognition():
    c.clear_temp()
    seq = None

    # Below are all the training images that are partitioned falsely
    # seq = 'YFF5M'
    # seq = 'W1PM4'
    # seq = 'W1R4R'
    # seq = 'YTM6X'
    # seq = 'W9WU4'
    # seq = 'EFTWY'
    # seq = '5WTGP'
    # seq = '113W2'
    # seq = 'UWFG1'

    # Sticking together
    # seq = 'WMQPQ'
    # seq = '14FWX'
    # seq = '4TJ3R'
    # seq = '5PW9Y'
    # seq = '6ML6X'
    # seq = '48HXH'
    # seq = 'Y581K'

    # Isolation
    # seq = 'QN4EL'

    # Complicated
    # seq = '2XML9'
    # seq = 'W9WU4'

    # image = dataset_manager.get_training_image(seq)
    # image = dataset_manager.get_test_image(seq)
    a = BilibiliCaptchaProvider()
    image = a.fetch()

    success, captcha = CaptchaRecognizer().recognize(image,
                                                     save_intermediate=True,
                                                     verbose=True,
                                                     reoptimize=False)
    if success:
        print(captcha)
        print('Recognized captcha is ', BilibiliCaptchaProvider().verify(captcha))
    def __init__(self, captcha_provider=BilibiliCaptchaProvider(),
                 h_tol=6 / 360,
                 s_tol=36 / 100,
                 v_tol=40 / 100):
        # Three parameters to be used in remove_noise_with_hsv
        self.h_tolerance = h_tol
        self.s_tolerance = s_tol
        self.v_tolerance = v_tol

        self.character_num = captcha_provider.seq_length

        # Four parameters to be used in partition
        self.char_width_min = 5
        self.char_width_max = 30
        self.char_height_min = 10
        self.char_height_max = 30
with open(os.path.join(c.get('dataset'), c.get('fail_char.txt'))) as f:
    _fail_char_set = set(f.read().splitlines())

_PARTITION_JSON = os.path.join(_dataset_dir, 'partition.json')
_NUM_TOTAL = '###total'
_NUM_FAIL = '##fail'
_NUM_SUCCESS = '##success'
_SUCCESS_RATE = '##success_rate'
_NUM_CHAR = '#{}'
_FAIL = 'fail'
_SUCCESS = 'success'

# the sequence that tells the program to skip a image when fetching
_SEQ_SKIP = '0'

_captcha_provider = BilibiliCaptchaProvider()


def _get_training_char_dir(char):
    return os.path.join(_training_char_dir, char)


for _char in _captcha_provider.chars:
    c.make_dirs(_get_training_char_dir(_char))


def _get_training_char_path(char, path):
    return os.path.join(_get_training_char_dir(char), path)


# Fetch some CAPTCHA images from a CAPTCHA source to a directory
Exemple #5
0
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func("fetch" if num == 1 else None, lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition
            )
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition,
                )
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition
                )
        if success:
            print(seq)
            result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq))
            if num == 1:
                print("Recognized seq is {}".format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print("Fail: ", fail)
    print("Right weak: ", right_weak)
    print("Right strong: ", right_strong)
    print("Right total: ", right_total)
    print("Wrong weak: ", wrong_weak)
    print("Wrong strong: ", wrong_strong)
    print("Wrong total: ", wrong_total)
    print("Total success rate: ", (right_weak + right_strong) / num)
    print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        "Success rate when strongly confident: ",
        right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0,
    )
    print(
        "Success rate when weakly confident: ",
        right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0,
    )
    time_end = time.time()
    print("Time used to test recognize http is: ", time_end - time_start)
Exemple #6
0
def test_recognize_http(show_img=False,
                        num=1,
                        reconstruct=False,
                        force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func('fetch' if num == 1 else None,
                          lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image,
                save_intermediate=True,
                verbose=True,
                reconstruct=reconstruct,
                force_partition=force_partition)
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition)
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=False,
                    force_partition=force_partition)
        if success:
            print(seq)
            result = time_func('verify' if num == 1 else None,
                               lambda: provider.verify(seq))
            if num == 1:
                print('Recognized seq is {}'.format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print('Fail: ', fail)
    print('Right weak: ', right_weak)
    print('Right strong: ', right_strong)
    print('Right total: ', right_total)
    print('Wrong weak: ', wrong_weak)
    print('Wrong strong: ', wrong_strong)
    print('Wrong total: ', wrong_total)
    print('Total success rate: ', (right_weak + right_strong) / num)
    print('Success rate when confident: ',
          (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        'Success rate when strongly confident: ',
        right_strong / (right_strong + wrong_strong)
        if right_strong + wrong_strong > 0 else 0)
    print(
        'Success rate when weakly confident: ', right_weak /
        (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0)
    time_end = time.time()
    print('Time used to test recognize http is: ', time_end - time_start)