def get_qualitative_results(denoise_func):
    sclite.clear()
    for i in tqdm(range(1, len(test_ds))):
        image, text = test_ds[i]
        resized_image = paragraph_segmentation_transform(image, image_size=form_size)
        paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx))
        paragraph_bb = paragraph_bb[0].asnumpy()
        paragraph_bb = expand_bounding_box(paragraph_bb, expand_bb_scale_x=0.01,
                                               expand_bb_scale_y=0.01)
        paragraph_segmented_image = crop_handwriting_page(image, paragraph_bb, image_size=segmented_paragraph_size)
        word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx)        
        line_bbs = sort_bbs_line_by_line(word_bb, y_overlap=0.4)
        line_images = crop_line_images(paragraph_segmented_image, line_bbs)

        predicted_text = []
        for line_image in line_images:
            line_image = exposure.adjust_gamma(line_image, 1)
            line_image = handwriting_recognition_transform(line_image, line_image_size)
            character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx))
            decoded_text = denoise_func(character_probabilities)
            predicted_text.append(decoded_text)

        actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&")
        actual_text = actual_text.split("\n")
        if len(predicted_text) > len(actual_text):
            predicted_text = predicted_text[:len(actual_text)]
        sclite.add_text(predicted_text, actual_text)
    
    cer, _ = sclite.get_cer()
    print("Mean CER = {}".format(cer))
    return cer
コード例 #2
0
paragraph_segmentation_net.hybridize()
form_size = (1120, 800)

predicted_bbs = []

fig, axs = plt.subplots(int(len(images) / 2),
                        2,
                        figsize=(15, 9 * len(images) / 2))
for i, image in enumerate(images):
    s_y, s_x = int(i / 2), int(i % 2)
    resized_image = paragraph_segmentation_transform(image, form_size)
    bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx))
    bb_predicted = bb_predicted[0].asnumpy()
    bb_predicted = expand_bounding_box(bb_predicted,
                                       expand_bb_scale_x=0.03,
                                       expand_bb_scale_y=0.03)
    predicted_bbs.append(bb_predicted)

    axs[s_y, s_x].imshow(image, cmap='Greys_r')
    axs[s_y, s_x].set_title("{}".format(i))

    (x, y, w, h) = bb_predicted
    image_h, image_w = image.shape[-2:]
    (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)
    rect = patches.Rectangle((x, y), w, h, fill=False, color="r", ls="--")
    axs[s_y, s_x].add_patch(rect)
    axs[s_y, s_x].axis('off')

segmented_paragraph_size = (700, 700)
fig, axs = plt.subplots(int(len(images) / 2),
コード例 #3
0
def generate_op(img_n, img_dir, folder_path):
    image_name = img_n.split('.')[0]
    img_path = os.path.join(img_dir, img_n)
    image = _pre_process_image(img_path, 'form')

    form_size = (1120, 800)

    predicted_bbs = []

    resized_image = paragraph_segmentation_transform(image, form_size)
    bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx))
    bb_predicted = bb_predicted[0].asnumpy()
    bb_predicted = expand_bounding_box(bb_predicted,
                                       expand_bb_scale_x=0.03,
                                       expand_bb_scale_y=0.03)
    predicted_bbs.append(bb_predicted)

    (x, y, w, h) = bb_predicted
    image_h, image_w = image.shape[-2:]
    (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    segmented_paragraph_size = (700, 700)
    paragraph_segmented_images = []

    bb = predicted_bbs[0]
    image = crop_handwriting_page(image,
                                  bb,
                                  image_size=segmented_paragraph_size)
    paragraph_segmented_images.append(image)

    min_c = 0.1
    overlap_thres = 0.1
    topk = 600
    predicted_words_bbs_array = []

    for i, paragraph_segmented_image in enumerate(paragraph_segmented_images):
        predicted_bb = predict_bounding_boxes(word_segmentation_net,
                                              paragraph_segmented_image, min_c,
                                              overlap_thres, topk, ctx)

        predicted_words_bbs_array.append(predicted_bb)
        for j in range(predicted_bb.shape[0]):
            (x, y, w, h) = predicted_bb[j]
            image_h, image_w = paragraph_segmented_image.shape[-2:]
            (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    line_images_array = []

    for i, paragraph_segmented_image in enumerate(paragraph_segmented_images):
        predicted_bbs = predicted_words_bbs_array[i]
        line_bbs = sort_bbs_line_by_line(predicted_bbs, y_overlap=0.4)
        line_images = crop_line_images(paragraph_segmented_image, line_bbs)
        line_images_array.append(line_images)

        for line_bb in line_bbs:
            (x, y, w, h) = line_bb
            image_h, image_w = paragraph_segmented_image.shape[-2:]
            (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h)

    line_image_size = (60, 800)
    character_probs = []
    for line_images in line_images_array:
        form_character_prob = []
        for i, line_image in enumerate(line_images):
            line_image = handwriting_recognition_transform(
                line_image, line_image_size)
            line_character_prob = handwriting_line_recognition_net(
                line_image.as_in_context(ctx))
            form_character_prob.append(line_character_prob)
        character_probs.append(form_character_prob)

    FEATURE_LEN = 150
    save_path = os.path.join(folder_path, image_name + '.txt')
    file = open(save_path, 'w')

    for i, form_character_probs in enumerate(character_probs):
        for j, line_character_probs in enumerate(form_character_probs):
            decoded_line_bs = get_beam_search(line_character_probs)
            print(decoded_line_bs)
            file.write(decoded_line_bs + ' ')
    file.close()