def get_qualitative_results(denoise_func): sclite.clear() for i in tqdm(range(1, len(test_ds))): image, text = test_ds[i] resized_image = paragraph_segmentation_transform(image, image_size=form_size) paragraph_bb = paragraph_segmentation_net(resized_image.as_in_context(ctx)) paragraph_bb = paragraph_bb[0].asnumpy() paragraph_bb = expand_bounding_box(paragraph_bb, expand_bb_scale_x=0.01, expand_bb_scale_y=0.01) paragraph_segmented_image = crop_handwriting_page(image, paragraph_bb, image_size=segmented_paragraph_size) word_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx) line_bbs = sort_bbs_line_by_line(word_bb, y_overlap=0.4) line_images = crop_line_images(paragraph_segmented_image, line_bbs) predicted_text = [] for line_image in line_images: line_image = exposure.adjust_gamma(line_image, 1) line_image = handwriting_recognition_transform(line_image, line_image_size) character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx)) decoded_text = denoise_func(character_probabilities) predicted_text.append(decoded_text) actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&") actual_text = actual_text.split("\n") if len(predicted_text) > len(actual_text): predicted_text = predicted_text[:len(actual_text)] sclite.add_text(predicted_text, actual_text) cer, _ = sclite.get_cer() print("Mean CER = {}".format(cer)) return cer
paragraph_segmentation_net.hybridize() form_size = (1120, 800) predicted_bbs = [] fig, axs = plt.subplots(int(len(images) / 2), 2, figsize=(15, 9 * len(images) / 2)) for i, image in enumerate(images): s_y, s_x = int(i / 2), int(i % 2) resized_image = paragraph_segmentation_transform(image, form_size) bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx)) bb_predicted = bb_predicted[0].asnumpy() bb_predicted = expand_bounding_box(bb_predicted, expand_bb_scale_x=0.03, expand_bb_scale_y=0.03) predicted_bbs.append(bb_predicted) axs[s_y, s_x].imshow(image, cmap='Greys_r') axs[s_y, s_x].set_title("{}".format(i)) (x, y, w, h) = bb_predicted image_h, image_w = image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) rect = patches.Rectangle((x, y), w, h, fill=False, color="r", ls="--") axs[s_y, s_x].add_patch(rect) axs[s_y, s_x].axis('off') segmented_paragraph_size = (700, 700) fig, axs = plt.subplots(int(len(images) / 2),
def generate_op(img_n, img_dir, folder_path): image_name = img_n.split('.')[0] img_path = os.path.join(img_dir, img_n) image = _pre_process_image(img_path, 'form') form_size = (1120, 800) predicted_bbs = [] resized_image = paragraph_segmentation_transform(image, form_size) bb_predicted = paragraph_segmentation_net(resized_image.as_in_context(ctx)) bb_predicted = bb_predicted[0].asnumpy() bb_predicted = expand_bounding_box(bb_predicted, expand_bb_scale_x=0.03, expand_bb_scale_y=0.03) predicted_bbs.append(bb_predicted) (x, y, w, h) = bb_predicted image_h, image_w = image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) segmented_paragraph_size = (700, 700) paragraph_segmented_images = [] bb = predicted_bbs[0] image = crop_handwriting_page(image, bb, image_size=segmented_paragraph_size) paragraph_segmented_images.append(image) min_c = 0.1 overlap_thres = 0.1 topk = 600 predicted_words_bbs_array = [] for i, paragraph_segmented_image in enumerate(paragraph_segmented_images): predicted_bb = predict_bounding_boxes(word_segmentation_net, paragraph_segmented_image, min_c, overlap_thres, topk, ctx) predicted_words_bbs_array.append(predicted_bb) for j in range(predicted_bb.shape[0]): (x, y, w, h) = predicted_bb[j] image_h, image_w = paragraph_segmented_image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) line_images_array = [] for i, paragraph_segmented_image in enumerate(paragraph_segmented_images): predicted_bbs = predicted_words_bbs_array[i] line_bbs = sort_bbs_line_by_line(predicted_bbs, y_overlap=0.4) line_images = crop_line_images(paragraph_segmented_image, line_bbs) line_images_array.append(line_images) for line_bb in line_bbs: (x, y, w, h) = line_bb image_h, image_w = paragraph_segmented_image.shape[-2:] (x, y, w, h) = (x * image_w, y * image_h, w * image_w, h * image_h) line_image_size = (60, 800) character_probs = [] for line_images in line_images_array: form_character_prob = [] for i, line_image in enumerate(line_images): line_image = handwriting_recognition_transform( line_image, line_image_size) line_character_prob = handwriting_line_recognition_net( line_image.as_in_context(ctx)) form_character_prob.append(line_character_prob) character_probs.append(form_character_prob) FEATURE_LEN = 150 save_path = os.path.join(folder_path, image_name + '.txt') file = open(save_path, 'w') for i, form_character_probs in enumerate(character_probs): for j, line_character_probs in enumerate(form_character_probs): decoded_line_bs = get_beam_search(line_character_probs) print(decoded_line_bs) file.write(decoded_line_bs + ' ') file.close()