def get_qualitative_results_lines(denoise_func): sclite.clear() test_ds_line = IAMDataset("line", train=False) for i in tqdm(range(1, len(test_ds_line))): image, text = test_ds_line[i] line_image = exposure.adjust_gamma(image, 1) line_image = handwriting_recognition_transform(line_image, line_image_size) character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx)) decoded_text = denoise_func(character_probabilities) actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&") sclite.add_text([decoded_text], [actual_text]) cer, er = sclite.get_cer() print("Mean CER = {}".format(cer)) return cer
import ocr.utils.denoiser_utils import ocr.utils.beam_search importlib.reload(ocr.utils.denoiser_utils) from ocr.utils.denoiser_utils import SequenceGenerator importlib.reload(ocr.utils.beam_search) from ocr.utils.beam_search import ctcBeamSearch from ocr.paragraph_segmentation_dcnn import SegmentationNetwork, paragraph_segmentation_transform from ocr.word_and_line_segmentation import SSD as WordSegmentationNet, predict_bounding_boxes from ocr.handwriting_line_recognition import Network as HandwritingRecognitionNet, handwriting_recognition_transform from ocr.handwriting_line_recognition import decode as decoder_handwriting, alphabet_encoding ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu() test_ds = IAMDataset("form_original", train=False) test_ds = IAMDataset("form_original", train=False) figs_to_plot = 4 images = [] random.seed(1) n = 0 for i in range(0, figs_to_plot): n = int(random.random() * len(test_ds)) image, _ = test_ds[n] images.append(image) fig, axs = plt.subplots(int(len(images) / 2), 2, figsize=(15, 10 * len(images) / 2)) for i, image in enumerate(images):
0.03, 0.03) if detection_box == "line" else (0.005, 0.005) random_remove_box = 0.1 log_dir = "./logs/line_word_segmentation" checkpoint_dir, checkpoint_name = "model_checkpoint", "ssd_" + detection_box + ".params" print_every_n = 5 send_image_every_n = 20 save_every_n = 50 #%% ############################################################# ctx = [mx.cpu(i) for i in range(cpu_count)] ############################################################## train_ds = IAMDataset("form_bb", output_data="bb", output_parse_method=detection_box, train=True) print("Number of training samples: {}".format(len(train_ds))) test_ds = IAMDataset("form_bb", output_data="bb", output_parse_method=detection_box, train=False) print("Number of testing samples: {}".format(len(test_ds))) train_data = gluon.data.DataLoader(train_ds.transform(augment_transform), batch_size, shuffle=True, last_batch="rollover", num_workers=8) test_data = gluon.data.DataLoader(test_ds.transform(transform),
from ocr.utils.iam_dataset import IAMDataset # # Paragraph Segmentation # This notebook investigates methods to identify segment passages from images that contains printed and handwritten text using the **MSER algorithm**. # # *Input*: a png from the IAM dataset with the "form" input and output type of "bb" and form. # # *Output*: a bounding boxes of the paragraphs. # # ## Dataset creation # In[3]: train_ds = IAMDataset("form", output_data="bb", output_parse_method="form", train=True) print("Number of training samples: {}".format(len(train_ds))) test_ds = IAMDataset("form", output_data="bb", output_parse_method="form", train=False) print("Number of testing samples: {}".format(len(test_ds))) # ## MSER based text region detection # # The MSER algorithm has been used in text regions detection [1]. Although this was mainly applied to printed text, we tested this technique to segment documents with both handwritten and printed text. # ### Detecting text region proposals
num_downsamples = 2 resnet_layer_id = 4 lstm_hidden_states = 512 lstm_layers = 2 random_y_translation, random_x_translation = 0.03, 0.03 random_y_scaling, random_x_scaling = 0.1, 0.1 random_shearing = 0.7 log_dir = "./logs/handwriting_recognition" checkpoint_dir = "model_checkpoint" checkpoint_name = "handwriting.params" # In[18]: train_ds = IAMDataset("line", output_data="text", train=True) print("Number of training samples: {}".format(len(train_ds))) test_ds = IAMDataset("line", output_data="text", train=False) print("Number of testing samples: {}".format(len(test_ds))) # In[ ]: train_data = gluon.data.DataLoader(train_ds.transform(augment_transform), batch_size, shuffle=True, last_batch="rollover", num_workers=4) test_data = gluon.data.DataLoader( test_ds.transform(transform), batch_size,
resnet_layer_id = 4 lstm_hidden_states = 512 lstm_layers = 2 random_y_translation, random_x_translation = 0.03, 0.03 random_y_scaling, random_x_scaling = 0.1, 0.1 random_shearing = 0.7 log_dir = "./logs/handwriting_recognition" checkpoint_dir = "model_checkpoint" checkpoint_name = "handwriting.params" #%% train_ds = IAMDataset("line", output_data="text", train=True) print("Number of training samples: {}".format(len(train_ds))) test_ds = IAMDataset("line", output_data="text", train=False) print("Number of testing samples: {}".format(len(test_ds))) #%% train_data = gluon.data.DataLoader(train_ds.transform(augment_transform), batch_size, shuffle=True, last_batch="rollover", num_workers=4) test_data = gluon.data.DataLoader(test_ds.transform(transform), batch_size, shuffle=True, last_batch="keep", num_workers=4)#, num_workers=multiprocessing.cpu_count()-2) #%% net = CNNBiLSTM(num_downsamples=num_downsamples, resnet_layer_id=resnet_layer_id , rnn_hidden_states=lstm_hidden_states, rnn_layers=lstm_layers, max_seq_len=max_seq_len, ctx=ctx) net.hybridize()
ctx = mx.cpu(0) #if mx.context.num_gpus() > 0 else mx.cpu() # ## Dataset creation # Obtain the original forms from the IAM dataset and plot the results. Randomly select 4 images for analysis. # In[3]: im11 = np.asarray(cv2.imread('test6.png', cv2.IMREAD_GRAYSCALE)) im12 = np.asarray(cv2.imread('test2.png', cv2.IMREAD_GRAYSCALE)) im13 = np.asarray(cv2.imread('test3.png', cv2.IMREAD_GRAYSCALE)) im14 = np.asarray(cv2.imread('test4.png', cv2.IMREAD_GRAYSCALE)) im15 = np.asarray(cv2.imread('test5.png', cv2.IMREAD_GRAYSCALE)) test_ds = IAMDataset("form_original",credentials=("sapand","saurabh12345"), train=False) # In[4]: random.seed(1) # In[5]: figs_to_plot = 4 images = [] n = 0