def get_qualitative_results_lines(denoise_func):
    sclite.clear()
    test_ds_line = IAMDataset("line", train=False)
    for i in tqdm(range(1, len(test_ds_line))):
        image, text = test_ds_line[i]
        line_image = exposure.adjust_gamma(image, 1)
        line_image = handwriting_recognition_transform(line_image, line_image_size)
        character_probabilities = handwriting_line_recognition_net(line_image.as_in_context(ctx))
        decoded_text = denoise_func(character_probabilities)
        actual_text = text[0].replace(""", '"').replace("'","'").replace("&", "&")
        sclite.add_text([decoded_text], [actual_text])
    
    cer, er = sclite.get_cer()
    print("Mean CER = {}".format(cer))
    return cer
import ocr.utils.denoiser_utils
import ocr.utils.beam_search

importlib.reload(ocr.utils.denoiser_utils)
from ocr.utils.denoiser_utils import SequenceGenerator

importlib.reload(ocr.utils.beam_search)
from ocr.utils.beam_search import ctcBeamSearch
from ocr.paragraph_segmentation_dcnn import SegmentationNetwork, paragraph_segmentation_transform
from ocr.word_and_line_segmentation import SSD as WordSegmentationNet, predict_bounding_boxes
from ocr.handwriting_line_recognition import Network as HandwritingRecognitionNet, handwriting_recognition_transform
from ocr.handwriting_line_recognition import decode as decoder_handwriting, alphabet_encoding

ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu()
test_ds = IAMDataset("form_original", train=False)
test_ds = IAMDataset("form_original", train=False)

figs_to_plot = 4
images = []
random.seed(1)
n = 0
for i in range(0, figs_to_plot):
    n = int(random.random() * len(test_ds))
    image, _ = test_ds[n]
    images.append(image)

fig, axs = plt.subplots(int(len(images) / 2),
                        2,
                        figsize=(15, 10 * len(images) / 2))
for i, image in enumerate(images):
Ejemplo n.º 3
0
    0.03, 0.03) if detection_box == "line" else (0.005, 0.005)
random_remove_box = 0.1

log_dir = "./logs/line_word_segmentation"
checkpoint_dir, checkpoint_name = "model_checkpoint", "ssd_" + detection_box + ".params"

print_every_n = 5
send_image_every_n = 20
save_every_n = 50

#%%
#############################################################
ctx = [mx.cpu(i) for i in range(cpu_count)]
##############################################################
train_ds = IAMDataset("form_bb",
                      output_data="bb",
                      output_parse_method=detection_box,
                      train=True)
print("Number of training samples: {}".format(len(train_ds)))

test_ds = IAMDataset("form_bb",
                     output_data="bb",
                     output_parse_method=detection_box,
                     train=False)
print("Number of testing samples: {}".format(len(test_ds)))

train_data = gluon.data.DataLoader(train_ds.transform(augment_transform),
                                   batch_size,
                                   shuffle=True,
                                   last_batch="rollover",
                                   num_workers=8)
test_data = gluon.data.DataLoader(test_ds.transform(transform),
Ejemplo n.º 4
0
from ocr.utils.iam_dataset import IAMDataset

# # Paragraph Segmentation
# This notebook investigates methods to identify segment passages from images that contains printed and handwritten text using the **MSER algorithm**.
#
# *Input*: a png from the IAM dataset with the "form" input and output type of "bb" and form.
#
# *Output*: a bounding boxes of the paragraphs.
#
# ## Dataset creation

# In[3]:

train_ds = IAMDataset("form",
                      output_data="bb",
                      output_parse_method="form",
                      train=True)
print("Number of training samples: {}".format(len(train_ds)))

test_ds = IAMDataset("form",
                     output_data="bb",
                     output_parse_method="form",
                     train=False)
print("Number of testing samples: {}".format(len(test_ds)))

# ## MSER based text region detection
#
# The MSER algorithm has been used in text regions detection [1]. Although this was mainly applied to printed text, we tested this technique to segment documents with both handwritten and printed text.

# ### Detecting text region proposals
num_downsamples = 2
resnet_layer_id = 4
lstm_hidden_states = 512
lstm_layers = 2

random_y_translation, random_x_translation = 0.03, 0.03
random_y_scaling, random_x_scaling = 0.1, 0.1
random_shearing = 0.7

log_dir = "./logs/handwriting_recognition"
checkpoint_dir = "model_checkpoint"
checkpoint_name = "handwriting.params"

# In[18]:

train_ds = IAMDataset("line", output_data="text", train=True)
print("Number of training samples: {}".format(len(train_ds)))

test_ds = IAMDataset("line", output_data="text", train=False)
print("Number of testing samples: {}".format(len(test_ds)))

# In[ ]:

train_data = gluon.data.DataLoader(train_ds.transform(augment_transform),
                                   batch_size,
                                   shuffle=True,
                                   last_batch="rollover",
                                   num_workers=4)
test_data = gluon.data.DataLoader(
    test_ds.transform(transform),
    batch_size,
Ejemplo n.º 6
0
resnet_layer_id = 4
lstm_hidden_states = 512
lstm_layers = 2

random_y_translation, random_x_translation = 0.03, 0.03
random_y_scaling, random_x_scaling = 0.1, 0.1
random_shearing = 0.7

log_dir = "./logs/handwriting_recognition"
checkpoint_dir = "model_checkpoint"
checkpoint_name = "handwriting.params"

#%%


train_ds = IAMDataset("line", output_data="text", train=True)
print("Number of training samples: {}".format(len(train_ds)))

test_ds = IAMDataset("line", output_data="text", train=False)
print("Number of testing samples: {}".format(len(test_ds)))

#%%

train_data = gluon.data.DataLoader(train_ds.transform(augment_transform), batch_size, shuffle=True, last_batch="rollover", num_workers=4)
test_data = gluon.data.DataLoader(test_ds.transform(transform), batch_size, shuffle=True, last_batch="keep", num_workers=4)#, num_workers=multiprocessing.cpu_count()-2)

#%%


net = CNNBiLSTM(num_downsamples=num_downsamples, resnet_layer_id=resnet_layer_id , rnn_hidden_states=lstm_hidden_states, rnn_layers=lstm_layers, max_seq_len=max_seq_len, ctx=ctx)
net.hybridize()

ctx = mx.cpu(0) #if mx.context.num_gpus() > 0 else mx.cpu()


# ## Dataset creation
# Obtain the original forms from the IAM dataset and plot the results. Randomly select 4 images for analysis.

# In[3]:

im11 = np.asarray(cv2.imread('test6.png', cv2.IMREAD_GRAYSCALE))
im12 = np.asarray(cv2.imread('test2.png', cv2.IMREAD_GRAYSCALE))
im13 = np.asarray(cv2.imread('test3.png', cv2.IMREAD_GRAYSCALE))
im14 = np.asarray(cv2.imread('test4.png', cv2.IMREAD_GRAYSCALE))
im15 = np.asarray(cv2.imread('test5.png', cv2.IMREAD_GRAYSCALE))
test_ds = IAMDataset("form_original",credentials=("sapand","saurabh12345"), train=False)


# In[4]:


random.seed(1)


# In[5]:


figs_to_plot = 4
images = []

n = 0