def __init__(self, image_dir="images/", debug=False): self.kjv = KJVTextDataset() # See scripts/generate_images.py self.font_size_in = 0.25 self.font_size_pt = int(self.font_size_in * 72.0) self.font_path = "utils/Andale-Mono.ttf" # Specific to Mac OS -- change if needed self.font = ImageFont.truetype(self.font_path, self.font_size_pt) self.char_height, self.char_width = self.font.getsize("A")[0:2] self.chars_per_line = 32 self.lines_per_img = 32 self.image_dims_px = (self.char_height * self.chars_per_line, (self.font_size_pt + 3) * self.lines_per_img) self.char_image_size = (self.char_height, (self.char_width + 3)) # Sort NUMERICALLY, not LEXICOGRAPHICALLY... goodness self.labels = self.kjv.image_label_mat(self.chars_per_line, self.lines_per_img) self.image_paths = [ os.path.join(image_dir, filename) for filename in sorted( filter(lambda x: x.endswith(".png"), os.listdir(image_dir)), key=lambda filename: int(filename.rstrip(".png"))) ] # Dynamically load these later self._all_data = None self._training_data = None self._val_data = None self.debug = debug
import math import sys sys.path.append("./") import numpy as np from utils.belief_prop import bp_error_correction from utils.kjv_text import KJVTextDataset from utils.metrics import char_err_rate, word_err_rate import random print("Running belief prop with one-hot vectors degraded by Gaussian noise...") kjv = KJVTextDataset() # Simply use ground truth one-hot vectors as predictions # Just a baseline model -- not much accomplished here in general predictions = kjv.one_hot() # Generate Gaussian noise (don't worry about normalization/rectification, # the error correction will do this automatically later) print("Generating Gaussian noise...") mean = 0.0 std_dev = 0.1 noise_per_letter = np.identity(predictions.shape[1]) for i in range(predictions.shape[1]): rand = np.random.choice(range(1,4), 1, p=[.85, .1, .05]) vector = np.ones(rand) noise_per_letter[i] = np.convolve(noise_per_letter[i], vector, "same") noise_per_letter[i]/=noise_per_letter[i].sum()
import numpy as np from models.dnn import OCRCNN from utils.belief_prop import bp_error_correction from utils.viterbi import viterbi_error_correction from utils.kjv_text import KJVTextDataset from utils.metrics import char_err_rate, word_err_rate, confusion_matrix import matplotlib.pyplot as plt import seaborn as sns; sns.set() kjv = KJVTextDataset() # Predict characters with convolutional neural net kernel_sizes = [] unit_counts = [] strides = [] maxpool_sizes = [] print("Using kernels %s" % str(kernel_sizes)) print("Using unit counts %s" % str(unit_counts)) print("Using strides %s" % str(strides)) print("Using max-pool sizes %s" % str(maxpool_sizes)) model = OCRCNN(kernel_sizes=kernel_sizes, unit_counts=unit_counts, strides=strides, maxpool_sizes=maxpool_sizes) # model = OCRCNN(kernel_sizes=kernel_sizes, unit_counts=unit_counts, strides=strides, maxpool_sizes=maxpool_sizes, debug=True) print("Training CNN...") model.train()
class OCRModel(object): def __init__(self, image_dir="images/", debug=False): self.kjv = KJVTextDataset() # See scripts/generate_images.py self.font_size_in = 0.25 self.font_size_pt = int(self.font_size_in * 72.0) self.font_path = "utils/Andale-Mono.ttf" # Specific to Mac OS -- change if needed self.font = ImageFont.truetype(self.font_path, self.font_size_pt) self.char_height, self.char_width = self.font.getsize("A")[0:2] self.chars_per_line = 32 self.lines_per_img = 32 self.image_dims_px = (self.char_height * self.chars_per_line, (self.font_size_pt + 3) * self.lines_per_img) self.char_image_size = (self.char_height, (self.char_width + 3)) # Sort NUMERICALLY, not LEXICOGRAPHICALLY... goodness self.labels = self.kjv.image_label_mat(self.chars_per_line, self.lines_per_img) self.image_paths = [ os.path.join(image_dir, filename) for filename in sorted( filter(lambda x: x.endswith(".png"), os.listdir(image_dir)), key=lambda filename: int(filename.rstrip(".png"))) ] # Dynamically load these later self._all_data = None self._training_data = None self._val_data = None self.debug = debug def all_data(self): if self._all_data is None: print("Preparing all data...") # Samples are flattened individual character images flattened_size = self.char_image_size[0] * self.char_image_size[1] chars_per_image = self.chars_per_line * self.lines_per_img if self.debug: # Quick prototyping all_indices = list(range(10)) else: all_indices = range( len( self.kjv.dataset_indices("train", self.chars_per_line, self.lines_per_img)) + len( self.kjv.dataset_indices("val", self.chars_per_line, self.lines_per_img))) all_feats = np.empty( (len(all_indices) * chars_per_image, flattened_size), dtype=float) all_labels = np.zeros((len(all_indices) * chars_per_image), dtype=int) for i in range(len(all_indices)): all_idx = all_indices[i] img = io.imread(self.image_paths[all_idx], as_grey=True) if random.random() < .5: img += np.random.normal(0, .6, img.shape) for x in range(self.chars_per_line): for y in range(self.lines_per_img): feats = img[y * (self.char_width + 3):(y + 1) * (self.char_width + 3), x * self.char_height:(x + 1) * self.char_height] #io.imshow(feats) feats_flattened = feats.reshape((-1)) feat_idx = (i * chars_per_image) + ( x * self.lines_per_img) + y all_feats[feat_idx, :] = feats_flattened all_labels[feat_idx] = self.labels[ all_idx, (y * self.lines_per_img) + x] #print(all_labels[feat_idx]) #matplotlib.pyplot.show() self._all_data = (all_feats, all_labels) print("Prepared all data.") return self._all_data def training_data(self): if self._training_data is None: print("Preparing training data...") # Samples are flattened individual character images flattened_size = self.char_image_size[0] * self.char_image_size[1] chars_per_image = self.chars_per_line * self.lines_per_img if self.debug: # Quick prototyping training_indices = list(range(9)) else: training_indices = self.kjv.dataset_indices( "train", self.chars_per_line, self.lines_per_img) training_feats = np.empty( (len(training_indices) * chars_per_image, flattened_size), dtype=float) training_labels = np.zeros( (len(training_indices) * chars_per_image), dtype=int) for i in range(len(training_indices)): training_idx = training_indices[i] img = io.imread(self.image_paths[training_idx], as_grey=True) for x in range(self.chars_per_line): for y in range(self.lines_per_img): feats = img[y * (self.char_width + 3):(y + 1) * (self.char_width + 3), x * self.char_height:(x + 1) * self.char_height] #io.imshow(feats) feats_flattened = feats.reshape((-1)) feat_idx = (i * chars_per_image) + ( x * self.lines_per_img) + y training_feats[feat_idx, :] = feats_flattened training_labels[feat_idx] = self.labels[ training_idx, (y * self.lines_per_img) + x] #print(training_labels[feat_idx]) #matplotlib.pyplot.show() self._training_data = (training_feats, training_labels) print("Prepared training data.") return self._training_data def val_data(self): if self._val_data is None: print("Preparing val data...") # Samples are flattened individual character images flattened_size = self.char_image_size[0] * self.char_image_size[1] chars_per_image = self.chars_per_line * self.lines_per_img if self.debug: # Quick prototyping val_indices = list(range(9, 10)) else: val_indices = self.kjv.dataset_indices("train", self.chars_per_line, self.lines_per_img) val_feats = np.empty( (len(val_indices) * chars_per_image, flattened_size), dtype=float) val_labels = np.zeros((len(val_indices) * chars_per_image), dtype=int) for i in range(len(val_indices)): val_idx = val_indices[i] img = io.imread(self.image_paths[val_idx], as_grey=True) for x in range(self.lines_per_img): for y in range(self.chars_per_line): feats = img[y * (self.char_width + 3):(y + 1) * (self.char_width + 3), x * self.char_height:(x + 1) * self.char_height] feats_flattened = feats.reshape((-1)) feat_idx = (i * chars_per_image) + ( x * self.lines_per_img) + y val_feats[feat_idx, :] = feats_flattened val_labels[feat_idx] = self.labels[ val_idx, (y * self.lines_per_img) + x] self._val_data = (val_feats, val_labels) print("Prepared val data.") return self._val_data
import math import sys sys.path.append("./") import numpy as np from PIL import Image, ImageFont, ImageDraw from utils.kjv_text import KJVTextDataset kjv = KJVTextDataset() # Derived from code at # https://nicholastsmith.wordpress.com/2017/10/14/deep-learning-ocr-using-tensorflow-and-python/ def makeImage(txt, font, filename, sz): img = Image.new('RGB', sz, "white") draw = ImageDraw.Draw(img) draw.text((0, 0), txt, (0, 0, 0), font=font) img.save(filename) font_size_in = 0.25 font_size_pt = int(font_size_in * 72.0) font_path = "utils/Andale-Mono.ttf" # Specific to Mac OS -- change if needed font = ImageFont.truetype(font_path, font_size_pt) char_height, char_width = font.getsize("A")[0:2] chars_per_line = 32 lines_per_img = 32 image_dims_px = (char_height * chars_per_line,