Esempio n. 1
0
    def __init__(self, image_dir="images/", debug=False):
        self.kjv = KJVTextDataset()

        # See scripts/generate_images.py
        self.font_size_in = 0.25
        self.font_size_pt = int(self.font_size_in * 72.0)
        self.font_path = "utils/Andale-Mono.ttf"  # Specific to Mac OS -- change if needed
        self.font = ImageFont.truetype(self.font_path, self.font_size_pt)
        self.char_height, self.char_width = self.font.getsize("A")[0:2]
        self.chars_per_line = 32
        self.lines_per_img = 32
        self.image_dims_px = (self.char_height * self.chars_per_line,
                              (self.font_size_pt + 3) * self.lines_per_img)
        self.char_image_size = (self.char_height, (self.char_width + 3))

        # Sort NUMERICALLY, not LEXICOGRAPHICALLY... goodness
        self.labels = self.kjv.image_label_mat(self.chars_per_line,
                                               self.lines_per_img)
        self.image_paths = [
            os.path.join(image_dir, filename) for filename in sorted(
                filter(lambda x: x.endswith(".png"), os.listdir(image_dir)),
                key=lambda filename: int(filename.rstrip(".png")))
        ]

        # Dynamically load these later
        self._all_data = None
        self._training_data = None
        self._val_data = None

        self.debug = debug
Esempio n. 2
0
import math
import sys
sys.path.append("./")

import numpy as np

from utils.belief_prop import bp_error_correction
from utils.kjv_text import KJVTextDataset
from utils.metrics import char_err_rate, word_err_rate

import random

print("Running belief prop with one-hot vectors degraded by Gaussian noise...")

kjv = KJVTextDataset()

# Simply use ground truth one-hot vectors as predictions
# Just a baseline model -- not much accomplished here in general
predictions = kjv.one_hot()

# Generate Gaussian noise (don't worry about normalization/rectification,
# the error correction will do this automatically later)
print("Generating Gaussian noise...")
mean = 0.0
std_dev = 0.1
noise_per_letter = np.identity(predictions.shape[1])
for i in range(predictions.shape[1]):
	rand = np.random.choice(range(1,4), 1, p=[.85, .1, .05])
	vector = np.ones(rand)
	noise_per_letter[i] = np.convolve(noise_per_letter[i], vector, "same")
	noise_per_letter[i]/=noise_per_letter[i].sum()
Esempio n. 3
0
import numpy as np

from models.dnn import OCRCNN

from utils.belief_prop import bp_error_correction
from utils.viterbi import viterbi_error_correction
from utils.kjv_text import KJVTextDataset
from utils.metrics import char_err_rate, word_err_rate, confusion_matrix


import matplotlib.pyplot as plt
import seaborn as sns; sns.set()


kjv = KJVTextDataset()

# Predict characters with convolutional neural net
kernel_sizes = []
unit_counts = []
strides = []
maxpool_sizes = []
print("Using kernels %s" % str(kernel_sizes))
print("Using unit counts %s" % str(unit_counts))
print("Using strides %s" % str(strides))
print("Using max-pool sizes %s" % str(maxpool_sizes))
model = OCRCNN(kernel_sizes=kernel_sizes, unit_counts=unit_counts, strides=strides, maxpool_sizes=maxpool_sizes)
# model = OCRCNN(kernel_sizes=kernel_sizes, unit_counts=unit_counts, strides=strides, maxpool_sizes=maxpool_sizes, debug=True)

print("Training CNN...")
model.train()
Esempio n. 4
0
class OCRModel(object):
    def __init__(self, image_dir="images/", debug=False):
        self.kjv = KJVTextDataset()

        # See scripts/generate_images.py
        self.font_size_in = 0.25
        self.font_size_pt = int(self.font_size_in * 72.0)
        self.font_path = "utils/Andale-Mono.ttf"  # Specific to Mac OS -- change if needed
        self.font = ImageFont.truetype(self.font_path, self.font_size_pt)
        self.char_height, self.char_width = self.font.getsize("A")[0:2]
        self.chars_per_line = 32
        self.lines_per_img = 32
        self.image_dims_px = (self.char_height * self.chars_per_line,
                              (self.font_size_pt + 3) * self.lines_per_img)
        self.char_image_size = (self.char_height, (self.char_width + 3))

        # Sort NUMERICALLY, not LEXICOGRAPHICALLY... goodness
        self.labels = self.kjv.image_label_mat(self.chars_per_line,
                                               self.lines_per_img)
        self.image_paths = [
            os.path.join(image_dir, filename) for filename in sorted(
                filter(lambda x: x.endswith(".png"), os.listdir(image_dir)),
                key=lambda filename: int(filename.rstrip(".png")))
        ]

        # Dynamically load these later
        self._all_data = None
        self._training_data = None
        self._val_data = None

        self.debug = debug

    def all_data(self):
        if self._all_data is None:
            print("Preparing all data...")

            # Samples are flattened individual character images
            flattened_size = self.char_image_size[0] * self.char_image_size[1]
            chars_per_image = self.chars_per_line * self.lines_per_img

            if self.debug:
                # Quick prototyping
                all_indices = list(range(10))
            else:
                all_indices = range(
                    len(
                        self.kjv.dataset_indices("train", self.chars_per_line,
                                                 self.lines_per_img)) +
                    len(
                        self.kjv.dataset_indices("val", self.chars_per_line,
                                                 self.lines_per_img)))

            all_feats = np.empty(
                (len(all_indices) * chars_per_image, flattened_size),
                dtype=float)
            all_labels = np.zeros((len(all_indices) * chars_per_image),
                                  dtype=int)

            for i in range(len(all_indices)):
                all_idx = all_indices[i]
                img = io.imread(self.image_paths[all_idx], as_grey=True)
                if random.random() < .5:
                    img += np.random.normal(0, .6, img.shape)
                for x in range(self.chars_per_line):
                    for y in range(self.lines_per_img):
                        feats = img[y * (self.char_width + 3):(y + 1) *
                                    (self.char_width + 3),
                                    x * self.char_height:(x + 1) *
                                    self.char_height]
                        #io.imshow(feats)

                        feats_flattened = feats.reshape((-1))

                        feat_idx = (i * chars_per_image) + (
                            x * self.lines_per_img) + y
                        all_feats[feat_idx, :] = feats_flattened
                        all_labels[feat_idx] = self.labels[
                            all_idx, (y * self.lines_per_img) + x]
                        #print(all_labels[feat_idx])
                        #matplotlib.pyplot.show()
            self._all_data = (all_feats, all_labels)

            print("Prepared all data.")

        return self._all_data

    def training_data(self):
        if self._training_data is None:
            print("Preparing training data...")

            # Samples are flattened individual character images
            flattened_size = self.char_image_size[0] * self.char_image_size[1]
            chars_per_image = self.chars_per_line * self.lines_per_img

            if self.debug:
                # Quick prototyping
                training_indices = list(range(9))
            else:
                training_indices = self.kjv.dataset_indices(
                    "train", self.chars_per_line, self.lines_per_img)

            training_feats = np.empty(
                (len(training_indices) * chars_per_image, flattened_size),
                dtype=float)
            training_labels = np.zeros(
                (len(training_indices) * chars_per_image), dtype=int)

            for i in range(len(training_indices)):
                training_idx = training_indices[i]
                img = io.imread(self.image_paths[training_idx], as_grey=True)
                for x in range(self.chars_per_line):
                    for y in range(self.lines_per_img):
                        feats = img[y * (self.char_width + 3):(y + 1) *
                                    (self.char_width + 3),
                                    x * self.char_height:(x + 1) *
                                    self.char_height]
                        #io.imshow(feats)

                        feats_flattened = feats.reshape((-1))

                        feat_idx = (i * chars_per_image) + (
                            x * self.lines_per_img) + y
                        training_feats[feat_idx, :] = feats_flattened
                        training_labels[feat_idx] = self.labels[
                            training_idx, (y * self.lines_per_img) + x]
                        #print(training_labels[feat_idx])
                        #matplotlib.pyplot.show()
            self._training_data = (training_feats, training_labels)

            print("Prepared training data.")

        return self._training_data

    def val_data(self):
        if self._val_data is None:
            print("Preparing val data...")

            # Samples are flattened individual character images
            flattened_size = self.char_image_size[0] * self.char_image_size[1]
            chars_per_image = self.chars_per_line * self.lines_per_img

            if self.debug:
                # Quick prototyping
                val_indices = list(range(9, 10))
            else:
                val_indices = self.kjv.dataset_indices("train",
                                                       self.chars_per_line,
                                                       self.lines_per_img)

            val_feats = np.empty(
                (len(val_indices) * chars_per_image, flattened_size),
                dtype=float)
            val_labels = np.zeros((len(val_indices) * chars_per_image),
                                  dtype=int)

            for i in range(len(val_indices)):
                val_idx = val_indices[i]
                img = io.imread(self.image_paths[val_idx], as_grey=True)
                for x in range(self.lines_per_img):
                    for y in range(self.chars_per_line):
                        feats = img[y * (self.char_width + 3):(y + 1) *
                                    (self.char_width + 3),
                                    x * self.char_height:(x + 1) *
                                    self.char_height]
                        feats_flattened = feats.reshape((-1))

                        feat_idx = (i * chars_per_image) + (
                            x * self.lines_per_img) + y
                        val_feats[feat_idx, :] = feats_flattened
                        val_labels[feat_idx] = self.labels[
                            val_idx, (y * self.lines_per_img) + x]

            self._val_data = (val_feats, val_labels)

            print("Prepared val data.")

        return self._val_data
Esempio n. 5
0
import math
import sys

sys.path.append("./")

import numpy as np
from PIL import Image, ImageFont, ImageDraw

from utils.kjv_text import KJVTextDataset

kjv = KJVTextDataset()


# Derived from code at
# https://nicholastsmith.wordpress.com/2017/10/14/deep-learning-ocr-using-tensorflow-and-python/
def makeImage(txt, font, filename, sz):
    img = Image.new('RGB', sz, "white")
    draw = ImageDraw.Draw(img)
    draw.text((0, 0), txt, (0, 0, 0), font=font)
    img.save(filename)


font_size_in = 0.25
font_size_pt = int(font_size_in * 72.0)
font_path = "utils/Andale-Mono.ttf"  # Specific to Mac OS -- change if needed
font = ImageFont.truetype(font_path, font_size_pt)
char_height, char_width = font.getsize("A")[0:2]

chars_per_line = 32
lines_per_img = 32
image_dims_px = (char_height * chars_per_line,