from sklearn.metrics import euclidean_distances

from hmms.segment import find_letters, split_on, show_segments, find_words
from hmms.data.load import load_text_images
from hmms import vdhmms
from hmms.utils import normalise
from hmms.analyzer import LETTER_MAP

print LETTER_MAP['a']

# Because we know it... FIXME
h, w = 98, 22
voc = np.load('vocabulary.npy')

text = load_text_images()
image, _ = text.next()
image, _ = text.next()

segments = find_words(image)

el = split_on(image, segments, clean=True)
im = el[0]

# load all the probability matrices we need
transition = np.load('transition.npy')
first_letter = np.load('first_letter.npy')
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
Example #2
0
    bits = split_on(letter, segments)
    letters_list[i].append(bits)

# Computes the average space taken per letter
ave_letter = []
for letter in range(26):
    m = [j.shape[1] for l in letters_list[letter] for j in l]
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append([
        sum(m) / len(letters_list[letter]),
        sum(d) / len(letters_list[letter])
    ])

# OK, now we have all the letters initialized
num = 0
texts = load_text_images()
for image, text in texts:
    words = text.split()
    segments = find_words(image)
    bits = split_on(image, segments, clean=True)
    # Just to check if we have segmented properly !
    show_segments(image,
                  segments,
                  title=('./text_seg/%s' % text.replace(' ', '_')),
                  save=True)
    if len(bits) != len(words):
        print "problem with image %s" % text
        continue
    for im, word in zip(bits, words):
        h, w = im.shape
        seg = find_letters(im)
Example #3
0
from sklearn.metrics import euclidean_distances

from hmms.segment import find_letters, split_on, show_segments, find_words
from hmms.data.load import load_text_images
from hmms import vdhmms
from hmms.utils import normalise
from hmms.analyzer import LETTER_MAP

print LETTER_MAP['a']

# Because we know it... FIXME
h, w = 98, 22
voc = np.load('vocabulary.npy')

text = load_text_images()
image, _ = text.next()
image, _ = text.next()

segments = find_words(image)

el = split_on(image, segments, clean=True)
im = el[0]

# load all the probability matrices we need
transition = np.load('transition.npy')
first_letter = np.load('first_letter.npy')
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
Example #4
0
    letters_list[i].append(bits)

# Computes the average space taken per letter
ave_letter = []
for letter in range(26):
    m = [j.shape[1] for l in letters_list[letter]
            for j in l]
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append(
        [sum(m) / len(letters_list[letter]),
         sum(d) / len(letters_list[letter])])


# OK, now we have all the letters initialized
num = 0
texts = load_text_images()
for image, text in texts:
    words = text.split()
    segments = find_words(image)
    bits = split_on(image, segments, clean=True)
    # Just to check if we have segmented properly !
    show_segments(image, segments,
                  title=('./text_seg/%s' % text.replace(' ', '_')),
                  save=True)
    if len(bits) != len(words):
        print "problem with image %s" % text
        continue
    for im, word in zip(bits, words):
        h, w = im.shape
        seg = find_letters(im)
        el = split_on(im, seg)