from sklearn.metrics import euclidean_distances from hmms.segment import find_letters, split_on, show_segments, find_words from hmms.data.load import load_text_images from hmms import vdhmms from hmms.utils import normalise from hmms.analyzer import LETTER_MAP print LETTER_MAP['a'] # Because we know it... FIXME h, w = 98, 22 voc = np.load('vocabulary.npy') text = load_text_images() image, _ = text.next() image, _ = text.next() segments = find_words(image) el = split_on(image, segments, clean=True) im = el[0] # load all the probability matrices we need transition = np.load('transition.npy') first_letter = np.load('first_letter.npy') last_letter = np.load('last_letter.npy') emission = np.load('emission.npy') occurances = np.load('occurances.npy') # Adding this by hand...
bits = split_on(letter, segments) letters_list[i].append(bits) # Computes the average space taken per letter ave_letter = [] for letter in range(26): m = [j.shape[1] for l in letters_list[letter] for j in l] d = [len(l) for l in letters_list[letter]] ave_letter.append([ sum(m) / len(letters_list[letter]), sum(d) / len(letters_list[letter]) ]) # OK, now we have all the letters initialized num = 0 texts = load_text_images() for image, text in texts: words = text.split() segments = find_words(image) bits = split_on(image, segments, clean=True) # Just to check if we have segmented properly ! show_segments(image, segments, title=('./text_seg/%s' % text.replace(' ', '_')), save=True) if len(bits) != len(words): print "problem with image %s" % text continue for im, word in zip(bits, words): h, w = im.shape seg = find_letters(im)
letters_list[i].append(bits) # Computes the average space taken per letter ave_letter = [] for letter in range(26): m = [j.shape[1] for l in letters_list[letter] for j in l] d = [len(l) for l in letters_list[letter]] ave_letter.append( [sum(m) / len(letters_list[letter]), sum(d) / len(letters_list[letter])]) # OK, now we have all the letters initialized num = 0 texts = load_text_images() for image, text in texts: words = text.split() segments = find_words(image) bits = split_on(image, segments, clean=True) # Just to check if we have segmented properly ! show_segments(image, segments, title=('./text_seg/%s' % text.replace(' ', '_')), save=True) if len(bits) != len(words): print "problem with image %s" % text continue for im, word in zip(bits, words): h, w = im.shape seg = find_letters(im) el = split_on(im, seg)