el = split_on(image, segments, clean=True) im = el[0] # load all the probability matrices we need transition = np.load('transition.npy') first_letter = np.load('first_letter.npy') last_letter = np.load('last_letter.npy') emission = np.load('emission.npy') occurances = np.load('occurances.npy') # Adding this by hand... occurances[3, 0] += 0.5 emission[np.isnan(emission)] = 0 #im = imread('./data/and.png').mean(axis=2) segments = find_letters(im) patches = split_on(im, segments) descs = np.zeros((len(patches), voc.shape[1])) for i, patch in enumerate(patches): patch = imresize(patch, (h, w)) patch = normalise(patch) descs[i] = patch.flatten() # OK, these are the labels of our observation ! labels = euclidean_distances(descs, voc).argmin(axis=1) # Now, we can compute the emission probability matrices emission_obs = emission[:, labels].T alphas = vdhmms.alpha(transition,
database = [] def make_gen(bits): for bit in bits: yield bit letters = load_letters() letters_list = {} for i in range(26): letters_list[i] = [] for i, letter in enumerate(letters): segments = find_letters(letter) bits = split_on(letter, segments) letters_list[i].append(bits) # Computes the average space taken per letter ave_letter = [] for letter in range(26): m = [j.shape[1] for l in letters_list[letter] for j in l] d = [len(l) for l in letters_list[letter]] ave_letter.append([ sum(m) / len(letters_list[letter]), sum(d) / len(letters_list[letter]) ]) # OK, now we have all the letters initialized num = 0
el = split_on(image, segments, clean=True) im = el[0] # load all the probability matrices we need transition = np.load('transition.npy') first_letter = np.load('first_letter.npy') last_letter = np.load('last_letter.npy') emission = np.load('emission.npy') occurances = np.load('occurances.npy') # Adding this by hand... occurances[3, 0] += 0.5 emission[np.isnan(emission)] = 0 #im = imread('./data/and.png').mean(axis=2) segments = find_letters(im) patches = split_on(im, segments) descs = np.zeros((len(patches), voc.shape[1])) for i, patch in enumerate(patches): patch = imresize(patch, (h, w)) patch = normalise(patch) descs[i] = patch.flatten() # OK, these are the labels of our observation ! labels = euclidean_distances(descs, voc).argmin(axis=1) # Now, we can compute the emission probability matrices emission_obs = emission[:, labels].T alphas = vdhmms.alpha(transition, emission_obs, occurances,
from hmms.segment import find_letters, split_on, find_words, show_segments from hmms.analyzer import LETTER_MAP database = [] def make_gen(bits): for bit in bits: yield bit letters = load_letters() letters_list = {} for i in range(26): letters_list[i] = [] for i, letter in enumerate(letters): segments = find_letters(letter) bits = split_on(letter, segments) letters_list[i].append(bits) # Computes the average space taken per letter ave_letter = [] for letter in range(26): m = [j.shape[1] for l in letters_list[letter] for j in l] d = [len(l) for l in letters_list[letter]] ave_letter.append( [sum(m) / len(letters_list[letter]), sum(d) / len(letters_list[letter])]) # OK, now we have all the letters initialized