last_letter = np.load('last_letter.npy') emission = np.load('emission.npy') occurances = np.load('occurances.npy') # Adding this by hand... occurances[3, 0] += 0.5 emission[np.isnan(emission)] = 0 #im = imread('./data/and.png').mean(axis=2) segments = find_letters(im) patches = split_on(im, segments) descs = np.zeros((len(patches), voc.shape[1])) for i, patch in enumerate(patches): patch = imresize(patch, (h, w)) patch = normalise(patch) descs[i] = patch.flatten() # OK, these are the labels of our observation ! labels = euclidean_distances(descs, voc).argmin(axis=1) # Now, we can compute the emission probability matrices emission_obs = emission[:, labels].T alphas = vdhmms.alpha(transition, emission_obs, occurances, p_init=first_letter.reshape((len(first_letter), ))) betas = vdhmms.beta(transition, emission_obs, occurances, p_init=last_letter.reshape((len(first_letter), ))) show_segments(im, segments) g = alphas * betas
last_letter = np.load('last_letter.npy') emission = np.load('emission.npy') occurances = np.load('occurances.npy') # Adding this by hand... occurances[3, 0] += 0.5 emission[np.isnan(emission)] = 0 #im = imread('./data/and.png').mean(axis=2) segments = find_letters(im) patches = split_on(im, segments) descs = np.zeros((len(patches), voc.shape[1])) for i, patch in enumerate(patches): patch = imresize(patch, (h, w)) patch = normalise(patch) descs[i] = patch.flatten() # OK, these are the labels of our observation ! labels = euclidean_distances(descs, voc).argmin(axis=1) # Now, we can compute the emission probability matrices emission_obs = emission[:, labels].T alphas = vdhmms.alpha(transition, emission_obs, occurances, p_init=first_letter.reshape((len(first_letter), ))) betas = vdhmms.beta(transition, emission_obs, occurances,
len(letter) / 2] += 1 occurances[LETTER_MAP[previous_label], len(letter) - len(letter) / 2] += 1 else: occurances[LETTER_MAP[previous_label], len(letter)] += 1 pickle.dump(database, open('database.pck', 'w')) # OK, let's create the descriptors array, to run kmeans. # We'll have to resize a bunch of images, in order to have desc of all the # same size. desc = np.zeros((len(database), max_width * max_height)) for i, (_, element) in enumerate(database): el = imresize(element, (max_height, max_width)) el = normalise(el) desc[i] = el.flatten() k = min(200, len(desc)) km = MiniBatchKMeans(k=k) km.fit(desc) voc = km.cluster_centers_ labels = km.labels_ voc.dump('vocabulary.npy') # OK. We have voc + labels. Let's compute the emission probability matrice emission = np.zeros((26, len(voc))) for el, label in zip(database, labels): emission[LETTER_MAP[el[0]], label] += 1