Ejemplo n.º 1
0
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
occurances[3, 0] += 0.5

emission[np.isnan(emission)] = 0

#im = imread('./data/and.png').mean(axis=2)
segments = find_letters(im)
patches = split_on(im, segments)

descs = np.zeros((len(patches), voc.shape[1]))
for i, patch in enumerate(patches):
    patch = imresize(patch, (h, w))
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition, emission_obs, occurances,
                      p_init=first_letter.reshape((len(first_letter), )))
betas = vdhmms.beta(transition, emission_obs, occurances,
                p_init=last_letter.reshape((len(first_letter), )))
show_segments(im, segments)

g = alphas * betas
Ejemplo n.º 2
0
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
occurances[3, 0] += 0.5

emission[np.isnan(emission)] = 0

#im = imread('./data/and.png').mean(axis=2)
segments = find_letters(im)
patches = split_on(im, segments)

descs = np.zeros((len(patches), voc.shape[1]))
for i, patch in enumerate(patches):
    patch = imresize(patch, (h, w))
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition,
                      emission_obs,
                      occurances,
                      p_init=first_letter.reshape((len(first_letter), )))
betas = vdhmms.beta(transition,
                    emission_obs,
                    occurances,
Ejemplo n.º 3
0
                    len(letter) / 2] += 1
        occurances[LETTER_MAP[previous_label],
                    len(letter) - len(letter) / 2] += 1
    else:
        occurances[LETTER_MAP[previous_label],
                    len(letter)] += 1

pickle.dump(database, open('database.pck', 'w'))

# OK, let's create the descriptors array, to run kmeans.
# We'll have to resize a bunch of images, in order to have desc of all the
# same size.
desc = np.zeros((len(database), max_width * max_height))
for i, (_, element) in enumerate(database):
    el = imresize(element, (max_height, max_width))
    el = normalise(el)
    desc[i] = el.flatten()

k = min(200, len(desc))
km = MiniBatchKMeans(k=k)
km.fit(desc)

voc = km.cluster_centers_
labels = km.labels_
voc.dump('vocabulary.npy')

# OK. We have voc + labels. Let's compute the emission probability matrice
emission = np.zeros((26, len(voc)))
for el, label in zip(database, labels):
    emission[LETTER_MAP[el[0]], label] += 1