Ejemplo n.º 1
0
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition,
                      emission_obs,
                      occurances,
                      p_init=first_letter.reshape((len(first_letter), )))
betas = vdhmms.beta(transition,
                    emission_obs,
                    occurances,
                    p_init=last_letter.reshape((len(first_letter), )))
show_segments(im, segments)

g = alphas * betas
chain = g.argmax(axis=1)
prob = g.max(axis=1)
# careful ! Some labels of the chain "just" correspond to null probabilities
letters = 'abcdefghijklmnopqrstuvwxyz'
labels = []
for p, l in zip(prob, chain):
    if p == 0:
        labels.append('_')
    else:
        labels.append(letters[l])
Ejemplo n.º 2
0
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append([
        sum(m) / len(letters_list[letter]),
        sum(d) / len(letters_list[letter])
    ])

# OK, now we have all the letters initialized
num = 0
texts = load_text_images()
for image, text in texts:
    words = text.split()
    segments = find_words(image)
    bits = split_on(image, segments, clean=True)
    # Just to check if we have segmented properly !
    show_segments(image,
                  segments,
                  title=('./text_seg/%s' % text.replace(' ', '_')),
                  save=True)
    if len(bits) != len(words):
        print "problem with image %s" % text
        continue
    for im, word in zip(bits, words):
        h, w = im.shape
        seg = find_letters(im)
        el = split_on(im, seg)
        database.append([word, el])
        show_segments(im,
                      seg,
                      title=('./word_seg/' + str(num) + '_' + word),
                      save=True)

        labels = []
Ejemplo n.º 3
0
for i, patch in enumerate(patches):
    patch = imresize(patch, (h, w))
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition, emission_obs, occurances,
                      p_init=first_letter.reshape((len(first_letter), )))
betas = vdhmms.beta(transition, emission_obs, occurances,
                p_init=last_letter.reshape((len(first_letter), )))
show_segments(im, segments)

g = alphas * betas
chain = g.argmax(axis=1)
prob = g.max(axis=1)
# careful ! Some labels of the chain "just" correspond to null probabilities
letters = 'abcdefghijklmnopqrstuvwxyz'
labels = []
for p, l in zip(prob, chain):
    if p == 0:
       labels.append('_')
    else:
        labels.append(letters[l])


Ejemplo n.º 4
0
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append(
        [sum(m) / len(letters_list[letter]),
         sum(d) / len(letters_list[letter])])


# OK, now we have all the letters initialized
num = 0
texts = load_text_images()
for image, text in texts:
    words = text.split()
    segments = find_words(image)
    bits = split_on(image, segments, clean=True)
    # Just to check if we have segmented properly !
    show_segments(image, segments,
                  title=('./text_seg/%s' % text.replace(' ', '_')),
                  save=True)
    if len(bits) != len(words):
        print "problem with image %s" % text
        continue
    for im, word in zip(bits, words):
        h, w = im.shape
        seg = find_letters(im)
        el = split_on(im, seg)
        database.append([word, el])
        show_segments(im, seg,
                  title=('./word_seg/' + str(num) + '_' + word),
                  save=True)

        labels = []
        print ("computing word '%s' of length %d splitted in %d"