예제 #1
0
el = split_on(image, segments, clean=True)
im = el[0]

# load all the probability matrices we need
transition = np.load('transition.npy')
first_letter = np.load('first_letter.npy')
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
occurances[3, 0] += 0.5

emission[np.isnan(emission)] = 0

#im = imread('./data/and.png').mean(axis=2)
segments = find_letters(im)
patches = split_on(im, segments)

descs = np.zeros((len(patches), voc.shape[1]))
for i, patch in enumerate(patches):
    patch = imresize(patch, (h, w))
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition,
예제 #2
0
database = []


def make_gen(bits):
    for bit in bits:
        yield bit


letters = load_letters()
letters_list = {}
for i in range(26):
    letters_list[i] = []

for i, letter in enumerate(letters):
    segments = find_letters(letter)
    bits = split_on(letter, segments)
    letters_list[i].append(bits)

# Computes the average space taken per letter
ave_letter = []
for letter in range(26):
    m = [j.shape[1] for l in letters_list[letter] for j in l]
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append([
        sum(m) / len(letters_list[letter]),
        sum(d) / len(letters_list[letter])
    ])

# OK, now we have all the letters initialized
num = 0
예제 #3
0
el = split_on(image, segments, clean=True)
im = el[0]

# load all the probability matrices we need
transition = np.load('transition.npy')
first_letter = np.load('first_letter.npy')
last_letter = np.load('last_letter.npy')
emission = np.load('emission.npy')
occurances = np.load('occurances.npy')
# Adding this by hand...
occurances[3, 0] += 0.5

emission[np.isnan(emission)] = 0

#im = imread('./data/and.png').mean(axis=2)
segments = find_letters(im)
patches = split_on(im, segments)

descs = np.zeros((len(patches), voc.shape[1]))
for i, patch in enumerate(patches):
    patch = imresize(patch, (h, w))
    patch = normalise(patch)
    descs[i] = patch.flatten()

# OK, these are the labels of our observation !
labels = euclidean_distances(descs, voc).argmin(axis=1)

# Now, we can compute the emission probability matrices
emission_obs = emission[:, labels].T

alphas = vdhmms.alpha(transition, emission_obs, occurances,
예제 #4
0
파일: segment_data.py 프로젝트: NelleV/HMMs
from hmms.segment import find_letters, split_on, find_words, show_segments
from hmms.analyzer import LETTER_MAP

database = []

def make_gen(bits):
    for bit in bits:
        yield bit

letters = load_letters()
letters_list = {}
for i in range(26):
    letters_list[i] = []

for i, letter in enumerate(letters):
    segments = find_letters(letter)
    bits = split_on(letter, segments)
    letters_list[i].append(bits)

# Computes the average space taken per letter
ave_letter = []
for letter in range(26):
    m = [j.shape[1] for l in letters_list[letter]
            for j in l]
    d = [len(l) for l in letters_list[letter]]
    ave_letter.append(
        [sum(m) / len(letters_list[letter]),
         sum(d) / len(letters_list[letter])])


# OK, now we have all the letters initialized