Exemple #1
0
    emb_mat = np.zeros((len(forms), emb.dim))
    for w in forms:
        emb_mat[forms[w]] = emb[w]
    emb_mat[forms['-OOV-']] = emb[emb.unk].reshape(1, emb.dim)
    emb_mat[forms['-PAD-']] = np.zeros((1, emb.dim))

print("constructing character list ...")
chars = []
for w in words:
    chars.extend(w)
chars = Label('FORM', list(set(chars)))

upos_tags = training_corpus.tagset['UPOS'] | validation_corpus.tagset['UPOS']
upos = Label('UPOS', list(upos_tags), discrete=True)

batch_size = 10
tags = [upos]
training_generator = UDDataGenerator(training_corpus, forms, chars, tags)
validation_generator = UDDataGenerator(validation_corpus, forms, chars, tags)
test_generator = UDDataGenerator(test_corpus, forms, chars, tags)

tagger = Tagger(training_generator,
                validation_generator,
                test_generator,
                word_emb_mat=emb_mat,
                num_epochs=10)
tagger.fit()
##tagger.save_model()
score = tagger.evaluate(test_generator)
print("Accuracy on the test data: {0:1.4f}".format(score[1]))