コード例 #1
0
def test_usage():

    with timing('Loading dictionary entries'):
        dict = load_dict('data/dict/polimorf-20190818.tab', limit=5000)

    with timing('Loading word vectors'):
        word_vectors = KeyedVectors.load_word2vec_format(
            'data/nkjp+wiki-forms-all-300-skipg-ns.txt', limit=5000)

    with timing('Initializing POS tagger'):
        posTagger = Lemmatizer.create(dict, word_vectors)
        posTagger.load_model('data/disambiguation.h5')

    text = '5 kilogramów pomidorów trafiło do kuchnii. Zostały ugotowane na miękko.'
    chunks = tokenize(text)
    assert chunks[0].tokens[1].orth == 'kilogramów'

    print(chunks)

    posTagger.tag(chunks)
    assert chunks[0].tokens[1].disamb_lemma == 'kilogram'
    assert chunks[0].tokens[1].disamb_tag.startswith('noun')

    print(chunks)