def test_batch_score():
    model = Numila(PARSE='batch', CHUNK_THRESHOLD=2)
    for _ in range(5):
        model.parse('the dog ate the steak')
    

    assert model.score('the dog ate the steak') > model.score('the ate dog steak the')
Exemple #2
0
def main():
    lang = 'English'
    kind = 'word'
    train_len = 4000
    N = 20
    
    model = Numila()
    corpus = utils.get_corpus(lang, kind)
    #import pcfg
    #corpus = (s.split(' ') for s in pcfg.toy2())

    train_corpus = [next(corpus) for _ in range(train_len)]
    model.fit(train_corpus)

    top_words, _ = zip(*Counter(utils.flatten(train_corpus)).most_common(N))

    nodes = [model.graph[w] for w in top_words]
    data = [[1-n1.similarity(n2) for n2 in nodes]
            for n1 in nodes]

    mds(data, top_words)
Exemple #3
0
def foo():
    model = Numila(GRAPH='vector')
    model = Numila(GRAPH='graph', CHUNK_THRESHOLD=0.05)
    corp = get_corpora('English', 'syl', 5000)
    model.fit(corp['train'])
    test = iter(corp['bleu_test'])
    print(model.parse(next(test)))
    print(model.speak(next(test)))
    print(model.speak(next(test)))
    print(model.speak(next(test)))

    print(model._debug['speak_chunks'])
Exemple #4
0
def get_model(train_len=1000, lang='english', kind='word', **params):
    # for testing
    model = Numila(**params)
    corpus = corpora.get_corpus(lang, kind)
    train_corpus = [next(corpus) for _ in range(train_len)]
    return model.fit(train_corpus)