def test_batch_score(): model = Numila(PARSE='batch', CHUNK_THRESHOLD=2) for _ in range(5): model.parse('the dog ate the steak') assert model.score('the dog ate the steak') > model.score('the ate dog steak the')
def main(): lang = 'English' kind = 'word' train_len = 4000 N = 20 model = Numila() corpus = utils.get_corpus(lang, kind) #import pcfg #corpus = (s.split(' ') for s in pcfg.toy2()) train_corpus = [next(corpus) for _ in range(train_len)] model.fit(train_corpus) top_words, _ = zip(*Counter(utils.flatten(train_corpus)).most_common(N)) nodes = [model.graph[w] for w in top_words] data = [[1-n1.similarity(n2) for n2 in nodes] for n1 in nodes] mds(data, top_words)
def foo(): model = Numila(GRAPH='vector') model = Numila(GRAPH='graph', CHUNK_THRESHOLD=0.05) corp = get_corpora('English', 'syl', 5000) model.fit(corp['train']) test = iter(corp['bleu_test']) print(model.parse(next(test))) print(model.speak(next(test))) print(model.speak(next(test))) print(model.speak(next(test))) print(model._debug['speak_chunks'])
def get_model(train_len=1000, lang='english', kind='word', **params): # for testing model = Numila(**params) corpus = corpora.get_corpus(lang, kind) train_corpus = [next(corpus) for _ in range(train_len)] return model.fit(train_corpus)