def test_score(): from collections import defaultdict import main train_corpus, test_corpus = main.corpora(4000, 100) model = NGramModel(2).fit(train_corpus) test_corpus = list(test_corpus) scores = model.map_score(test_corpus) lens = map(len, test_corpus) len_scores = defaultdict(list) for ln, sc in zip(lens, scores): len_scores[ln].append(sc) avgs = {ln: np.mean(scores) for ln, scores in len_scores.items()} print(avgs)
def main(): import main train_corpus, test_corpus = main.corpora(4000, 10) models = get_ngrams(train_corpus) test_corpus = main.joblib.load('pickles/test100') full_test = main.comprehension.add_foils(test_corpus) y, targets, _ = list(zip(*full_test)) score_funcs = {name: model.map_score for name, model in models.items()} scores = {name: score(targets) for name, score in score_funcs.items()} print(scores['bigram'][:10]) print(scores['kndiscount'][:10]) print(scores['ndiscount'][:10]) print(scores['addsmooth'][:10])