Example #1
0
def evaluate(ckp_dir):
    db = QuizBowlDataset(guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds(["guessdev"])
    questions = {x.qnum: x for x in questions}

    with open(ckp_dir, "rb") as f:
        checkpoint = pickle.load(f)

    scores = [0, 0, 0, 0, 0]
    descriptions = [
        "accuracy before",
        "accuracy after",
        "before after match",
        "top 5 accuracy before",
        "top 5 accuracy after",
    ]
    for k, q in checkpoint.items():
        page = questions[k].page
        gb = sorted(q["guesses_before"].items(), key=lambda x: x[1])[::-1]
        ga = sorted(q["guesses_after"].items(), key=lambda x: x[1])[::-1]
        scores[0] += gb[0][0] == page  # accuracy before
        scores[1] += ga[0][0] == page  # accuracy after
        scores[2] += ga[0][0] == gb[0][0]  # top 1 match before / after
        scores[3] += page in [x[0] for x in gb[:5]]  # top 5 accuracy before
        scores[4] += page in [x[0] for x in ga[:5]]  # top 5 accuracy after
    scores = [x / len(questions) for x in scores]
    for s, d in zip(scores, descriptions):
        print(d, s)
Example #2
0
def main():
    fold = 'guessdev'
    db = QuizBowlDataset(1, guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds([fold])
    first_n = lambda x: len(x)

    print(guesser.guess_single(' '.join(questions[0].text.values())))
    '''
Example #3
0
def main():
    fold = 'guessdev'
    db = QuizBowlDataset(1, guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds([fold])
    first_n = lambda x: len(x)

    print(guesser.guess_single(' '.join(questions[0].text.values())))
    
    '''
Example #4
0
File: stats.py Project: xxlatgh/qb
def compute_question_stats(question_db_path: str):
    dataset = QuizBowlDataset(5, qb_question_db=question_db_path)
    train_dev_questions = dataset.questions_in_folds(('train', 'dev'))
    question_lengths = [
        len(q.flatten_text().split()) for q in train_dev_questions
    ]

    mean = np.mean(question_lengths)
    std = np.std(question_lengths)

    stats = (mean, std)

    with safe_open(SENTENCE_STATS, 'wb') as f:
        pickle.dump(stats, f)
Example #5
0
def main(questions, n_keep, ckp_dir):
    db = QuizBowlDataset(guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds(['guessdev'])
    questions = {x.qnum: x for x in questions}

    checkpoint = defaultdict(dict)
    for qnum, question in questions.items():
        text_before = question.flatten_text()
        guesses_before = guesser.guess_single(text_before)
        text_after, guesses_after, removed = greedy_remove(
                text_before, guesses_before, n_keep)
        checkpoint[qnum]['text_before'] = text_before
        checkpoint[qnum]['text_after'] = text_after
        checkpoint[qnum]['guesses_before'] = guesses_before
        checkpoint[qnum]['guesses_after'] = guesses_after
        checkpoint[qnum]['removed'] = removed

    checkpoint = dict(checkpoint)
    with open(safe_path(ckp_dir), 'wb') as f:
        pickle.dump(checkpoint, f)

    evaluate(ckp_dir)
Example #6
0
def main(questions, n_keep, ckp_dir):
    db = QuizBowlDataset(guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds(["guessdev"])
    questions = {x.qnum: x for x in questions}

    checkpoint = defaultdict(dict)
    for qnum, question in questions.items():
        text_before = question.flatten_text()
        guesses_before = guesser.guess_single(text_before)
        text_after, guesses_after, removed = greedy_remove(
            text_before, guesses_before, n_keep
        )
        checkpoint[qnum]["text_before"] = text_before
        checkpoint[qnum]["text_after"] = text_after
        checkpoint[qnum]["guesses_before"] = guesses_before
        checkpoint[qnum]["guesses_after"] = guesses_after
        checkpoint[qnum]["removed"] = removed

    checkpoint = dict(checkpoint)
    with open(safe_path(ckp_dir), "wb") as f:
        pickle.dump(checkpoint, f)

    evaluate(ckp_dir)
Example #7
0
def evaluate(ckp_dir):
    db = QuizBowlDataset(guesser_train=True, buzzer_train=True)
    questions = db.questions_in_folds(['guessdev'])
    questions = {x.qnum: x for x in questions}

    with open(ckp_dir, 'rb') as f:
        checkpoint = pickle.load(f)

    scores = [0, 0, 0, 0, 0]
    descriptions = ['accuracy before', 'accuracy after', 'before after match',
                    'top 5 accuracy before', 'top 5 accuracy after']
    for k, q in checkpoint.items():
        page = questions[k].page
        gb = sorted(q['guesses_before'].items(), key=lambda x: x[1])[::-1]
        ga = sorted(q['guesses_after'].items(), key=lambda x: x[1])[::-1]
        scores[0] += gb[0][0] == page # accuracy before
        scores[1] += ga[0][0] == page # accuracy after
        scores[2] += ga[0][0] == gb[0][0] # top 1 match before / after
        scores[3] += page in [x[0] for x in gb[:5]] # top 5 accuracy before
        scores[4] += page in [x[0] for x in ga[:5]] # top 5 accuracy after
    scores = [x / len(questions) for x in scores]
    for s, d in zip(scores, descriptions):
        print(d, s)