예제 #1
0
def protobowl(model, fold=BUZZER_DEV_FOLD):
    buzzes = get_buzzes(model, fold)
    '''eval'''
    guesses_dir = AbstractGuesser.output_path('qanta.guesser.rnn',
                                              'RnnGuesser', 0, '')
    guesses_dir = AbstractGuesser.guess_path(guesses_dir, fold, 'char')
    with open(guesses_dir, 'rb') as f:
        guesses = pickle.load(f)
    guesses = guesses.groupby('qanta_id')

    questions = QuizBowlDataset(buzzer_train=True).questions_by_fold()
    questions = questions[fold]

    df = load_protobowl()
    df = df.groupby('qid')

    worker = partial(simulate_game, guesses, buzzes, df)

    possibility = []
    outcome = []
    for question in tqdm(questions):
        pos, out = worker(question)
        possibility += pos
        outcome += out

    result_df = pd.DataFrame({
        'Possibility': possibility,
        'Outcome': outcome,
    })

    result_dir = os.path.join(model.model_dir, '{}_protobowl.pkl'.format(fold))
    with open(result_dir, 'wb') as f:
        pickle.dump(result_df, f)
예제 #2
0
def protobowl(model, fold=BUZZER_DEV_FOLD):
    buzzes = get_buzzes(model, fold)

    """eval"""
    guesses_dir = AbstractGuesser.output_path("qanta.guesser.rnn", "RnnGuesser", 0, "")
    guesses_dir = AbstractGuesser.guess_path(guesses_dir, fold, "char")
    with open(guesses_dir, "rb") as f:
        guesses = pickle.load(f)
    guesses = guesses.groupby("qanta_id")

    questions = QuizBowlDataset(buzzer_train=True).questions_by_fold()
    questions = questions[fold]

    df = load_protobowl()
    df = df.groupby("qid")

    worker = partial(simulate_game, guesses, buzzes, df)

    possibility = []
    outcome = []
    for question in tqdm(questions):
        pos, out = worker(question)
        possibility += pos
        outcome += out

    result_df = pd.DataFrame({"Possibility": possibility, "Outcome": outcome,})

    result_dir = os.path.join(model.model_dir, "{}_protobowl.pkl".format(fold))
    with open(result_dir, "wb") as f:
        pickle.dump(result_df, f)
예제 #3
0
    def fit_curve(self):
        df = load_protobowl()
        # convert prompt to false
        df.result = df.result.apply(lambda x: x is True)

        xy = list(zip(df.relative_position.tolist(), df.result.tolist()))
        xy = sorted(xy, key=lambda x: x[0])
        ratios = dict()
        cnt = 0
        for x, y in xy:
            x = int(x * 1000)
            ratios[x] = cnt
            cnt += y
        ratios = sorted(ratios.items(), key=lambda x: x[0])
        ratios = [(x / 1000, y) for x, y in ratios]

        ttl_correct = df.result.tolist().count(True)
        ttl_correct = len(xy)
        curve = [(x, 1 - y / ttl_correct) for x, y in ratios]
        X, y = list(map(list, zip(*curve)))

        X = np.asarray(X)
        y = np.asarray(y)
        degree = 3
        polynomial_features = PolynomialFeatures(degree=degree,
                                                 include_bias=False)
        linear_regression = LinearRegression()
        pipeline = Pipeline([
            ("polynomial_features", polynomial_features),
            ("linear_regression", linear_regression),
        ])
        pipeline.fit(X[:, np.newaxis], y)
        print(pipeline.steps[1][1].coef_)

        def get_weight(x):
            return pipeline.predict(np.asarray([[x]]))[0]

        ddf = pd.DataFrame({"x": X, "y": y})
        p0 = (ggplot(ddf, aes(x="x", y="y")) +
              geom_point(size=0.3, color="blue", alpha=0.5, shape="+") +
              stat_function(fun=get_weight, color="red", size=2, alpha=0.5) +
              labs(x="Position", y="Weight"))
        p0.save("output/reporting/curve_score.pdf")
        p0.draw()

        return pipeline
예제 #4
0
파일: curve_score.py 프로젝트: Pinafore/qb
    def fit_curve(self):
        df, questions = load_protobowl()
        # convert prompt to false
        df.result = df.result.apply(lambda x: x is True)

        xy = list(zip(df.relative_position.tolist(), df.result.tolist()))
        xy = sorted(xy, key=lambda x: x[0])
        ratios = dict()
        cnt = 0
        for x, y in xy:
            x = int(x*1000)
            ratios[x] = cnt
            cnt += y
        ratios = sorted(ratios.items(), key=lambda x: x[0])
        ratios = [(x / 1000, y) for x, y in ratios]

        ttl_correct = df.result.tolist().count(True)
        ttl_correct = len(xy)
        curve = [(x, 1 - y / ttl_correct) for x, y in ratios]
        X, y = list(map(list, zip(*curve)))

        X = np.asarray(X)
        y = np.asarray(y)
        degree = 3
        polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
        linear_regression = LinearRegression()
        pipeline = Pipeline([("polynomial_features", polynomial_features),
                             ("linear_regression", linear_regression)])
        pipeline.fit(X[:, np.newaxis], y)
        print(pipeline.steps[1][1].coef_)

        def get_weight(x):
            return pipeline.predict(np.asarray([[x]]))[0]

        ddf = pd.DataFrame({'x': X, 'y': y})
        p0 = ggplot(ddf, aes(x='x', y='y')) \
            + geom_point(size=0.3, color='blue', alpha=0.5, shape='+') \
            + stat_function(fun=get_weight, color='red', size=2, alpha=0.5) \
            + labs(x='Position', y='Weight')
        p0.save('output/reporting/curve_score.pdf')
        p0.draw()

        return pipeline
예제 #5
0
def main():
    fold = BUZZER_DEV_FOLD

    # load questions
    print('loading questions')
    questions = QuizBowlDataset(buzzer_train=True).questions_by_fold()
    questions = questions[fold]

    # load guesser outputs
    print('loading guesser outputs')
    guesses = read_data(fold)
    guesses = {x[0]: x for x in guesses}

    # load buzzer outputs
    print('loading buzzer outputs')
    buzz_dir = os.path.join(buzzes_dir.format(fold))
    with open(buzz_dir, 'rb') as f:
        buzzes = pickle.load(f)

    # load protobowl records
    print('loading protobowl records')
    df, _ = load_protobowl()
    record_groups = df.groupby('qid')

    metrics = [_protobowl_scores, _curve_scores]
    pool = Pool(8)
    worker = partial(run_all_metrics, guesses, buzzes, record_groups, metrics)
    scores = pool.map(worker, questions)

    all_scores = list(map(list, zip(*scores)))

    protobowl_scores = all_scores[0]
    protobowl_scores = list(map(list, zip(*protobowl_scores)))
    protobowl_scores = [[x for x in s if x is not None]
                        for s in protobowl_scores]
    print([np.mean(s) for s in protobowl_scores])

    curve_scores = all_scores[1]
    curve_scores = list(map(list, zip(*curve_scores)))
    curve_scores = [[x for x in s if x is not None] for s in curve_scores]
    print([np.mean(s) for s in curve_scores])
예제 #6
0
파일: end_to_end.py 프로젝트: Pinafore/qb
def main():
    fold = BUZZER_DEV_FOLD

    # load questions
    print('loading questions')
    questions = QuizBowlDataset(buzzer_train=True).questions_by_fold()
    questions = questions[fold]

    # load guesser outputs
    print('loading guesser outputs')
    guesses = read_data(fold)
    guesses = {x[0]: x for x in guesses}

    # load buzzer outputs
    print('loading buzzer outputs')
    buzz_dir = os.path.join(buzzes_dir.format(fold))
    with open(buzz_dir, 'rb') as f:
        buzzes = pickle.load(f)

    # load protobowl records
    print('loading protobowl records')
    df, _ = load_protobowl()
    record_groups = df.groupby('qid')

    metrics = [_protobowl_scores, _curve_scores]
    pool = Pool(8)
    worker = partial(run_all_metrics, guesses, buzzes, record_groups, metrics)
    scores = pool.map(worker, questions)

    all_scores = list(map(list, zip(*scores)))

    protobowl_scores = all_scores[0]
    protobowl_scores = list(map(list, zip(*protobowl_scores)))
    protobowl_scores = [[x for x in s if x is not None] for s in protobowl_scores]
    print([np.mean(s) for s in protobowl_scores])

    curve_scores = all_scores[1]
    curve_scores = list(map(list, zip(*curve_scores)))
    curve_scores = [[x for x in s if x is not None] for s in curve_scores]
    print([np.mean(s) for s in curve_scores])
예제 #7
-2
파일: eval.py 프로젝트: Pinafore/qb
def protobowl(model, fold=BUZZER_DEV_FOLD):
    buzzes = get_buzzes(model, fold)

    '''eval'''
    guesses_dir = AbstractGuesser.output_path(
        'qanta.guesser.rnn', 'RnnGuesser', 0, '')
    guesses_dir = AbstractGuesser.guess_path(guesses_dir, fold, 'char')
    with open(guesses_dir, 'rb') as f:
        guesses = pickle.load(f)
    guesses = guesses.groupby('qanta_id')

    questions = QuizBowlDataset(buzzer_train=True).questions_by_fold()
    questions = questions[fold]

    df = load_protobowl()
    df = df.groupby('qid')

    worker = partial(simulate_game, guesses, buzzes, df)

    possibility = []
    outcome = []
    for question in tqdm(questions):
        pos, out = worker(question)
        possibility += pos
        outcome += out

    result_df = pd.DataFrame({
        'Possibility': possibility,
        'Outcome': outcome,
    })

    result_dir = os.path.join(
        model.model_dir, '{}_protobowl.pkl'.format(fold))
    with open(result_dir, 'wb') as f:
        pickle.dump(result_df, f)