Exemple #1
0
def save_problem_ci_results(ci_error, db, error, eval_data, gist, problem_ci,
                            results, should_merge):
    if not should_merge:
        # If problem_ci fails, don't save to aggregate bot scores collection
        if ci_error:
            log.error('Problem CI failed, not saving to bots '
                      'official scores as this is likely an issue '
                      'with the new version of the problem.')
            problem_ci.status = PROBLEM_CI_STATUS_FAILED
            problem_ci.error = ci_error
            update_pr_status_problem_ci(ci_error, problem_ci, eval_data)
        else:
            log.info('Problem CI not yet finished')

    else:
        # Aggregate data from bot evals now that they're done
        gists = BoxList()
        for bot_eval_key in problem_ci.bot_eval_keys:
            bot_eval = db.get(get_eval_db_key(bot_eval_key))
            save_to_bot_scores(
                bot_eval, bot_eval.eval_key,
                Box(score=bot_eval.results.score, eval_key=bot_eval.eval_key))
            gists.append(bot_eval.gist)
        problem_ci.gists = gists
        update_pr_status_problem_ci(error, problem_ci, eval_data)
        problem_ci.status = PROBLEM_CI_STATUS_PASSED
    db.set(problem_ci.id, problem_ci)
Exemple #2
0
 def reduce():
     result = dbox(problem_ci)
     # Refetch all bots in case scores came in after initial request
     for bot_eval_key in problem_ci.bot_eval_keys:
         bot_eval = db.get(get_eval_db_key(bot_eval_key))
         past_bot_scores = get_past_bot_scores(bot_eval)
         bot_eval_no_eval_key = deepcopy(bot_eval)
         del bot_eval_no_eval_key['eval_key']
         log.info(f'Checking confidence interval for bot_eval '
                  f'{box2json(bot_eval)}\n'
                  f'past scores: {box2json(past_bot_scores)}')
         if bot_eval.results.errors:
             result.error = str(bot_eval.results.errors)
             log.error(result.error + ': bot details ' \
                 f'{box2json(bot_eval_no_eval_key)}')
             return result
         in_interval, interval_info = score_within_confidence_interval(
             bot_eval, past_bot_scores)
         if not in_interval:
             result.error = f'Score for bot {bot_eval.results.score}' \
                 f' not within confidence interval ' \
                 f'{interval_info.low} to {interval_info.high}, ' \
                 f'mean: {interval_info.mean} ' \
                 f'problem CI failed'
             log.error(result.error + ': bot details ' \
                 f'{box2json(bot_eval_no_eval_key)}')
             return result
     else:
         log.success('Score for bot within confidence interval, '
                     'problem CI successful!')
         return result
def get_eval_data(eval_key, db: DB) -> Box:
    db_key = get_eval_db_key(eval_key)
    # eval_key is secret, do not make public anywhere!
    eval_data = Box(db.get(db_key))
    if eval_data and eval_data.eval_key != eval_key:
        raise RuntimeError(INVALID_DB_KEY_STATE_MESSAGE)
    return eval_data
Exemple #4
0
def test_results_handler_already_complete():
    payload = Mockable.read_test_box('results_success.json')
    db = get_liaison_db_store()
    db_key = get_eval_db_key(payload.eval_key)
    eval_data = get_test_eval_data()
    db.set(db_key, eval_data)
    error, results, eval_data, gist, _ = process_results(payload, db)
    assert error
    assert error.http_status_code == 400
    assert 'finished' in results
Exemple #5
0
 def bots_done():
     for bot_eval_key in bot_eval_keys:
         bot = db.get(get_eval_db_key(bot_eval_key))
         log.info(f'Checking if bot is done... bot: {box2json(bot)}')
         if bot.status != constants.EVAL_STATUS_COMPLETE:
             log.info('Bot not done')
             return False
     else:
         log.info('All bots done!')
         return True
Exemple #6
0
def test_confirm_handler():
    payload = Mockable.read_test_box('request.json')
    db = get_liaison_db_store()
    db_key = get_eval_db_key(payload.eval_key)
    eval_data = get_test_eval_data()
    db.set(db_key, eval_data)
    error, resp = process_confirm(payload, db)
    eval_data = get_eval_data(payload.eval_key, db)
    assert not error
    assert resp.confirmed
    assert eval_data.status == constants.EVAL_STATUS_CONFIRMED
Exemple #7
0
def test_db_invalid_key_handler():
    payload = Mockable.read_test_box('request.json')
    db = get_liaison_db_store()
    db_key = get_eval_db_key(payload.eval_key)
    eval_data = get_test_eval_data()
    db.set(db_key, eval_data)
    try:
        error, results, eval_data, gist, _ = process_results(payload, db)
    except RuntimeError as e:
        assert INVALID_DB_KEY_STATE_MESSAGE == str(e)
    else:
        raise RuntimeError('Expected exception')
Exemple #8
0
def test_results_handler():
    payload = Mockable.read_test_box('results_success.json')
    db = get_liaison_db_store()
    db_key = get_eval_db_key(payload.eval_key)
    eval_data = get_test_eval_data()
    db.set(db_key, eval_data)
    error, results, eval_data, gist, _ = process_results(payload, db)
    assert not error
    assert 'finished' in results
    assert 'started' in results
    assert results.started < results.finished
    assert results.username == 'crizcraig'
    assert results.botname == 'forward-agent'
    assert results.problem == 'deepdrive/domain_randomization'
Exemple #9
0
def collect_bot_scores(
        docker_tag='deepdriveio/deepdrive:bot_domain_randomization'):
    """
    Catches up bot scores using deepdrive_jobs. This is a violation of
    data boundaries across deepdrive and botleague, and won't be possible
    for future independent problem providers. We are now storing results
    in the bot_eval data as well, to avoid such problems in the future.
    Alternatively, we could have just downloaded all results from
    gist/botleague-results which is a source of truth, but this was easier.
    """
    job_db = get_db('deepdrive_jobs')
    ldb = get_liaison_db_store()
    for job in job_db.where('eval_spec.docker_tag', '==', docker_tag):
        eval_key = job.eval_spec.eval_key
        eval_data = ldb.get(get_eval_db_key(eval_key))
        score = Box(score=job.results.score, eval_key=eval_key)
        save_to_bot_scores(eval_data, eval_key, score)
 def trigger_single_eval(self,
                         bot_def,
                         problem_def,
                         problem_id,
                         problem_ci_replace_sim_url=None,
                         container_postfix=None) -> PrResponse:
     endpoint = problem_def.endpoint
     if problem_ci_replace_sim_url:
         problem_def.problem_ci_replace_sim_url = problem_ci_replace_sim_url
     if container_postfix:
         problem_def.container_postfix = container_postfix
     eval_key = generate_rand_alphanumeric(25)
     eval_id = generate_rand_alphanumeric(25)
     eval_data = self.get_eval_data(eval_id, eval_key, problem_id, bot_def,
                                    problem_def)
     db = get_liaison_db_store()
     db_key = get_eval_db_key(eval_data.eval_key)
     db.set(db_key, eval_data)
     eval_data = db.get(db_key)  # Resolve timestamp
     resp = self.request_eval(endpoint, eval_data)
     return resp
def save_eval_data(eval_data: Box, db: DB):
    db_key = get_eval_db_key(eval_data.eval_key)
    # eval_key is secret, do not make public anywhere!
    db.set(db_key, eval_data)