Esempio n. 1
0
def test_util_find_on_s3_functions():
    # Local imports are recommended when using moto
    from emmaa.util import sort_s3_files_by_date_str, find_latest_s3_file, \
        find_nth_latest_s3_file, find_number_of_files_on_s3
    # Bucket has mm (pkl) and results (json) files, both in results folder
    client = setup_bucket(add_mm=True, add_results=True)
    # Get both
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/')
    assert len(files) == 2
    # Specific extension
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/',
                                      '.json')
    assert len(files) == 1
    # Longer prefix
    files = sort_s3_files_by_date_str(TEST_BUCKET_NAME,
                                      'results/test/results_')
    assert len(files) == 1
    assert find_latest_s3_file(TEST_BUCKET_NAME, 'results/test/results_')
    assert not find_nth_latest_s3_file(1, TEST_BUCKET_NAME,
                                       'results/test/results_')
    assert find_nth_latest_s3_file(1, TEST_BUCKET_NAME, 'results/test/')
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/') == 2
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/results_') == 1
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/',
                                      '.json') == 1
Esempio n. 2
0
def test_save_load_update_model_manager():
    # Local imports are recommended when using moto
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        load_model_manager_from_s3, update_model_manager_on_s3
    from emmaa.util import find_number_of_files_on_s3
    client = setup_bucket(add_model=True)
    # Should be None if no model manager
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 0
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm is None
    # Save a model manager and load it back
    model = create_model()
    mm = ModelManager(model)
    save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 1
    # Update should create a new file if there's at least one second difference
    time.sleep(1)
    update_model_manager_on_s3('test', TEST_BUCKET_NAME)
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 2
Esempio n. 3
0
def test_generate_stats_on_s3():
    # Local imports are recommended when using moto
    from emmaa.analyze_tests_results import generate_stats_on_s3
    from emmaa.util import find_number_of_files_on_s3, make_date_str
    from emmaa.model_tests import update_model_manager_on_s3
    # Try with only one set of results first (as for new model/test)
    client = setup_bucket(add_results=True, add_mm=True, add_model=True)
    msg = generate_stats_on_s3('test',
                               'model',
                               upload_stats=True,
                               bucket=TEST_BUCKET_NAME)
    assert msg.latest_round
    assert not msg.previous_round
    assert not msg.previous_json_stats
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'model_stats/test/model_stats_') == 1
    tsg = generate_stats_on_s3('test',
                               'tests',
                               'simple_tests',
                               upload_stats=True,
                               bucket=TEST_BUCKET_NAME)
    assert tsg.latest_round
    assert not tsg.previous_round
    assert not tsg.previous_json_stats
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'stats/test/test_stats_') == 1
    # Now add new results and new mm
    time.sleep(1)
    update_model_manager_on_s3('test', TEST_BUCKET_NAME)
    client.put_object(
        Body=json.dumps(previous_results, indent=1),
        Bucket=TEST_BUCKET_NAME,
        Key=f'results/test/results_simple_tests_{make_date_str()}.json')
    msg = generate_stats_on_s3('test',
                               'model',
                               upload_stats=True,
                               bucket=TEST_BUCKET_NAME)
    assert msg.latest_round
    assert msg.previous_round
    assert msg.previous_json_stats
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'model_stats/test/model_stats_') == 2
    tsg = generate_stats_on_s3('test',
                               'tests',
                               'simple_tests',
                               upload_stats=True,
                               bucket=TEST_BUCKET_NAME)
    assert tsg.latest_round
    assert tsg.previous_round
    assert tsg.previous_json_stats
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'stats/test/test_stats_') == 2
Esempio n. 4
0
def get_model_tests_page(model):
    model_type = request.args.get('model_type')
    test_hash = request.args.get('test_hash')
    test_corpus = request.args.get('test_corpus')
    if not test_corpus:
        abort(Response('Test corpus has to be provided', 404))
    date = request.args.get('date')
    if model_type not in ALL_MODEL_TYPES:
        abort(Response(f'Model type {model_type} does not exist', 404))
    test_stats, file_key = get_model_stats(model,
                                           'test',
                                           tests=test_corpus,
                                           date=date)
    if not test_stats:
        abort(Response(f'Data for {model} for {date} was not found', 404))
    try:
        current_test = \
            test_stats['test_round_summary']['all_test_results'][test_hash]
    except KeyError:
        abort(Response(f'Result for this test does not exist for {date}', 404))
    current_model_types = [
        mt for mt in ALL_MODEL_TYPES if mt in test_stats['test_round_summary']
    ]
    test = current_test["test"]
    test_status, path_list = current_test[model_type]
    correct, incorrect = _label_curations()
    if isinstance(path_list, list):
        for path in path_list:
            for edge in path['edge_list']:
                for stmt in edge['stmts']:
                    cur = ''
                    url = stmt[0]
                    if 'stmt_hash' in url:
                        stmt_hashes = parse.parse_qs(
                            parse.urlparse(url).query)['stmt_hash']
                        cur = _set_curation(stmt_hashes, correct, incorrect)
                    stmt.append(cur)
    latest_date = get_latest_available_date(model, test_corpus)
    prefix = f'stats/{model}/test_stats_{test_corpus}_'
    cur_ix = find_index_of_s3_file(file_key, EMMAA_BUCKET_NAME, prefix)
    if test_hash in test_stats['tests_delta']['applied_hashes_delta']['added']:
        prev_date = None
    elif (cur_ix + 1) < find_number_of_files_on_s3(EMMAA_BUCKET_NAME, prefix,
                                                   '.json'):
        prev_date = last_updated_date(model,
                                      'test_stats',
                                      'date',
                                      tests=test_corpus,
                                      extension='.json',
                                      n=(cur_ix + 1),
                                      bucket=EMMAA_BUCKET_NAME)
    else:
        prev_date = None
    if cur_ix > 0:
        next_date = last_updated_date(model,
                                      'test_stats',
                                      'date',
                                      tests=test_corpus,
                                      extension='.json',
                                      n=(cur_ix - 1),
                                      bucket=EMMAA_BUCKET_NAME)
    else:
        next_date = None
    return render_template('tests_template.html',
                           link_list=link_list,
                           model=model,
                           model_type=model_type,
                           all_model_types=current_model_types,
                           test_hash=test_hash,
                           test=test,
                           test_status=test_status,
                           path_list=path_list,
                           formatted_names=FORMATTED_TYPE_NAMES,
                           date=date,
                           latest_date=latest_date,
                           prev=prev_date,
                           next=next_date)