def test_util_find_on_s3_functions(): # Local imports are recommended when using moto from emmaa.util import sort_s3_files_by_date_str, find_latest_s3_file, \ find_nth_latest_s3_file, find_number_of_files_on_s3 # Bucket has mm (pkl) and results (json) files, both in results folder client = setup_bucket(add_mm=True, add_results=True) # Get both files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/') assert len(files) == 2 # Specific extension files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/', '.json') assert len(files) == 1 # Longer prefix files = sort_s3_files_by_date_str(TEST_BUCKET_NAME, 'results/test/results_') assert len(files) == 1 assert find_latest_s3_file(TEST_BUCKET_NAME, 'results/test/results_') assert not find_nth_latest_s3_file(1, TEST_BUCKET_NAME, 'results/test/results_') assert find_nth_latest_s3_file(1, TEST_BUCKET_NAME, 'results/test/') assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/') == 2 assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/results_') == 1 assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/', '.json') == 1
def test_save_load_update_model_manager(): # Local imports are recommended when using moto from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \ load_model_manager_from_s3, update_model_manager_on_s3 from emmaa.util import find_number_of_files_on_s3 client = setup_bucket(add_model=True) # Should be None if no model manager assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 0 loaded_mm = load_model_manager_from_s3(model_name='test', bucket=TEST_BUCKET_NAME) assert loaded_mm is None # Save a model manager and load it back model = create_model() mm = ModelManager(model) save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME) loaded_mm = load_model_manager_from_s3(model_name='test', bucket=TEST_BUCKET_NAME) assert loaded_mm assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 1 # Update should create a new file if there's at least one second difference time.sleep(1) update_model_manager_on_s3('test', TEST_BUCKET_NAME) assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 2
def test_generate_stats_on_s3(): # Local imports are recommended when using moto from emmaa.analyze_tests_results import generate_stats_on_s3 from emmaa.util import find_number_of_files_on_s3, make_date_str from emmaa.model_tests import update_model_manager_on_s3 # Try with only one set of results first (as for new model/test) client = setup_bucket(add_results=True, add_mm=True, add_model=True) msg = generate_stats_on_s3('test', 'model', upload_stats=True, bucket=TEST_BUCKET_NAME) assert msg.latest_round assert not msg.previous_round assert not msg.previous_json_stats assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'model_stats/test/model_stats_') == 1 tsg = generate_stats_on_s3('test', 'tests', 'simple_tests', upload_stats=True, bucket=TEST_BUCKET_NAME) assert tsg.latest_round assert not tsg.previous_round assert not tsg.previous_json_stats assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'stats/test/test_stats_') == 1 # Now add new results and new mm time.sleep(1) update_model_manager_on_s3('test', TEST_BUCKET_NAME) client.put_object( Body=json.dumps(previous_results, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'results/test/results_simple_tests_{make_date_str()}.json') msg = generate_stats_on_s3('test', 'model', upload_stats=True, bucket=TEST_BUCKET_NAME) assert msg.latest_round assert msg.previous_round assert msg.previous_json_stats assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'model_stats/test/model_stats_') == 2 tsg = generate_stats_on_s3('test', 'tests', 'simple_tests', upload_stats=True, bucket=TEST_BUCKET_NAME) assert tsg.latest_round assert tsg.previous_round assert tsg.previous_json_stats assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'stats/test/test_stats_') == 2
def get_model_tests_page(model): model_type = request.args.get('model_type') test_hash = request.args.get('test_hash') test_corpus = request.args.get('test_corpus') if not test_corpus: abort(Response('Test corpus has to be provided', 404)) date = request.args.get('date') if model_type not in ALL_MODEL_TYPES: abort(Response(f'Model type {model_type} does not exist', 404)) test_stats, file_key = get_model_stats(model, 'test', tests=test_corpus, date=date) if not test_stats: abort(Response(f'Data for {model} for {date} was not found', 404)) try: current_test = \ test_stats['test_round_summary']['all_test_results'][test_hash] except KeyError: abort(Response(f'Result for this test does not exist for {date}', 404)) current_model_types = [ mt for mt in ALL_MODEL_TYPES if mt in test_stats['test_round_summary'] ] test = current_test["test"] test_status, path_list = current_test[model_type] correct, incorrect = _label_curations() if isinstance(path_list, list): for path in path_list: for edge in path['edge_list']: for stmt in edge['stmts']: cur = '' url = stmt[0] if 'stmt_hash' in url: stmt_hashes = parse.parse_qs( parse.urlparse(url).query)['stmt_hash'] cur = _set_curation(stmt_hashes, correct, incorrect) stmt.append(cur) latest_date = get_latest_available_date(model, test_corpus) prefix = f'stats/{model}/test_stats_{test_corpus}_' cur_ix = find_index_of_s3_file(file_key, EMMAA_BUCKET_NAME, prefix) if test_hash in test_stats['tests_delta']['applied_hashes_delta']['added']: prev_date = None elif (cur_ix + 1) < find_number_of_files_on_s3(EMMAA_BUCKET_NAME, prefix, '.json'): prev_date = last_updated_date(model, 'test_stats', 'date', tests=test_corpus, extension='.json', n=(cur_ix + 1), bucket=EMMAA_BUCKET_NAME) else: prev_date = None if cur_ix > 0: next_date = last_updated_date(model, 'test_stats', 'date', tests=test_corpus, extension='.json', n=(cur_ix - 1), bucket=EMMAA_BUCKET_NAME) else: next_date = None return render_template('tests_template.html', link_list=link_list, model=model, model_type=model_type, all_model_types=current_model_types, test_hash=test_hash, test=test, test_status=test_status, path_list=path_list, formatted_names=FORMATTED_TYPE_NAMES, date=date, latest_date=latest_date, prev=prev_date, next=next_date)