def test_save_load_update_model_manager(): # Local imports are recommended when using moto from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \ load_model_manager_from_s3, update_model_manager_on_s3 from emmaa.util import find_number_of_files_on_s3 client = setup_bucket(add_model=True) # Should be None if no model manager assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 0 loaded_mm = load_model_manager_from_s3(model_name='test', bucket=TEST_BUCKET_NAME) assert loaded_mm is None # Save a model manager and load it back model = create_model() mm = ModelManager(model) save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME) loaded_mm = load_model_manager_from_s3(model_name='test', bucket=TEST_BUCKET_NAME) assert loaded_mm assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 1 # Update should create a new file if there's at least one second difference time.sleep(1) update_model_manager_on_s3('test', TEST_BUCKET_NAME) assert find_number_of_files_on_s3(TEST_BUCKET_NAME, 'results/test/model_manager_', '.pkl') == 2
def test_run_tests(): model = create_model() tests = [StatementCheckingTest( Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('MAPK1', db_refs={'UP': 'P28482'})))] mm = ModelManager(model) tm = TestManager([mm], tests) tm.make_tests(ScopeTestConnector()) tm.run_tests() assert len(mm.applicable_tests) == 1 assert isinstance(mm.applicable_tests[0], StatementCheckingTest) assert len(mm.mc_types['pysb']['test_results']) == 1 assert len(mm.mc_types['pybel']['test_results']) == 1 assert len(mm.mc_types['signed_graph']['test_results']) == 1 assert len(mm.mc_types['unsigned_graph']['test_results']) == 1 assert isinstance(mm.mc_types['pysb']['test_results'][0], PathResult)
def test_model_manager_structure(): model = create_model() mm = ModelManager(model) assert isinstance(mm, ModelManager) assert isinstance(mm.model, EmmaaModel) assert mm.model.name == 'test' assert len(mm.mc_types) == 4, len(mm.mc_types) assert len(mm.mc_types['pysb']) == 3 assert isinstance(mm.mc_types['pysb']['model_checker'], PysbModelChecker) assert isinstance(mm.mc_types['pybel']['model_checker'], PybelModelChecker) assert isinstance(mm.mc_types['signed_graph']['model_checker'], SignedGraphModelChecker) assert isinstance(mm.mc_types['unsigned_graph']['model_checker'], UnsignedGraphModelChecker) assert isinstance(mm.entities[0], Agent) assert isinstance(mm.date_str, str)
def test_applicability(): model = create_model() tests = [StatementCheckingTest( Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('MAPK1', db_refs={'UP': 'P28482'}))), StatementCheckingTest( Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('ERK', db_refs={'FPLX': 'ERK'})))] mm = ModelManager(model) tm = TestManager([mm], tests) # Only first test is applicable with ScopeTestConnector tm.make_tests(ScopeTestConnector()) assert len(mm.applicable_tests) == 1 assert mm.applicable_tests[0] == tests[0] # Both tests are applicable with RefinementTestConnector mm.applicable_tests = [] tm.make_tests(RefinementTestConnector()) assert len(mm.applicable_tests) == 2
def test_direct_path_tests(): model = create_model() stmt = Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('MAPK1', db_refs={'UP': 'P28482'})) model.stmts.append( EmmaaStatement(stmt, datetime.datetime.now(), [], { 'internal': True, 'curated': False })) tests = [StatementCheckingTest(stmt)] mm = ModelManager(model) tm = TestManager([mm], tests) tm.make_tests(ScopeTestConnector()) tm.run_tests(allow_direct=True) for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']: res = mm.mc_types[mc_type]['test_results'][0] print(res.paths) assert len(res.paths[0]) == 2, (mc_type, res.paths[0]) # 1 edge tm.run_tests(allow_direct=False) for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']: # Look at the seecond test result here res = mm.mc_types[mc_type]['test_results'][1] assert len(res.paths[0]) == 3, (mc_type, res.paths[0]) # 2 edges
def setup_bucket(add_model=False, add_mm=False, add_tests=False, add_results=False, add_model_stats=False, add_test_stats=False): """ This function creates a new (local) bucket mocking S3 bucket at each call. Then all calls to S3 are calling this bucket instead of real S3 bucket. Depending on the test we might or might not need the bucket to contain different files. For faster computation, only required files for the test are generated and stored in the bucket. Files can be added by setting corresponding arguments to True when calling this function. """ # Local imports are recommended when using moto from emmaa.util import get_s3_client from emmaa.model import save_config_to_s3 from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \ StatementCheckingTest # Create a mock s3 bucket client = get_s3_client() bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read') date_str = make_date_str() emmaa_model = None if add_model: # Put config and model files into empty bucket config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }], 'test': { 'test_corpus': 'simple_tests', 'default_test_corpus': 'simple_tests' }, 'human_readable_name': 'Test Model', 'assembly': [{ 'function': 'filter_no_hypothesis' }, { 'function': 'map_grounding' }, { 'function': 'filter_grounded_only' }, { 'function': 'filter_human_only' }, { 'function': 'map_sequence' }, { 'function': 'run_preassembly', 'kwargs': { 'return_toplevel': False } }, { 'function': 'filter_top_level' }] } save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME) emmaa_model = create_model() emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME) if add_mm: # Add a ModelManager to bucket if not emmaa_model: emmaa_model = create_model() mm = ModelManager(emmaa_model) mm.date_str = date_str mm.save_assembled_statements(upload_to_db=False, bucket=TEST_BUCKET_NAME) save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME) if add_tests: tests = [ StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1'))) ] test_dict = { 'test_data': { 'description': 'Tests for functionality testing' }, 'tests': tests } client.put_object(Body=pickle.dumps(test_dict), Bucket=TEST_BUCKET_NAME, Key=f'tests/simple_tests.pkl') if add_results: client.put_object( Body=json.dumps(previous_results, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'results/test/results_simple_tests_{date_str}.json') if add_model_stats: client.put_object(Body=json.dumps(previous_model_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'model_stats/test/model_stats_{date_str}.json') if add_test_stats: client.put_object( Body=json.dumps(previous_test_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'stats/test/test_stats_simple_tests_{date_str}.json') return client
'MAP2K1 → MAPK1', 'stmts': [[ '/evidence?stmt_hash=-34603994586320440&source=' 'model_statement&model=test&date=2020-01-01', 'Active MAP2K1 activates MAPK1.', '' ]] }] } } query_not_appl = {'2413475507': 'Query is not applicable for this model'} fail_response = { '521653329': 'No path found that satisfies the test statement' } # Create a new EmmaaModel and ModelManager for tests instead of depending # on S3 version test_model = create_model() test_mm = ModelManager(test_model) test_mm.date_str = '2020-01-01-00-00-00' test_email = '*****@*****.**' def test_format_results(): date = datetime.now() results = [ ('test', query_object, 'pysb', test_response, {'3801854542'}, date), ('test', query_object, 'signed_graph', fail_response, {}, date), ('test', query_object, 'unsigned_graph', test_response, {}, date) ] formatted_results = format_results(results) assert len(formatted_results) == 1 qh = query_object.get_hash_with_model('test')
def test_results_json(): model = create_model() model.run_assembly() # Add statements with similar subject and object to test grouping map2k1 = model.assembled_stmts[1].subj mapk1 = model.assembled_stmts[1].obj phos = Phosphorylation(map2k1, mapk1) phos_t185 = Phosphorylation(map2k1, mapk1, 'T', '185') phos_y187 = Phosphorylation(map2k1, mapk1, 'Y', '187') inc = IncreaseAmount(map2k1, mapk1) inh = Inhibition(map2k1, mapk1) model.assembled_stmts += [phos, phos_t185, phos_y187, inc, inh] mm = ModelManager(model) tests = [StatementCheckingTest( Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('MAPK1', db_refs={'UP': 'P28482'}))), StatementCheckingTest( Phosphorylation( Agent('MEK', db_refs={'TEXT': 'MEK', 'FPLX': 'MEK'}), Agent('ERK', db_refs={'TEXT': 'ERK', 'FPLX': 'ERK'})))] tm = TestManager([mm], tests) tm.make_tests(RefinementTestConnector()) tm.run_tests() result_json, json_lines = mm.results_to_json() assert len(result_json) == 3 # Looking at the first result assert len(result_json[1]) == 6, len(result_json[1]) # The second edge will be supported differently in different model types assert result_json[1]['pysb']['path_json'][0]['path'] == \ 'BRAF → MAP2K1 → MAPK1' second_edge = result_json[1]['pysb']['path_json'][0]['edge_list'][1] # Only Activation statement will be in the edge in PySB assert len(second_edge['stmts']) == 1 assert second_edge['stmts'][0][1] == 'Active MAP2K1 activates MAPK1.' assert second_edge['stmts'][0][0].count('stmt_hash') == 1 # Positive (Activation and IncreaseAmount) in SignedGraph edge assert result_json[1]['signed_graph']['path_json'][0]['path'] == \ 'BRAF → MAP2K1 → MAPK1' second_edge = result_json[1]['signed_graph']['path_json'][0][ 'edge_list'][1] assert len(second_edge['stmts']) == 2 assert second_edge['stmts'][0][1] == 'MAP2K1 activates MAPK1.' assert second_edge['stmts'][0][0].count('stmt_hash') == 1 assert second_edge['stmts'][1][1] == ('MAP2K1 increases the amount of ' 'MAPK1.') assert second_edge['stmts'][1][0].count('stmt_hash') == 1 # All statement types support unsigned graph edge, but different statements # of the same type are grouped together assert result_json[1]['unsigned_graph']['path_json'][0]['path'] == \ 'BRAF → MAP2K1 → MAPK1' second_edge = result_json[1]['unsigned_graph']['path_json'][0][ 'edge_list'][1] assert len(second_edge['stmts']) == 4 sentence_counts = {pair[1]: pair[0].count('stmt_hash') for pair in second_edge['stmts']} assert 'MAP2K1 activates MAPK1.' in sentence_counts assert sentence_counts['MAP2K1 activates MAPK1.'] == 1 assert 'MAP2K1 phosphorylates MAPK1.' in sentence_counts assert sentence_counts['MAP2K1 phosphorylates MAPK1.'] == 3 assert 'MAP2K1 inhibits MAPK1.' in sentence_counts assert sentence_counts['MAP2K1 inhibits MAPK1.'] == 1 assert 'MAP2K1 increases the amount of MAPK1.' in sentence_counts assert sentence_counts['MAP2K1 increases the amount of MAPK1.'] == 1 # Test JSONL representation assert len(json_lines) == 6, len(json_lines) for path_dict in json_lines: # First test if path_dict['test'] == 13165736649758742: assert len(path_dict['edges']) == 2 assert path_dict['edges'][0]['type'] == 'statements' assert len(path_dict['edges'][0]['hashes']) == 1 if path_dict['graph_type'] == 'pysb': assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 1 elif path_dict['graph_type'] == 'signed_graph': assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 2 elif path_dict['graph_type'] == 'unsigned_graph': assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 6 # Second test else: if path_dict['graph_type'] == 'pysb': assert len(path_dict['edges']) == 3 assert path_dict['edges'][0]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][0] assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 1 assert path_dict['edges'][2]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][2] elif path_dict['graph_type'] == 'pybel': assert len(path_dict['edges']) == 3 assert path_dict['edges'][0]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][0] assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 1 assert path_dict['edges'][2]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][2] elif path_dict['graph_type'] == 'unsigned_graph': assert len(path_dict['edges']) == 3 assert path_dict['edges'][0]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][0] assert path_dict['edges'][1]['type'] == 'statements' assert len(path_dict['edges'][1]['hashes']) == 6 assert path_dict['edges'][2]['type'] == 'RefEdge' assert 'hashes' not in path_dict['edges'][2]