def test_api_load_from_s3(): from emmaa_service.api import is_available, get_latest_available_date, \ _get_test_corpora, _get_model_meta_data, get_model_config from emmaa.model import last_updated_date client = setup_bucket(add_model=True, add_model_stats=True, add_test_stats=True) today = make_date_str()[:10] other_day = '2020-01-01' assert is_available('test', 'simple_tests', today, TEST_BUCKET_NAME) assert not is_available('test', 'large_corpus_tests', today, TEST_BUCKET_NAME) assert not is_available('test', 'simple_tests', other_day, TEST_BUCKET_NAME) assert get_latest_available_date('test', 'simple_tests', bucket=TEST_BUCKET_NAME) == today config = get_model_config('test', TEST_BUCKET_NAME) assert config test_corpora = _get_test_corpora('test', TEST_BUCKET_NAME) assert test_corpora == {'simple_tests'} metadata = _get_model_meta_data(bucket=TEST_BUCKET_NAME) assert len(metadata) == 1 assert len(metadata[0]) == 2 assert metadata[0][0] == 'test' assert metadata[0][1] == config
def test_handler(): """Test the lambda handler locally.""" dts = make_date_str() key = f'models/test/test_model_{dts}.pkl' event = {'Records': [{'s3': {'object': {'key': key}}}]} context = None res = lambda_handler(event, context) print(res) assert res['statusCode'] == 200, res assert res['result'] == 'SUCCESS', res assert res['job_id'], res job_id = res['job_id'] results = {} wait_for_complete(QUEUE, job_list=[{ 'jobId': job_id }], result_record=results) print(results) assert job_id in [job_def['jobId'] for job_def in results['succeeded']], \ results['failed'] s3 = get_s3_client() s3_res = s3.list_objects(Bucket='emmaa', Prefix='results/test/' + dts[:10]) print(s3_res.keys()) assert s3_res, s3_res
def answer_dynamic_query(self, query, bucket=EMMAA_BUCKET_NAME): """Answer user query by simulating a PySB model.""" pysb_model, use_kappa, time_limit, num_times, num_sim = \ self._get_dynamic_components() tra = TRA(use_kappa=use_kappa) tp = query.get_temporal_pattern(time_limit) try: sat_rate, num_sim, kpat, pat_obj, fig_path = tra.check_property( pysb_model, tp, num_times=num_times) if self.mode == 's3': fig_name, ext = os.path.splitext(os.path.basename(fig_path)) date_str = make_date_str() s3_key = (f'query_images/{self.model.name}/{fig_name}_' f'{date_str}{ext}') s3_path = f'https://{bucket}.s3.amazonaws.com/{s3_key}' client = get_s3_client(unsigned=False) logger.info(f'Uploading image to {s3_path}') client.upload_file(fig_path, Bucket=bucket, Key=s3_key) fig_path = s3_path resp_json = { 'sat_rate': sat_rate, 'num_sim': num_sim, 'kpat': kpat, 'fig_path': fig_path } except (MissingMonomerError, MissingMonomerSiteError): resp_json = RESULT_CODES['QUERY_NOT_APPLICABLE'] return [('pysb', self.hash_response_list(resp_json), resp_json)]
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): """Create StatementCheckingTests from model statements.""" stmts, _ = get_assembled_statements(model_name, bucket=bucket) config = load_config_from_s3(model_name, bucket=bucket) # Filter statements if needed if isinstance(config.get('make_tests'), dict): conditions = config['make_tests']['filter']['conditions'] evid_policy = config['make_tests']['filter']['evid_policy'] stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy) tests = [ StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list()) ] date_str = make_date_str() test_description = ( f'These tests were generated from the ' f'{config.get("human_readable_name")} on {date_str[:10]}') test_name = f'{config.get("human_readable_name")} model test corpus' test_dict = { 'test_data': { 'description': test_description, 'name': test_name }, 'tests': tests } if upload: save_tests_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl', 'pkl') return test_dict
def answer_intervention_query(self, query, bucket=EMMAA_BUCKET_NAME): """Answer user intervention query by simulating a PySB model.""" pysb_model, use_kappa, time_limit, num_times, num_sim, _ = \ self._get_dynamic_components('intervention') tra = TRA(use_kappa=use_kappa) try: res, fig_path = tra.compare_conditions(pysb_model, query.condition_entity, query.target_entity, query.direction, time_limit, num_times) if self.mode == 's3': fig_name, ext = os.path.splitext(os.path.basename(fig_path)) date_str = make_date_str() s3_key = (f'query_images/{self.model.name}/{fig_name}_' f'{date_str}{ext}') s3_path = f'https://{bucket}.s3.amazonaws.com/{s3_key}' client = get_s3_client(unsigned=False) logger.info(f'Uploading image to {s3_path}') client.upload_file(fig_path, Bucket=bucket, Key=s3_key) fig_path = s3_path resp_json = {'result': res, 'fig_path': fig_path} return [('pysb', self.hash_response_list(resp_json), resp_json)] except (MissingMonomerError, MissingMonomerSiteError): resp_json = RESULT_CODES['QUERY_NOT_APPLICABLE'] return [('pysb', self.hash_response_list(resp_json), { 'fail_reason': RESULT_CODES['QUERY_NOT_APPLICABLE'] })]
def __init__(self, model): self.model = model self.mc_mapping = { 'pysb': (self.model.assemble_pysb, PysbModelChecker, stmts_from_pysb_path), 'pybel': (self.model.assemble_pybel, PybelModelChecker, stmts_from_pybel_path), 'signed_graph': (self.model.assemble_signed_graph, SignedGraphModelChecker, stmts_from_indranet_path), 'unsigned_graph': (self.model.assemble_unsigned_graph, UnsignedGraphModelChecker, stmts_from_indranet_path) } self.mc_types = {} for mc_type in model.test_config.get('mc_types', ['pysb']): self.mc_types[mc_type] = {} assembled_model = self.mc_mapping[mc_type][0]() self.mc_types[mc_type]['model'] = assembled_model self.mc_types[mc_type]['model_checker'] = ( self.mc_mapping[mc_type][1](assembled_model)) self.mc_types[mc_type]['test_results'] = [] self.entities = self.model.get_assembled_entities() self.applicable_tests = [] self.date_str = make_date_str() self.path_stmt_counts = defaultdict(int)
def get_dates(self): if not self.previous_json_stats: previous_dates = [] else: previous_dates = ( self.previous_json_stats['changes_over_time']['dates']) previous_dates.append(make_date_str()) return previous_dates
def save_to_s3(self, bucket=EMMAA_BUCKET_NAME): """Dump the model state to S3.""" date_str = make_date_str() fname = f'models/{self.name}/model_{date_str}' # Dump as pickle save_pickle_to_s3(self.stmts, bucket, key=fname + '.pkl') # Dump as json save_json_to_s3(self.stmts, bucket, key=fname + '.json')
def format_results(results, query_type='path_property'): """Format db output to a standard json structure.""" model_types = ['pysb', 'pybel', 'signed_graph', 'unsigned_graph'] formatted_results = {} for result in results: model = result[0] query = result[1] query_hash = query.get_hash_with_model(model) if query_hash not in formatted_results: formatted_results[query_hash] = { 'query': query.to_english(), 'model': model, 'date': make_date_str(result[4]) } mc_type = result[2] response_json = result[3] response = [] for v in response_json.values(): if isinstance(v, str): response = v elif isinstance(v, dict): response.append(v) if query_type == 'path_property': if mc_type == '' and \ response == 'Query is not applicable for this model': for mt in model_types: formatted_results[query_hash][mt] = ['n_a', response] elif isinstance(response, str) and \ response == 'Statement type not handled': formatted_results[query_hash][mc_type] = ['n_a', response] elif isinstance(response, str) and \ not response == 'Path found but exceeds search depth': formatted_results[query_hash][mc_type] = ['Fail', response] else: formatted_results[query_hash][mc_type] = ['Pass', response] elif query_type == 'dynamic_property': if response == 'Query is not applicable for this model': formatted_results[query_hash]['result'] = ['n_a', response] else: res = int(response[0]['sat_rate'] * 100) expl = (f'Satisfaction rate is {res}% after ' f'{response[0]["num_sim"]} simulations.') if res > 50: formatted_results[query_hash]['result'] = ['Pass', expl] else: formatted_results[query_hash]['result'] = ['Fail', expl] formatted_results[query_hash]['image'] = response[0][ 'fig_path'] if query_type == 'path_property': # Loop through the results again to make sure all model types are there for qh in formatted_results: for mt in model_types: if mt not in formatted_results[qh]: formatted_results[qh][mt] = [ 'n_a', 'Model type not supported' ] return formatted_results
def save_to_s3(self): json_stats_str = json.dumps(self.json_stats, indent=1) client = get_s3_client(unsigned=False) date_str = make_date_str() stats_key = f'stats/{self.model_name}/stats_{date_str}.json' logger.info(f'Uploading test round statistics to {stats_key}') client.put_object(Bucket='emmaa', Key=stats_key, Body=json_stats_str.encode('utf8'))
def save_to_s3(self): """Dump the model state to S3.""" date_str = make_date_str() fname = f'models/{self.name}/model_{date_str}' client = get_s3_client(unsigned=False) # Dump as pickle client.put_object(Body=pickle.dumps(self.stmts), Bucket='emmaa', Key=fname + '.pkl') # Dump as json client.put_object(Body=str.encode(json.dumps(self.to_json()), encoding='utf8'), Bucket='emmaa', Key=fname + '.json')
def format_results(results): """Format db output to a standard json structure.""" formatted_results = [] for result in results: formatted_result = {} formatted_result['model'] = result[0] query = result[1] formatted_result['query'] = _make_query_simple_dict(query) formatted_result['mc_type'] = result[2] response_json = result[3] response = _process_result_to_html(response_json) formatted_result['response'] = response formatted_result['date'] = make_date_str(result[4]) formatted_results.append(formatted_result) return formatted_results
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): """Create StatementCheckingTests from model statements.""" stmts, _ = get_assembled_statements(model_name, bucket=bucket) config = load_config_from_s3(model_name, bucket=bucket) tests = [StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list())] date_str = make_date_str() test_description = ( f'These tests were generated from the ' f'{config.get("human_readable_name")} on {date_str[:10]}') test_name = f'{config.get("human_readable_name")} model test corpus' test_dict = {'test_data': {'description': test_description, 'name': test_name}, 'tests': tests} if upload: save_tests_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl', 'pkl') return test_dict
def __init__(self, name, config, paper_ids=None): self.name = name self.stmts = [] self.assembly_config = {} self.test_config = {} self.reading_config = {} self.query_config = {} self.search_terms = [] self.ndex_network = None self.human_readable_name = None self.export_formats = [] self._load_config(config) self.assembled_stmts = [] if paper_ids: self.paper_ids = set(paper_ids) else: self.paper_ids = set() self.date_str = make_date_str()
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): em = EmmaaModel.load_from_s3(model_name, bucket=bucket) em.run_assembly() tests = [ StatementCheckingTest(stmt) for stmt in em.assembled_stmts if all(stmt.agent_list()) ] date_str = make_date_str() test_description = ( f'These tests were generated from the {em.human_readable_name} ' f'on {date_str[:10]}') test_dict = { 'test_data': { 'description': test_description }, 'tests': tests } if upload: save_pickle_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl') return test_dict
def setup_bucket(add_model=False, add_mm=False, add_tests=False, add_results=False, add_model_stats=False, add_test_stats=False): """ This function creates a new (local) bucket mocking S3 bucket at each call. Then all calls to S3 are calling this bucket instead of real S3 bucket. Depending on the test we might or might not need the bucket to contain different files. For faster computation, only required files for the test are generated and stored in the bucket. Files can be added by setting corresponding arguments to True when calling this function. """ # Local imports are recommended when using moto from emmaa.util import get_s3_client from emmaa.model import save_config_to_s3 from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \ StatementCheckingTest # Create a mock s3 bucket client = get_s3_client() bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read') date_str = make_date_str() emmaa_model = None if add_model: # Put config and model files into empty bucket config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }], 'test': { 'test_corpus': 'simple_tests', 'default_test_corpus': 'simple_tests' }, 'human_readable_name': 'Test Model', 'assembly': [{ 'function': 'filter_no_hypothesis' }, { 'function': 'map_grounding' }, { 'function': 'filter_grounded_only' }, { 'function': 'filter_human_only' }, { 'function': 'map_sequence' }, { 'function': 'run_preassembly', 'kwargs': { 'return_toplevel': False } }, { 'function': 'filter_top_level' }] } save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME) emmaa_model = create_model() emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME) if add_mm: # Add a ModelManager to bucket if not emmaa_model: emmaa_model = create_model() mm = ModelManager(emmaa_model) mm.date_str = date_str mm.save_assembled_statements(upload_to_db=False, bucket=TEST_BUCKET_NAME) save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME) if add_tests: tests = [ StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1'))) ] test_dict = { 'test_data': { 'description': 'Tests for functionality testing' }, 'tests': tests } client.put_object(Body=pickle.dumps(test_dict), Bucket=TEST_BUCKET_NAME, Key=f'tests/simple_tests.pkl') if add_results: client.put_object( Body=json.dumps(previous_results, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'results/test/results_simple_tests_{date_str}.json') if add_model_stats: client.put_object(Body=json.dumps(previous_model_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'model_stats/test/model_stats_{date_str}.json') if add_test_stats: client.put_object( Body=json.dumps(previous_test_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'stats/test/test_stats_simple_tests_{date_str}.json') return client
def format_results(results, query_type='path_property'): """Format db output to a standard json structure.""" model_types = ['pysb', 'pybel', 'signed_graph', 'unsigned_graph'] formatted_results = {} for result in results: model = result[0] query = result[1] query_hash = query.get_hash_with_model(model) if query_hash not in formatted_results: formatted_results[query_hash] = { 'query': query.to_english(), 'model': model, 'date': make_date_str(result[5]) } mc_type = result[2] response_json = result[3] delta = result[4] response = [] for k, v in response_json.items(): if isinstance(v, str): response = v elif isinstance(v, dict): if k in delta: new_v = deepcopy(v) new_v['path'] = ('new', new_v['path']) response.append(new_v) else: response.append(v) if query_type in ['path_property', 'open_search_query']: if mc_type == '' and \ response == 'Query is not applicable for this model': for mt in model_types: formatted_results[query_hash][mt] = ['n_a', response] elif isinstance(response, str) and \ response == 'Statement type not handled': formatted_results[query_hash][mc_type] = ['n_a', response] elif isinstance(response, str) and \ not response == 'Path found but exceeds search depth': formatted_results[query_hash][mc_type] = ['Fail', response] else: formatted_results[query_hash][mc_type] = ['Pass', response] elif query_type == 'simple_intervention_property': if response == 'Query is not applicable for this model': formatted_results[query_hash]['result'] = ['n_a', response] else: res = response[0]['result'] if res == 'no_change': action = 'did not change' elif res.endswith('increase'): action = 'increased' elif res.endswith('decrease'): action = 'decreased' if res.startswith('no'): expl = f'No, the amount of target entity {action}.' formatted_results[query_hash]['result'] = ['Fail', expl] else: expl = f'Yes, the amount of target entity {action}.' formatted_results[query_hash]['result'] = ['Pass', expl] formatted_results[query_hash]['image'] = ( response[0]['fig_path']) elif query_type == 'dynamic_property': if response == 'Query is not applicable for this model': formatted_results[query_hash]['result'] = ['n_a', response] else: res = int(response[0]['sat_rate'] * 100) expl = (f'Satisfaction rate is {res}% after ' f'{response[0]["num_sim"]} simulations.') if res > 50: formatted_results[query_hash]['result'] = ['Pass', expl] else: formatted_results[query_hash]['result'] = ['Fail', expl] formatted_results[query_hash]['image'] = ( response[0]['fig_path']) if query_type in ['path_property', 'open_search_query']: # Loop through the results again to make sure all model types are there for qh in formatted_results: for mt in model_types: if mt not in formatted_results[qh]: formatted_results[qh][mt] = [ 'n_a', 'Model type not supported' ] return formatted_results
def run_model_tests_from_s3(model_name, upload_mm=True, upload_results=True, upload_stats=True, registered_queries=True, db=None): """Run a given set of tests on a given model, both loaded from S3. After loading both the model and the set of tests, model/test overlap is determined using a ScopeTestConnector and tests are run. Parameters ---------- model_name : str Name of EmmaaModel to load from S3. upload_mm : Optional[bool] Whether to upload a model manager instance to S3 as a pickle file. Default: True upload_results : Optional[bool] Whether to upload test results to S3 in JSON format. Can be set to False when running tests. Default: True upload_stats : Optional[bool] Whether to upload latest statistics about model and a test. Default: True registered_queries : Optional[bool] If True, registered queries are fetched from the database and executed, the results are then saved to the database. Default: True db : Optional[emmaa.db.manager.EmmaaDatabaseManager] If given over-rides the default primary database. Returns ------- emmaa.model_tests.ModelManager Instance of ModelManager containing the model data, list of applied tests and the test results. emmaa.analyze_test_results.StatsGenerator Instance of StatsGenerator containing statistics about model and test. """ model = EmmaaModel.load_from_s3(model_name) test_corpus = model.test_config.get('test_corpus', 'large_corpus_tests.pkl') tests = load_tests_from_s3(test_corpus) mm = ModelManager(model) if upload_mm: save_model_manager_to_s3(model_name, mm) tm = TestManager([mm], tests) tm.make_tests(ScopeTestConnector()) tm.run_tests() results_json_dict = mm.results_to_json() results_json_str = json.dumps(results_json_dict, indent=1) # Optionally upload test results to S3 if upload_results: client = get_s3_client(unsigned=False) date_str = make_date_str() result_key = f'results/{model_name}/results_{date_str}.json' logger.info(f'Uploading test results to {result_key}') client.put_object(Bucket='emmaa', Key=result_key, Body=results_json_str.encode('utf8')) tr = TestRound(results_json_dict) sg = StatsGenerator(model_name, latest_round=tr) sg.make_stats() # Optionally upload statistics to S3 if upload_stats: sg.save_to_s3() if registered_queries: qm = QueryManager(db=db, model_managers=[mm]) qm.answer_registered_queries(model_name) return (mm, sg)