def generate_model(model_name): """Generate a simple model for end-to-end testing using natural language.""" tp = trips.process_text('BRAF activates MAP2K1. ' 'Active MAP2K1 activates MAPK1.') indra_stmts = tp.statements emmaa_stmts = [ EmmaaStatement(stmt, datetime.datetime.now(), 'MAPK1') for stmt in indra_stmts ] # Create a CXAssembled model, upload to NDEx and retrieve key #cxa = CxAssembler(indra_stmts) #cxa.make_model() #ndex_id = cxa.upload_model(private=False) config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }] } emmaa_model = EmmaaModel(model_name, config_dict) emmaa_model.add_statements(emmaa_stmts) return emmaa_model, config_dict
def test_model_json(): """Test the json structure and content of EmmaaModel.to_json() output""" indra_stmts = \ [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}), Agent('MAP2K1'), evidence=[Evidence(text='BRAF activates MAP2K1.')]), Activation(Agent('MAP2K1', activity=ActivityCondition('activity', True)), Agent('MAPK1'), evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')]) ] st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1') emmaa_stmts = [ EmmaaStatement(stmt, datetime.datetime.now(), [st]) for stmt in indra_stmts ] config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }] } emmaa_model = EmmaaModel('test', config_dict) emmaa_model.add_statements(emmaa_stmts) emmaa_model_json = emmaa_model.to_json() # Test json structure assert emmaa_model_json['name'] == 'test' assert isinstance(emmaa_model_json['stmts'], list) assert emmaa_model_json['ndex_network'] == \ 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' # Test config assert emmaa_model_json['search_terms'][0]['type'] == 'gene' assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'} # Test json statements assert 'BRAF activates MAP2K1.' == \ emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text'] assert 'BRAF activates MAP2K1.' == \ emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text'] assert 'Active MAP2K1 activates MAPK1.' == \ emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text'] assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF' assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1' assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1' # Need hashes to be strings so that javascript can read them assert isinstance( emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'], str)
def upload_prior(ctype, config, gene_names): fname = f'../models/{ctype}/prior_stmts.pkl' with open(fname, 'rb') as fh: stmts = pickle.load(fh) estmts = get_emmaa_statements(stmts, gene_names) model = EmmaaModel(ctype, config) model.add_statements(estmts) model.update_to_ndex()
def upload_prior(ctype, config): fname = f'models/{ctype}/prior_stmts.pkl' with open(fname, 'rb') as fh: stmts = pickle.load(fh) estmts = [ EmmaaStatement(stmt, datetime.datetime.now(), []) for stmt in stmts ] model = EmmaaModel(ctype, config) model.add_statements(estmts) model.upload_to_ndex()
def test_model_extend(): ev1 = Evidence(pmid='1234', text='abcd', source_api='x') ev2 = Evidence(pmid='1234', text='abcde', source_api='x') ev3 = Evidence(pmid='1234', text='abcd', source_api='x') indra_sts = [Phosphorylation(None, Agent('a'), evidence=ev) for ev in [ev1, ev2, ev3]] emmaa_sts = [EmmaaStatement(st, datetime.datetime.now(), []) for st in indra_sts] em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}}) em.add_statements([emmaa_sts[0]]) em.extend_unique(emmaa_sts[1:]) assert len(em.stmts) == 2 stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1), datetime.datetime.now(), []) em.extend_unique([stmt]) assert len(em.stmts) == 3
def create_model(relevance=None, paper_ids=None): indra_stmts = [ Activation(Agent('BRAF', db_refs={'HGNC': '1097'}), Agent('MAP2K1', db_refs={'HGNC': '6840'}), evidence=[Evidence(text='BRAF activates MAP2K1.', source_api='assertion', text_refs={'TRID': '1234'})]), Activation(Agent('MAP2K1', db_refs={'HGNC': '6840'}, activity=ActivityCondition('activity', True)), Agent('MAPK1', db_refs={'HGNC': '6871'}), evidence=[Evidence(text='Active MAP2K1 activates MAPK1.', source_api='assertion', text_refs={'TRID': '2345'})]) ] st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1') emmaa_stmts = [ EmmaaStatement( indra_stmts[0], datetime.datetime.now(), [st], {'internal': True, 'curated': False}), EmmaaStatement( indra_stmts[1], datetime.datetime.now(), [st], {'internal': True, 'curated': True}) ] config_dict = { 'ndex': {'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'}, 'search_terms': [{'db_refs': {'HGNC': '20974'}, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene'}], 'human_readable_name': 'Test Model', 'test': { 'statement_checking': {'max_path_length': 5, 'max_paths': 1}, 'test_corpus': 'simple_tests', 'mc_types': ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']}, 'assembly': [ {'function': 'filter_no_hypothesis'}, {'function': 'map_grounding'}, {'function': 'filter_grounded_only'}, {'function': 'filter_human_only'}, {'function': 'map_sequence'}, {'function': 'run_preassembly', 'kwargs': { 'return_toplevel': False}}]} if relevance: config_dict['assembly'].append( {'function': 'filter_relevance', 'kwargs': {'policy': relevance}}) emmaa_model = EmmaaModel('test', config_dict, paper_ids) emmaa_model.add_statements(emmaa_stmts) return emmaa_model
def create_upload_model(model_name, full_name, indra_stmts, ndex_id=None): """Make and upload an EMMAA model from a list of INDRA Statements. Parameters ---------- short_name : str Short name of the model to use on S3. full_name : str Human-readable model name to use in EMMAA dashboard. indra_stmts : list of indra.statement INDRA Statements to be used to populate the EMMAA model. ndex_id : str UUID of the network corresponding to the model on NDex. If provided, the NDex network will be updated with the latest model content. If None (default), a new network will be created and the UUID stored in the model config files on S3. """ emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), []) # Get updated CX content for the INDRA Statements cxa = CxAssembler(indra_stmts) cx_str = cxa.make_model() # If we don't have an NDex ID, create network and upload to Ndex if ndex_id is None: ndex_id = cxa.upload_model(private=False) print(f'NDex ID for {model_name} is {ndex_id}.') # If the NDEx ID is provided, update the existing network else: ndex_client.update_network(cx_str, ndex_id) # Create the config dictionary config_dict = {'ndex': {'network': ndex_id}, 'search_terms': []} # Create EMMAA model emmaa_model = EmmaaModel(model_name, config_dict) emmaa_model.add_statements(emmaa_stmts) # Upload model to S3 with config as YAML and JSON emmaa_model.save_to_s3() s3_client = boto3.client('s3') config_json = json.dumps(config_dict) s3_client.put_object(Body=config_json.encode('utf8'), Key='models/%s/config.json' % model_name, Bucket='emmaa') config_json = json.dumps(config_dict) s3_client.put_object(Body=config_json.encode('utf8'), Key='models/%s/config.json' % model_name, Bucket='emmaa')
def update_cancer(cancer_type): """Update the model for the given cancer. A JSON config file must be present for the given cancer type, located in the models/<cancer_type>/config.json. Parameters ---------- cancer_type : str A short string which is the name of the cancer, and corresponds to a directory in the models directory, as described above. """ print(cancer_type) with open(f'models/{cancer_type}/prior_stmts.pkl', 'rb') as fh: stmts = pickle.load(fh) config = json.load(open(f'models/{cancer_type}/config.json', 'r')) em = EmmaaModel(cancer_type, config) ess = [EmmaaStatement(st, datetime.datetime.now(), []) for st in stmts] em.add_statements(ess) em.save_to_s3() return
def create_upload_model(model_name, indra_stmts, config_file): """Make and upload an EMMAA model from a list of INDRA Statements. Parameters ---------- model_name : str Name of the model to use on S3. indra_stmts : list of indra.statement INDRA Statements to be used to populate the EMMAA model. config_file : str Path to the local config.json file. """ emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [], {'internal': True}) # Load config information with open(config_file, 'rt') as f: config_json = json.load(f) # If there is no ndex entry in the config, create a new network and update # the config file with the NDex network ID if 'ndex' not in config_json: cxa = CxAssembler(indra_stmts) cx_str = cxa.make_model() ndex_id = cxa.upload_model(private=False) print(f'NDex ID for {model_name} is {ndex_id}.') config_json['ndex'] = {'network': ndex_id} updated_config_file = f'{config_file}.updated' with open(updated_config_file, 'wt') as f: json.dump(config_json, f, indent=2) # If the NDEx ID is provided we don't need to update the existing network # because this will occur as part of the model assembly/update procedure # on EMMAA itself. # Create the config dictionary # Create EMMAA model emmaa_model = EmmaaModel(model_name, config_json) emmaa_model.add_statements(emmaa_stmts) # Upload model to S3 emmaa_model.save_to_s3() # Upload config JSON s3_client = boto3.client('s3') save_config_to_s3(model_name, config_json)
def make_model(self, estmts, upload_to_s3=False): """Return, and optionally upload to S3 an initial EMMAA Model. Parameters ---------- estmts : list of emmaa.statement.EmmaaStatement A list of prior EMMAA Statements to initialize the model with. upload_to_s3 : Optional[bool] If True, the model and the config are uploaded to S3, otherwise the model object is just returned without upload. Default: False Returns ------- emmaa.model.EmmaaModel The EMMAA Model object constructed from the generated config and the given EMMAA Statements. """ from emmaa.model import EmmaaModel config = self.make_config(upload_to_s3=upload_to_s3) model = EmmaaModel(name=self.name, config=config) model.add_statements(estmts) if upload_to_s3: model.save_to_s3() return model