def make_config(search_terms, human_readable_name, description, short_name, ndex_network=None, save_to_s3=False): """Make a config file for WorldModelers models and optionally save to S3.""" config = {} config['ndex'] = {'network': ndex_network if ndex_network else ''} config['human_readable_name'] = human_readable_name config['search_terms'] = [st.to_json() for st in search_terms] config['test'] = {'statement_checking': { 'max_path_length': 5, 'max_paths': 1}, 'test_corpus': 'world_modelers_tests.pkl'} config['assembly'] = {'skip_map_grounding': True, 'skip_filter_human': True, 'skip_map_sequence': True, 'belief_cutoff': 0.8, 'filter_ungrounded': True, 'score_threshold': 0.7, 'filter_relevance': 'prior_one', 'standardize_names': True, 'preassembly_mode': 'wm'} config['reading'] = {'literature_source': 'elsevier', 'reader': 'elsevier_eidos'} config['description'] = description if save_to_s3: save_config_to_s3(short_name, config) return config
def test_save_load_config(): # Local imports are recommended when using moto from emmaa.model import save_config_to_s3, load_config_from_s3 client = setup_bucket() config = {'test': 'This is test config'} save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME) read_config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME) assert config == read_config
def make_model(self): """Make an EmmaaModel and upload it along with the config to S3.""" config = self.make_config() em = EmmaaModel(self.name, config) em.stmts = self.stmts ndex_uuid = em.upload_to_ndex() config['ndex'] = {'network': ndex_uuid} save_config_to_s3(self.name, config) em.save_to_s3()
def make_config(self, upload_to_s3=False): """Return a config dict fot the model, optionally upload to S3. Parameters ---------- upload_to_s3 : Optional[bool] If True, the config is uploaded to S3 in the EMMAA bucket. Default: False Returns ------- dict A config data structure. """ config = { # These are provided by the user upon initialization 'name': self.name, 'human_readable_name': self.human_readable_name, 'description': self.description, # We don't make tests by default 'make_tests': False, # We run daily upates by default 'run_daily_update': True, # We first show the model just on dev 'dev_only': True, # These are the search terms constructed upon # initialization 'search_terms': [st.to_json() for st in self.search_terms], # This is adopted from the template specified upon # initialization 'assembly': self.assembly_config, # We configure the large corpus tests by default 'test': { 'statement_checking': { 'max_path_length': 10, 'max_paths': 1 }, 'mc_types': [ 'signed_graph', 'unsigned_graph' ], 'make_links': True, 'test_corpus': ['large_corpus_tests'], 'default_test_corpus': 'large_corpus_tests', 'filters': { 'large_corpus_tests': 'filter_chem_mesh_go' } } } if upload_to_s3: from emmaa.model import save_config_to_s3 save_config_to_s3(self.name, config) return config
def create_upload_model(model_name, indra_stmts, config_file): """Make and upload an EMMAA model from a list of INDRA Statements. Parameters ---------- model_name : str Name of the model to use on S3. indra_stmts : list of indra.statement INDRA Statements to be used to populate the EMMAA model. config_file : str Path to the local config.json file. """ emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [], {'internal': True}) # Load config information with open(config_file, 'rt') as f: config_json = json.load(f) # If there is no ndex entry in the config, create a new network and update # the config file with the NDex network ID if 'ndex' not in config_json: cxa = CxAssembler(indra_stmts) cx_str = cxa.make_model() ndex_id = cxa.upload_model(private=False) print(f'NDex ID for {model_name} is {ndex_id}.') config_json['ndex'] = {'network': ndex_id} updated_config_file = f'{config_file}.updated' with open(updated_config_file, 'wt') as f: json.dump(config_json, f, indent=2) # If the NDEx ID is provided we don't need to update the existing network # because this will occur as part of the model assembly/update procedure # on EMMAA itself. # Create the config dictionary # Create EMMAA model emmaa_model = EmmaaModel(model_name, config_json) emmaa_model.add_statements(emmaa_stmts) # Upload model to S3 emmaa_model.save_to_s3() # Upload config JSON s3_client = boto3.client('s3') save_config_to_s3(model_name, config_json)
def test_model_to_tests(): # Local imports are recommended when using moto from emmaa.model_tests import model_to_tests, load_tests_from_s3, \ StatementCheckingTest from emmaa.model import save_config_to_s3, load_config_from_s3 client = setup_bucket(add_model=True, add_mm=True) test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME) assert isinstance(test_dict, dict) assert 'test_data' in test_dict assert 'tests' in test_dict tests = test_dict['tests'] # With default config get tests for each statement assert len(tests) == 2 # Modify config to filter config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME) config['make_tests'] = { 'filter': { 'conditions': { 'curated': False }, 'evid_policy': 'any' } } save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME) test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME) assert isinstance(test_dict, dict) assert 'test_data' in test_dict assert 'tests' in test_dict tests = test_dict['tests'] # With modified config statements are filtered assert len(tests) == 1 assert isinstance(tests[0], StatementCheckingTest) loaded_tests, _ = load_tests_from_s3('test_tests', bucket=TEST_BUCKET_NAME) assert loaded_tests assert isinstance(loaded_tests, dict) assert 'test_data' in loaded_tests assert 'tests' in loaded_tests
def create_upload_model(model_name, full_name, indra_stmts, ndex_id=None): """Make and upload an EMMAA model from a list of INDRA Statements. Parameters ---------- short_name : str Short name of the model to use on S3. full_name : str Human-readable model name to use in EMMAA dashboard. indra_stmts : list of indra.statement INDRA Statements to be used to populate the EMMAA model. ndex_id : str UUID of the network corresponding to the model on NDex. If provided, the NDex network will be updated with the latest model content. If None (default), a new network will be created and the UUID stored in the model config files on S3. """ emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), []) # Get updated CX content for the INDRA Statements cxa = CxAssembler(indra_stmts) cx_str = cxa.make_model() # If we don't have an NDex ID, create network and upload to Ndex if ndex_id is None: ndex_id = cxa.upload_model(private=False) print(f'NDex ID for {model_name} is {ndex_id}.') # If the NDEx ID is provided, update the existing network else: ndex_client.update_network(cx_str, ndex_id) # Create the config dictionary config_dict = {'ndex': {'network': ndex_id}, 'search_terms': []} # Create EMMAA model emmaa_model = EmmaaModel(model_name, config_dict) emmaa_model.add_statements(emmaa_stmts) # Upload model to S3 with config as YAML and JSON emmaa_model.save_to_s3() s3_client = boto3.client('s3') save_config_to_s3(model_name, config_dict)
def setup_bucket(add_model=False, add_mm=False, add_tests=False, add_results=False, add_model_stats=False, add_test_stats=False): """ This function creates a new (local) bucket mocking S3 bucket at each call. Then all calls to S3 are calling this bucket instead of real S3 bucket. Depending on the test we might or might not need the bucket to contain different files. For faster computation, only required files for the test are generated and stored in the bucket. Files can be added by setting corresponding arguments to True when calling this function. """ # Local imports are recommended when using moto from emmaa.util import get_s3_client from emmaa.model import save_config_to_s3 from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \ StatementCheckingTest # Create a mock s3 bucket client = get_s3_client() bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read') date_str = make_date_str() emmaa_model = None if add_model: # Put config and model files into empty bucket config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }], 'test': { 'test_corpus': 'simple_tests', 'default_test_corpus': 'simple_tests' }, 'human_readable_name': 'Test Model', 'assembly': [{ 'function': 'filter_no_hypothesis' }, { 'function': 'map_grounding' }, { 'function': 'filter_grounded_only' }, { 'function': 'filter_human_only' }, { 'function': 'map_sequence' }, { 'function': 'run_preassembly', 'kwargs': { 'return_toplevel': False } }, { 'function': 'filter_top_level' }] } save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME) emmaa_model = create_model() emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME) if add_mm: # Add a ModelManager to bucket if not emmaa_model: emmaa_model = create_model() mm = ModelManager(emmaa_model) mm.date_str = date_str mm.save_assembled_statements(upload_to_db=False, bucket=TEST_BUCKET_NAME) save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME) if add_tests: tests = [ StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1'))) ] test_dict = { 'test_data': { 'description': 'Tests for functionality testing' }, 'tests': tests } client.put_object(Body=pickle.dumps(test_dict), Bucket=TEST_BUCKET_NAME, Key=f'tests/simple_tests.pkl') if add_results: client.put_object( Body=json.dumps(previous_results, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'results/test/results_simple_tests_{date_str}.json') if add_model_stats: client.put_object(Body=json.dumps(previous_model_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'model_stats/test/model_stats_{date_str}.json') if add_test_stats: client.put_object( Body=json.dumps(previous_test_stats, indent=1), Bucket=TEST_BUCKET_NAME, Key=f'stats/test/test_stats_simple_tests_{date_str}.json') return client
def save_config(ctype, terms): config = load_config_from_s3(ctype) config['search_terms'] = [term.to_json() for term in terms] save_config_to_s3(ctype, config)