Ejemplo n.º 1
0
def make_config(search_terms, human_readable_name, description,
                short_name, ndex_network=None, save_to_s3=False):
    """Make a config file for WorldModelers models and optionally save to S3."""
    config = {}
    config['ndex'] = {'network': ndex_network if ndex_network else ''}
    config['human_readable_name'] = human_readable_name
    config['search_terms'] = [st.to_json() for st in search_terms]
    config['test'] = {'statement_checking': {
                      'max_path_length': 5,
                      'max_paths': 1},
                      'test_corpus': 'world_modelers_tests.pkl'}
    config['assembly'] = {'skip_map_grounding': True,
                          'skip_filter_human': True,
                          'skip_map_sequence': True,
                          'belief_cutoff': 0.8,
                          'filter_ungrounded': True,
                          'score_threshold': 0.7,
                          'filter_relevance': 'prior_one',
                          'standardize_names': True,
                          'preassembly_mode': 'wm'}
    config['reading'] = {'literature_source': 'elsevier',
                         'reader': 'elsevier_eidos'}
    config['description'] = description
    if save_to_s3:
        save_config_to_s3(short_name, config)
    return config
Ejemplo n.º 2
0
def test_save_load_config():
    # Local imports are recommended when using moto
    from emmaa.model import save_config_to_s3, load_config_from_s3
    client = setup_bucket()
    config = {'test': 'This is test config'}
    save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME)
    read_config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME)
    assert config == read_config
Ejemplo n.º 3
0
 def make_model(self):
     """Make an EmmaaModel and upload it along with the config to S3."""
     config = self.make_config()
     em = EmmaaModel(self.name, config)
     em.stmts = self.stmts
     ndex_uuid = em.upload_to_ndex()
     config['ndex'] = {'network': ndex_uuid}
     save_config_to_s3(self.name, config)
     em.save_to_s3()
Ejemplo n.º 4
0
    def make_config(self, upload_to_s3=False):
        """Return a config dict fot the model, optionally upload to S3.

        Parameters
        ----------
        upload_to_s3 : Optional[bool]
            If True, the config is uploaded to S3 in the EMMAA bucket.
            Default: False

        Returns
        -------
        dict
            A config data structure.
        """
        config = {
            # These are provided by the user upon initialization
            'name': self.name,
            'human_readable_name': self.human_readable_name,
            'description': self.description,
            # We don't make tests by default
            'make_tests': False,
            # We run daily upates by default
            'run_daily_update': True,
            # We first show the model just on dev
            'dev_only': True,
            # These are the search terms constructed upon
            # initialization
            'search_terms': [st.to_json()
                             for st in self.search_terms],
            # This is adopted from the template specified upon
            # initialization
            'assembly': self.assembly_config,
            # We configure the large corpus tests by default
            'test': {
                'statement_checking': {
                    'max_path_length': 10,
                    'max_paths': 1
                },
                'mc_types': [
                    'signed_graph', 'unsigned_graph'
                ],
                'make_links': True,
                'test_corpus': ['large_corpus_tests'],
                'default_test_corpus': 'large_corpus_tests',
                'filters': {
                    'large_corpus_tests': 'filter_chem_mesh_go'
                }
            }
        }
        if upload_to_s3:
            from emmaa.model import save_config_to_s3
            save_config_to_s3(self.name, config)
        return config
Ejemplo n.º 5
0
def create_upload_model(model_name, indra_stmts, config_file):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    model_name : str
        Name of the model to use on S3.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    config_file : str
        Path to the local config.json file.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [],
                                 {'internal': True})
    # Load config information
    with open(config_file, 'rt') as f:
        config_json = json.load(f)
    # If there is no ndex entry in the config, create a new network and update
    # the config file with the NDex network ID
    if 'ndex' not in config_json:
        cxa = CxAssembler(indra_stmts)
        cx_str = cxa.make_model()
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
        config_json['ndex'] = {'network': ndex_id}
        updated_config_file = f'{config_file}.updated'
        with open(updated_config_file, 'wt') as f:
            json.dump(config_json, f, indent=2)
    # If the NDEx ID is provided we don't need to update the existing network
    # because this will occur as part of the model assembly/update procedure
    # on EMMAA itself.
    # Create the config dictionary
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_json)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3
    emmaa_model.save_to_s3()
    # Upload config JSON
    s3_client = boto3.client('s3')
    save_config_to_s3(model_name, config_json)
Ejemplo n.º 6
0
def test_model_to_tests():
    # Local imports are recommended when using moto
    from emmaa.model_tests import model_to_tests, load_tests_from_s3, \
        StatementCheckingTest
    from emmaa.model import save_config_to_s3, load_config_from_s3
    client = setup_bucket(add_model=True, add_mm=True)
    test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME)
    assert isinstance(test_dict, dict)
    assert 'test_data' in test_dict
    assert 'tests' in test_dict
    tests = test_dict['tests']
    # With default config get tests for each statement
    assert len(tests) == 2
    # Modify config to filter
    config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME)
    config['make_tests'] = {
        'filter': {
            'conditions': {
                'curated': False
            },
            'evid_policy': 'any'
        }
    }
    save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME)
    test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME)
    assert isinstance(test_dict, dict)
    assert 'test_data' in test_dict
    assert 'tests' in test_dict
    tests = test_dict['tests']
    # With modified config statements are filtered
    assert len(tests) == 1
    assert isinstance(tests[0], StatementCheckingTest)
    loaded_tests, _ = load_tests_from_s3('test_tests', bucket=TEST_BUCKET_NAME)
    assert loaded_tests
    assert isinstance(loaded_tests, dict)
    assert 'test_data' in loaded_tests
    assert 'tests' in loaded_tests
Ejemplo n.º 7
0
def create_upload_model(model_name, full_name, indra_stmts, ndex_id=None):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    short_name : str
        Short name of the model to use on S3.
    full_name : str
        Human-readable model name to use in EMMAA dashboard.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    ndex_id : str
        UUID of the network corresponding to the model on NDex. If provided,
        the NDex network will be updated with the latest model content.
        If None (default), a new network will be created and the UUID stored
        in the model config files on S3.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [])
    # Get updated CX content for the INDRA Statements
    cxa = CxAssembler(indra_stmts)
    cx_str = cxa.make_model()
    # If we don't have an NDex ID, create network and upload to Ndex
    if ndex_id is None:
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
    # If the NDEx ID is provided, update the existing network
    else:
        ndex_client.update_network(cx_str, ndex_id)
    # Create the config dictionary
    config_dict = {'ndex': {'network': ndex_id}, 'search_terms': []}
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3 with config as YAML and JSON
    emmaa_model.save_to_s3()
    s3_client = boto3.client('s3')
    save_config_to_s3(model_name, config_dict)
Ejemplo n.º 8
0
def setup_bucket(add_model=False,
                 add_mm=False,
                 add_tests=False,
                 add_results=False,
                 add_model_stats=False,
                 add_test_stats=False):
    """
    This function creates a new (local) bucket mocking S3 bucket at each call.
    Then all calls to S3 are calling this bucket instead of real S3 bucket.
    Depending on the test we might or might not need the bucket to contain
    different files. For faster computation, only required files for the test
    are generated and stored in the bucket. Files can be added by setting
    corresponding arguments to True when calling this function.
    """
    # Local imports are recommended when using moto
    from emmaa.util import get_s3_client
    from emmaa.model import save_config_to_s3
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        StatementCheckingTest
    # Create a mock s3 bucket
    client = get_s3_client()
    bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read')
    date_str = make_date_str()
    emmaa_model = None
    if add_model:
        # Put config and model files into empty bucket
        config_dict = {
            'ndex': {
                'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
            },
            'search_terms': [{
                'db_refs': {
                    'HGNC': '20974'
                },
                'name': 'MAPK1',
                'search_term': 'MAPK1',
                'type': 'gene'
            }],
            'test': {
                'test_corpus': 'simple_tests',
                'default_test_corpus': 'simple_tests'
            },
            'human_readable_name':
            'Test Model',
            'assembly': [{
                'function': 'filter_no_hypothesis'
            }, {
                'function': 'map_grounding'
            }, {
                'function': 'filter_grounded_only'
            }, {
                'function': 'filter_human_only'
            }, {
                'function': 'map_sequence'
            }, {
                'function': 'run_preassembly',
                'kwargs': {
                    'return_toplevel': False
                }
            }, {
                'function': 'filter_top_level'
            }]
        }
        save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME)
        emmaa_model = create_model()
        emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME)
    if add_mm:
        # Add a ModelManager to bucket
        if not emmaa_model:
            emmaa_model = create_model()
        mm = ModelManager(emmaa_model)
        mm.date_str = date_str
        mm.save_assembled_statements(upload_to_db=False,
                                     bucket=TEST_BUCKET_NAME)
        save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    if add_tests:
        tests = [
            StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1')))
        ]
        test_dict = {
            'test_data': {
                'description': 'Tests for functionality testing'
            },
            'tests': tests
        }
        client.put_object(Body=pickle.dumps(test_dict),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'tests/simple_tests.pkl')
    if add_results:
        client.put_object(
            Body=json.dumps(previous_results, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'results/test/results_simple_tests_{date_str}.json')
    if add_model_stats:
        client.put_object(Body=json.dumps(previous_model_stats, indent=1),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'model_stats/test/model_stats_{date_str}.json')
    if add_test_stats:
        client.put_object(
            Body=json.dumps(previous_test_stats, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'stats/test/test_stats_simple_tests_{date_str}.json')
    return client
Ejemplo n.º 9
0
def save_config(ctype, terms):
    config = load_config_from_s3(ctype)
    config['search_terms'] = [term.to_json() for term in terms]
    save_config_to_s3(ctype, config)