Beispiel #1
0
def generate_model(model_name):
    """Generate a simple model for end-to-end testing using natural language."""
    tp = trips.process_text('BRAF activates MAP2K1. '
                            'Active MAP2K1 activates MAPK1.')
    indra_stmts = tp.statements
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), 'MAPK1')
        for stmt in indra_stmts
    ]
    # Create a CXAssembled model, upload to NDEx and retrieve key
    #cxa = CxAssembler(indra_stmts)
    #cxa.make_model()
    #ndex_id = cxa.upload_model(private=False)
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    return emmaa_model, config_dict
Beispiel #2
0
def test_model_json():
    """Test the json structure and content of EmmaaModel.to_json() output"""
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.add_statements(emmaa_stmts)

    emmaa_model_json = emmaa_model.to_json()

    # Test json structure
    assert emmaa_model_json['name'] == 'test'
    assert isinstance(emmaa_model_json['stmts'], list)
    assert emmaa_model_json['ndex_network'] == \
        'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'

    # Test config
    assert emmaa_model_json['search_terms'][0]['type'] == 'gene'
    assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'}

    # Test json statements
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'Active MAP2K1 activates MAPK1.' == \
           emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text']
    assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF'
    assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1'
    assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1'

    # Need hashes to be strings so that javascript can read them
    assert isinstance(
        emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'],
        str)
Beispiel #3
0
def create_model(relevance=None, paper_ids=None):
    indra_stmts = [
        Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                   Agent('MAP2K1', db_refs={'HGNC': '6840'}),
                   evidence=[Evidence(text='BRAF activates MAP2K1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '1234'})]),
        Activation(Agent('MAP2K1', db_refs={'HGNC': '6840'},
                         activity=ActivityCondition('activity', True)),
                   Agent('MAPK1', db_refs={'HGNC': '6871'}),
                   evidence=[Evidence(text='Active MAP2K1 activates MAPK1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '2345'})])
        ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(
            indra_stmts[0], datetime.datetime.now(), [st],
            {'internal': True, 'curated': False}),
        EmmaaStatement(
            indra_stmts[1], datetime.datetime.now(), [st],
            {'internal': True, 'curated': True})
        ]
    config_dict = {
        'ndex': {'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'},
        'search_terms': [{'db_refs': {'HGNC': '20974'}, 'name': 'MAPK1',
                          'search_term': 'MAPK1', 'type': 'gene'}],
        'human_readable_name': 'Test Model',
        'test': {
            'statement_checking': {'max_path_length': 5, 'max_paths': 1},
            'test_corpus': 'simple_tests',
            'mc_types': ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']},
        'assembly': [
            {'function': 'filter_no_hypothesis'},
            {'function': 'map_grounding'},
            {'function': 'filter_grounded_only'},
            {'function': 'filter_human_only'},
            {'function': 'map_sequence'},
            {'function': 'run_preassembly', 'kwargs': {
                'return_toplevel': False}}]}
    if relevance:
        config_dict['assembly'].append(
            {'function': 'filter_relevance', 'kwargs': {'policy': relevance}})
    emmaa_model = EmmaaModel('test', config_dict, paper_ids)
    emmaa_model.add_statements(emmaa_stmts)
    return emmaa_model
Beispiel #4
0
def test_load_model():
    # Local imports are recommended when using moto
    from emmaa.model import EmmaaModel
    client = setup_bucket(add_model=True)
    em = EmmaaModel.load_from_s3('test', bucket='test_bucket')
    assert isinstance(em, EmmaaModel)
    assert len(em.stmts) == 2, len(em.stmts)
    assert em.name == 'test'
Beispiel #5
0
 def load_from_statements(cls, model_name, mode='local', date=None,
                          bucket=EMMAA_BUCKET_NAME):
     config = load_config_from_s3(model_name, bucket=bucket)
     if date:
         prefix = f'papers/{model_name}/paper_ids_{date}'
     else:
         prefix = f'papers/{model_name}/paper_ids_'
     paper_key = find_latest_s3_file(bucket, prefix, 'json')
     if paper_key:
         paper_ids = load_json_from_s3(bucket, paper_key)
     else:
         paper_ids = None
     model = EmmaaModel(model_name, config, paper_ids)
     # Loading assembled statements to avoid reassembly
     stmts, fname = get_assembled_statements(model_name, date, bucket)
     model.assembled_stmts = stmts
     model.date_str = strip_out_date(fname, 'datetime')
     mm = cls(model, mode=mode)
     return mm
Beispiel #6
0
def upload_prior(ctype, config, gene_names):
    fname = f'../models/{ctype}/prior_stmts.pkl'
    with open(fname, 'rb') as fh:
        stmts = pickle.load(fh)
    estmts = get_emmaa_statements(stmts, gene_names)
    model = EmmaaModel(ctype, config)
    model.add_statements(estmts)
    model.update_to_ndex()
Beispiel #7
0
def test_model_extend():
    ev1 = Evidence(pmid='1234', text='abcd', source_api='x')
    ev2 = Evidence(pmid='1234', text='abcde', source_api='x')
    ev3 = Evidence(pmid='1234', text='abcd', source_api='x')
    indra_sts = [Phosphorylation(None, Agent('a'), evidence=ev) for ev in
                 [ev1, ev2, ev3]]
    emmaa_sts = [EmmaaStatement(st, datetime.datetime.now(), []) for st in
                 indra_sts]
    em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}})
    em.add_statements([emmaa_sts[0]])
    em.extend_unique(emmaa_sts[1:])
    assert len(em.stmts) == 2
    stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1),
                          datetime.datetime.now(), [])
    em.extend_unique([stmt])
    assert len(em.stmts) == 3
Beispiel #8
0
 def make_model(self):
     """Make an EmmaaModel and upload it along with the config to S3."""
     config = self.make_config()
     em = EmmaaModel(self.name, config)
     em.stmts = self.stmts
     ndex_uuid = em.upload_to_ndex()
     config['ndex'] = {'network': ndex_uuid}
     save_config_to_s3(self.name, config)
     em.save_to_s3()
Beispiel #9
0
def upload_prior(ctype, config):
    fname = f'models/{ctype}/prior_stmts.pkl'
    with open(fname, 'rb') as fh:
        stmts = pickle.load(fh)
    estmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), []) for stmt in stmts
    ]
    model = EmmaaModel(ctype, config)
    model.add_statements(estmts)
    model.upload_to_ndex()
Beispiel #10
0
    def get_statements(self, mode='all', batch_size=100):
        """Return EMMAA Statements for this prior's literature set.

        Parameters
        ----------
        mode : 'all' or 'distilled'
            The 'distilled' mode makes sure that the "best", non-redundant
            set of raw statements are found across potentially redundant text
            contents and reader versions. The 'all' mode doesn't do such
            distillation but is significantly faster.
        batch_size : Optional[int]
            Determines how many PMIDs to fetch statements for in each
            iteration. Default: 100.

        Returns
        -------
        list of EmmaaStatement
            A list of EMMAA Statements corresponding to extractions from
            the subset of literature defined by this prior's search terms.
        """
        if self.stmts:
            return self.stmts
        terms_to_pmids = \
            EmmaaModel.search_pubmed(search_terms=self.search_terms,
                                     date_limit=None)
        pmids_to_terms = defaultdict(list)
        for term, pmids in terms_to_pmids.items():
            for pmid in pmids:
                pmids_to_terms[pmid].append(term)
        pmids_to_terms = dict(pmids_to_terms)
        all_pmids = set(pmids_to_terms.keys())
        raw_statements_by_pmid = \
            get_raw_statements_for_pmids(all_pmids, mode=mode,
                                         batch_size=batch_size)
        timestamp = datetime.datetime.now()
        for pmid, stmts in raw_statements_by_pmid.items():
            for stmt in stmts:
                self.stmts.append(
                    EmmaaStatement(stmt, timestamp, pmids_to_terms[pmid],
                                   {'internal': True}))
        return self.stmts
Beispiel #11
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    em = EmmaaModel.load_from_s3(model_name, bucket=bucket)
    em.run_assembly()
    tests = [
        StatementCheckingTest(stmt) for stmt in em.assembled_stmts
        if all(stmt.agent_list())
    ]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the {em.human_readable_name} '
        f'on {date_str[:10]}')
    test_dict = {
        'test_data': {
            'description': test_description
        },
        'tests': tests
    }
    if upload:
        save_pickle_to_s3(test_dict, bucket,
                          f'tests/{model_name}_tests_{date_str}.pkl')
    return test_dict
Beispiel #12
0
def create_upload_model(model_name, full_name, indra_stmts, ndex_id=None):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    short_name : str
        Short name of the model to use on S3.
    full_name : str
        Human-readable model name to use in EMMAA dashboard.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    ndex_id : str
        UUID of the network corresponding to the model on NDex. If provided,
        the NDex network will be updated with the latest model content.
        If None (default), a new network will be created and the UUID stored
        in the model config files on S3.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [])
    # Get updated CX content for the INDRA Statements
    cxa = CxAssembler(indra_stmts)
    cx_str = cxa.make_model()
    # If we don't have an NDex ID, create network and upload to Ndex
    if ndex_id is None:
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
    # If the NDEx ID is provided, update the existing network
    else:
        ndex_client.update_network(cx_str, ndex_id)
    # Create the config dictionary
    config_dict = {'ndex': {'network': ndex_id}, 'search_terms': []}
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3 with config as YAML and JSON
    emmaa_model.save_to_s3()
    s3_client = boto3.client('s3')
    config_json = json.dumps(config_dict)
    s3_client.put_object(Body=config_json.encode('utf8'),
                         Key='models/%s/config.json' % model_name,
                         Bucket='emmaa')
    config_json = json.dumps(config_dict)
    s3_client.put_object(Body=config_json.encode('utf8'),
                         Key='models/%s/config.json' % model_name,
                         Bucket='emmaa')
Beispiel #13
0
def update_cancer(cancer_type):
    """Update the model for the given cancer.

    A JSON config file must be present for the given cancer type, located in
    the models/<cancer_type>/config.json.

    Parameters
    ----------
    cancer_type : str
        A short string which is the name of the cancer, and corresponds to a
        directory in the models directory, as described above.
    """
    print(cancer_type)
    with open(f'models/{cancer_type}/prior_stmts.pkl', 'rb') as fh:
        stmts = pickle.load(fh)
    config = json.load(open(f'models/{cancer_type}/config.json', 'r'))
    em = EmmaaModel(cancer_type, config)
    ess = [EmmaaStatement(st, datetime.datetime.now(), []) for st in stmts]
    em.add_statements(ess)
    em.save_to_s3()
    return
Beispiel #14
0
def create_upload_model(model_name, indra_stmts, config_file):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    model_name : str
        Name of the model to use on S3.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    config_file : str
        Path to the local config.json file.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [],
                                 {'internal': True})
    # Load config information
    with open(config_file, 'rt') as f:
        config_json = json.load(f)
    # If there is no ndex entry in the config, create a new network and update
    # the config file with the NDex network ID
    if 'ndex' not in config_json:
        cxa = CxAssembler(indra_stmts)
        cx_str = cxa.make_model()
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
        config_json['ndex'] = {'network': ndex_id}
        updated_config_file = f'{config_file}.updated'
        with open(updated_config_file, 'wt') as f:
            json.dump(config_json, f, indent=2)
    # If the NDEx ID is provided we don't need to update the existing network
    # because this will occur as part of the model assembly/update procedure
    # on EMMAA itself.
    # Create the config dictionary
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_json)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3
    emmaa_model.save_to_s3()
    # Upload config JSON
    s3_client = boto3.client('s3')
    save_config_to_s3(model_name, config_json)
Beispiel #15
0
    def make_model(self, estmts, upload_to_s3=False):
        """Return, and optionally upload to S3 an initial EMMAA Model.

        Parameters
        ----------
        estmts : list of emmaa.statement.EmmaaStatement
            A list of prior EMMAA Statements to initialize the model with.
        upload_to_s3 : Optional[bool]
            If True, the model and the config are uploaded to S3, otherwise
            the model object is just returned without upload. Default: False

        Returns
        -------
        emmaa.model.EmmaaModel
            The EMMAA Model object constructed from the generated config
            and the given EMMAA Statements.
        """
        from emmaa.model import EmmaaModel
        config = self.make_config(upload_to_s3=upload_to_s3)
        model = EmmaaModel(name=self.name, config=config)
        model.add_statements(estmts)
        if upload_to_s3:
            model.save_to_s3()
        return model
Beispiel #16
0
      '6840@HGNC&type=Activation&format=html'),
     ('Active MAP2K1 activates MAPK1.',
      'https://db.indra.bio/statements/from_agents?subject=6840@HGNC&object='
      '6871@HGNC&type=Activation&format=html')]
}
processed_link = '<a href="https://db.indra.bio/statements/from_agents?'\
    'subject=1097@HGNC&object=6840@HGNC&type=Activation&format=html" '\
                 'target="_blank" class="status-link">'\
                 'BRAF activates MAP2K1.</a>'
query_not_appl = {
    2413475507:
    [('Query is not applicable for this model',
      'https://emmaa.readthedocs.io/en/latest/dashboard/response_codes.html')]
}
# Create a new ModelManager for tests instead of depending on S3 version
test_model = EmmaaModel.load_from_s3('test')
test_mm = ModelManager(test_model)


def test_load_model_manager_from_s3():
    mm = load_model_manager_from_s3('test')
    assert isinstance(mm, ModelManager)


def test_format_results():
    results = [('test', query_object, 'pysb', test_response, datetime.now())]
    formatted_results = format_results(results)
    assert len(formatted_results) == 1
    assert formatted_results[0]['model'] == 'test'
    assert formatted_results[0]['query'] == simple_query
    assert formatted_results[0]['mc_type'] == 'pysb'
Beispiel #17
0
def update_model_manager_on_s3(model_name, bucket=EMMAA_BUCKET_NAME):
    model = EmmaaModel.load_from_s3(model_name, bucket=bucket)
    mm = ModelManager(model)
    save_model_manager_to_s3(model_name, mm, bucket=bucket)
    return mm
Beispiel #18
0
def test_filter_relevance():
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.',
                                       source_api='assertion')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates '
                                            'MAPK1.',
                                       source_api='assertion')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]

    # Try no filter first
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 2, emmaa_model.assembled_stmts

    # Next do a prior_one filter
    config_dict['assembly'] = {'filter_relevance': 'prior_one'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 1, emmaa_model.assembled_stmts
    assert emmaa_model.assembled_stmts[0].obj.name == 'MAPK1'

    # Next do a prior_all filter
    config_dict['assembly'] = {'filter_relevance': 'prior_all'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 0
Beispiel #19
0
import argparse
from emmaa.model import EmmaaModel
from emmaa.model_tests import ModelManager, save_model_manager_to_s3


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
            description='Script to update ModelManager stored on Amazon S3.')
    parser.add_argument('-m', '--model', help='Model name', required=True)
    args = parser.parse_args()

    model = EmmaaModel.load_from_s3(args.model)
    mm = ModelManager(model, mode='s3')
    mm.model.update_to_ndex()
    mm.save_assembled_statements()
    save_model_manager_to_s3(args.model, mm)
Beispiel #20
0
def run_model_tests_from_s3(model_name,
                            upload_mm=True,
                            upload_results=True,
                            upload_stats=True,
                            registered_queries=True,
                            db=None):
    """Run a given set of tests on a given model, both loaded from S3.

    After loading both the model and the set of tests, model/test overlap
    is determined using a ScopeTestConnector and tests are run.


    Parameters
    ----------
    model_name : str
        Name of EmmaaModel to load from S3.
    upload_mm : Optional[bool]
        Whether to upload a model manager instance to S3 as a pickle file.
        Default: True
    upload_results : Optional[bool]
        Whether to upload test results to S3 in JSON format. Can be set
        to False when running tests. Default: True
    upload_stats : Optional[bool]
        Whether to upload latest statistics about model and a test.
        Default: True
    registered_queries : Optional[bool]
        If True, registered queries are fetched from the database and
        executed, the results are then saved to the database. Default: True
    db : Optional[emmaa.db.manager.EmmaaDatabaseManager]
        If given over-rides the default primary database.

    Returns
    -------
    emmaa.model_tests.ModelManager
        Instance of ModelManager containing the model data, list of applied
        tests and the test results.
    emmaa.analyze_test_results.StatsGenerator
        Instance of StatsGenerator containing statistics about model and test.
    """
    model = EmmaaModel.load_from_s3(model_name)
    test_corpus = model.test_config.get('test_corpus',
                                        'large_corpus_tests.pkl')
    tests = load_tests_from_s3(test_corpus)
    mm = ModelManager(model)
    if upload_mm:
        save_model_manager_to_s3(model_name, mm)
    tm = TestManager([mm], tests)
    tm.make_tests(ScopeTestConnector())
    tm.run_tests()
    results_json_dict = mm.results_to_json()
    results_json_str = json.dumps(results_json_dict, indent=1)
    # Optionally upload test results to S3
    if upload_results:
        client = get_s3_client(unsigned=False)
        date_str = make_date_str()
        result_key = f'results/{model_name}/results_{date_str}.json'
        logger.info(f'Uploading test results to {result_key}')
        client.put_object(Bucket='emmaa',
                          Key=result_key,
                          Body=results_json_str.encode('utf8'))
    tr = TestRound(results_json_dict)
    sg = StatsGenerator(model_name, latest_round=tr)
    sg.make_stats()

    # Optionally upload statistics to S3
    if upload_stats:
        sg.save_to_s3()
    if registered_queries:
        qm = QueryManager(db=db, model_managers=[mm])
        qm.answer_registered_queries(model_name)
    return (mm, sg)
Beispiel #21
0
def get_indirect_stmts(corpus):
    cpath = os.path.join(indra.__path__[0], os.pardir, 'data',
                         f'{corpus}_corpus.bel')
    bp = bel.process_belscript(cpath)
    indirect_stmts = [
        st for st in bp.statements
        if not st.evidence[0].epistemics.get('direct')
    ]
    stmts = ac.run_preassembly(indirect_stmts, return_toplevel=False)
    return stmts


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--corpus', default='large')
    parser.add_argument('--mode', default='dump')
    args = parser.parse_args()

    indirect_stmts = get_indirect_stmts(args.corpus)
    tests = [StatementCheckingTest(stmt) for stmt in indirect_stmts]
    if args.mode == 'dump':
        with open(f'{args.corpus}_corpus_tests.pkl', 'wb') as f:
            pickle.dump(tests, f)
    elif args.mode == 'run':
        ctypes = ['rasmodel']
        models = [EmmaaModel(ctype) for ctype in ctypes]
        tm = TestManager(models, tests)
        tm.make_tests(ScopeTestConnector())
        tm.run_tests()
        print(tm.test_results)