Esempio n. 1
0
 def search_terms_from_nodes(node_list):
     """Build a list of Pubmed search terms from the nodes returned by
     make_prior."""
     terms = []
     for node in node_list:
         if node.startswith('HGNC:'):
             hgnc_id = node.split(':')[1]
             hgnc_name = get_hgnc_name(hgnc_id)
             if hgnc_name is None:
                 logger.log(f'{node} is not a valid HGNC ID')
             else:
                 term = SearchTerm(type='gene', name=hgnc_name,
                                   search_term=f'"{hgnc_name}"',
                                   db_refs={'HGNC': hgnc_id})
                 terms.append(term)
         elif node.startswith('MESH:'):
             mesh_id = node.split(':')[1]
             # TODO: get actual process name here
             term = SearchTerm(type='bioprocess', name=mesh_id,
                               search_term=f'{mesh_id}[MeSH Terms]',
                               db_refs={'MESH': mesh_id})
             terms.append(term)
         # TODO: handle GO here
         else:
             logger.warning(f'Could not create search term from {node}')
     return sorted(terms, key=lambda x: x.name)
Esempio n. 2
0
def make_search_terms(terms, ontology_file):
    """Make SearchTerm objects standardized to a given ontology from terms.

    Parameters
    ----------
    terms : list[str]
        A list of terms corresponding to suffixes of entries in the ontology.
    ontology_file : str
        A path to a file containing ontology.

    Returns
    -------
    search_terms : set
        A set of SearchTerm objects constructed from given terms and ontology
        having standardized names.
    """
    search_terms = set()
    with open(ontology_file, 'r') as f:
        lines = f.readlines()
    ontologies = []
    for line in lines:
        links = line.split('> <')
        link = links[0]
        ont_start = link.find('UN')
        ont = link[ont_start:]
        ontologies.append(ont)
    for ont in ontologies:
        for term in terms:
            if ont.endswith(term):
                search_term = term.replace('_', ' ')
                name = search_term.capitalize()
                st = SearchTerm(type='concept', name=name, db_refs={'UN': ont},
                                search_term='\"%s\"' % search_term)
                search_terms.add(st)
    return search_terms
Esempio n. 3
0
def test_model_json():
    """Test the json structure and content of EmmaaModel.to_json() output"""
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.add_statements(emmaa_stmts)

    emmaa_model_json = emmaa_model.to_json()

    # Test json structure
    assert emmaa_model_json['name'] == 'test'
    assert isinstance(emmaa_model_json['stmts'], list)
    assert emmaa_model_json['ndex_network'] == \
        'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'

    # Test config
    assert emmaa_model_json['search_terms'][0]['type'] == 'gene'
    assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'}

    # Test json statements
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'Active MAP2K1 activates MAPK1.' == \
           emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text']
    assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF'
    assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1'
    assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1'

    # Need hashes to be strings so that javascript can read them
    assert isinstance(
        emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'],
        str)
Esempio n. 4
0
def test_filter_relevance():
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.',
                                       source_api='assertion')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates '
                                            'MAPK1.',
                                       source_api='assertion')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]

    # Try no filter first
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 2, emmaa_model.assembled_stmts

    # Next do a prior_one filter
    config_dict['assembly'] = {'filter_relevance': 'prior_one'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 1, emmaa_model.assembled_stmts
    assert emmaa_model.assembled_stmts[0].obj.name == 'MAPK1'

    # Next do a prior_all filter
    config_dict['assembly'] = {'filter_relevance': 'prior_all'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 0
Esempio n. 5
0
 def _load_config(self, config):
     self.search_terms = [
         SearchTerm.from_json(s) for s in config['search_terms']
     ]
     if 'ndex' in config:
         self.ndex_network = config['ndex']['network']
     else:
         self.ndex_network = None
     if 'reading' in config:
         self.reading_config = config['reading']
     if 'assembly' in config:
         self.assembly_config = config['assembly']
     if 'test' in config:
         self.test_config = config['test']
Esempio n. 6
0
def test_read_db_pmid_search_terms():
    """Check read_db_pmid_search_terms() function with different inputs."""
    search_terms = [
        SearchTerm('gene', 'AKT2', {
            'HGNC': '392',
            'UP': 'P31751'
        }, 'AKT2'),
        SearchTerm('gene', 'ACOX2', {
            'HGNC': '120',
            'UP': 'Q99424'
        }, 'ACOX2')
    ]
    # Check for empty input.
    assert len(read_db_pmid_search_terms({})) == 0
    # Check for PMIDs that do not have any statements.
    nostmts_pmid = "22178463"
    assert len(read_db_pmid_search_terms({nostmts_pmid: search_terms})) == 0
    # Check for PMIDs that have statements.
    stmts_pmid = "23431386"
    estmts = read_db_pmid_search_terms({stmts_pmid: search_terms})
    assert len(estmts) > 0
    assert isinstance(estmts[0], EmmaaStatement)
    estmts[0].search_terms == search_terms
Esempio n. 7
0
def create_model(relevance=None, paper_ids=None):
    indra_stmts = [
        Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                   Agent('MAP2K1', db_refs={'HGNC': '6840'}),
                   evidence=[Evidence(text='BRAF activates MAP2K1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '1234'})]),
        Activation(Agent('MAP2K1', db_refs={'HGNC': '6840'},
                         activity=ActivityCondition('activity', True)),
                   Agent('MAPK1', db_refs={'HGNC': '6871'}),
                   evidence=[Evidence(text='Active MAP2K1 activates MAPK1.',
                                      source_api='assertion',
                                      text_refs={'TRID': '2345'})])
        ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(
            indra_stmts[0], datetime.datetime.now(), [st],
            {'internal': True, 'curated': False}),
        EmmaaStatement(
            indra_stmts[1], datetime.datetime.now(), [st],
            {'internal': True, 'curated': True})
        ]
    config_dict = {
        'ndex': {'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'},
        'search_terms': [{'db_refs': {'HGNC': '20974'}, 'name': 'MAPK1',
                          'search_term': 'MAPK1', 'type': 'gene'}],
        'human_readable_name': 'Test Model',
        'test': {
            'statement_checking': {'max_path_length': 5, 'max_paths': 1},
            'test_corpus': 'simple_tests',
            'mc_types': ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']},
        'assembly': [
            {'function': 'filter_no_hypothesis'},
            {'function': 'map_grounding'},
            {'function': 'filter_grounded_only'},
            {'function': 'filter_human_only'},
            {'function': 'map_sequence'},
            {'function': 'run_preassembly', 'kwargs': {
                'return_toplevel': False}}]}
    if relevance:
        config_dict['assembly'].append(
            {'function': 'filter_relevance', 'kwargs': {'policy': relevance}})
    emmaa_model = EmmaaModel('test', config_dict, paper_ids)
    emmaa_model.add_statements(emmaa_stmts)
    return emmaa_model
Esempio n. 8
0
 def _load_config(self, config):
     self.search_terms = [SearchTerm.from_json(s) for s in
                          config['search_terms']]
     if 'ndex' in config:
         self.ndex_network = config['ndex']['network']
     else:
         self.ndex_network = None
     if 'reading' in config:
         self.reading_config = config['reading']
     if 'assembly' in config:
         self.assembly_config = config['assembly']
     if 'test' in config:
         self.test_config = config['test']
     if 'query' in config:
         self.query_config = config['query']
     if 'human_readable_name' in config:
         self.human_readable_name = config['human_readable_name']
     self.export_formats = config.get('export_formats', [])
Esempio n. 9
0
def test_find_drugs_for_genes():
    # SearchTerm for SRC
    SRC = SearchTerm(type='gene',
                     name='SRC',
                     search_term='"SRC"',
                     db_refs={'HGNC': '11283'})
    # drugs targeting KRAS
    drug_terms = find_drugs_for_genes([SRC])

    # make sure there are results
    assert drug_terms

    # make sure the result is a list of search terms
    assert all(isinstance(term, SearchTerm) for term in drug_terms)

    # something is wrong if there are fewer than 10 drugs
    assert len(drug_terms) > 10

    # test that some example drugs are included
    drug_names = set(term.name for term in drug_terms)
    example_drugs = set(['Dasatinib', 'Tozasertib', 'Ponatinib'])
    assert example_drugs <= drug_names
Esempio n. 10
0
    def search_biorxiv(collection_id, date_limit):
        """Search BioRxiv within date_limit.

        Parameters
        ----------
        date_limit : int
            The number of days to search back from today.
        collection_id : str
            ID of a collection to search BioArxiv for.
        Returns
        -------
        terms_to_dois : dict
            A dict representing biorxiv collection ID as key and DOIs returned
            by search as values.
        """
        start_date = (datetime.datetime.utcnow() -
                      datetime.timedelta(days=date_limit))
        dois = biorxiv_client.get_collection_dois(collection_id, start_date)
        logger.info(f'{len(dois)} DOIs found')
        term = SearchTerm('other', f'biorxiv: {collection_id}', {}, None)
        terms_to_dois = {term: dois}
        return terms_to_dois
Esempio n. 11
0
def test_find_drugs_for_genes():
    # SearchTerm for SRC
    SRC = SearchTerm(type='gene',
                     name='SRC',
                     search_term='"SRC"',
                     db_refs={'HGNC': '11283'})
    # drugs targeting KRAS
    drug_terms = find_drugs_for_genes([SRC], [
        Inhibition(Agent('Dasatinib', db_refs={'CHEBI': 'CHEBI:49375'}),
                   Agent('SRC', db_refs={'HGNC': '11283'})),
        Inhibition(Agent('Ponatinib', db_refs={'CHEBI': 'CHEBI:78543'}),
                   Agent('SRC', db_refs={'HGNC': '11283'}))
    ])

    # make sure there are results
    assert drug_terms

    # make sure the result is a list of search terms
    assert all(isinstance(term, SearchTerm) for term in drug_terms)

    # test that some example drugs are included
    drug_names = set(term.name for term in drug_terms)
    assert drug_names == set(['Dasatinib', 'Ponatinib'])
Esempio n. 12
0
    filter_emmaa_stmts_by_metadata, filter_indra_stmts_by_metadata
from emmaa.priors import SearchTerm
from indra.statements import Activation, Agent, Evidence

braf = Agent('BRAF', db_refs={'HGNC': '1097'})
map2k1 = Agent('MAP2K1', db_refs={'HGNC': '6840'})
stmt = Activation(braf,
                  map2k1,
                  evidence=[
                      Evidence(text='BRAF activates MAP2K1.',
                               source_api='assertion',
                               text_refs={'TRID': '1234'})
                  ])
date = datetime.datetime.now()
search_terms = [
    SearchTerm('gene', braf.name, braf.db_refs, '"BRAF"'),
    SearchTerm('gene', map2k1.name, map2k1.db_refs, '"MAP2K1"')
]


def test_to_emmaa_stmts():
    estmts = to_emmaa_stmts([stmt],
                            date=date,
                            search_terms=search_terms,
                            metadata={'internal': True})
    assert estmts
    estmt = estmts[0]
    assert isinstance(estmt, EmmaaStatement)
    assert estmt.stmt == stmt
    assert estmt.metadata == {'internal': True}
    emmaa_anns = estmt.stmt.evidence[0].annotations.get('emmaa')
Esempio n. 13
0
def make_prior_from_genes(gene_list):
    """Return reactome prior based on a list of genes

    Parameters
    ----------
    gene_list : list of str
        List of HGNC symbols for genes

    Returns
    -------
    res : list of :py:class:`emmaa.priors.SearchTerm`
        List of search terms corresponding to all genes found in any reactome
        pathway containing one of the genes in the input gene list
    """
    all_reactome_ids = set([])
    for gene_name in gene_list:
        hgnc_id = get_hgnc_id(gene_name)
        uniprot_id = get_uniprot_id(hgnc_id)
        if not uniprot_id:
            logger.warning('Could not get Uniprot ID for HGNC symbol'
                           f' {gene_name}')
            continue
        reactome_ids = rx_id_from_up_id(uniprot_id)
        if not reactome_ids:
            logger.warning('Could not get Reactome ID for Uniprot ID'
                           f' {uniprot_id} with corresonding HGNC symbol'
                           f' {gene_name}')
            continue
        all_reactome_ids.update(reactome_ids)

    all_pathways = set([])
    for reactome_id in all_reactome_ids:
        if not re.match('^R-HSA-[0-9]', reactome_id):
            # skip non-human genes
            continue
        additional_pathways = get_pathways_containing_gene(reactome_id)
        if additional_pathways is not None:
            all_pathways.update(additional_pathways)

    all_genes = set([])
    for pathway in all_pathways:
        additional_genes = get_genes_contained_in_pathway(pathway)
        if additional_genes is not None:
            all_genes.update(additional_genes)

    gene_terms = []
    for uniprot_id in all_genes:
        hgnc_name = get_gene_name(uniprot_id)
        if hgnc_name is None:
            logger.warning('Could not get HGNC name for UniProt ID'
                           f' {uniprot_id}')
            continue
        hgnc_id = get_hgnc_id(hgnc_name)
        if not hgnc_id:
            logger.warning('Could not find HGNC ID for HGNC symbol'
                           f' {hgnc_name} with corresonding Uniprot ID'
                           f' {uniprot_id}')
            continue
        term = SearchTerm(type='gene',
                          name=hgnc_name,
                          search_term=f'"{hgnc_name}"',
                          db_refs={
                              'HGNC': hgnc_id,
                              'UP': uniprot_id
                          })
        gene_terms.append(term)
    return sorted(gene_terms, key=lambda x: x.name)