Exemple #1
0
def test_standardize_up_isoform():
    refs = standardize_db_refs({'UP': 'Q99490'})
    assert refs == {'UP': 'Q99490', 'HGNC': '16921',
                    'EGID': '116986', 'MESH': 'C485997'}, refs
    refs = standardize_db_refs({'UP': 'Q99490-123'})
    assert refs == {'UP': 'Q99490-123', 'HGNC': '16921',
                    'EGID': '116986', 'MESH': 'C485997'}, refs
Exemple #2
0
def test_standardize_hgnc_fplx_mesh_bug():
    refs = standardize_db_refs({'HGNC': '1514'})
    assert refs['UP'] == 'P41180'
    assert 'FPLX' not in refs

    refs = standardize_db_refs({'FPLX': 'Calcium_sensing_receptors'})
    assert refs['HGNC_GROUP'] == '279'
    assert 'HGNC' not in refs
def get_chemical_agent(name, mesh_id, cas_id):
    db_refs = {'MESH': mesh_id}
    if cas_id:
        db_refs['CAS'] = cas_id
    db_refs = standardize_db_refs(db_refs)
    assert_valid_db_refs(db_refs)
    return Agent(name, db_refs=db_refs)
Exemple #4
0
def test_obo_replacements():
    assert bio_ontology.get_node_property('GO', 'GO:0036442',
                                          'obsolete') is True
    assert bio_ontology.get_replacement('GO', 'GO:0036442') == \
        ('GO', 'GO:0008553')
    assert standardize_db_refs({'GO': 'GO:0036442'}).get('GO') == \
        'GO:0008553'
Exemple #5
0
def test_uniprot_replacements():
    assert bio_ontology.get_node_property('UP', 'A0A059MHB0',
                                          'obsolete') is True
    assert bio_ontology.get_replacement('UP', 'A0A059MHB0') == \
        ('UP', 'C7U1M6')
    assert standardize_db_refs({'UP': 'A0A059MHB0'}).get('UP') == \
        'C7U1M6'
Exemple #6
0
def parse_context_entry(entry, grounder, sentence=None):
    """Return a dict of context type and object processed from an entry."""
    match = re.match(r'(.*): (.*)', entry)
    if not match:
        return None
    context_type, context_txt = match.groups()
    if context_type not in allowed_contexts:
        logger.warning('Unknown context type %s' % context_type)
        return None

    terms = grounder(context_txt, context=sentence)
    if not terms:
        logger.warning('Could not ground %s context: %s'
                       % (context_type, context_txt))
    db_refs = {}
    if terms:
        db_refs = standardize_db_refs({terms[0].term.db:
                                       terms[0].term.id})
    db_refs['TEXT'] = context_txt
    standard_name = None
    if terms:
        standard_name = bio_ontology.get_name(terms[0].term.db,
                                              terms[0].term.id)
    name = standard_name if standard_name else context_txt
    context = RefContext(name=name, db_refs=db_refs)
    return {allowed_contexts[context_type]: context}
Exemple #7
0
def test_mesh_replacements():
    assert bio_ontology.get_name('MESH', 'D000086382') == 'COVID-19'
    assert bio_ontology.isrel('MESH', 'C000657245', 'MESH', 'D000086382',
                              {'replaced_by'})
    assert bio_ontology.get_replacement('MESH', 'C000657245') == \
        ('MESH', 'D000086382')
    assert standardize_db_refs({'MESH': 'C000657245'}).get('MESH') == \
        'D000086382'
Exemple #8
0
def test_standardize_db_refs_efo_hp_doid():
    refs = standardize_db_refs({'EFO': '0009502'})
    assert refs.get('MESH') == 'D000007', refs
    refs = standardize_db_refs({'MESH': 'D000007'})
    assert refs.get('EFO') == '0009502', refs

    refs = standardize_db_refs({'HP': 'HP:0031801'})
    assert refs.get('MESH') == 'D064706', refs
    refs = standardize_db_refs({'MESH': 'D064706'})
    assert refs.get('HP') == 'HP:0031801', refs

    # Currently there is no one-to-many mapping in the direction towards MeSH
    # (there used to be) if there is again, we should test it here
    #refs = standardize_db_refs({'DOID': 'DOID:0060695'})
    #assert 'MESH' not in refs

    # One-to-many mappings away from MESH
    refs = standardize_db_refs({'MESH': 'D000071017'})
    assert 'DOID' not in refs

    refs = standardize_db_refs({'DOID': 'DOID:0060495'})
    assert refs.get('MESH') == 'D000067208'

    # This is an xrefs-based mapping that isn't in Gilda's resource file
    refs = standardize_db_refs({'EFO': '0000694'})
    assert refs.get('MESH') == 'D045169'
def indra_db_refs_from_minerva_refs(refs):
    db_refs = {}
    for db_ns, db_id in refs:
        db_ns = minerva_to_indra_map[db_ns] \
            if db_ns in minerva_to_indra_map else db_ns
        db_nbs, db_id = fix_id_standards(db_ns, db_id)
        db_refs[db_ns] = db_id
    db_refs = standardize_db_refs(db_refs)
    return db_refs
Exemple #10
0
def test_name_standardize_mesh_other_db():
    a1 = Agent('x', db_refs={'MESH': 'D001194'})
    standardize_agent_name(a1, True)
    assert a1.db_refs['CHEBI'] == 'CHEBI:46661'
    assert a1.name == 'asbestos', a1.name

    db_refs = {'MESH': 'D000067777'}
    db_refs = standardize_db_refs(db_refs)
    assert db_refs.get('HGNC') == '3313', db_refs
    assert db_refs.get('UP') == 'Q12926', db_refs
    a2 = Agent('x', db_refs=db_refs)
    standardize_agent_name(a2)
    assert a2.name == 'ELAVL2'
Exemple #11
0
def align_identifiers_urls(indra_groundings, dm_urls):
    matches = []
    identifiers_prefix = 'https://identifiers.org/'
    for dm_url in dm_urls:
        # We do it this way instead of splitting because of DOIs which have
        # extra slashes
        entity = dm_url[len(identifiers_prefix):]
        db_ns, db_id = entity.split(':', maxsplit=1)
        if db_ns == 'CHEBI':
            db_refs = [
                standardize_db_refs({'CHEBI': '%s:%s' % (db_ns, db_id)})
            ]
        elif db_ns == 'hgnc':
            db_refs = [standardize_db_refs({'HGNC': db_id})]
        elif db_ns == 'hgnc.symbol':
            hgnc_id = hgnc_client.get_current_hgnc_id(db_id)
            db_refs = [standardize_db_refs({'HGNC': hgnc_id})]
        elif db_ns == 'pubchem.compound':
            db_refs = [standardize_db_refs({'PUBCHEM': db_id})]
        elif db_ns == 'uniprot':
            db_refs = [standardize_db_refs({'UP': db_id})]
        elif db_ns == 'bigg.metabolite':
            chebi_ids = bigg_to_chebi.get(db_id)
            if chebi_ids:
                db_refs = [
                    standardize_db_refs({'CHEBI': chebi_id})
                    for chebi_id in chebi_ids
                ]
            else:
                db_refs = [{}]
        elif db_ns == 'ncbigene':
            hgnc_id = hgnc_client.get_hgnc_from_entrez(db_id)
            if hgnc_id:
                db_refs = [standardize_db_refs({'HGNC': hgnc_id})]
            else:
                db_refs = [{}]
        # Skip literature references that aren't entities
        elif db_ns in {'doi', 'pubmed'}:
            continue
        else:
            print('Unhandled namespace %s' % db_ns)
            db_refs = {}

        matched = None
        for db_ref in db_refs:
            for k, v in db_ref.items():
                if (k, v) in indra_groundings:
                    matched = (k, v)
                    break

        matches.append(
            (dm_url, get_identifiers_url(*matched) if matched else None))
    return matches
Exemple #12
0
def indra_db_refs_from_minerva_refs(refs):
    db_refs = {}
    for db_ns, db_id in refs:
        db_ns = minerva_to_indra_map[db_ns] \
            if db_ns in minerva_to_indra_map else db_ns
        db_ns, db_id = fix_id_standards(db_ns, db_id)
        db_refs[db_ns] = db_id
    # We need some special handling here for issues in the curated maps
    # If we have a specific gene grounding, remove ECCODE grounding since
    # it can incorrectly result in a family interpretation
    if 'HGNC' in db_refs:
        db_refs.pop('ECCODE', None)
    db_refs = standardize_db_refs(db_refs)
    return db_refs
Exemple #13
0
    def standardize_db_refs(db_refs):
        """Return a standardized db refs dict for a given db refs dict.

        Parameters
        ----------
        db_refs : dict
            A dict of db refs that may not be standardized, i.e., may be
            missing an available UP ID corresponding to an existing HGNC ID.

        Returns
        -------
        dict
            The db_refs dict with standardized entries.
        """
        return standardize_db_refs(db_refs)
Exemple #14
0
 def _get_db_refs(entity_term):
     db_refs = {}
     for xr in entity_term['xrefs']:
         ns = xr['namespace']
         if ns == 'uniprot':
             db_refs['UP'] = xr['id']
         elif ns == 'hgnc':
             db_refs['HGNC'] = xr['id']
         elif ns == 'pfam':
             fplx_id = famplex_map.get(('PF', xr['id']))
             if fplx_id:
                 db_refs['FPLX'] = fplx_id
             db_refs['PF'] = xr['id']
         elif ns == 'interpro':
             fplx_id = famplex_map.get(('IP', xr['id']))
             if fplx_id:
                 db_refs['FPLX'] = fplx_id
             db_refs['IP'] = xr['id']
         elif ns == 'chebi':
             db_refs['CHEBI'] = xr['id']
         elif ns == 'pubchem':
             db_refs['PUBCHEM'] = xr['id']
         elif ns == 'go':
             go_id = xr['id']
             # Handle secondary to primary mapping if necessary
             pri = go_client.get_primary_id(go_id)
             if pri:
                 go_id = pri
             db_refs['GO'] = go_id
         elif ns == 'mesh':
             db_refs['MESH'] = xr['id']
         elif ns == 'hmdb':
             db_refs['HMDB'] = xr['id']
         elif ns == 'simple_chemical':
             if xr['id'].startswith('HMDB'):
                 db_refs['HMDB'] = xr['id']
         # We handle "be" here for compatibility with older versions
         elif ns in ('fplx', 'be'):
             db_refs['FPLX'] = xr['id']
         # These name spaces are ignored
         elif ns in ['uaz']:
             pass
         else:
             logger.warning('Unhandled xref namespace: %s' % ns)
     db_refs['TEXT'] = entity_term['text']
     db_refs = standardize_db_refs(db_refs)
     return db_refs
Exemple #15
0
 def _add_node(self, agent, uuid=None):
     node_key = agent.name
     node_id = self._existing_nodes.get(node_key)
     # if the node already exists we do not want to add it again
     # we must however add its uuid
     if node_id is not None:
         # fetch the appropriate node
         n = [x for x in self._nodes if x['data']['id'] == node_id][0]
         uuid_list = n['data']['uuid_list']
         if uuid not in uuid_list:
             uuid_list.append(uuid)
         return node_id
     db_refs = _get_db_refs(agent)
     node_id = self._get_new_id()
     self._existing_nodes[node_key] = node_id
     node_name = agent.name
     node_name = node_name.replace('_', ' ')
     if 'FPLX' in db_refs:
         expanded_families = bio_ontology.get_children(
             *agent.get_grounding(), ns_filter={'HGNC'})
     else:
         expanded_families = []
     members = {}
     for member in expanded_families:
         member_db_refs = {member[0]: member[1]}
         member_db_refs = standardize_db_refs(member_db_refs)
         gene_name = bio_ontology.get_name(*member)
         members[gene_name] = {'db_refs': {}}
         for dbns, dbid in member_db_refs.items():
             url = get_identifiers_url(dbns, dbid)
             if url:
                 members[gene_name]['db_refs'][dbns] = url
     node = {
         'data': {
             'id': node_id,
             'name': node_name,
             'db_refs': db_refs,
             'parent': '',
             'members': members,
             'uuid_list': [uuid]
         }
     }
     self._nodes.append(node)
     return node_id
Exemple #16
0
    def _get_db_refs(entity_term, organism_priority=None):
        db_refs = {}
        for xr in entity_term['xrefs']:
            ns = xr['namespace']
            if ns == 'uniprot':
                # Note: we add both full protein and protein chain
                # IDs here so that we can appli organism prioritization in
                # a uniform way. Later these will be separated out.
                up_id = xr['id']
                db_refs['UP'] = up_id
            elif ns == 'hgnc':
                db_refs['HGNC'] = xr['id']
            elif ns == 'pfam':
                fplx_id = famplex_map.get(('PF', xr['id']))
                if fplx_id:
                    db_refs['FPLX'] = fplx_id
                db_refs['PF'] = xr['id']
            elif ns == 'interpro':
                fplx_id = famplex_map.get(('IP', xr['id']))
                if fplx_id:
                    db_refs['FPLX'] = fplx_id
                db_refs['IP'] = xr['id']
            elif ns == 'chebi':
                db_refs['CHEBI'] = xr['id']
            elif ns == 'pubchem':
                db_refs['PUBCHEM'] = xr['id']
            elif ns == 'go':
                go_id = xr['id']
                # Handle secondary to primary mapping if necessary
                pri = go_client.get_primary_id(go_id)
                if pri:
                    go_id = pri
                db_refs['GO'] = go_id
            elif ns == 'mesh':
                db_refs['MESH'] = xr['id']
            elif ns == 'hmdb':
                db_refs['HMDB'] = xr['id']
            elif ns == 'simple_chemical':
                if xr['id'].startswith('HMDB'):
                    db_refs['HMDB'] = xr['id']
            # We handle "be" here for compatibility with older versions
            elif ns in ('fplx', 'be'):
                db_refs['FPLX'] = xr['id']
            elif ns == 'proonto':
                db_refs['PR'] = xr['id']
            # These name spaces are ignored
            elif ns in ['uaz']:
                pass
            else:
                logger.warning('Unhandled xref namespace: %s' % ns)
        db_refs['TEXT'] = entity_term['text']

        # If we have a UniProt grounding and we have a non-default
        # organism priority list, we call the prioritization function
        if db_refs.get('UP'):
            if organism_priority:
                # These are all the unique groundings in the alt-xrefs list,
                # which redundantly lists the same match multiple times because
                # it enumerates multiple synonyms for organisms redundantly
                unique_altxrefs = \
                    set((axr['namespace'], axr['id'])
                        for axr in entity_term.get('alt-xrefs', []))
                # This returns a single prioritized UniProt ID or None
                prioritized_id = \
                    prioritize_organism_grounding(db_refs['UP'],
                                                  unique_altxrefs,
                                                  organism_priority)
                # If we got an ID, we set the UP grounding to that, otherwise
                # we keep what we already got from the primary xref
                if prioritized_id:
                    db_refs['UP'] = prioritized_id
            # After all this, we need to separate protein chain grounding
            # and so if we are dealing with one of those, we pop out the UP
            # key, split the ID to get the chain ID and add that in the UPPRO
            # namespace.
            if '#' in db_refs['UP']:
                up_id = db_refs.pop('UP', None)
                db_refs['UPPRO'] = up_id.split('#')[1]

        db_refs = standardize_db_refs(db_refs)
        return db_refs
Exemple #17
0
def test_pubchem_mesh():
    db_refs = standardize_db_refs({'PUBCHEM': '56649450'})
    assert db_refs.get('MESH') == 'C585539'
Exemple #18
0
def test_standardize_up_isoform():
    assert standardize_db_refs({'UP': 'Q99490'}) == \
           {'UP': 'Q99490', 'HGNC': '16921'}
    assert standardize_db_refs({'UP': 'Q99490-123'}) == \
           {'UP': 'Q99490-123', 'HGNC': '16921'}
Exemple #19
0
def test_standardize_chembl():
    db_refs = standardize_db_refs({'DRUGBANK': 'DB00305'})
    assert 'CHEMBL' in db_refs, db_refs
    assert db_refs['CHEMBL'] == 'CHEMBL105', db_refs