def test_update_pos_labels(): """Test updating of positive labels in existing model.""" ad1 = load_disambiguator('IR', path=TEST_MODEL_PATH) ad2 = load_disambiguator('IR', path=TEST_MODEL_PATH) ad2.update_pos_labels(ad1.pos_labels) assert ad1.classifier.stats == ad2.classifier.stats ad2.update_pos_labels(ad1.pos_labels + ['MESH:D007333']) assert set(ad2.pos_labels) == set( ['HGNC:6091', 'MESH:D011839', 'MESH:D007333'])
def generate_adeft_terms(): from adeft import available_shortforms from adeft.disambiguate import load_disambiguator all_term_args = set() for shortform in available_shortforms: da = load_disambiguator(shortform) for grounding in da.names.keys(): if grounding == 'ungrounded' or ':' not in grounding: continue db_ns, db_id = grounding.split(':', maxsplit=1) if db_ns == 'HGNC': standard_name = hgnc_client.get_hgnc_name(db_id) elif db_ns == 'GO': standard_name = go_client.get_go_label(db_id) elif db_ns == 'MESH': standard_name = mesh_client.get_mesh_name(db_id) elif db_ns == 'CHEBI': standard_name = chebi_client.get_chebi_name_from_id(db_id) elif db_ns == 'FPLX': standard_name = db_id elif db_ns == 'UP': standard_name = uniprot_client.get_gene_name(db_id) else: logger.warning('Unknown grounding namespace from Adeft: %s' % db_ns) continue term_args = (normalize(shortform), shortform, db_ns, db_id, standard_name, 'synonym', 'adeft') all_term_args.add(term_args) terms = [ Term(*term_args) for term_args in sorted(list(all_term_args), key=lambda x: x[0]) ] return terms
def test_dump_disambiguator(): ad1 = load_disambiguator('IR', path=TEST_MODEL_PATH) tempname = uuid.uuid4().hex ad1.dump(tempname, path=SCRATCH_PATH) ad2 = load_disambiguator('IR', path=SCRATCH_PATH) assert ad1.grounding_dict == ad2.grounding_dict assert ad1.names == ad2.names assert ad1.pos_labels == ad2.pos_labels assert (array_equal(ad1.classifier.estimator.named_steps['logit'].coef_, ad2.classifier.estimator.named_steps['logit'].coef_)) assert ad1.info() == ad2.info(), (ad1.info(), ad2.info()) try: shutil.rmtree(os.path.join(SCRATCH_PATH, tempname)) except Exception: logger.warning('Could not clean up temporary folder %s' % os.path.join(SCRATCH_PATH, tempname))
def test_modify_groundings(): """Test updating groundings of existing model.""" ad = load_disambiguator('IR', path=TEST_MODEL_PATH) ad.modify_groundings(new_groundings={'HGNC:6091': 'UP:P06213'}, new_names={'HGNC:6091': 'Insulin Receptor'}) assert 'UP:P06213' in ad.pos_labels assert 'UP:P06213' in ad.classifier.pos_labels assert 'UP:P06213' in ad.classifier.estimator.classes_ assert 'UP:P06213' in ad.names assert 'UP:P06213' in ad.grounding_dict['IR'].values() assert ad.names['UP:P06213'] == 'Insulin Receptor'
import logging from indra.ontology.standardize \ import standardize_agent_name logger = logging.getLogger(__name__) # If the adeft disambiguator is installed, load adeft models to # disambiguate acronyms and shortforms try: from adeft import available_shortforms as available_adeft_models from adeft.disambiguate import load_disambiguator adeft_disambiguators = {} for shortform in available_adeft_models: adeft_disambiguators[shortform] = load_disambiguator(shortform) except Exception: logger.info('Adeft will not be available for grounding disambiguation.') adeft_disambiguators = {} def run_adeft_disambiguation(stmt, agent, idx): """Run Adeft disambiguation on an Agent in a given Statement. This function looks at the evidence of the given Statement and attempts to look up the full paper or the abstract for the evidence. If both of those fail, the evidence sentence itself is used for disambiguation. The disambiguation model corresponding to the Agent text is then called, and the highest scoring returned grounding is set as the Agent's new grounding. The Statement's annotations as well as the Agent are modified in place and no value is returned.
def test_load_disambiguator(): ad = load_disambiguator('IR', path=TEST_MODEL_PATH) assert ad.shortforms == ['IR'] assert hasattr(ad, 'classifier') assert hasattr(ad, 'recognizers')
def test_modify_groundings_error(): ad = load_disambiguator('IR', path=TEST_MODEL_PATH) ad.modify_groundings(new_groundings={'MESH:D011839': 'HGNC:6091'})
from itertools import groupby, chain from indra.statements import Agent from indra.databases import uniprot_client, hgnc_client from indra.util import read_unicode_csv, write_unicode_csv logger = logging.getLogger(__name__) # If the adeft disambiguator is installed, load adeft models to # disambiguate acronyms and shortforms try: from adeft import available_shortforms as available_adeft_models from adeft.disambiguate import load_disambiguator adeft_disambiguators = {} for shortform in available_adeft_models: adeft_disambiguators[shortform] = load_disambiguator(shortform) except Exception: logger.info('DEFT will not be available for grounding disambiguation.') adeft_disambiguators = {} class GroundingMapper(object): """Maps grounding of INDRA Agents based on a given grounding map. Parameters ---------- gm : dict The grounding map, a dictionary mapping strings (entity names) to a dictionary of database identifiers. agent_map : Optional[dict] A dictionary mapping strings to grounded INDRA Agents with given state.
def load_adeft_models(): adeft_disambiguators = {} for shortform in available_adeft_models: adeft_disambiguators[shortform] = load_disambiguator(shortform) return adeft_disambiguators
from indra.databases.hgnc_client import get_uniprot_id from indra_db_lite import get_entrez_pmids_for_hgnc from indra_db_lite import get_entrez_pmids_for_uniprot from indra_db_lite import get_mesh_terms_for_grounding from indra_db_lite import get_plaintexts_for_text_ref_ids from indra_db_lite import get_pmids_for_mesh_term from indra_db_lite import get_text_ref_ids_for_agent_text from indra_db_lite import get_text_ref_ids_for_pmids from opaque.nlp.featurize import BaselineTfidfVectorizer logger = logging.getLogger(__file__) models = { model_name: load_disambiguator(shortform) for shortform, model_name in available_shortforms.items() } reverse_model_map = { model_name: shortform for shortform, model_name in available_shortforms.items() } def get_groundings_for_disambiguator(disamb): result = set() for grounding_map in disamb.grounding_dict.values(): for curie in grounding_map.values(): result.add(curie) return list(result)