def _resolve_geo(hume_loc_entity): place = hume_loc_entity.get('canonicalName', hume_loc_entity.get('text')) geo_id = hume_loc_entity.get('geoname_id', None) if geo_id is not None: return RefContext(name=place, db_refs={"GEOID": geo_id}) else: return RefContext(place)
def _make_context(self, entity): """Get place and time info from the json for this entity.""" loc_context = None time_context = None # Look for time and place contexts. for argument in entity["arguments"]: if argument["type"] == "place": entity_id = argument["value"]["@id"] loc_entity = self.concept_dict[entity_id] place = loc_entity["canonicalName"] geo_id = loc_entity.get('geoname_id') loc_context = RefContext(name=place, db_refs={"GEOID": geo_id}) if argument["type"] == "time": entity_id = argument["value"]["@id"] temporal_entity = self.concept_dict[entity_id] text = temporal_entity['mentions'][0]['text'] if len(temporal_entity.get("timeInterval", [])) < 1: time_context = TimeContext(text=text) continue time = temporal_entity["timeInterval"][0] start = datetime.strptime(time['start'], '%Y-%m-%dT%H:%M') end = datetime.strptime(time['end'], '%Y-%m-%dT%H:%M') duration = int(time['duration']) time_context = TimeContext(text=text, start=start, end=end, duration=duration) # Put context together context = None if loc_context or time_context: context = WorldContext(time=time_context, geo_location=loc_context) return context
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient ev = Evidence(source_api='sofia', pmid=ref, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def get_event_compositional(self, event_entry: Dict[str, str]) -> Event: """Get an Event with compositional grounding Parameters ---------- event_entry : The event to process Returns ------- event : An Event statement """ # Get get compositional grounding comp_name, comp_grnd = self.get_compositional_grounding(event_entry) if comp_name is not None and \ comp_grnd[0] is not None and \ comp_grnd[0][0] is not None: concept = Concept(comp_name, db_refs={ 'TEXT': comp_name, 'WM': [comp_grnd] }) # If not try to get old style Sofia grounding else: name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) if event_entry['Event_Type']: concept.db_refs['SOFIA'] = event_entry['Event_Type'] context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def parse_context_entry(entry, grounder, sentence=None): """Return a dict of context type and object processed from an entry.""" match = re.match(r'(.*): (.*)', entry) if not match: return None context_type, context_txt = match.groups() if context_type not in allowed_contexts: logger.warning('Unknown context type %s' % context_type) return None terms = grounder(context_txt, context=sentence) if not terms: logger.warning('Could not ground %s context: %s' % (context_type, context_txt)) db_refs = {} if terms: db_refs = standardize_db_refs({terms[0].term.db: terms[0].term.id}) db_refs['TEXT'] = context_txt standard_name = None if terms: standard_name = bio_ontology.get_name(terms[0].term.db, terms[0].term.id) name = standard_name if standard_name else context_txt context = RefContext(name=name, db_refs=db_refs) return {allowed_contexts[context_type]: context}
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') ev = Evidence(source_api='sofia', pmid=ref, text=text) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta={ 'polarity': pol, 'adjectives': [] }) return event
def test_event_assemble_location(): rainfall = Concept('rainfall') loc1 = RefContext(name='x', db_refs={'GEOID': '1'}) loc2 = RefContext(name='x', db_refs={'GEOID': '2'}) ev1 = Event(rainfall, context=WorldContext(geo_location=loc1)) ev2 = Event(rainfall, context=WorldContext(geo_location=loc2)) pa = Preassembler(ontology=world_ontology, stmts=[ev1, ev2], matches_fun=None) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 1 pa = Preassembler(ontology=world_ontology, stmts=[ev1, ev2], matches_fun=location_matches) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2
def get_cell_line(ekb): # Look for a term representing a cell line cl_tag = ekb.find("TERM/[type='ONT::CELL-LINE']/text") if cl_tag is not None: cell_line = cl_tag.text cell_line.replace('-', '') # TODO: add grounding here if available clc = RefContext(cell_line) return clc return None
def get_statements(self): stmts = [] for rel_key, rel_info in self._relations.items(): # Turn the arguments into a dict. args = {e['role']: e['entity_duid'] for e in rel_info['argument']} entity_args = args.copy() # Remove some special cases. trigger_id = entity_args.pop('TRIGGER') site_id = entity_args.pop('SITE', None) # Get the entity ids. entities = { role: self._get_agent(eid) for role, eid in entity_args.items() } rel_type = rel_info['relationType'] if rel_type == 'PHOSPHORYLATION': # Get the agents. enz, enz_coords = entities.get('KINASE', (None, None)) sub, sub_coords = entities.get('SUBSTRATE', (None, None)) if sub is None: continue # Get the site residue, position, site_coords = self._get_site(site_id) # Get the evidence ev = self._get_evidence(trigger_id, args, [enz_coords, sub_coords], site_coords) # Turn taxonomy into context, sub TAX takes precedence tax = None if enz and 'TAX' in enz.db_refs: tax = enz.db_refs.pop('TAX') if sub and 'TAX' in sub.db_refs: tax = sub.db_refs.pop('TAX') if tax is not None: context = \ BioContext(species=RefContext(tax, {'TAXONOMY': tax})) ev.context = context stmts.append( Phosphorylation(enz, sub, residue=residue, position=position, evidence=[ev])) else: logger.warning("Unhandled statement type: %s" % rel_type) return stmts
def test_influence_event_hash_reference(): rainfall = Concept('rainfall') loc1 = RefContext(name='x', db_refs={'GEOID': '1'}) loc2 = RefContext(name='x', db_refs={'GEOID': '2'}) ev1 = Event(rainfall, context=WorldContext(geo_location=loc1)) ev2 = Event(rainfall, context=WorldContext(geo_location=loc2)) infl = Influence(ev1, ev2) h1 = ev1.get_hash(refresh=True) h2 = ev2.get_hash(refresh=True) hl1 = ev1.get_hash(refresh=True, matches_fun=location_matches) hl2 = ev2.get_hash(refresh=True, matches_fun=location_matches) assert h1 == h2, (h1, h2) assert hl1 != hl2, (hl1, hl2) ij = infl.to_json(matches_fun=location_matches) ev1j = ev1.to_json(matches_fun=location_matches) assert ev1j['matches_hash'] == ij['subj']['matches_hash'], \ (print(json.dumps(ev1j, indent=1)), print(json.dumps(ij, indent=1)))
def test_pybel_neighborhood_query(): corpus = path_this + '/../../data/small_corpus.bel' bp = bel.process_pybel_neighborhood(['TP63'], corpus) assert bp.statements assert_pmids(bp.statements) unicode_strs(bp.statements) assert all([ s.evidence[0].context.cell_line.name == 'MCF 10A' for s in bp.statements ]) assert bp.statements[0].evidence[0].context.__repr__() == \ bp.statements[0].evidence[0].context.__str__() assert bp.statements[0].evidence[0].context == \ BioContext(location=RefContext(name="Cytoplasm", db_refs={'MESH': 'D003593'}), cell_line=RefContext(name="MCF 10A", db_refs={'EFO': '0001200'}), cell_type=RefContext(name="keratinocyte", db_refs={'CL': '0000312'}), organ=RefContext(name="colon", db_refs={'UBERON': '0001155'}), disease=RefContext(name="cancer", db_refs={'DOID': '162'}), species=RefContext(name="Rattus norvegicus", db_refs={'TAXONOMY': '10116'})) # Test annotation manager assert bp.annot_manager.get_mapping('Species', '9606') == \ 'H**o sapiens'
def get_cell_line(ekb): # Look for a term representing a cell line cl_tag = ekb.find("TERM/[type='ONT::CELL-LINE']") if cl_tag is not None: name_tag = cl_tag.find('name') if name_tag is not None: name = name_tag.text name = name.replace('CELLS', '') name = name.replace('CELL', '') name = name.replace('-', '') # TODO: add grounding here if available clc = RefContext(name) return clc return None
def get_event_flat(self, event_entry: Dict[str, str]) -> Event: """Get an Event with flattened grounding Parameters ---------- event_entry : The event to process Returns ------- event : An Event statement """ name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def _get_evidence(record: Mapping[str, Any]) -> Evidence: # TODO how to use the following metadata? geo_id = record["geo_id"] cell_type = record["cell_type"] organism = record["organism"] return Evidence( source_api="creeds", annotations={ # TODO use Gilda for grounding and put in BioContext? "cell_type": cell_type, "geo": geo_id, }, context=BioContext( species=RefContext( name=organism, db_refs={"TAXONOMY": ORGANISMS[organism]}, ) ), )
def test_matches_key_fun(): from indra.statements import WorldContext, RefContext def has_location(stmt): if not stmt.context or not stmt.context.geo_location or \ not stmt.context.geo_location.db_refs.get('GEOID'): return False return True def event_location_matches(stmt): if isinstance(stmt, Event): if not has_location(stmt): context_key = None else: context_key = stmt.context.geo_location.db_refs['GEOID'] matches_key = str((stmt.concept.matches_key(), context_key)) else: matches_key = stmt.matches_key() return matches_key def event_location_refinement(st1, st2, ontology, entities_refined): if isinstance(st1, Event) and isinstance(st2, Event): ref = st1.refinement_of(st2, ontology) if not ref: return False if not has_location(st2): return True elif not has_location(st1) and has_location(st2): return False else: return st1.context.geo_location.db_refs['GEOID'] == \ st2.context.geo_location.db_refs['GEOID'] context1 = WorldContext( geo_location=RefContext('x', db_refs={'GEOID': '1'})) context2 = WorldContext( geo_location=RefContext('x', db_refs={'GEOID': '2'})) health = 'wm/concept/causal_factor/health_and_life' e1 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context1, evidence=Evidence(text='1', source_api='eidos')) e2 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context2, evidence=Evidence(text='2', source_api='eidos')) e3 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context2, evidence=Evidence(text='3', source_api='eidos')) pa = Preassembler(world_ontology, [e1, e2, e3], matches_fun=event_location_matches, refinement_fun=event_location_refinement) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2, unique_stmts from indra.tools.assemble_corpus import run_preassembly stmts = run_preassembly([e1, e2, e3], matches_fun=event_location_matches, refinement_fun=event_location_refinement) assert len(stmts) == 2, stmts
def ref_context_from_geoloc(geoloc): """Return a RefContext object given a geoloc entry.""" text = geoloc.get('text') geoid = geoloc.get('geoID') rc = RefContext(name=text, db_refs={'GEOID': geoid}) return rc
def _build_stmts(self, rel_dict): stmt_list = [] cause_entries = rel_dict.get('Cause Index') effect_entries = rel_dict.get('Effect Index') # FIXME: Handle cases in which there is a missing cause/effect if not cause_entries or not effect_entries: return [] causes = [c.strip() for c in cause_entries.split(',')] effects = [e.strip() for e in effect_entries.split(',')] rel = rel_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None # If we don't recognize this relation, we don't get any # statements else: return [] text = rel_dict.get('Sentence') annot_keys = ['Relation'] annots = {k: rel_dict.get(k) for k in annot_keys} ref = rel_dict.get('Source_File') for cause_idx, effect_idx in itertools.product(causes, effects): cause_name = self._events[cause_idx]['Relation'] cause_grounding = self._events[cause_idx]['Event_Type'] effect_name = self._events[effect_idx]['Relation'] effect_grounding = self._events[effect_idx]['Event_Type'] cause_concept = Concept(cause_name, db_refs={'TEXT': cause_name}) if cause_grounding: cause_concept.db_refs['SOFIA'] = cause_grounding effect_concept = Concept(effect_name, db_refs={'TEXT': effect_name}) if effect_grounding: effect_concept.db_refs['SOFIA'] = effect_grounding # NOTE: Extract context. The basic issue is that # time/location # here is given at the event level, not at the relation # level, and so we need to choose which event's context # we will associate with the relation def choose_context(context_type): locs = [ self._events[cause_idx].get(context_type), self._events[effect_idx].get(context_type) ] if locs[0]: return locs[0].strip() elif locs[1]: return locs[1].strip() else: return None context = WorldContext() location = choose_context('Location') if location: context.location = RefContext(name=location) time = choose_context('Time') if time: context.time = TimeContext(text=time) # Overwrite blank context if not context: context = None ev = Evidence(source_api='sofia', pmid=ref, annotations=annots, text=text, context=context) stmt = Influence(cause_concept, effect_concept, evidence=[ev]) # Assume unknown polarity on the subject, put the overall # polarity in the sign of the object stmt.subj_delta['polarity'] = None stmt.obj_delta['polarity'] = pol stmt_list.append(stmt) return stmt_list