def get_event_compositional(self, event_entry: Dict[str, str]) -> Event: """Get an Event with compositional grounding Parameters ---------- event_entry : The event to process Returns ------- event : An Event statement """ # Get get compositional grounding comp_name, comp_grnd = self.get_compositional_grounding(event_entry) if comp_name is not None and \ comp_grnd[0] is not None and \ comp_grnd[0][0] is not None: concept = Concept(comp_name, db_refs={ 'TEXT': comp_name, 'WM': [comp_grnd] }) # If not try to get old style Sofia grounding else: name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) if event_entry['Event_Type']: concept.db_refs['SOFIA'] = event_entry['Event_Type'] context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient ev = Evidence(source_api='sofia', pmid=ref, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') ev = Evidence(source_api='sofia', pmid=ref, text=text) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta={ 'polarity': pol, 'adjectives': [] }) return event
def get_causal_relations(self): """Extract causal relations as Statements.""" # Get the events that are labeled as directed and causal events = [ e for e in self.extractions if 'DirectedRelation' in e['labels'] and 'Causal' in e['labels'] ] for event in events: # For now, just take the first source and first destination. # Later, might deal with hypergraph representation. subj_id = self.find_arg(event, 'source') obj_id = self.find_arg(event, 'destination') if subj_id is None or obj_id is None: continue # Resolve coreferences by ID subj_id = self.coreferences.get(subj_id, subj_id) obj_id = self.coreferences.get(obj_id, obj_id) # Get the actual entities subj = self.entities[subj_id] obj = self.entities[obj_id] subj_delta = self.extract_entity_states(subj.get('states', [])) obj_delta = self.extract_entity_states(obj.get('states', [])) evidence = self.get_evidence(event) # It is currently the case that time constraints and locations for # concepts are better stored as annotations and the Evidence # level, we therefore move them over there. subj_timex = subj_delta.pop('time_context', None) obj_timex = obj_delta.pop('time_context', None) subj_geo = subj_delta.pop('geo_context', None) obj_geo = obj_delta.pop('geo_context', None) if subj_timex or subj_geo: wc = WorldContext(time=subj_timex, geo_location=subj_geo).to_json() evidence.annotations['subj_context'] = wc if obj_timex or obj_geo: wc = WorldContext(time=obj_timex, geo_location=obj_geo).to_json() evidence.annotations['obj_context'] = wc # In addition, for the time being we also put the adjectives and # polarities into annotations since they could otherwise get # squashed upon preassembly evidence.annotations['subj_adjectives'] = subj_delta['adjectives'] evidence.annotations['obj_adjectives'] = obj_delta['adjectives'] evidence.annotations['subj_polarity'] = subj_delta['polarity'] evidence.annotations['obj_polarity'] = obj_delta['polarity'] st = Influence(self.get_concept(subj), self.get_concept(obj), subj_delta, obj_delta, evidence=[evidence]) self.statements.append(st)
def _make_context(self, entity): """Get place and time info from the json for this entity.""" loc_context = None time_context = None # Look for time and place contexts. for argument in entity["arguments"]: if argument["type"] == "place": entity_id = argument["value"]["@id"] loc_entity = self.concept_dict[entity_id] place = loc_entity["canonicalName"] geo_id = loc_entity.get('geoname_id') loc_context = RefContext(name=place, db_refs={"GEOID": geo_id}) if argument["type"] == "time": entity_id = argument["value"]["@id"] temporal_entity = self.concept_dict[entity_id] text = temporal_entity['mentions'][0]['text'] if len(temporal_entity.get("timeInterval", [])) < 1: time_context = TimeContext(text=text) continue time = temporal_entity["timeInterval"][0] start = datetime.strptime(time['start'], '%Y-%m-%dT%H:%M') end = datetime.strptime(time['end'], '%Y-%m-%dT%H:%M') duration = int(time['duration']) time_context = TimeContext(text=text, start=start, end=end, duration=duration) # Put context together context = None if loc_context or time_context: context = WorldContext(time=time_context, geo_location=loc_context) return context
def _make_world_context(self, entity): """Get place and time info from the json for this entity.""" loc_context = None time_context = None # Look for time and place contexts. for argument in entity["arguments"]: if argument["type"] in { "has_location", "has_origin_location", "has_destination_location", "has_intermediate_location" }: entity_id = argument["value"]["@id"] loc_entity = self.concept_dict[entity_id] loc_context = _resolve_geo(loc_entity) if argument["type"] in { "has_time", "has_start_time", "has_end_time" }: entity_id = argument["value"]["@id"] temporal_entity = self.concept_dict[entity_id] time_context = _resolve_time(temporal_entity) # Put context together context = None if loc_context or time_context: context = WorldContext(time=time_context, geo_location=loc_context) return context
def get_event_flat(self, event_entry: Dict[str, str]) -> Event: """Get an Event with flattened grounding Parameters ---------- event_entry : The event to process Returns ------- event : An Event statement """ name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def test_event_assemble_location(): rainfall = Concept('rainfall') loc1 = RefContext(name='x', db_refs={'GEOID': '1'}) loc2 = RefContext(name='x', db_refs={'GEOID': '2'}) ev1 = Event(rainfall, context=WorldContext(geo_location=loc1)) ev2 = Event(rainfall, context=WorldContext(geo_location=loc2)) pa = Preassembler(ontology=world_ontology, stmts=[ev1, ev2], matches_fun=None) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 1 pa = Preassembler(ontology=world_ontology, stmts=[ev1, ev2], matches_fun=location_matches) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2
def test_influence_event_hash_reference(): rainfall = Concept('rainfall') loc1 = RefContext(name='x', db_refs={'GEOID': '1'}) loc2 = RefContext(name='x', db_refs={'GEOID': '2'}) ev1 = Event(rainfall, context=WorldContext(geo_location=loc1)) ev2 = Event(rainfall, context=WorldContext(geo_location=loc2)) infl = Influence(ev1, ev2) h1 = ev1.get_hash(refresh=True) h2 = ev2.get_hash(refresh=True) hl1 = ev1.get_hash(refresh=True, matches_fun=location_matches) hl2 = ev2.get_hash(refresh=True, matches_fun=location_matches) assert h1 == h2, (h1, h2) assert hl1 != hl2, (hl1, hl2) ij = infl.to_json(matches_fun=location_matches) ev1j = ev1.to_json(matches_fun=location_matches) assert ev1j['matches_hash'] == ij['subj']['matches_hash'], \ (print(json.dumps(ev1j, indent=1)), print(json.dumps(ij, indent=1)))
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') ev = Evidence(source_api='sofia', pmid=ref, text=text) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def get_event(self, event): concept = self.get_concept(event) states = event.get('states', []) extracted_states = self.extract_entity_states(states) polarity = extracted_states.get('polarity') adjectives = extracted_states.get('adjectives') delta = QualitativeDelta(polarity=polarity, adjectives=adjectives) timex = extracted_states.get('time_context', None) geo = extracted_states.get('geo_context', None) context = WorldContext(time=timex, geo_location=geo) \ if timex or geo else None stmt = Event(concept, delta=delta, context=context) return stmt
def _make_world_context(self, entity): """Get place and time info from the json for this entity.""" loc_context = None time_context = None # Look for time and place contexts. for argument in entity["arguments"]: if argument["type"] == "place": entity_id = argument["value"]["@id"] loc_entity = self.concept_dict[entity_id] loc_context = _resolve_geo(loc_entity) if argument["type"] == "time": entity_id = argument["value"]["@id"] temporal_entity = self.concept_dict[entity_id] time_context = _resolve_time(temporal_entity) # Put context together context = None if loc_context or time_context: context = WorldContext(time=time_context, geo_location=loc_context) return context
def test_matches_key_fun(): from indra.statements import WorldContext, RefContext def has_location(stmt): if not stmt.context or not stmt.context.geo_location or \ not stmt.context.geo_location.db_refs.get('GEOID'): return False return True def event_location_matches(stmt): if isinstance(stmt, Event): if not has_location(stmt): context_key = None else: context_key = stmt.context.geo_location.db_refs['GEOID'] matches_key = str((stmt.concept.matches_key(), context_key)) else: matches_key = stmt.matches_key() return matches_key def event_location_refinement(st1, st2, ontology, entities_refined): if isinstance(st1, Event) and isinstance(st2, Event): ref = st1.refinement_of(st2, ontology) if not ref: return False if not has_location(st2): return True elif not has_location(st1) and has_location(st2): return False else: return st1.context.geo_location.db_refs['GEOID'] == \ st2.context.geo_location.db_refs['GEOID'] context1 = WorldContext( geo_location=RefContext('x', db_refs={'GEOID': '1'})) context2 = WorldContext( geo_location=RefContext('x', db_refs={'GEOID': '2'})) health = 'wm/concept/causal_factor/health_and_life' e1 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context1, evidence=Evidence(text='1', source_api='eidos')) e2 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context2, evidence=Evidence(text='2', source_api='eidos')) e3 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}), context=context2, evidence=Evidence(text='3', source_api='eidos')) pa = Preassembler(world_ontology, [e1, e2, e3], matches_fun=event_location_matches, refinement_fun=event_location_refinement) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2, unique_stmts from indra.tools.assemble_corpus import run_preassembly stmts = run_preassembly([e1, e2, e3], matches_fun=event_location_matches, refinement_fun=event_location_refinement) assert len(stmts) == 2, stmts
def get_evidence(self, event): """Return the Evidence object for the INDRA Statment.""" provenance = event.get('provenance') # First try looking up the full sentence through provenance text = None context = None if provenance: sentence_tag = provenance[0].get('sentence') if sentence_tag and '@id' in sentence_tag: sentence_id = sentence_tag['@id'] sentence = self.sentences.get(sentence_id) if sentence is not None: text = _sanitize(sentence['text']) # Get temporal constraints if available timexes = sentence.get('timexes', []) if timexes: # We currently handle just one timex per statement timex = timexes[0] tc = self.time_context_from_timex(timex) context = WorldContext(time=tc) # Get geolocation if available geolocs = sentence.get('geolocs', []) if geolocs: geoloc = geolocs[0] rc = self.ref_context_from_geoloc(geoloc) if context: context.geo_location = rc else: context = WorldContext(geo_location=rc) # Here we try to get the title of the document and set it # in the provenance doc_id = provenance[0].get('document', {}).get('@id') if doc_id: title = self.documents.get(doc_id, {}).get('title') if title: provenance[0]['document']['title'] = title annotations = {'found_by': event.get('rule'), 'provenance': provenance} if self.dct is not None: annotations['document_creation_time'] = self.dct.to_json() epistemics = {} negations = self.get_negation(event) hedgings = self.get_hedging(event) if hedgings: epistemics['hedgings'] = hedgings if negations: # This is the INDRA standard to show negation epistemics['negated'] = True # But we can also save the texts associated with the negation # under annotations, just in case it's needed annotations['negated_texts'] = negations # If that fails, we can still get the text of the event if text is None: text = _sanitize(event.get('text')) ev = Evidence(source_api='eidos', text=text, annotations=annotations, context=context, epistemics=epistemics) return ev
def _build_stmts(self, rel_dict): stmt_list = [] cause_entries = rel_dict.get('Cause Index') effect_entries = rel_dict.get('Effect Index') # FIXME: Handle cases in which there is a missing cause/effect if not cause_entries or not effect_entries: return [] causes = [c.strip() for c in cause_entries.split(',')] effects = [e.strip() for e in effect_entries.split(',')] rel = rel_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None # If we don't recognize this relation, we don't get any # statements else: return [] text = rel_dict.get('Sentence') annot_keys = ['Relation'] annots = {k: rel_dict.get(k) for k in annot_keys} ref = rel_dict.get('Source_File') for cause_idx, effect_idx in itertools.product(causes, effects): cause_name = self._events[cause_idx]['Relation'] cause_grounding = self._events[cause_idx]['Event_Type'] effect_name = self._events[effect_idx]['Relation'] effect_grounding = self._events[effect_idx]['Event_Type'] cause_concept = Concept(cause_name, db_refs={'TEXT': cause_name}) if cause_grounding: cause_concept.db_refs['SOFIA'] = cause_grounding effect_concept = Concept(effect_name, db_refs={'TEXT': effect_name}) if effect_grounding: effect_concept.db_refs['SOFIA'] = effect_grounding # NOTE: Extract context. The basic issue is that # time/location # here is given at the event level, not at the relation # level, and so we need to choose which event's context # we will associate with the relation def choose_context(context_type): locs = [ self._events[cause_idx].get(context_type), self._events[effect_idx].get(context_type) ] if locs[0]: return locs[0].strip() elif locs[1]: return locs[1].strip() else: return None context = WorldContext() location = choose_context('Location') if location: context.location = RefContext(name=location) time = choose_context('Time') if time: context.time = TimeContext(text=time) # Overwrite blank context if not context: context = None ev = Evidence(source_api='sofia', pmid=ref, annotations=annots, text=text, context=context) stmt = Influence(cause_concept, effect_concept, evidence=[ev]) # Assume unknown polarity on the subject, put the overall # polarity in the sign of the object stmt.subj_delta['polarity'] = None stmt.obj_delta['polarity'] = pol stmt_list.append(stmt) return stmt_list