Beispiel #1
0
    def get_event_compositional(self, event_entry: Dict[str, str]) -> Event:
        """Get an Event with compositional grounding

        Parameters
        ----------
        event_entry :
            The event to process

        Returns
        -------
        event :
            An Event statement
        """
        # Get get compositional grounding
        comp_name, comp_grnd = self.get_compositional_grounding(event_entry)
        if comp_name is not None and \
                comp_grnd[0] is not None and \
                comp_grnd[0][0] is not None:
            concept = Concept(comp_name,
                              db_refs={
                                  'TEXT': comp_name,
                                  'WM': [comp_grnd]
                              })
        # If not try to get old style Sofia grounding
        else:
            name = event_entry['Relation']
            concept = Concept(name, db_refs={'TEXT': name})
            if event_entry['Event_Type']:
                concept.db_refs['SOFIA'] = event_entry['Event_Type']

        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        text_refs = {'DART': ref}
        ev = Evidence(source_api='sofia',
                      text_refs=text_refs,
                      text=text,
                      annotations=anns,
                      source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
Beispiel #2
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        ev = Evidence(source_api='sofia', pmid=ref, text=text,
                      annotations=anns, source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept, context=context, evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
Beispiel #3
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        ev = Evidence(source_api='sofia', pmid=ref, text=text)
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta={
                          'polarity': pol,
                          'adjectives': []
                      })

        return event
Beispiel #4
0
    def get_causal_relations(self):
        """Extract causal relations as Statements."""
        # Get the events that are labeled as directed and causal
        events = [
            e for e in self.extractions
            if 'DirectedRelation' in e['labels'] and 'Causal' in e['labels']
        ]
        for event in events:
            # For now, just take the first source and first destination.
            # Later, might deal with hypergraph representation.
            subj_id = self.find_arg(event, 'source')
            obj_id = self.find_arg(event, 'destination')
            if subj_id is None or obj_id is None:
                continue

            # Resolve coreferences by ID
            subj_id = self.coreferences.get(subj_id, subj_id)
            obj_id = self.coreferences.get(obj_id, obj_id)

            # Get the actual entities
            subj = self.entities[subj_id]
            obj = self.entities[obj_id]

            subj_delta = self.extract_entity_states(subj.get('states', []))
            obj_delta = self.extract_entity_states(obj.get('states', []))

            evidence = self.get_evidence(event)

            # It is currently the case that time constraints and locations for
            #  concepts are better stored as annotations and the Evidence
            # level, we therefore move them over there.
            subj_timex = subj_delta.pop('time_context', None)
            obj_timex = obj_delta.pop('time_context', None)
            subj_geo = subj_delta.pop('geo_context', None)
            obj_geo = obj_delta.pop('geo_context', None)
            if subj_timex or subj_geo:
                wc = WorldContext(time=subj_timex,
                                  geo_location=subj_geo).to_json()
                evidence.annotations['subj_context'] = wc
            if obj_timex or obj_geo:
                wc = WorldContext(time=obj_timex,
                                  geo_location=obj_geo).to_json()
                evidence.annotations['obj_context'] = wc

            # In addition, for the time being we also put the adjectives and
            # polarities into annotations since they could otherwise get
            # squashed upon preassembly
            evidence.annotations['subj_adjectives'] = subj_delta['adjectives']
            evidence.annotations['obj_adjectives'] = obj_delta['adjectives']
            evidence.annotations['subj_polarity'] = subj_delta['polarity']
            evidence.annotations['obj_polarity'] = obj_delta['polarity']

            st = Influence(self.get_concept(subj),
                           self.get_concept(obj),
                           subj_delta,
                           obj_delta,
                           evidence=[evidence])

            self.statements.append(st)
Beispiel #5
0
    def _make_context(self, entity):
        """Get place and time info from the json for this entity."""
        loc_context = None
        time_context = None

        # Look for time and place contexts.
        for argument in entity["arguments"]:
            if argument["type"] == "place":
                entity_id = argument["value"]["@id"]
                loc_entity = self.concept_dict[entity_id]
                place = loc_entity["canonicalName"]
                geo_id = loc_entity.get('geoname_id')
                loc_context = RefContext(name=place, db_refs={"GEOID": geo_id})
            if argument["type"] == "time":
                entity_id = argument["value"]["@id"]
                temporal_entity = self.concept_dict[entity_id]
                text = temporal_entity['mentions'][0]['text']
                if len(temporal_entity.get("timeInterval", [])) < 1:
                    time_context = TimeContext(text=text)
                    continue
                time = temporal_entity["timeInterval"][0]
                start = datetime.strptime(time['start'], '%Y-%m-%dT%H:%M')
                end = datetime.strptime(time['end'], '%Y-%m-%dT%H:%M')
                duration = int(time['duration'])
                time_context = TimeContext(text=text,
                                           start=start,
                                           end=end,
                                           duration=duration)

        # Put context together
        context = None
        if loc_context or time_context:
            context = WorldContext(time=time_context, geo_location=loc_context)

        return context
Beispiel #6
0
    def _make_world_context(self, entity):
        """Get place and time info from the json for this entity."""
        loc_context = None
        time_context = None

        # Look for time and place contexts.
        for argument in entity["arguments"]:
            if argument["type"] in {
                    "has_location", "has_origin_location",
                    "has_destination_location", "has_intermediate_location"
            }:
                entity_id = argument["value"]["@id"]
                loc_entity = self.concept_dict[entity_id]
                loc_context = _resolve_geo(loc_entity)
            if argument["type"] in {
                    "has_time", "has_start_time", "has_end_time"
            }:
                entity_id = argument["value"]["@id"]
                temporal_entity = self.concept_dict[entity_id]
                time_context = _resolve_time(temporal_entity)

        # Put context together
        context = None
        if loc_context or time_context:
            context = WorldContext(time=time_context, geo_location=loc_context)

        return context
Beispiel #7
0
    def get_event_flat(self, event_entry: Dict[str, str]) -> Event:
        """Get an Event with flattened grounding

        Parameters
        ----------
        event_entry :
            The event to process

        Returns
        -------
        event :
            An Event statement
        """
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        text_refs = {'DART': ref}
        ev = Evidence(source_api='sofia',
                      text_refs=text_refs,
                      text=text,
                      annotations=anns,
                      source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))
        return event
Beispiel #8
0
def test_event_assemble_location():
    rainfall = Concept('rainfall')
    loc1 = RefContext(name='x', db_refs={'GEOID': '1'})
    loc2 = RefContext(name='x', db_refs={'GEOID': '2'})
    ev1 = Event(rainfall, context=WorldContext(geo_location=loc1))
    ev2 = Event(rainfall, context=WorldContext(geo_location=loc2))

    pa = Preassembler(ontology=world_ontology,
                      stmts=[ev1, ev2],
                      matches_fun=None)
    unique_stmts = pa.combine_duplicates()

    assert len(unique_stmts) == 1
    pa = Preassembler(ontology=world_ontology,
                      stmts=[ev1, ev2],
                      matches_fun=location_matches)
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 2
Beispiel #9
0
def test_influence_event_hash_reference():
    rainfall = Concept('rainfall')
    loc1 = RefContext(name='x', db_refs={'GEOID': '1'})
    loc2 = RefContext(name='x', db_refs={'GEOID': '2'})
    ev1 = Event(rainfall, context=WorldContext(geo_location=loc1))
    ev2 = Event(rainfall, context=WorldContext(geo_location=loc2))
    infl = Influence(ev1, ev2)

    h1 = ev1.get_hash(refresh=True)
    h2 = ev2.get_hash(refresh=True)
    hl1 = ev1.get_hash(refresh=True, matches_fun=location_matches)
    hl2 = ev2.get_hash(refresh=True, matches_fun=location_matches)

    assert h1 == h2, (h1, h2)
    assert hl1 != hl2, (hl1, hl2)

    ij = infl.to_json(matches_fun=location_matches)
    ev1j = ev1.to_json(matches_fun=location_matches)
    assert ev1j['matches_hash'] == ij['subj']['matches_hash'], \
        (print(json.dumps(ev1j, indent=1)),
         print(json.dumps(ij, indent=1)))
Beispiel #10
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        ev = Evidence(source_api='sofia', pmid=ref, text=text)
        pol = event_entry.get('Polarity')
        event = Event(concept, context=context, evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
Beispiel #11
0
 def get_event(self, event):
     concept = self.get_concept(event)
     states = event.get('states', [])
     extracted_states = self.extract_entity_states(states)
     polarity = extracted_states.get('polarity')
     adjectives = extracted_states.get('adjectives')
     delta = QualitativeDelta(polarity=polarity, adjectives=adjectives)
     timex = extracted_states.get('time_context', None)
     geo = extracted_states.get('geo_context', None)
     context = WorldContext(time=timex, geo_location=geo) \
         if timex or geo else None
     stmt = Event(concept, delta=delta, context=context)
     return stmt
Beispiel #12
0
    def _make_world_context(self, entity):
        """Get place and time info from the json for this entity."""
        loc_context = None
        time_context = None

        # Look for time and place contexts.
        for argument in entity["arguments"]:
            if argument["type"] == "place":
                entity_id = argument["value"]["@id"]
                loc_entity = self.concept_dict[entity_id]
                loc_context = _resolve_geo(loc_entity)
            if argument["type"] == "time":
                entity_id = argument["value"]["@id"]
                temporal_entity = self.concept_dict[entity_id]
                time_context = _resolve_time(temporal_entity)

        # Put context together
        context = None
        if loc_context or time_context:
            context = WorldContext(time=time_context, geo_location=loc_context)

        return context
Beispiel #13
0
def test_matches_key_fun():
    from indra.statements import WorldContext, RefContext

    def has_location(stmt):
        if not stmt.context or not stmt.context.geo_location or \
                not stmt.context.geo_location.db_refs.get('GEOID'):
            return False
        return True

    def event_location_matches(stmt):
        if isinstance(stmt, Event):
            if not has_location(stmt):
                context_key = None
            else:
                context_key = stmt.context.geo_location.db_refs['GEOID']

            matches_key = str((stmt.concept.matches_key(), context_key))
        else:
            matches_key = stmt.matches_key()
        return matches_key

    def event_location_refinement(st1, st2, ontology, entities_refined):
        if isinstance(st1, Event) and isinstance(st2, Event):
            ref = st1.refinement_of(st2, ontology)
            if not ref:
                return False
            if not has_location(st2):
                return True
            elif not has_location(st1) and has_location(st2):
                return False
            else:
                return st1.context.geo_location.db_refs['GEOID'] == \
                    st2.context.geo_location.db_refs['GEOID']

    context1 = WorldContext(
        geo_location=RefContext('x', db_refs={'GEOID': '1'}))
    context2 = WorldContext(
        geo_location=RefContext('x', db_refs={'GEOID': '2'}))

    health = 'wm/concept/causal_factor/health_and_life'
    e1 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context1,
               evidence=Evidence(text='1', source_api='eidos'))
    e2 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context2,
               evidence=Evidence(text='2', source_api='eidos'))
    e3 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context2,
               evidence=Evidence(text='3', source_api='eidos'))

    pa = Preassembler(world_ontology, [e1, e2, e3],
                      matches_fun=event_location_matches,
                      refinement_fun=event_location_refinement)

    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 2, unique_stmts

    from indra.tools.assemble_corpus import run_preassembly
    stmts = run_preassembly([e1, e2, e3],
                            matches_fun=event_location_matches,
                            refinement_fun=event_location_refinement)
    assert len(stmts) == 2, stmts
Beispiel #14
0
    def get_evidence(self, event):
        """Return the Evidence object for the INDRA Statment."""
        provenance = event.get('provenance')

        # First try looking up the full sentence through provenance
        text = None
        context = None
        if provenance:
            sentence_tag = provenance[0].get('sentence')
            if sentence_tag and '@id' in sentence_tag:
                sentence_id = sentence_tag['@id']
                sentence = self.sentences.get(sentence_id)
                if sentence is not None:
                    text = _sanitize(sentence['text'])
                # Get temporal constraints if available
                timexes = sentence.get('timexes', [])
                if timexes:
                    # We currently handle just one timex per statement
                    timex = timexes[0]
                    tc = self.time_context_from_timex(timex)
                    context = WorldContext(time=tc)
                # Get geolocation if available
                geolocs = sentence.get('geolocs', [])
                if geolocs:
                    geoloc = geolocs[0]
                    rc = self.ref_context_from_geoloc(geoloc)
                    if context:
                        context.geo_location = rc
                    else:
                        context = WorldContext(geo_location=rc)

            # Here we try to get the title of the document and set it
            # in the provenance
            doc_id = provenance[0].get('document', {}).get('@id')
            if doc_id:
                title = self.documents.get(doc_id, {}).get('title')
                if title:
                    provenance[0]['document']['title'] = title

        annotations = {'found_by': event.get('rule'), 'provenance': provenance}
        if self.dct is not None:
            annotations['document_creation_time'] = self.dct.to_json()

        epistemics = {}
        negations = self.get_negation(event)
        hedgings = self.get_hedging(event)
        if hedgings:
            epistemics['hedgings'] = hedgings
        if negations:
            # This is the INDRA standard to show negation
            epistemics['negated'] = True
            # But we can also save the texts associated with the negation
            # under annotations, just in case it's needed
            annotations['negated_texts'] = negations

        # If that fails, we can still get the text of the event
        if text is None:
            text = _sanitize(event.get('text'))

        ev = Evidence(source_api='eidos',
                      text=text,
                      annotations=annotations,
                      context=context,
                      epistemics=epistemics)
        return ev
Beispiel #15
0
    def _build_stmts(self, rel_dict):
        stmt_list = []
        cause_entries = rel_dict.get('Cause Index')
        effect_entries = rel_dict.get('Effect Index')

        # FIXME: Handle cases in which there is a missing cause/effect
        if not cause_entries or not effect_entries:
            return []
        causes = [c.strip() for c in cause_entries.split(',')]
        effects = [e.strip() for e in effect_entries.split(',')]
        rel = rel_dict.get('Relation')
        if _in_rels(rel, pos_rels):
            pol = 1
        elif _in_rels(rel, neg_rels):
            pol = -1
        elif _in_rels(rel, neu_rels):
            pol = None
        # If we don't recognize this relation, we don't get any
        # statements
        else:
            return []

        text = rel_dict.get('Sentence')
        annot_keys = ['Relation']
        annots = {k: rel_dict.get(k) for k in annot_keys}
        ref = rel_dict.get('Source_File')

        for cause_idx, effect_idx in itertools.product(causes, effects):
            cause_name = self._events[cause_idx]['Relation']
            cause_grounding = self._events[cause_idx]['Event_Type']
            effect_name = self._events[effect_idx]['Relation']
            effect_grounding = self._events[effect_idx]['Event_Type']
            cause_concept = Concept(cause_name, db_refs={'TEXT': cause_name})
            if cause_grounding:
                cause_concept.db_refs['SOFIA'] = cause_grounding
            effect_concept = Concept(effect_name,
                                     db_refs={'TEXT': effect_name})
            if effect_grounding:
                effect_concept.db_refs['SOFIA'] = effect_grounding

            # NOTE: Extract context. The basic issue is that
            # time/location
            # here is given at the event level, not at the relation
            # level, and so we need to choose which event's context
            # we will associate with the relation
            def choose_context(context_type):
                locs = [
                    self._events[cause_idx].get(context_type),
                    self._events[effect_idx].get(context_type)
                ]
                if locs[0]:
                    return locs[0].strip()
                elif locs[1]:
                    return locs[1].strip()
                else:
                    return None

            context = WorldContext()
            location = choose_context('Location')
            if location:
                context.location = RefContext(name=location)
            time = choose_context('Time')
            if time:
                context.time = TimeContext(text=time)
            # Overwrite blank context
            if not context:
                context = None

            ev = Evidence(source_api='sofia',
                          pmid=ref,
                          annotations=annots,
                          text=text,
                          context=context)
            stmt = Influence(cause_concept, effect_concept, evidence=[ev])
            # Assume unknown polarity on the subject, put the overall
            # polarity in the sign of the object
            stmt.subj_delta['polarity'] = None
            stmt.obj_delta['polarity'] = pol

            stmt_list.append(stmt)
        return stmt_list