def _process_relations(relation_rows, event_dict): header = [cell.value for cell in next(relation_rows)] stmts = [] for row in relation_rows: row_values = [r.value for r in row] row_dict = {h: v for h, v in zip(header, row_values)} cause_entries = row_dict.get('Cause Index') effect_entries = row_dict.get('Effect Index') # FIXME: Handle cases in which there is a missing cause/effect if not cause_entries or not effect_entries: continue causes = [c.strip() for c in cause_entries.split(',')] effects = [e.strip() for e in effect_entries.split(',')] rel = row_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None # If we don't recognize this relation, we don't get any statements else: continue text = row_dict.get('Sentence') #annot_keys = ['Relation', 'Event_Type', 'Location', 'Time'] #annots = {k: row_dict.get(k) for k in annot_keys} annot_keys = ['Relation'] annots = {k: row_dict.get(k) for k in annot_keys} ref = row_dict.get('Source_File') ev = Evidence(source_api='sofia', pmid=ref, annotations=annots, text=text) for cause_index, effect_index in itertools.product( causes, effects): cause_name = event_dict[cause_index]['Relation'] cause_grounding = event_dict[cause_index]['Event_Type'] effect_name = event_dict[effect_index]['Relation'] effect_grounding = event_dict[effect_index]['Event_Type'] cause_concept = Concept(cause_name, db_refs={ 'TEXT': cause_name, 'SOFIA': cause_grounding }) effect_concept = Concept(effect_name, db_refs={ 'TEXT': effect_name, 'SOFIA': effect_grounding }) stmt = Influence(cause_concept, effect_concept, evidence=[ev]) # Assume unknown polarity on the subject, put the overall # polarity in the sign of the object stmt.subj_delta['polarity'] = None stmt.obj_delta['polarity'] = pol stmts.append(stmt) return stmts
def _process_row(header, row): row_dict = {h: v for h, v in zip(header, row)} subj = row_dict.get('Agent') obj = row_dict.get('Patient') if not obj or not subj: return None rel = row_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None else: return None subj_concept = Concept(subj, db_refs={'TEXT': subj, 'SOFIA': subj}) obj_concept = Concept(obj, db_refs={'TEXT': subj, 'SOFIA': subj}) text = row_dict.get('Sentence') annot_keys = ['Relation', 'Event_Type', 'Location', 'Time'] annots = {k: row_dict.get(k) for k in annot_keys} ref = row_dict.get('Source_File') ev = Evidence(source_api='sofia', pmid=ref, annotations=annots, text=text) stmt = Influence(subj_concept, obj_concept, evidence=[ev]) stmt.obj_delta['polarity'] = pol return stmt
def test_print_model(): stmt1 = Influence(Agent('rainfall'), Agent('crop_yields')) stmt2 = Influence(Agent('irrigation'), Agent('crop_yields')) stmt3 = Influence(Agent('temperature'), Agent('crop_yields')) stmt4 = Influence(Agent('rainfall'), Agent('temperature')) stmts = [stmt1, stmt2, stmt3, stmt4] fa = FigaroAssembler(stmts) fa.make_model() txt = fa.print_model() assert txt is not None
def test_incremental_assembler_add_statement_duplicate(): ev3 = Evidence('eidos', text='3') s3 = Influence(e1, e2, ev3) s3h = s3.get_hash(matches_fun=location_matches_compositional) ia = IncrementalAssembler([s1, s2]) delta = ia.add_statements([s3]) assert not delta.new_stmts, delta.new_stmts assert delta.new_evidences == {s3h: [ev3]}, delta.new_evidences assert not delta.new_refinements, delta.new_refinements # TODO: test beliefs assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev3} assert set(ia.get_all_supporting_evidence(s2h)) == {ev2}
def test_map(): c1 = Concept('x', db_refs={'UN': [('entities/x', 1.0)]}) c2 = Concept('y', db_refs={'HUME': [('entities/y', 1.0)]}) c3 = Concept('z') stmts = [Influence(c1, c3), Influence(c2, c3)] om = OntologyMapper(stmts) om.map_statements() assert len(om.statements) == 2 assert om.statements[0].subj.db_refs['HUME'] == [('entities/y', 1.0)], \ om.statements[0].subj.db_refs assert om.statements[1].subj.db_refs['UN'] == [('entities/x', 1.0)], \ om.statements[1].subj.db_refs
def test_incremental_assembler_add_statement_new_refinement(): ev4 = Evidence('eidos', text='4') s4 = Influence(e2, e4, ev4) s4h = s4.get_hash(matches_fun=location_matches_compositional) ia = IncrementalAssembler([s1, s2]) delta = ia.add_statements([s4]) assert delta.new_stmts, {s4h: s4} assert delta.new_evidences == {s4h: [ev4]}, delta.new_evidences assert delta.new_refinements == {(s1h, s4h), (s2h, s4h)}, \ delta.new_refinements # TODO: test beliefs assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev4} assert set(ia.get_all_supporting_evidence(s2h)) == {ev2, ev4} assert set(ia.get_all_supporting_evidence(s4h)) == {ev4}
def influence_stmt_from_dict(d: Dict) -> Influence: st = Influence( Concept(d["subj"]["name"], db_refs=d["subj"]["db_refs"]), Concept(d["obj"]["name"], db_refs=d["obj"]["db_refs"]), d.get("subj_delta"), d.get("obj_delta"), [ Evidence( e["source_api"], text=e["text"], annotations=e["annotations"] ) for e in d["evidence"] ], ) st.belief = d["belief"] return st
def contains_concept(s: Influence, concept_name: str, cutoff=0.7) -> bool: return any( map( lambda c: is_grounded_to_name(c, concept_name, cutoff), s.agent_list(), ) )
def _(s: Influence, ontology: str = "UN", cutoff: float = 0.7) -> bool: """ Returns true if both subj and obj are grounded to the specified ontology""" return all( map(lambda c: is_well_grounded(c, ontology, cutoff), s.agent_list()) )
def get_events(self): events = \ list(self.tree.execute("$.extractions[(@.@type is 'Event')]")) if not events: return self.event_dict = {ev['@id']: ev for ev in events} # List out event types and their default (implied) polarities. event_polarities = { 'causation': 1, 'precondition': 1, 'catalyst': 1, 'mitigation': -1, 'prevention': -1 } # Restrict to known event types events = [ e for e in events if any([et in e.get('type') for et in event_polarities.keys()]) ] logger.info('%d events of types %s found' % (len(events), ', '.join(event_polarities.keys()))) # Build a dictionary of entities and sentences by ID for convenient # lookup entities = \ self.tree.execute("$.extractions[(@.@type is 'Entity')]") self.entity_dict = {entity['@id']: entity for entity in entities} self.get_documents() for event in events: event_type = event.get('type') subj_concept, subj_delta = self._get_concept(event, 'source') obj_concept, obj_delta = self._get_concept(event, 'destination') # Apply the naive polarity from the type of statement. For the # purpose of the multiplication here, if obj_delta['polarity'] is # None to begin with, we assume it is positive obj_delta['polarity'] = \ event_polarities[event_type] * \ (obj_delta['polarity'] if obj_delta['polarity'] is not None else 1) if not subj_concept or not obj_concept: continue evidence = self._get_evidence(event, subj_concept, obj_concept, get_states(event)) st = Influence(subj_concept, obj_concept, subj_delta, obj_delta, evidence=evidence) self.eid_stmt_dict[event['@id']] = st self.statements.append(st) return
def test_curations(): sc.db = DbManager(url='sqlite:///:memory:') sc.db.create_all() _call_api('post', 'assembly/new_project', json=dict(project_id='p1', project_name='Project 1')) # Now add a record just on the back-end sc.db.add_records_for_project('p1', ['r1']) # And now add a statement for that record so we can "curate" it stmt = Influence(Event(Concept('x')), Event(Concept('y'))) stmt_hash = -11334164755554266 sc.db.add_statements_for_record('r1', [stmt], '1.0') curation = { 'project_id': 'p1', 'statement_id': 'abcdef', 'update_type': 'reverse_relation' } mappings = _call_api('post', 'assembly/submit_curations', json=dict(project_id='p1', curations={stmt_hash: curation})) assert mappings res = _call_api('get', 'assembly/get_project_curations', json=dict(project_id='p1')) assert len(res) == 1 assert res[str(stmt_hash)] == curation, res
def extract_statement_from_query_result(self, res): """Adds a statement based on one element of a rdflib SPARQL query. Parameters ---------- res: rdflib.query.ResultRow Element of rdflib SPARQL query result """ agent_start, agent_end, affected_start, affected_end = res # Convert from rdflib literals to python integers so we can use # them to index strings agent_start = int(agent_start) agent_end = int(agent_end) affected_start = int(affected_start) affected_end = int(affected_end) # Find the text corresponding to these indices agent = self.text[agent_start:agent_end] affected = self.text[affected_start:affected_end] # Strip off surrounding whitespace agent = agent.lstrip().rstrip() affected = affected.lstrip().rstrip() # Make an Agent object for both the subject and the object subj = Agent(agent, db_refs={'TEXT': agent}) obj = Agent(affected, db_refs={'TEXT': affected}) statement = Influence(subj=subj, obj=obj) # Add the statement to the list of statements self.statements.append(statement)
def test_assemble_influence(): stmt = Influence(Agent('rainfall'), Agent('crop_yields')) fa = FigaroAssembler([stmt]) fa.make_model() assert fa.BN is not None assert len(fa.BN.nodes()) == 2 assert len(fa.BN.edges()) == 1
def make_stmt_from_sort_key(key, verb, agents=None): """Make a Statement from the sort key. Specifically, the sort key used by `group_and_sort_statements`. """ def make_agent(name): if name == 'None' or name is None: return None return Agent(name) StmtClass = get_statement_by_name(verb) inps = list(key[1]) if agents is None: agents = [] if verb == 'Complex': agents.extend([make_agent(name) for name in inps]) stmt = StmtClass(agents[:]) elif verb == 'Conversion': names_from = [make_agent(name) for name in inps[1]] names_to = [make_agent(name) for name in inps[2]] agents.extend(names_from + names_to) stmt = StmtClass(make_agent(inps[0]), names_from, names_to) elif verb == 'ActiveForm' or verb == 'HasActivity': agents.extend([make_agent(inps[0])]) stmt = StmtClass(agents[0], inps[1], inps[2]) elif verb == 'Influence': agents.extend([make_agent(inp) for inp in inps[:2]]) stmt = Influence(*[Event(ag) for ag in agents]) elif verb == 'Association': agents.extend([make_agent(inp) for inp in inps]) stmt = StmtClass([Event(ag) for ag in agents]) else: agents.extend([make_agent(name) for name in inps]) stmt = StmtClass(*agents) return stmt
def get_causal_relations(self): """Extract causal relations as Statements.""" # Get the events that are labeled as directed and causal events = [ e for e in self.extractions if 'DirectedRelation' in e['labels'] and 'Causal' in e['labels'] ] for event in events: # For now, just take the first source and first destination. # Later, might deal with hypergraph representation. subj_id = self.find_arg(event, 'source') obj_id = self.find_arg(event, 'destination') if subj_id is None or obj_id is None: continue # Resolve coreferences by ID subj_id = self.coreferences.get(subj_id, subj_id) obj_id = self.coreferences.get(obj_id, obj_id) # Get the actual entities subj = self.entities[subj_id] obj = self.entities[obj_id] subj_delta = self.extract_entity_states(subj.get('states', [])) obj_delta = self.extract_entity_states(obj.get('states', [])) evidence = self.get_evidence(event) # It is currently the case that time constraints and locations for # concepts are better stored as annotations and the Evidence # level, we therefore move them over there. subj_timex = subj_delta.pop('time_context', None) obj_timex = obj_delta.pop('time_context', None) subj_geo = subj_delta.pop('geo_context', None) obj_geo = obj_delta.pop('geo_context', None) if subj_timex or subj_geo: wc = WorldContext(time=subj_timex, geo_location=subj_geo).to_json() evidence.annotations['subj_context'] = wc if obj_timex or obj_geo: wc = WorldContext(time=obj_timex, geo_location=obj_geo).to_json() evidence.annotations['obj_context'] = wc # In addition, for the time being we also put the adjectives and # polarities into annotations since they could otherwise get # squashed upon preassembly evidence.annotations['subj_adjectives'] = subj_delta['adjectives'] evidence.annotations['obj_adjectives'] = obj_delta['adjectives'] evidence.annotations['subj_polarity'] = subj_delta['polarity'] evidence.annotations['obj_polarity'] = obj_delta['polarity'] st = Influence(self.get_concept(subj), self.get_concept(obj), subj_delta, obj_delta, evidence=[evidence]) self.statements.append(st)
def get_model_checker(statements): pa = PysbAssembler() pa.add_statements(statements) model = pa.make_model() stmt = Influence(Concept('crop_production'), Concept('food_security')) mc = ModelChecker(model, [stmt]) mc.prune_influence_map() return mc
def from_uncharted_json_serialized_dict( cls, _dict, minimum_evidence_pieces_required: int = 1): sts = _dict["statements"] G = nx.DiGraph() for s in sts: if len(s["evidence"]) >= minimum_evidence_pieces_required: subj, obj = s["subj"], s["obj"] if (subj["db_refs"]["concept"] is not None and obj["db_refs"]["concept"] is not None): subj_name, obj_name = [ "/".join(s[x]["db_refs"]["concept"].split("/")[:]) for x in ["subj", "obj"] ] G.add_edge(subj_name, obj_name) subj_delta = s["subj_delta"] obj_delta = s["obj_delta"] for delta in (subj_delta, obj_delta): # TODO : Ensure that all the statements provided by # Uncharted have unambiguous polarities. if delta["polarity"] is None: delta["polarity"] = 1 influence_stmt = Influence( Concept(subj_name, db_refs=subj["db_refs"]), Concept(obj_name, db_refs=obj["db_refs"]), subj_delta=s["subj_delta"], obj_delta=s["obj_delta"], evidence=[ Evidence( source_api=ev["source_api"], annotations=ev["annotations"], text=ev["text"], epistemics=ev.get("epistemics"), ) for ev in s["evidence"] ], ) influence_sts = G.edges[subj_name, obj_name].get( "InfluenceStatements", []) influence_sts.append(influence_stmt) G.edges[subj_name, obj_name]["InfluenceStatements"] = influence_sts for concept, indicator in _dict["concept_to_indicator_mapping"].items( ): if indicator is not None: indicator_source, indicator_name = ( indicator.split("/")[0], indicator, ) if concept in G: if G.nodes[concept].get("indicators") is None: G.nodes[concept]["indicators"] = {} G.nodes[concept]["indicators"][indicator_name] = Indicator( indicator_name, indicator_source) self = cls(G) self.assign_uuids_to_nodes_and_edges() return self
def test_run_preassembly_concepts(): ont = _get_extended_wm_hierarchy() rainfall = Event( Concept('rain', db_refs={ 'WM': ('wm/concept/causal_factor/environmental/meteorologic/' 'precipitation/rainfall') })) flooding_1 = Event(Concept('flood', db_refs={'WM': 'wm/x/y/z/flooding'})) flooding_2 = Event(Concept('flooding', db_refs={'WM': 'wm/a/b/c/flooding'})) st_out = ac.run_preassembly( [Influence(rainfall, flooding_1), Influence(rainfall, flooding_2)], normalize_ns='WM', normalize_equivalences=True, ontology=ont) assert len(st_out) == 1, st_out
def make_statement(event1, event2): return Influence( event1, event2, evidence=Evidence( annotations={ "subj_adjectives": event1.delta.adjectives, "obj_adjectives": event2.delta.adjectives, }), )
def test_wm_scorer(): scorer = get_eidos_scorer() stmt = Influence(Concept('a'), Concept('b'), evidence=[Evidence(source_api='eidos')]) # Make sure other sources are still in the map assert 'hume' in scorer.prior_probs['rand'] assert 'biopax' in scorer.prior_probs['syst'] engine = BeliefEngine(scorer) engine.set_prior_probs([stmt])
def test_influence_duplicate(): gov = 'UN/entities/human/government/government_entity' agr = 'UN/entities/natural/crop_technology' cgov = Event(Concept('government', db_refs={'UN': [(gov, 1.0)]})) cagr = Event(Concept('agriculture', db_refs={'UN': [(agr, 1.0)]})) stmt1 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos1')]) stmt2 = Influence(cagr, cgov, evidence=[Evidence(source_api='eidos2')]) stmt3 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos3')]) eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../sources/eidos/eidos_ontology.rdf') hm = HierarchyManager(eidos_ont, True, True) hierarchies = {'entity': hm} pa = Preassembler(hierarchies, [stmt1, stmt2, stmt3]) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 2 assert len(unique_stmts[0].evidence) == 2 assert len(unique_stmts[1].evidence) == 1 sources = [e.source_api for e in unique_stmts[0].evidence] assert set(sources) == set(['eidos1', 'eidos3'])
def test_wm_map(): c1 = Concept('x', db_refs={'UN': [('UN/properties/price', 1.0)]}) c2 = Concept('y', db_refs={'UN': [('UN/entities/human/education', 1.0)]}) stmts = [Influence(c1, c2)] om = OntologyMapper(stmts, wm_ontomap, symmetric=False) om.map_statements() stmt = om.statements[0] assert 'BBN' in stmt.subj.db_refs assert 'BBN' in stmt.obj.db_refs assert 'SOFIA' in stmt.subj.db_refs assert 'SOFIA' in stmt.obj.db_refs
def test_influence_refinement(): tran = 'UN/entities/human/infrastructure/transportation' truck = 'UN/entities/human/infrastructure/transportation/' + \ 'transportation_methods' agr = 'UN/entities/human/livelihood' ctran = Concept('transportation', db_refs={'UN': [(tran, 1.0)]}) ctruck = Concept('trucking', db_refs={'UN': [(truck, 1.0)]}) cagr = Concept('agriculture', db_refs={'UN': [(agr, 1.0)]}) stmt1 = Influence(ctran, cagr, evidence=[Evidence(source_api='eidos1')]) stmt2 = Influence(ctruck, cagr, evidence=[Evidence(source_api='eidos2')]) stmt3 = Influence(cagr, ctran, evidence=[Evidence(source_api='eidos3')]) eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../sources/eidos/eidos_ontology.rdf') hm = HierarchyManager(eidos_ont, True, True) hierarchies = {'entity': hm} pa = Preassembler(hierarchies, [stmt1, stmt2, stmt3]) rel_stmts = pa.combine_related() assert len(rel_stmts) == 2 truck_stmt = [st for st in rel_stmts if st.subj.name == 'trucking'][0] assert len(truck_stmt.supported_by) == 1 assert truck_stmt.supported_by[0].subj.name == 'transportation'
def test_influence_event_hash_reference(): rainfall = Concept('rainfall') loc1 = RefContext(name='x', db_refs={'GEOID': '1'}) loc2 = RefContext(name='x', db_refs={'GEOID': '2'}) ev1 = Event(rainfall, context=WorldContext(geo_location=loc1)) ev2 = Event(rainfall, context=WorldContext(geo_location=loc2)) infl = Influence(ev1, ev2) h1 = ev1.get_hash(refresh=True) h2 = ev2.get_hash(refresh=True) hl1 = ev1.get_hash(refresh=True, matches_fun=location_matches) hl2 = ev2.get_hash(refresh=True, matches_fun=location_matches) assert h1 == h2, (h1, h2) assert hl1 != hl2, (hl1, hl2) ij = infl.to_json(matches_fun=location_matches) ev1j = ev1.to_json(matches_fun=location_matches) assert ev1j['matches_hash'] == ij['subj']['matches_hash'], \ (print(json.dumps(ev1j, indent=1)), print(json.dumps(ij, indent=1)))
def test_wm_map(): c1 = Concept('x', db_refs={'UN': [('UN/events/human/famine', 1.0)]}) c2 = Concept('y', db_refs={'UN': [('UN/entities/human/education', 1.0)]}) stmts = [Influence(c1, c2)] om = OntologyMapper(stmts, wm_ontomap, symmetric=False) om.map_statements() stmt = om.statements[0] assert 'HUME' in stmt.subj.db_refs assert 'HUME' in stmt.obj.db_refs assert 'SOFIA' in stmt.subj.db_refs assert 'SOFIA' in stmt.obj.db_refs # Test the previously problematic famine case c3 = Concept('z', db_refs={'SOFIA': 'Health/Famine'}) c4 = Concept('a', db_refs={'HUME': [('event/healthcare/famine', 1.0)]}) stmts = [Influence(c4, c3)] # Unscored mapping om = OntologyMapper(stmts, wm_ontomap, symmetric=False, scored=False) om.map_statements() stmt = om.statements[0] assert stmt.obj.db_refs['UN'] == [('UN/events/human/famine', 1.0)], \ stmt.obj.db_refs['UN'] assert stmt.subj.db_refs['UN'] == [('UN/events/human/famine', 1.0)], \ stmt.subj.db_refs['UN'] # Scored mapping c3 = Concept('z', db_refs={'SOFIA': 'Health/Famine'}) c4 = Concept('a', db_refs={'HUME': [('event/healthcare/famine', 1.0)]}) stmts = [Influence(c4, c3)] om = OntologyMapper(stmts, wm_ontomap, symmetric=False, scored=True) om.map_statements() stmt = om.statements[0] assert stmt.obj.db_refs['UN'] == [('UN/events/human/famine', 0.81851065)], \ stmt.obj.db_refs['UN'] assert stmt.subj.db_refs['UN'] == [('UN/events/human/famine', 1.0)], \ stmt.subj.db_refs['UN']
def _build_influences(self, rel_dict): stmt_list = [] cause_entries = rel_dict.get('Cause Index') effect_entries = rel_dict.get('Effect Index') # FIXME: Handle cases in which there is a missing cause/effect if not cause_entries or not effect_entries: return [] causes = [c.strip() for c in cause_entries.split(', ')] effects = [e.strip() for e in effect_entries.split(', ')] rel = rel_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None # If we don't recognize this relation, we don't get any # statements else: self.unhandled_relations[rel] += 1 return [] text = rel_dict.get('Sentence') annot_keys = ['Relation'] annots = {k: rel_dict.get(k) for k in annot_keys} ref = rel_dict.get('Source_File') for cause_idx, effect_idx in itertools.product(causes, effects): cause = self._events.get(cause_idx) effect = self._events.get(effect_idx) if not cause or not effect: continue subj = self.get_event(cause) obj = self.get_event(effect) text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, annotations=annots, text=text) stmt = Influence(subj, obj, evidence=[ev]) # Use the polarity of the events, if object does not have a # polarity, use overall polarity if stmt.obj.delta.polarity is None: stmt.obj.delta.set_polarity(pol) stmt_list.append(stmt) return stmt_list
def test_apply_grounding_curation(): gr1 = [('theme1', 0.8), None, ('process', 0.7), None] gr2 = ['theme2', 'property2', None, None] cur = { "before": {"subj": {"factor": 'x', "concept": gr1}, "obj": {"factor": 'y', "concept": 'z'}}, "after": {"subj": {"factor": 'x', "concept": gr2}, "obj": {"factor": 'y', "concept": 'z'}}, } c1 = Concept('x', db_refs={'WM': [gr1]}) stmt = Influence(Event(c1), Event('y')) IncrementalAssembler.apply_grounding_curation(stmt, cur) assert stmt.subj.concept.db_refs['WM'][0] == \ [('theme2', 1.0), ('property2', 1.0), None, None]
def get_events(self): events = self.tree.execute("$.mentions[(@.type is 'EventMention')]") events = list(events) # Skip events that only have one argument #events = [e for e in events if len(e['arguments']) == 2] for event in events: # Skip events with missing arguments if len(event['arguments']) != 2: continue # Process causal events if 'Causal' in event['labels']: subj = event['arguments']['cause'][0] obj = event['arguments']['effect'][0] # Process origin/theme events elif 'Origin' in event['labels']: subj = event['arguments']['origin'][0] obj = event['arguments']['theme'][0] # Skip correlation events for now elif 'Correlation' in event['labels']: logger.warning('Correlation event %s skipped.' % event['id']) continue else: logger.warning('Could not classify event with labels: %s' % ', '.join(event['labels'])) continue subj_concept = self._get_concept(subj) obj_concept = self._get_concept(obj) subj_mods = self._get_mods(subj) obj_mods = self._get_mods(obj) # The interpretation of multiple mods is not clear yet so we # choose the first mod if available subj_delta = subj_mods[0] if subj_mods else \ {'adjectives': [], 'polarity': None} obj_delta = obj_mods[0] if obj_mods else \ {'adjectives': [], 'polarity': None} evidence = self._get_evidence(event) st = Influence(subj_concept, obj_concept, subj_delta, obj_delta, evidence=evidence) self.statements.append(st)
def to_statement(self): """Converts to an INDRA statement, or returns None if either the cause polarity or effect polarity is not Positive.""" if self.cause_polarity != 'Positive' or \ self.effect_polarity != 'Positive': return None # The cause and effect events list both the full text and the text # identified as the cause/effect. Get the relevant text by getting # the shortest string. cause_text = shortest_string_in_list(self.cause_texts) effect_text = shortest_string_in_list(self.effect_texts) # Add an evidence object with the full text. There should be exactly # only full text string, but if there is more than one, list them all. # Note how we're careful to convert from rdflib's string representation # to a python string with str(). evidence_texts = list(self.evidence_texts) if len(evidence_texts) == 1: evidence_text = evidence_texts[0] else: evidence_text = repr(evidence_texts) ev = Evidence(source_api='hume', text=str(evidence_text)) # Convert from rdf literal to python string cause_text = str(cause_text) effect_text = str(effect_text) # Make cause concept cause_db_refs = {'TEXT': cause_text} if self.cause_type is not None: cause_db_refs['HUME'] = self.cause_type cause_concept = Concept(cause_text, db_refs=cause_db_refs) # Make effect concept effect_db_refs = {'TEXT': effect_text} if self.effect_type is not None: effect_db_refs['HUME'] = self.effect_type effect_concept = Concept(effect_text, db_refs=effect_db_refs) return Influence(cause_concept, effect_concept, evidence=ev)
def get_events(self): events = self.tree.execute("$.mentions[(@.type is 'EventMention')]") events = list(events) # Skip events that only have one argument #events = [e for e in events if len(e['arguments']) == 2] for event in events: # Skip events with missing arguments if len(event['arguments']) != 2: continue # Process causal events if 'Causal' in event['labels']: subj = event['arguments']['cause'][0] obj = event['arguments']['effect'][0] # Process origin/theme events if 'Origin' in event['labels']: subj = event['arguments']['origin'][0] obj = event['arguments']['theme'][0] subj_agent = self._get_agent(subj) obj_agent = self._get_agent(obj) subj_mods = self._get_mods(subj) obj_mods = self._get_mods(obj) # The interpretation of multiple mods is not clear yet so we # choose the first mod if available subj_delta = subj_mods[0] if subj_mods else { 'adjectives': None, 'polarity': None } obj_delta = obj_mods[0] if obj_mods else { 'adjectives': None, 'polarity': None } evidence = self._get_evidence(event) st = Influence(subj_agent, obj_agent, subj_delta, obj_delta, evidence=evidence) self.statements.append(st)