예제 #1
0
def _resolve_geo(hume_loc_entity):
    place = hume_loc_entity.get('canonicalName', hume_loc_entity.get('text'))
    geo_id = hume_loc_entity.get('geoname_id', None)
    if geo_id is not None:
        return RefContext(name=place, db_refs={"GEOID": geo_id})
    else:
        return RefContext(place)
예제 #2
0
    def _make_context(self, entity):
        """Get place and time info from the json for this entity."""
        loc_context = None
        time_context = None

        # Look for time and place contexts.
        for argument in entity["arguments"]:
            if argument["type"] == "place":
                entity_id = argument["value"]["@id"]
                loc_entity = self.concept_dict[entity_id]
                place = loc_entity["canonicalName"]
                geo_id = loc_entity.get('geoname_id')
                loc_context = RefContext(name=place, db_refs={"GEOID": geo_id})
            if argument["type"] == "time":
                entity_id = argument["value"]["@id"]
                temporal_entity = self.concept_dict[entity_id]
                text = temporal_entity['mentions'][0]['text']
                if len(temporal_entity.get("timeInterval", [])) < 1:
                    time_context = TimeContext(text=text)
                    continue
                time = temporal_entity["timeInterval"][0]
                start = datetime.strptime(time['start'], '%Y-%m-%dT%H:%M')
                end = datetime.strptime(time['end'], '%Y-%m-%dT%H:%M')
                duration = int(time['duration'])
                time_context = TimeContext(text=text,
                                           start=start,
                                           end=end,
                                           duration=duration)

        # Put context together
        context = None
        if loc_context or time_context:
            context = WorldContext(time=time_context, geo_location=loc_context)

        return context
예제 #3
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        ev = Evidence(source_api='sofia', pmid=ref, text=text,
                      annotations=anns, source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept, context=context, evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
예제 #4
0
    def get_event_compositional(self, event_entry: Dict[str, str]) -> Event:
        """Get an Event with compositional grounding

        Parameters
        ----------
        event_entry :
            The event to process

        Returns
        -------
        event :
            An Event statement
        """
        # Get get compositional grounding
        comp_name, comp_grnd = self.get_compositional_grounding(event_entry)
        if comp_name is not None and \
                comp_grnd[0] is not None and \
                comp_grnd[0][0] is not None:
            concept = Concept(comp_name,
                              db_refs={
                                  'TEXT': comp_name,
                                  'WM': [comp_grnd]
                              })
        # If not try to get old style Sofia grounding
        else:
            name = event_entry['Relation']
            concept = Concept(name, db_refs={'TEXT': name})
            if event_entry['Event_Type']:
                concept.db_refs['SOFIA'] = event_entry['Event_Type']

        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        text_refs = {'DART': ref}
        ev = Evidence(source_api='sofia',
                      text_refs=text_refs,
                      text=text,
                      annotations=anns,
                      source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
예제 #5
0
def parse_context_entry(entry, grounder, sentence=None):
    """Return a dict of context type and object processed from an entry."""
    match = re.match(r'(.*): (.*)', entry)
    if not match:
        return None
    context_type, context_txt = match.groups()
    if context_type not in allowed_contexts:
        logger.warning('Unknown context type %s' % context_type)
        return None

    terms = grounder(context_txt, context=sentence)
    if not terms:
        logger.warning('Could not ground %s context: %s'
                       % (context_type, context_txt))
    db_refs = {}
    if terms:
        db_refs = standardize_db_refs({terms[0].term.db:
                                       terms[0].term.id})
    db_refs['TEXT'] = context_txt
    standard_name = None
    if terms:
        standard_name = bio_ontology.get_name(terms[0].term.db,
                                              terms[0].term.id)
    name = standard_name if standard_name else context_txt
    context = RefContext(name=name, db_refs=db_refs)
    return {allowed_contexts[context_type]: context}
예제 #6
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        ev = Evidence(source_api='sofia', pmid=ref, text=text)
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta={
                          'polarity': pol,
                          'adjectives': []
                      })

        return event
예제 #7
0
def test_event_assemble_location():
    rainfall = Concept('rainfall')
    loc1 = RefContext(name='x', db_refs={'GEOID': '1'})
    loc2 = RefContext(name='x', db_refs={'GEOID': '2'})
    ev1 = Event(rainfall, context=WorldContext(geo_location=loc1))
    ev2 = Event(rainfall, context=WorldContext(geo_location=loc2))

    pa = Preassembler(ontology=world_ontology,
                      stmts=[ev1, ev2],
                      matches_fun=None)
    unique_stmts = pa.combine_duplicates()

    assert len(unique_stmts) == 1
    pa = Preassembler(ontology=world_ontology,
                      stmts=[ev1, ev2],
                      matches_fun=location_matches)
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 2
예제 #8
0
def get_cell_line(ekb):
    # Look for a term representing a cell line
    cl_tag = ekb.find("TERM/[type='ONT::CELL-LINE']/text")
    if cl_tag is not None:
        cell_line = cl_tag.text
        cell_line.replace('-', '')
        # TODO: add grounding here if available
        clc = RefContext(cell_line)
        return clc
    return None
예제 #9
0
    def get_statements(self):
        stmts = []
        for rel_key, rel_info in self._relations.items():
            # Turn the arguments into a dict.
            args = {e['role']: e['entity_duid'] for e in rel_info['argument']}
            entity_args = args.copy()

            # Remove some special cases.
            trigger_id = entity_args.pop('TRIGGER')
            site_id = entity_args.pop('SITE', None)

            # Get the entity ids.
            entities = {
                role: self._get_agent(eid)
                for role, eid in entity_args.items()
            }

            rel_type = rel_info['relationType']
            if rel_type == 'PHOSPHORYLATION':

                # Get the agents.
                enz, enz_coords = entities.get('KINASE', (None, None))
                sub, sub_coords = entities.get('SUBSTRATE', (None, None))
                if sub is None:
                    continue

                # Get the site
                residue, position, site_coords = self._get_site(site_id)

                # Get the evidence
                ev = self._get_evidence(trigger_id, args,
                                        [enz_coords, sub_coords], site_coords)

                # Turn taxonomy into context, sub TAX takes precedence
                tax = None
                if enz and 'TAX' in enz.db_refs:
                    tax = enz.db_refs.pop('TAX')
                if sub and 'TAX' in sub.db_refs:
                    tax = sub.db_refs.pop('TAX')
                if tax is not None:
                    context = \
                        BioContext(species=RefContext(tax,
                                                      {'TAXONOMY': tax}))
                    ev.context = context

                stmts.append(
                    Phosphorylation(enz,
                                    sub,
                                    residue=residue,
                                    position=position,
                                    evidence=[ev]))
            else:
                logger.warning("Unhandled statement type: %s" % rel_type)

        return stmts
예제 #10
0
def test_influence_event_hash_reference():
    rainfall = Concept('rainfall')
    loc1 = RefContext(name='x', db_refs={'GEOID': '1'})
    loc2 = RefContext(name='x', db_refs={'GEOID': '2'})
    ev1 = Event(rainfall, context=WorldContext(geo_location=loc1))
    ev2 = Event(rainfall, context=WorldContext(geo_location=loc2))
    infl = Influence(ev1, ev2)

    h1 = ev1.get_hash(refresh=True)
    h2 = ev2.get_hash(refresh=True)
    hl1 = ev1.get_hash(refresh=True, matches_fun=location_matches)
    hl2 = ev2.get_hash(refresh=True, matches_fun=location_matches)

    assert h1 == h2, (h1, h2)
    assert hl1 != hl2, (hl1, hl2)

    ij = infl.to_json(matches_fun=location_matches)
    ev1j = ev1.to_json(matches_fun=location_matches)
    assert ev1j['matches_hash'] == ij['subj']['matches_hash'], \
        (print(json.dumps(ev1j, indent=1)),
         print(json.dumps(ij, indent=1)))
예제 #11
0
def test_pybel_neighborhood_query():
    corpus = path_this + '/../../data/small_corpus.bel'
    bp = bel.process_pybel_neighborhood(['TP63'], corpus)
    assert bp.statements
    assert_pmids(bp.statements)
    unicode_strs(bp.statements)
    assert all([
        s.evidence[0].context.cell_line.name == 'MCF 10A'
        for s in bp.statements
    ])
    assert bp.statements[0].evidence[0].context.__repr__() == \
        bp.statements[0].evidence[0].context.__str__()
    assert bp.statements[0].evidence[0].context == \
        BioContext(location=RefContext(name="Cytoplasm",
                                       db_refs={'MESH': 'D003593'}),
                   cell_line=RefContext(name="MCF 10A",
                                        db_refs={'EFO': '0001200'}),
                   cell_type=RefContext(name="keratinocyte",
                                        db_refs={'CL': '0000312'}),
                   organ=RefContext(name="colon",
                                    db_refs={'UBERON': '0001155'}),
                   disease=RefContext(name="cancer",
                                      db_refs={'DOID': '162'}),
                   species=RefContext(name="Rattus norvegicus",
                                      db_refs={'TAXONOMY': '10116'}))
    # Test annotation manager
    assert bp.annot_manager.get_mapping('Species', '9606') == \
        'H**o sapiens'
예제 #12
0
파일: ekb.py 프로젝트: kolusask/bioagents
def get_cell_line(ekb):
    # Look for a term representing a cell line
    cl_tag = ekb.find("TERM/[type='ONT::CELL-LINE']")
    if cl_tag is not None:
        name_tag = cl_tag.find('name')
        if name_tag is not None:
            name = name_tag.text
            name = name.replace('CELLS', '')
            name = name.replace('CELL', '')
            name = name.replace('-', '')
            # TODO: add grounding here if available
            clc = RefContext(name)
            return clc
    return None
예제 #13
0
    def get_event_flat(self, event_entry: Dict[str, str]) -> Event:
        """Get an Event with flattened grounding

        Parameters
        ----------
        event_entry :
            The event to process

        Returns
        -------
        event :
            An Event statement
        """
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        text_refs = {'DART': ref}
        ev = Evidence(source_api='sofia',
                      text_refs=text_refs,
                      text=text,
                      annotations=anns,
                      source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))
        return event
예제 #14
0
파일: processor.py 프로젝트: steppi/indra
def _get_evidence(record: Mapping[str, Any]) -> Evidence:
    # TODO how to use the following metadata?
    geo_id = record["geo_id"]
    cell_type = record["cell_type"]
    organism = record["organism"]
    return Evidence(
        source_api="creeds",
        annotations={
            # TODO use Gilda for grounding and put in BioContext?
            "cell_type": cell_type,
            "geo": geo_id,
        },
        context=BioContext(
            species=RefContext(
                name=organism,
                db_refs={"TAXONOMY": ORGANISMS[organism]},
            )
        ),
    )
예제 #15
0
def test_matches_key_fun():
    from indra.statements import WorldContext, RefContext

    def has_location(stmt):
        if not stmt.context or not stmt.context.geo_location or \
                not stmt.context.geo_location.db_refs.get('GEOID'):
            return False
        return True

    def event_location_matches(stmt):
        if isinstance(stmt, Event):
            if not has_location(stmt):
                context_key = None
            else:
                context_key = stmt.context.geo_location.db_refs['GEOID']

            matches_key = str((stmt.concept.matches_key(), context_key))
        else:
            matches_key = stmt.matches_key()
        return matches_key

    def event_location_refinement(st1, st2, ontology, entities_refined):
        if isinstance(st1, Event) and isinstance(st2, Event):
            ref = st1.refinement_of(st2, ontology)
            if not ref:
                return False
            if not has_location(st2):
                return True
            elif not has_location(st1) and has_location(st2):
                return False
            else:
                return st1.context.geo_location.db_refs['GEOID'] == \
                    st2.context.geo_location.db_refs['GEOID']

    context1 = WorldContext(
        geo_location=RefContext('x', db_refs={'GEOID': '1'}))
    context2 = WorldContext(
        geo_location=RefContext('x', db_refs={'GEOID': '2'}))

    health = 'wm/concept/causal_factor/health_and_life'
    e1 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context1,
               evidence=Evidence(text='1', source_api='eidos'))
    e2 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context2,
               evidence=Evidence(text='2', source_api='eidos'))
    e3 = Event(Concept('health', db_refs={'WM': [(health, 1.0)]}),
               context=context2,
               evidence=Evidence(text='3', source_api='eidos'))

    pa = Preassembler(world_ontology, [e1, e2, e3],
                      matches_fun=event_location_matches,
                      refinement_fun=event_location_refinement)

    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 2, unique_stmts

    from indra.tools.assemble_corpus import run_preassembly
    stmts = run_preassembly([e1, e2, e3],
                            matches_fun=event_location_matches,
                            refinement_fun=event_location_refinement)
    assert len(stmts) == 2, stmts
예제 #16
0
 def ref_context_from_geoloc(geoloc):
     """Return a RefContext object given a geoloc entry."""
     text = geoloc.get('text')
     geoid = geoloc.get('geoID')
     rc = RefContext(name=text, db_refs={'GEOID': geoid})
     return rc
예제 #17
0
    def _build_stmts(self, rel_dict):
        stmt_list = []
        cause_entries = rel_dict.get('Cause Index')
        effect_entries = rel_dict.get('Effect Index')

        # FIXME: Handle cases in which there is a missing cause/effect
        if not cause_entries or not effect_entries:
            return []
        causes = [c.strip() for c in cause_entries.split(',')]
        effects = [e.strip() for e in effect_entries.split(',')]
        rel = rel_dict.get('Relation')
        if _in_rels(rel, pos_rels):
            pol = 1
        elif _in_rels(rel, neg_rels):
            pol = -1
        elif _in_rels(rel, neu_rels):
            pol = None
        # If we don't recognize this relation, we don't get any
        # statements
        else:
            return []

        text = rel_dict.get('Sentence')
        annot_keys = ['Relation']
        annots = {k: rel_dict.get(k) for k in annot_keys}
        ref = rel_dict.get('Source_File')

        for cause_idx, effect_idx in itertools.product(causes, effects):
            cause_name = self._events[cause_idx]['Relation']
            cause_grounding = self._events[cause_idx]['Event_Type']
            effect_name = self._events[effect_idx]['Relation']
            effect_grounding = self._events[effect_idx]['Event_Type']
            cause_concept = Concept(cause_name, db_refs={'TEXT': cause_name})
            if cause_grounding:
                cause_concept.db_refs['SOFIA'] = cause_grounding
            effect_concept = Concept(effect_name,
                                     db_refs={'TEXT': effect_name})
            if effect_grounding:
                effect_concept.db_refs['SOFIA'] = effect_grounding

            # NOTE: Extract context. The basic issue is that
            # time/location
            # here is given at the event level, not at the relation
            # level, and so we need to choose which event's context
            # we will associate with the relation
            def choose_context(context_type):
                locs = [
                    self._events[cause_idx].get(context_type),
                    self._events[effect_idx].get(context_type)
                ]
                if locs[0]:
                    return locs[0].strip()
                elif locs[1]:
                    return locs[1].strip()
                else:
                    return None

            context = WorldContext()
            location = choose_context('Location')
            if location:
                context.location = RefContext(name=location)
            time = choose_context('Time')
            if time:
                context.time = TimeContext(text=time)
            # Overwrite blank context
            if not context:
                context = None

            ev = Evidence(source_api='sofia',
                          pmid=ref,
                          annotations=annots,
                          text=text,
                          context=context)
            stmt = Influence(cause_concept, effect_concept, evidence=[ev])
            # Assume unknown polarity on the subject, put the overall
            # polarity in the sign of the object
            stmt.subj_delta['polarity'] = None
            stmt.obj_delta['polarity'] = pol

            stmt_list.append(stmt)
        return stmt_list