Ejemplo n.º 1
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Ejemplo n.º 2
0
def test_preassemble_flatten():
    st_out = ac.run_preassembly([st1, st3, st5, st6], flatten_evidence=False)
    assert len(st_out[0].evidence) == 1
    assert len(st_out[1].evidence) == 1
    st_out = ac.run_preassembly([st1, st3, st5, st6], flatten_evidence=True,
                                flatten_evidence_collect_from='supported_by')
    assert len(st_out[0].evidence) == 2
    assert len(st_out[1].evidence) == 2
    st_out = ac.run_preassembly([st1, st3, st5, st6], flatten_evidence=True,
                                flatten_evidence_collect_from='supports')
    assert len(st_out[0].evidence) == 1
    assert len(st_out[1].evidence) == 1
Ejemplo n.º 3
0
def test_merge_deltas():
    def add_annots(stmt):
        for ev in stmt.evidence:
            ev.annotations['subj_adjectives'] = stmt.subj_delta['adjectives']
            ev.annotations['obj_adjectives'] = stmt.obj_delta['adjectives']
            ev.annotations['subj_polarity'] = stmt.subj_delta['polarity']
            ev.annotations['obj_polarity'] = stmt.obj_delta['polarity']
        return stmt

    d1 = {'adjectives': ['a', 'b', 'c'], 'polarity': 1}
    d2 = {'adjectives': [], 'polarity': -1}
    d3 = {'adjectives': ['g'], 'polarity': 1}
    d4 = {'adjectives': ['d', 'e', 'f'], 'polarity': -1}
    d5 = {'adjectives': ['d'], 'polarity': None}
    d6 = {'adjectives': [], 'polarity': None}
    d7 = {'adjectives': [], 'polarity': 1}
    stmts = [
        add_annots(
            Influence(Concept('a'),
                      Concept('b'),
                      subj_delta=sd,
                      obj_delta=od,
                      evidence=[Evidence(source_api='eidos',
                                         text='%d' % idx)]))
        for idx, (sd, od) in enumerate([(d1, d2), (d3, d4)])
    ]
    stmts = ac.run_preassembly(stmts, return_toplevel=True)
    stmts = ac.merge_deltas(stmts)
    assert stmts[0].subj_delta['polarity'] == 1, stmts[0].subj_delta
    assert stmts[0].obj_delta['polarity'] == -1, stmts[0].obj_delta
    assert set(stmts[0].subj_delta['adjectives']) == {'a', 'b', 'c', 'g'}, \
        stmts[0].subj_delta
    assert set(stmts[0].obj_delta['adjectives']) == {'d', 'e', 'f'}, \
        stmts[0].obj_delta

    stmts = [
        add_annots(
            Influence(Concept('a'),
                      Concept('b'),
                      subj_delta=sd,
                      obj_delta=od,
                      evidence=[Evidence(source_api='eidos',
                                         text='%d' % idx)]))
        for idx, (sd, od) in enumerate([(d1, d5), (d6, d7), (d6, d7)])
    ]
    stmts = ac.run_preassembly(stmts, return_toplevel=True)
    stmts = ac.merge_deltas(stmts)
    assert stmts[0].subj_delta['polarity'] is None, stmts[0].subj_delta
    assert stmts[0].obj_delta['polarity'] == 1, stmts[0].obj_delta
    assert set(stmts[0].subj_delta['adjectives']) == {'a', 'b', 'c'}, \
        stmts[0].subj_delta
    assert set(stmts[0].obj_delta['adjectives']) == {'d'}, \
        stmts[0].obj_delta
Ejemplo n.º 4
0
def test_merge_deltas():
    def add_annots(stmt):
        for ev in stmt.evidence:
            ev.annotations['subj_adjectives'] = stmt.subj.delta.adjectives
            ev.annotations['obj_adjectives'] = stmt.obj.delta.adjectives
            ev.annotations['subj_polarity'] = stmt.subj.delta.polarity
            ev.annotations['obj_polarity'] = stmt.obj.delta.polarity
        return stmt
    # d1 = {'adjectives': ['a', 'b', 'c'], 'polarity': 1}
    # d2 = {'adjectives': [], 'polarity': -1}
    # d3 = {'adjectives': ['g'], 'polarity': 1}
    # d4 = {'adjectives': ['d', 'e', 'f'], 'polarity': -1}
    # d5 = {'adjectives': ['d'], 'polarity': None}
    # d6 = {'adjectives': [], 'polarity': None}
    # d7 = {'adjectives': [], 'polarity': 1}

    d1 = QualitativeDelta(polarity=1, adjectives=['a', 'b', 'c'])
    d2 = QualitativeDelta(polarity=-1, adjectives=None)
    d3 = QualitativeDelta(polarity=1, adjectives=['g'])
    d4 = QualitativeDelta(polarity=-1, adjectives=['d', 'e', 'f'])
    d5 = QualitativeDelta(polarity=None, adjectives=['d'])
    d6 = QualitativeDelta(polarity=None, adjectives=None)
    d7 = QualitativeDelta(polarity=1, adjectives=None)

    def make_ev(name, delta):
        return Event(Concept(name), delta=delta)

    stmts = [add_annots(Influence(make_ev('a', sd), make_ev('b', od),
                                  evidence=[Evidence(source_api='eidos',
                                                     text='%d' % idx)]))
             for idx, (sd, od) in enumerate([(d1, d2), (d3, d4)])]
    stmts = ac.run_preassembly(stmts, return_toplevel=True)
    stmts = ac.merge_deltas(stmts)
    assert stmts[0].subj.delta.polarity == 1, stmts[0].subj.delta
    assert stmts[0].obj.delta.polarity == -1, stmts[0].obj.delta
    assert set(stmts[0].subj.delta.adjectives) == {'a', 'b', 'c', 'g'}, \
        stmts[0].subj.delta
    assert set(stmts[0].obj.delta.adjectives) == {'d', 'e', 'f'}, \
        stmts[0].obj.delta

    stmts = [add_annots(Influence(make_ev('a', sd), make_ev('b', od),
                                  evidence=[Evidence(source_api='eidos',
                                                     text='%d' % idx)]))
             for idx, (sd, od) in enumerate([(d1, d5), (d6, d7), (d6, d7)])]
    stmts = ac.run_preassembly(stmts, return_toplevel=True)
    stmts = ac.merge_deltas(stmts)
    assert stmts[0].subj.delta.polarity is None, stmts[0].subj.delta
    assert stmts[0].obj.delta.polarity == 1, stmts[0].obj.delta
    assert set(stmts[0].subj.delta.adjectives) == {'a', 'b', 'c'}, \
        stmts[0].subj.delta
    assert set(stmts[0].obj.delta.adjectives) == {'d'}, \
        stmts[0].obj.delta
Ejemplo n.º 5
0
def test_normalize_equals_opposites():
    ont = _get_extended_wm_hierarchy()
    flooding1 = 'wm/a/b/c/flooding'
    flooding2 = 'wm/x/y/z/flooding'
    # Note that as of 5/15/2020 food_insecurity and food_security aren't
    # explicitly opposites in the ontology
    food_insec = 'wm/concept/causal_factor/food_insecurity/food_nonaccess'
    food_sec = 'wm/concept/causal_factor/food_security/food_access'

    # Top grounding: flooding1
    dbr = {'WM': [(flooding1, 1.0), (flooding2, 0.5), (food_insec, 0.1)]}
    ev1 = Event(Concept('x', db_refs=dbr))

    # Top grounding: food security
    dbr = {'WM': [(food_sec, 1.0), (flooding2, 0.5)]}
    ev2 = Event(Concept('x', db_refs=dbr), delta=QualitativeDelta(polarity=1))

    # Make sure that by default, things don't get normalized out
    stmts = ac.run_preassembly([ev1, ev2], ontology=ont)
    assert stmts[0].concept.db_refs['WM'][0][0] != \
           stmts[0].concept.db_refs['WM'][1][0]

    # Now we turn on equivalence normalization and expect
    # that flooding1 and flooding2 have been normalized out
    # in ev1's db_refs
    stmts = ac.run_preassembly([ev1, ev2],
                               normalize_equivalences=True,
                               normalize_ns='WM',
                               ontology=ont)
    assert stmts[0].concept.db_refs['WM'][0][0] == \
           stmts[0].concept.db_refs['WM'][1][0], \
        stmts[0].concept.db_refs['WM']

    # Now we turn on opposite normalization and expect that food
    # security and insecurity will get normalized out
    stmts = ac.run_preassembly([ev1, ev2],
                               normalize_equivalences=True,
                               normalize_opposites=True,
                               normalize_ns='WM',
                               ontology=ont)
    assert len(stmts) == 2
    stmts = sorted(stmts,
                   key=lambda x: len(x.concept.db_refs['WM']),
                   reverse=True)
    assert len(stmts[0].concept.db_refs['WM']) == 3, stmts[0].concept.db_refs
    # This is to check that food_insecurity was normalized to food_security
    assert stmts[0].concept.db_refs['WM'][2][0] == \
           stmts[1].concept.db_refs['WM'][0][0], \
        (stmts[0].concept.db_refs['WM'],
         stmts[1].concept.db_refs['WM'])
Ejemplo n.º 6
0
    def respond_get_paper_model(self, content):
        """Get and display the model from a paper, indicated by pmid."""
        pmid_raw = content.gets('pmid')
        prefix = 'PMID-'
        if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit():
            pmid = pmid_raw[len(prefix):]
        else:
            return self.make_failure('BAD_INPUT')
        try:
            stmts = get_statements_for_paper([('pmid', pmid)],
                                             simple_response=True)
        except IndraDBRestAPIError as e:
            if e.status_code == 404 and 'Invalid or unavailable' in e.reason:
                logger.error("Could not find pmid: %s" % e.reason)
                return self.make_failure('MISSING_MECHANISM')
            else:
                raise e

        if not stmts:
            resp = KQMLPerformative('SUCCESS')
            resp.set('relations-found', 0)
            return resp
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        unique_stmts = ac.run_preassembly(stmts, return_toplevel=True)
        diagrams = _make_diagrams(stmts)
        self.send_display_model(diagrams)
        resp = KQMLPerformative('SUCCESS')
        resp.set('relations-found', len(unique_stmts))
        resp.set('dump-limit', str(DUMP_LIMIT))
        return resp
Ejemplo n.º 7
0
def assemble_one_corpus():
    """For assembling one of the four corpora."""
    path = '/home/bmg16/data/wm/2-Jsonld'
    corpus_size = '16k'
    prefix = '%s%s' % (path, corpus_size)
    fnames = glob.glob('%s/*.jsonld' % prefix)

    # For large corpus
    all_statements = []
    for idx, fname in enumerate(fnames):
        ep = eidos.process_json_file(fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                ev.annotations['provenance'][0]['document']['@id'] = \
                    os.path.basename(fname)

        all_statements += ep.statements
        print('%d: %d' % (idx, len(all_statements)))
    with open('%s/3-Indra%s.pkl' % (prefix, corpus_size), 'wb') as fh:
        pickle.dump(all_statements, fh)

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('%s/3-Indra%s.json' % (prefix, corpus_size), 'w') as fh:
        json.dump(jd, fh, indent=1)
def main(args):
    # This file takes about 32 GB to load
    if not args.infile:
        args.infile = './Data/indra_raw/bioexp_all_raw.pkl'
    if not args.outfile:
        args.outfile = './filtered_indra_network.sif'

    # Load statements from file
    stmts_raw = assemble_corpus.load_statements(args.infile)

    # Expand families, fix grounding errors and run run preassembly
    stmts_fixed = assemble_corpus.run_preassembly(
                    assemble_corpus.map_grounding(
                        assemble_corpus.expand_families(stmts_raw)))

    # Default filtering: specific (unique) genes that are grounded.
    stmts_filtered = assemble_corpus.filter_grounded_only(
                         assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True))
    # Custom filters
    if args.human_only:
        stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered)
    if args.filter_direct:
        stmts_filtered = assemble_corpus.filter_direct(stmts_filtered)

    binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None]
    rows = []
    for s in binary_stmts:
        rows.append([ag.name for ag in s.agent_list()])

    # Write rows to .sif file
    with open(args.outfile, 'w', newline='') as csvfile:
        wrtr = csv.writer(csvfile, delimiter='\t')
        for row in rows:
            wrtr.writerow(row)
Ejemplo n.º 9
0
def _do_old_fashioned_preassembly(stmts):
    grounded_stmts = ac.map_grounding(stmts,
                                      use_adeft=True,
                                      gilda_mode='local')
    ms_stmts = ac.map_sequence(grounded_stmts, use_cache=True)
    opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False)
    return opa_stmts
def test_agent_name_custom_preassembly():
    e1 = Event(Concept('price oil'))
    e2 = Event(Concept('oil price'))
    stmts = [e1, e2]
    stmts_out = ac.run_preassembly(stmts,
                                   matches_fun=agent_name_stmt_type_matches)
    assert len(stmts_out) == 1
Ejemplo n.º 11
0
    def respond_get_paper_model(self, content):
        """Get and display the model from a paper, indicated by pmid."""
        pmid_raw = content.gets('pmid')
        prefix = 'PMID-'
        if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit():
            pmid = pmid_raw[len(prefix):]
        else:
            return self.make_failure('BAD_INPUT')
        try:
            stmts = get_statements_for_paper([('pmid', pmid)])
        except IndraDBRestAPIError as e:
            if e.status_code == 404 and 'Invalid or unavailable' in e.reason:
                logger.error("Could not find pmid: %s" % e.reason)
                return self.make_failure('MISSING_MECHANISM')
            else:
                raise e

        if not stmts:
            resp = KQMLPerformative('SUCCESS')
            resp.set('relations-found', 0)
            return resp
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        unique_stmts = ac.run_preassembly(stmts, return_toplevel=True)
        diagrams = _make_diagrams(stmts)
        self.send_display_model(diagrams)
        resp = KQMLPerformative('SUCCESS')
        resp.set('relations-found', len(unique_stmts))
        resp.set('dump-limit', str(DUMP_LIMIT))
        return resp
Ejemplo n.º 12
0
def run_assembly(stmts, filename):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4)
    ac.dump_statements(stmts, filename)
    return stmts
def get_statements(target):
    #tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = db_stmts
    #stmts = tas_stmts + db_stmts
    stmts = filter_misgrounding(target, stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)
    stmts = filter_neg(stmts)
    return stmts
Ejemplo n.º 14
0
def get_indirect_stmts(corpus):
    cpath = os.path.join(indra.__path__[0], os.pardir, 'data',
                         f'{corpus}_corpus.bel')
    bp = bel.process_belscript(cpath)
    indirect_stmts = [
        st for st in bp.statements
        if not st.evidence[0].epistemics.get('direct')
    ]
    stmts = ac.run_preassembly(indirect_stmts, return_toplevel=False)
    return stmts
Ejemplo n.º 15
0
def assemble_stmts(stmts):
    print('Running preassembly')
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts,
                               belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=2)
    return stmts
Ejemplo n.º 16
0
def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'
Ejemplo n.º 17
0
def print_statements(
    statements: List[Statement],
    file: Union[None, str, TextIO] = None,
    sep: Optional[str] = None,
    limit: Optional[int] = None,
    allow_duplicates: bool = False,
    keep_only_pmids: Union[None, str, Collection[str]] = None,
    sort_attrs: Iterable[str] = ('uuid', 'pmid'),
    allow_ungrounded: bool = True,
    minimum_belief: Optional[float] = None,
    extra_columns: Optional[List[str]] = None,
) -> None:
    """Write statements to a CSV for curation.

    This one is similar to the other one, but sorts by the BEL string and only keeps the first for each group.
    """
    sep = sep or '\t'
    extra_columns = extra_columns or []
    extra_columns_placeholders = [''] * len(extra_columns)

    statements = run_preassembly(statements)

    if not allow_ungrounded:
        statements = filter_grounded_only(statements)

    if minimum_belief is not None:
        statements = filter_belief(statements, minimum_belief)

    rows = get_rows_from_statements(statements,
                                    allow_duplicates=allow_duplicates,
                                    keep_only_pmids=keep_only_pmids)
    rows = sorted(rows, key=attrgetter(*sort_attrs))

    if limit is not None:
        rows = rows[:limit]

    if not rows:
        logger.warning('no rows written')
        return

    def _write(_file):
        print(*start_header, *extra_columns, *end_header, sep=sep, file=_file)
        for row in rows:
            print(*row.start_tuple,
                  *extra_columns_placeholders,
                  *row.end_tuple,
                  sep=sep,
                  file=_file)

    if isinstance(file, str):
        with open(file, 'w') as _file:
            _write(_file)
    else:
        _write(file)
Ejemplo n.º 18
0
def test_readme_wm_pipeline():
    stmts = wm_raw_stmts
    # stmts = ac.filter_grounded_only(stmts)  # Does not work on test stmts
    belief_scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts,
                               return_toplevel=False,
                               belief_scorer=belief_scorer,
                               ontology=world_ontology,
                               normalize_opposites=True,
                               normalize_ns='WM')
    stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
    assert stmts, 'Update example to yield statements list of non-zero length'
Ejemplo n.º 19
0
def normalize_active_forms(stmts):
    af_stmts = ac.filter_by_type(stmts, ActiveForm)
    relevant_af_stmts = []
    for stmt in af_stmts:
        if (not stmt.agent.mods) and (not stmt.agent.mutations):
            continue
        relevant_af_stmts.append(stmt)
    print('%d relevant ActiveForms' % len(relevant_af_stmts))
    non_af_stmts = ac.filter_by_type(stmts, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(relevant_af_stmts)
    stmts = af_stmts + non_af_stmts
    return stmts
Ejemplo n.º 20
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Ejemplo n.º 21
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Ejemplo n.º 22
0
def run_preassembly():
    """Run preassembly on a list of INDRA Statements."""
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.run_preassembly(stmts)
    if stmts_out:
        stmts_json = stmts_to_json(stmts_out)
        res = {'statements': stmts_json}
        return res
    else:
        res = {'statements': []}
    return res
Ejemplo n.º 23
0
def default_assembly(stmts):
    from indra.belief.wm_scorer import get_eidos_scorer
    from indra.preassembler.hierarchy_manager import get_wm_hierarchies
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts, belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=4)
    stmts = ac.merge_groundings(stmts)
    stmts = ac.merge_deltas(stmts)
    stmts = ac.standardize_names_groundings(stmts)
    return stmts
Ejemplo n.º 24
0
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
Ejemplo n.º 25
0
def default_assembly(stmts):
    from indra.belief.wm_scorer import get_eidos_scorer
    from indra.preassembler.hierarchy_manager import get_wm_hierarchies
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts, belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=4)
    stmts = ac.merge_groundings(stmts)
    stmts = ac.merge_deltas(stmts)
    stmts = ac.standardize_names_groundings(stmts)
    return stmts
Ejemplo n.º 26
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements.

        Returns
        -------
        stmts : list[indra.statements.Statement]
            The list of assembled INDRA Statements.
        """
        stmts = self.get_indra_smts()
        stmts = ac.filter_no_hypothesis(stmts)
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        return stmts
Ejemplo n.º 27
0
def get_statements(target):
    tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = filter_misgrounding(target, tas_stmts + db_stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)

    ev_counts = {s.get_hash(): len(s.evidence) for s in stmts}
    source_counts = {}
    for stmt in stmts:
        stmt_source_counts = get_source_counts_dict()
        for ev in stmt.evidence:
            stmt_source_counts[ev.source_api] += 1
        source_counts[stmt.get_hash()] = stmt_source_counts
    return stmts, ev_counts, source_counts
Ejemplo n.º 28
0
def get_indra_phos_stmts():
    stmts = by_gene_role_type(stmt_type='Phosphorylation')
    stmts += by_gene_role_type(stmt_type='Dephosphorylation')
    stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.map_sequence(stmts)
    ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl')
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_phos_stmts_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl')
    return stmts
Ejemplo n.º 29
0
def run_preassembly():
    """Run preassembly on a list of INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    scorer = body.get('scorer')
    return_toplevel = body.get('return_toplevel')
    if scorer == 'wm':
        belief_scorer = get_eidos_scorer()
    else:
        belief_scorer = None
    stmts_out = ac.run_preassembly(stmts, belief_scorer=belief_scorer,
                                   return_toplevel=return_toplevel)
    return _return_stmts(stmts_out)
Ejemplo n.º 30
0
def get_indra_reg_act_stmts():
    try:
        stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl')
        return stmts
    except:
        pass
    stmts = []
    for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'):
        print("Getting %s statements from INDRA DB" % stmt_type)
        stmts += by_gene_role_type(stmt_type=stmt_type)
    stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl')
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_reg_act_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl')
    return stmts
Ejemplo n.º 31
0
def test_run_preassembly_concepts():
    ont = _get_extended_wm_hierarchy()
    rainfall = Event(
        Concept('rain',
                db_refs={
                    'WM':
                    ('wm/concept/causal_factor/environmental/meteorologic/'
                     'precipitation/rainfall')
                }))
    flooding_1 = Event(Concept('flood', db_refs={'WM': 'wm/x/y/z/flooding'}))
    flooding_2 = Event(Concept('flooding', db_refs={'WM':
                                                    'wm/a/b/c/flooding'}))
    st_out = ac.run_preassembly(
        [Influence(rainfall, flooding_1),
         Influence(rainfall, flooding_2)],
        normalize_ns='WM',
        normalize_equivalences=True,
        ontology=ont)
    assert len(st_out) == 1, st_out
Ejemplo n.º 32
0
def test_merge_groundings():
    refs1 = {'UN': [('x', 0.8), ('y', 0.7)], 'B': 'x', 'C': 'y'}
    refs2 = {'UN': [('x', 0.9), ('y', 0.6), ('z', 0.5)], 'B': 'x', 'D': 'z'}
    stmts = [
        Influence(Concept('a', db_refs=refs1),
                  Concept('b', db_refs=refs2),
                  evidence=[Evidence(source_api='eidos', text='1')]),
        Influence(Concept('a', db_refs=refs2),
                  Concept('b', db_refs=refs1),
                  evidence=[Evidence(source_api='eidos', text='2')])
    ]
    stmts = ac.run_preassembly(stmts)
    assert len(stmts) == 1
    stmts = ac.merge_groundings(stmts)
    assert stmts[0].subj.db_refs == \
           {'UN': [('x', 0.9), ('y', 0.7), ('z', 0.5)],
            'B': 'x', 'C': 'y', 'D': 'z'}, \
        stmts[0].subj.db_refs
    assert stmts[0].obj.db_refs == stmts[0].subj.db_refs
Ejemplo n.º 33
0
    def run_preassembly(self, stmts, print_summary=True):
        """Run complete preassembly procedure on the given statements.

        Results are returned as a dict and stored in the attribute
        :py:attr:`results`. They are also saved in the pickle file
        `<basename>_results.pkl`.

        Parameters
        ----------
        stmts : list of :py:class:`indra.statements.Statement`
            Statements to preassemble.
        print_summary : bool
            If True (default), prints a summary of the preassembly process to
            the console.

        Returns
        -------
        dict
            A dict containing the following entries:

            - `raw`: the starting set of statements before preassembly.
            - `duplicates1`: statements after initial de-duplication.
            - `valid`: statements found to have valid modification sites.
            - `mapped`: mapped statements (list of
              :py:class:`indra.preassembler.sitemapper.MappedStatement`).
            - `mapped_stmts`: combined list of valid statements and statements
              after mapping.
            - `duplicates2`: statements resulting from de-duplication of the
              statements in `mapped_stmts`.
            - `related2`: top-level statements after combining the statements
              in `duplicates2`.
        """
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        self.results = ac.run_preassembly(stmts)
        # Save the results if we're caching
        if self.basename is not None:
            results_filename = '%s_results.pkl' % self.basename
            with open(results_filename, 'wb') as f:
                pickle.dump(self.results, f)
        return self.results
Ejemplo n.º 34
0
def test_merge_groundings():
    refs1 = {'UN': [('x', 0.8), ('y', 0.7)],
             'B': 'x',
             'C': 'y'}
    refs2 = {'UN': [('x', 0.9), ('y', 0.6), ('z', 0.5)],
             'B': 'x',
             'D': 'z'}
    stmts = [Influence(Event(Concept('a', db_refs=refs1)),
                       Event(Concept('b', db_refs=refs2)),
                       evidence=[Evidence(source_api='eidos', text='1')]),
             Influence(Event(Concept('a', db_refs=refs2)),
                       Event(Concept('b', db_refs=refs1)),
                       evidence=[Evidence(source_api='eidos', text='2')])]
    stmts = ac.run_preassembly(stmts)
    assert len(stmts) == 1
    stmts = ac.merge_groundings(stmts)
    assert stmts[0].subj.concept.db_refs == \
           {'UN': [('x', 0.9), ('y', 0.7), ('z', 0.5)],
            'B': 'x', 'C': 'y', 'D': 'z'}, \
        stmts[0].subj.db_refs
    assert stmts[0].obj.concept.db_refs == stmts[0].subj.concept.db_refs
Ejemplo n.º 35
0
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reading_stmts = ac.map_grounding(reading_stmts,
                                    save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts, return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'))

    assemble_models = []
    assemble_models.append('sif')
    assemble_models.append('pysb')
    assemble_models.append('cx')

    ### PySB assembly
    if 'pysb' in assemble_models:
        pysb_model = assemble_pysb(stmts, data_genes,
                                   pjoin(outf, 'korkut_model_pysb.py'))
    ### SIF assembly
    if 'sif' in assemble_models:
        sif_str = assemble_sif(stmts, data, pjoin(outf, 'PKN-korkut_all_ab.sif'))
    ### CX assembly
    if 'cx' in assemble_models:
Ejemplo n.º 36
0
def test_run_preassembly():
    st_out = ac.run_preassembly([st1, st3, st5, st6])
    assert len(st_out) == 2
Ejemplo n.º 37
0
def test_run_preassembly_all_stmts():
    st_out = ac.run_preassembly([st1, st3, st5, st6], return_toplevel=False)
    assert len(st_out) == 4