Exemple #1
0
def test_filter_no_hypothesis():
    a = Agent('MAPK1')
    ev1 = Evidence(epistemics={'hypothesis': True})
    ev2 = Evidence(epistemics={'hypothesis': False})
    st1 = Phosphorylation(None, a, evidence=[ev1, ev2])
    st2 = Phosphorylation(None, a, evidence=[ev1, ev1])
    st_out = ac.filter_no_hypothesis([st1, st2])
Exemple #2
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
def test_filter_no_hypothesis():
    a = Agent('MAPK1')
    ev1 = Evidence(epistemics={'hypothesis': True})
    ev2 = Evidence(epistemics={'hypothesis': False})
    st1 = Phosphorylation(None, a, evidence=[ev1, ev2])
    st2 = Phosphorylation(None, a, evidence=[ev1, ev1])
    st_out = ac.filter_no_hypothesis([st1, st2])
    assert len(st_out) == 1
Exemple #4
0
def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Exemple #6
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Exemple #7
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements.

        Returns
        -------
        stmts : list[indra.statements.Statement]
            The list of assembled INDRA Statements.
        """
        stmts = self.get_indra_smts()
        stmts = ac.filter_no_hypothesis(stmts)
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        return stmts
Exemple #8
0
          ', '.join(assemble_models))
    print('##############')

    outf = 'output/'
    data = process_data.read_data(process_data.data_file)
    data_genes = process_data.get_all_gene_names(data)
    reassemble = False
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                         save=pjoin(outf,
                                                    'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
Exemple #9
0
    # If generic assembly needs to be done (instead of just loading the result)
    # set this to True
    reassemble = False

    # The file in which the preassembled statements will be saved
    pre_stmts_file = prefixed_pkl('preassembled')
    if reassemble:
        # Load various files that were previously produced
        sources = [
            'indradb', 'trips', 'bel', 'biopax', 'phosphosite', 'r3', 'sparser'
        ]
        stmts = []
        for source in sources:
            stmts += ac.load_statements(prefixed_pkl(source))
        stmts = ac.filter_no_hypothesis(stmts)
        # Fix grounding and filter to grounded entities and for proteins,
        # filter to the human ones
        stmts = ac.map_grounding(stmts)
        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_human_only(stmts)
        # Combinatorially expand protein families
        stmts = ac.expand_families(stmts)
        # Apply a strict filter to statements based on the gene names
        stmts = ac.filter_gene_list(stmts, gene_names, 'all')
        # Fix errors in references to protein sequences
        stmts = ac.map_sequence(stmts)
        # Run preassembly and save result
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        ac.dump_statements(stmts, pre_stmts_file)