Example #1
0
def test_map_sequence():
    a = Agent('MAPK1', db_refs={'UP': 'P28482', 'HGNC': '6871'})
    st1 = Phosphorylation(None, a, 'T', '182')
    st2 = Phosphorylation(None, a, 'T', '185')
    st3 = Phosphorylation(None, a, 'Y', '999')
    st_out = ac.map_sequence([st1])
    assert (len(st_out) == 1)
    assert (st_out[0].position == '185')
    st_out = ac.map_sequence([st2])
    assert (len(st_out) == 1)
    assert (st_out[0].position == '185')
    st_out = ac.map_sequence([st3])
    assert (len(st_out) == 0)
def test_map_sequence():
    a = Agent('MAPK1', db_refs={'UP': 'P28482', 'HGNC': '6871'})
    st1 = Phosphorylation(None, a, 'T', '182')
    st2 = Phosphorylation(None, a, 'T', '185')
    st3 = Phosphorylation(None, a, 'Y', '999')
    st_out = ac.map_sequence([st1])
    assert len(st_out) == 1, st_out
    assert st_out[0].position == '185'
    st_out = ac.map_sequence([st2])
    assert len(st_out) == 1, st_out
    assert st_out[0].position == '185'
    st_out = ac.map_sequence([st3])
    assert len(st_out) == 0, st_out
Example #3
0
def _do_old_fashioned_preassembly(stmts):
    grounded_stmts = ac.map_grounding(stmts,
                                      use_adeft=True,
                                      gilda_mode='local')
    ms_stmts = ac.map_sequence(grounded_stmts, use_cache=True)
    opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False)
    return opa_stmts
Example #4
0
    def respond_get_paper_model(self, content):
        """Get and display the model from a paper, indicated by pmid."""
        pmid_raw = content.gets('pmid')
        prefix = 'PMID-'
        if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit():
            pmid = pmid_raw[len(prefix):]
        else:
            return self.make_failure('BAD_INPUT')
        try:
            stmts = get_statements_for_paper([('pmid', pmid)])
        except IndraDBRestAPIError as e:
            if e.status_code == 404 and 'Invalid or unavailable' in e.reason:
                logger.error("Could not find pmid: %s" % e.reason)
                return self.make_failure('MISSING_MECHANISM')
            else:
                raise e

        if not stmts:
            resp = KQMLPerformative('SUCCESS')
            resp.set('relations-found', 0)
            return resp
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        unique_stmts = ac.run_preassembly(stmts, return_toplevel=True)
        diagrams = _make_diagrams(stmts)
        self.send_display_model(diagrams)
        resp = KQMLPerformative('SUCCESS')
        resp.set('relations-found', len(unique_stmts))
        resp.set('dump-limit', str(DUMP_LIMIT))
        return resp
Example #5
0
    def respond_get_paper_model(self, content):
        """Get and display the model from a paper, indicated by pmid."""
        pmid_raw = content.gets('pmid')
        prefix = 'PMID-'
        if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit():
            pmid = pmid_raw[len(prefix):]
        else:
            return self.make_failure('BAD_INPUT')
        try:
            stmts = get_statements_for_paper([('pmid', pmid)],
                                             simple_response=True)
        except IndraDBRestAPIError as e:
            if e.status_code == 404 and 'Invalid or unavailable' in e.reason:
                logger.error("Could not find pmid: %s" % e.reason)
                return self.make_failure('MISSING_MECHANISM')
            else:
                raise e

        if not stmts:
            resp = KQMLPerformative('SUCCESS')
            resp.set('relations-found', 0)
            return resp
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        unique_stmts = ac.run_preassembly(stmts, return_toplevel=True)
        diagrams = _make_diagrams(stmts)
        self.send_display_model(diagrams)
        resp = KQMLPerformative('SUCCESS')
        resp.set('relations-found', len(unique_stmts))
        resp.set('dump-limit', str(DUMP_LIMIT))
        return resp
Example #6
0
    def _make_unique_statement_set(self, stmt_tpls):
        """Perform grounding, sequence mapping, and find unique set from stmts.

        This method returns a list of statement objects, as well as a set of
        tuples of the form (uuid, matches_key) which represent the links between
        raw (evidence) statements and their unique/preassembled counterparts.
        """
        stmts = []
        uuid_sid_dict = {}
        for sid, stmt in stmt_tpls:
            uuid_sid_dict[stmt.uuid] = sid
            stmts.append(stmt)
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        stmt_groups = self.pa._get_stmt_matching_groups(stmts)
        unique_stmts = []
        evidence_links = defaultdict(lambda: set())
        for _, duplicates in stmt_groups:
            # Get the first statement and add the evidence of all subsequent
            # Statements to it
            for stmt_ix, stmt in enumerate(duplicates):
                if stmt_ix == 0:
                    first_stmt = stmt.make_generic_copy()
                    stmt_hash = first_stmt.get_hash(shallow=True)
                evidence_links[stmt_hash].add(uuid_sid_dict[stmt.uuid])
            # This should never be None or anything else
            assert isinstance(first_stmt, type(stmt))
            unique_stmts.append(first_stmt)
        return unique_stmts, flatten_evidence_dict(evidence_links)
Example #7
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Example #8
0
def process_statements(stmts, **generate_id_map_kwargs):
    stmts = ac.map_grounding(stmts)
    stmts = ac.map_sequence(stmts)
    pa = Preassembler(hierarchies)
    unique_stmts = make_unique_statement_set(pa, stmts)
    match_key_maps = get_match_key_maps(pa, unique_stmts,
                                        **generate_id_map_kwargs)
    return unique_stmts, match_key_maps
Example #9
0
def get_omnipath_stmts():
    stmts = omnipath_client.get_all_modifications()
    phos_stmts = ac.filter_by_type(stmts, Phosphorylation)
    dephos_stmts = ac.filter_by_type(stmts, Dephosphorylation)
    stmts = phos_stmts + dephos_stmts
    stmts = ac.map_sequence(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts
Example #10
0
def map_sequence():
    """Map sequence on a list of INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.map_sequence(stmts)
    return _return_stmts(stmts_out)
Example #11
0
def map_sequence():
    """Map sequence on a list of INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.map_sequence(stmts)
    return _return_stmts(stmts_out)
Example #12
0
def run_assembly(stmts, filename):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4)
    ac.dump_statements(stmts, filename)
    return stmts
Example #13
0
def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'
    def _clean_statements(self, stmts):
        """Perform grounding, sequence mapping, and find unique set from stmts.

        This method returns a list of statement objects, as well as a set of
        tuples of the form (uuid, matches_key) which represent the links between
        raw (evidence) statements and their unique/preassembled counterparts.
        """
        self._log("Map grounding...")
        stmts = ac.map_grounding(stmts)
        self._log("Map sequences...")
        stmts = ac.map_sequence(stmts, use_cache=True)
        return stmts
Example #15
0
def test_map_sequence_blank_entries():
    """Make sure sites curated as erroneous with no mappings don't
    get treated as valid mappings."""
    mapk1 = Agent('MAPK1', db_refs={'UP': 'P28482'})
    rps6 = Agent('RPS6', db_refs={'UP': 'P62753'})
    phos_rps6 = Agent('RPS6',
                 mods=[ModCondition('phosphorylation', 'T', '389')],
                 db_refs={'UP': 'P62753'})
    st1 = Phosphorylation(mapk1, rps6, 'T', '389')
    st2 = Phosphorylation(phos_rps6, mapk1, 'T', '185')
    mapped = ac.map_sequence([st1, st2])
    assert len(mapped) == 0
Example #16
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Example #17
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Example #18
0
def map_grounding():
    """Map sequence on a list of INDRA Statements."""
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.map_sequence(stmts)
    if stmts_out:
        stmts_json = stmts_to_json(stmts_out)
        res = {'statements': stmts_json}
        return res
    else:
        res = {'statements': []}
    return res
Example #19
0
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
Example #20
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements.

        Returns
        -------
        stmts : list[indra.statements.Statement]
            The list of assembled INDRA Statements.
        """
        stmts = self.get_indra_smts()
        stmts = ac.filter_no_hypothesis(stmts)
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        return stmts
Example #21
0
def get_indra_phos_stmts():
    stmts = by_gene_role_type(stmt_type='Phosphorylation')
    stmts += by_gene_role_type(stmt_type='Dephosphorylation')
    stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.map_sequence(stmts)
    ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl')
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_phos_stmts_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl')
    return stmts
Example #22
0
    def _clean_statements(self, stmts):
        """Perform grounding, sequence mapping, and find unique set from stmts.

        This method returns a list of statement objects, as well as a set of
        tuples of the form (uuid, matches_key) which represent the links between
        raw (evidence) statements and their unique/preassembled counterparts.
        """
        eliminated_uuids = {}
        all_uuids = {s.uuid for s in stmts}
        self._log("Map grounding...")
        stmts = ac.map_grounding(stmts, use_adeft=True, gilda_mode='local')
        grounded_uuids = {s.uuid for s in stmts}
        eliminated_uuids['grounding'] = all_uuids - grounded_uuids
        self._log("Map sequences...")
        stmts = ac.map_sequence(stmts, use_cache=True)
        seqmapped_and_grounded_uuids = {s.uuid for s in stmts}
        eliminated_uuids['sequence mapping'] = \
            grounded_uuids - seqmapped_and_grounded_uuids
        return stmts, eliminated_uuids
Example #23
0
def load_statements_from_synapse(synapse_id='syn11273504'):
    syn = synapseclient.Synapse()
    syn.login()
    # Obtain a pointer and download the data
    syn_data = syn.get(synapse_id)
    stmts = []
    for row in read_unicode_csv(syn_data.path, delimiter='\t'):
        sub_name, site_info = row[0].split(':')
        res = site_info[0]
        pos = site_info[1:]
        gene_list = row[1].split(',')
        for enz_name in gene_list:
            enz = Agent(enz_name, db_refs=get_ids(enz_name))
            sub = Agent(sub_name, db_refs=get_ids(sub_name))
            stmt = Phosphorylation(enz, sub, res, pos)
            stmts.append(stmt)
    stmts = ac.map_sequence(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts
Example #24
0
    def run_preassembly(self, stmts, print_summary=True):
        """Run complete preassembly procedure on the given statements.

        Results are returned as a dict and stored in the attribute
        :py:attr:`results`. They are also saved in the pickle file
        `<basename>_results.pkl`.

        Parameters
        ----------
        stmts : list of :py:class:`indra.statements.Statement`
            Statements to preassemble.
        print_summary : bool
            If True (default), prints a summary of the preassembly process to
            the console.

        Returns
        -------
        dict
            A dict containing the following entries:

            - `raw`: the starting set of statements before preassembly.
            - `duplicates1`: statements after initial de-duplication.
            - `valid`: statements found to have valid modification sites.
            - `mapped`: mapped statements (list of
              :py:class:`indra.preassembler.sitemapper.MappedStatement`).
            - `mapped_stmts`: combined list of valid statements and statements
              after mapping.
            - `duplicates2`: statements resulting from de-duplication of the
              statements in `mapped_stmts`.
            - `related2`: top-level statements after combining the statements
              in `duplicates2`.
        """
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        self.results = ac.run_preassembly(stmts)
        # Save the results if we're caching
        if self.basename is not None:
            results_filename = '%s_results.pkl' % self.basename
            with open(results_filename, 'wb') as f:
                pickle.dump(self.results, f)
        return self.results
def pa_filter_unique_evidence(stmts):
    """Wrapper function for chaining preassembly statements meant to reduce
    the number of statements.

    stmts : list[:py:class:`indra.statements.Statement`]

    Returns
    -------
    stmts : list[:py:class:`indra.statements.Statement`]
        List of preassembled indra statements
    """

    # Ground statemtens:
    grounded_stmts = ac.map_grounding(stmts)

    # Use curated site information to standardize modification sites in stmts
    ms_stmts = ac.map_sequence(grounded_stmts)

    # Compiles together raw statements to one statement per type
    opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False)
    return opa_stmts
Example #26
0
def test_gene_network():
    # Chunk 1: this is tested in _get_gene_network_stmts
    # from indra.tools.gene_network import GeneNetwork
    # gn = GeneNetwork(['H2AX'])
    # biopax_stmts = gn.get_biopax_stmts()
    # bel_stmts = gn.get_bel_stmts()

    # Chunk 2
    from indra import literature
    pmids = literature.pubmed_client.get_ids_for_gene('H2AX')

    # Chunk 3
    from indra import literature
    paper_contents = {}
    for pmid in pmids:
        content, content_type = literature.get_full_text(pmid, 'pmid')
        if content_type == 'abstract':
            paper_contents[pmid] = content
        if len(paper_contents) == 5:  # Is 10 in actual code
            break

    # Chunk 4
    from indra.sources import reach

    literature_stmts = []
    for pmid, content in paper_contents.items():
        rp = reach.process_text(content, url=reach.local_text_url)
        literature_stmts += rp.statements
    print('Got %d statements' % len(literature_stmts))
    assert literature_stmts  # replaces a print statements

    # Chunk 6
    from indra.tools import assemble_corpus as ac
    # stmts = biopax_stmts + bel_stmts + literature_stmts  # tested elsewhere
    stmts = gn_stmts + literature_stmts  # Added instead of above line
    stmts = ac.map_grounding(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts)
    assert stmts

    # Chunk 7
    from indra.assemblers.cx import CxAssembler
    from indra.databases import ndex_client
    cxa = CxAssembler(stmts)
    cx_str = cxa.make_model()
    assert cx_str

    # Chunk 8
    # ndex_cred = {'user': '******', 'password': '******'}
    # network_id = ndex_client.create_network(cx_str, ndex_cred)
    # print(network_id)

    # Chunk 9
    from indra.assemblers.indranet import IndraNetAssembler
    indranet_assembler = IndraNetAssembler(statements=stmts)
    indranet = indranet_assembler.make_model()
    assert len(indranet.nodes) > 0, 'indranet conatins no nodes'
    assert len(indranet.edges) > 0, 'indranet conatins no edges'

    # Chunk 10
    import networkx as nx
    paths = nx.single_source_shortest_path(G=indranet, source='H2AX', cutoff=1)
    assert paths

    # Chunk 11
    from indra.assemblers.pysb import PysbAssembler
    pysb = PysbAssembler(statements=stmts)
    pysb_model = pysb.make_model()
    assert pysb_model
Example #27
0
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reading_stmts = ac.map_grounding(reading_stmts,
                                    save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts, return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'))

    assemble_models = []
    assemble_models.append('sif')
    assemble_models.append('pysb')
    assemble_models.append('cx')

    ### PySB assembly
    if 'pysb' in assemble_models:
        pysb_model = assemble_pysb(stmts, data_genes,
                                   pjoin(outf, 'korkut_model_pysb.py'))
    ### SIF assembly
    if 'sif' in assemble_models:
        sif_str = assemble_sif(stmts, data, pjoin(outf, 'PKN-korkut_all_ab.sif'))
Example #28
0
    # The file in which the preassembled statements will be saved
    pre_stmts_file = prefixed_pkl('preassembled')
    if reassemble:
        # Load various files that were previously produced
        sources = [
            'indradb', 'trips', 'bel', 'biopax', 'phosphosite', 'r3', 'sparser'
        ]
        stmts = []
        for source in sources:
            stmts += ac.load_statements(prefixed_pkl(source))
        stmts = ac.filter_no_hypothesis(stmts)
        # Fix grounding and filter to grounded entities and for proteins,
        # filter to the human ones
        stmts = ac.map_grounding(stmts)
        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_human_only(stmts)
        # Combinatorially expand protein families
        stmts = ac.expand_families(stmts)
        # Apply a strict filter to statements based on the gene names
        stmts = ac.filter_gene_list(stmts, gene_names, 'all')
        # Fix errors in references to protein sequences
        stmts = ac.map_sequence(stmts)
        # Run preassembly and save result
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        ac.dump_statements(stmts, pre_stmts_file)

    # Load the preassembled statements
    stmts = ac.load_statements(pre_stmts_file)
    # Run assembly into a PySB model
    assemble_pysb.assemble_pysb(stmts, gene_names, contextualize=True)
Example #29
0
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                         save=pjoin(outf,
                                                    'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts,
                                   return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'),
                                   poolsize=4)

    ### PySB assembly
    if 'pysb' in assemble_models:
        pysb_model = assemble_pysb(stmts, data_genes,
                                   pjoin(outf, 'korkut_model_pysb.py'))
    ### SIF assembly
    if 'sif' in assemble_models:
        sif_str = assemble_sif(stmts, data, pjoin(outf,
                                                  'PKN-korkut_all_ab.sif'))
    ### CX assembly
Example #30
0
def _do_old_fashioned_preassembly(stmts):
    grounded_stmts = ac.map_grounding(stmts)
    ms_stmts = ac.map_sequence(grounded_stmts)
    opa_stmts = ac.run_preassembly(ms_stmts, return_toplevel=False)
    return opa_stmts