Esempio n. 1
0
def read_phosphosite_owl(fname=phosphosite_owl_file):
    bp = biopax.process_owl(fname)
    for stmt in bp.statements:
        for ev in stmt.evidence:
            ev.source_api = 'phosphosite'
            ev.epistemics = {'direct': True}
    return bp.statements
Esempio n. 2
0
def from_biopax(path: str, **kwargs):
    """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`.

    :param path: Path to a BioPAX OWL file
    :rtype: pybel.BELGraph

    Other kwargs are passed to :func:`from_indra_statements`.

    .. warning:: Not compatible with all BioPAX! See INDRA documentation.
    """
    from indra.sources.biopax import process_owl

    model = process_owl(path)
    return from_indra_statements(stmts=model.statements, **kwargs)
Esempio n. 3
0
def from_biopax(path, name=None, version=None, description=None):
    """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`.

    :param str path: Path to a BioPAX OWL file
    :param str name: The name for the BEL graph
    :param str version: The version of the BEL graph
    :param str description: The description of the BEL graph
    :rtype: pybel.BELGraph

    .. warning:: Not compatible with all BioPAX! See INDRA documentation.
    """
    from indra.sources.biopax import process_owl

    model = process_owl(path)

    return from_indra_statements(stmts=model.statements,
                                 name=name,
                                 version=version,
                                 description=description)
Esempio n. 4
0
def from_biopax(
    path: str,
    name: Optional[str] = None,
    version: Optional[str] = None,
    description: Optional[str] = None,
    authors: Optional[str] = None,
    contact: Optional[str] = None,
    license: Optional[str] = None,
    copyright: Optional[str] = None,
    disclaimer: Optional[str] = None,
):
    """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`.

    :param path: Path to a BioPAX OWL file
    :param name: The name for the BEL graph
    :param version: The version of the BEL graph
    :param description: The description of the graph
    :param authors: The authors of this graph
    :param contact: The contact email for this graph
    :param license: The license for this graph
    :param copyright: The copyright for this graph
    :param disclaimer: The disclaimer for this graph
    :rtype: pybel.BELGraph

    .. warning:: Not compatible with all BioPAX! See INDRA documentation.
    """
    from indra.sources.biopax import process_owl

    model = process_owl(path)

    return from_indra_statements(
        stmts=model.statements,
        name=name,
        version=version,
        description=description,
        authors=authors,
        contact=contact,
        license=license,
        copyright=copyright,
        disclaimer=disclaimer,
    )
Esempio n. 5
0
    def get_biopax_stmts(self,
                         filter=False,
                         query='pathsbetween',
                         database_filter=None):
        """Get relevant statements from Pathway Commons.

        Performs a "paths between" query for the genes in :py:attr:`gene_list`
        and uses the results to build statements. This function caches two
        files: the list of statements built from the query, which is cached in
        `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway
        Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`.
        If these cached files are found, then the results are returned based
        on the cached file and Pathway Commons is not queried again.

        Parameters
        ----------
        filter : Optional[bool]
            If True, includes only those statements that exclusively mention
            genes in :py:attr:`gene_list`. Default is False.
        query : Optional[str]
            Defined what type of query is executed. The two options are
            'pathsbetween' which finds paths between the given list of genes
            and only works if more than 1 gene is given, and 'neighborhood'
            which searches the immediate neighborhood of each given gene.
            Note that for pathsbetween queries with more thatn 60 genes, the
            query will be executed in multiple blocks for scalability.
        database_filter: Optional[list[str]]
            A list of PathwayCommons databases to include in the query.

        Returns
        -------
        list of :py:class:`indra.statements.Statement`
            List of INDRA statements extracted from Pathway Commons.
        """
        # If we're using a cache, initialize the appropriate filenames
        if self.basename is not None:
            biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename
            biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename
        # Check for cached Biopax stmt file at the given path
        # if it's there, return the statements from the cache
        if self.basename is not None and os.path.exists(biopax_stmt_path):
            logger.info("Loading Biopax statements from %s" % biopax_stmt_path)
            with open(biopax_stmt_path, 'rb') as f:
                bp_statements = pickle.load(f)
            return bp_statements
        # Check for cached file before querying Pathway Commons Web API
        if self.basename is not None and os.path.exists(biopax_ras_owl_path):
            logger.info("Loading Biopax from OWL file %s" %
                        biopax_ras_owl_path)
            bp = biopax.process_owl(biopax_ras_owl_path)
        # OWL file not found; do query and save to file
        else:
            if (len(self.gene_list) < 2) and (query == 'pathsbetween'):
                logger.warning('Using neighborhood query for one gene.')
                query = 'neighborhood'
            if query == 'pathsbetween':
                if len(self.gene_list) > 60:
                    block_size = 60
                else:
                    block_size = None
                bp = biopax.process_pc_pathsbetween(
                    self.gene_list,
                    database_filter=database_filter,
                    block_size=block_size)
            elif query == 'neighborhood':
                bp = biopax.process_pc_neighborhood(
                    self.gene_list, database_filter=database_filter)
            else:
                logger.error('Invalid query type: %s' % query)
                return []
            # Save the file if we're caching
            if self.basename is not None:
                bp.save_model(biopax_ras_owl_path)
        # Save statements to pickle file if we're caching
        if self.basename is not None:
            with open(biopax_stmt_path, 'wb') as f:
                pickle.dump(bp.statements, f)
        # Optionally filter out statements not involving only our gene set
        if filter:
            policy = 'one' if len(self.gene_list) > 1 else 'all'
            stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy)
        else:
            stmts = bp.statements
        return stmts
Esempio n. 6
0
def process_owl(path):
    bp = biopax.process_owl(path)
    return bp.statements
Esempio n. 7
0
import os
from collections import defaultdict
from indra.sources import biopax
from indra.statements import *
import indra.sources.biopax.processor as bpc
from indra.util import unicode_strs
from nose.plugins.attrib import attr

model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                          'biopax_test.owl')

bp = biopax.process_owl(model_path)

stmts_by_source_id = defaultdict(set)
for stmt in bp.statements:
    for ev in stmt.evidence:
        stmts_by_source_id[ev.source_id.split('/')[-1]].add(stmt)


def test_listify():
    assert bpc._listify(1) == [1]
    assert bpc._listify([1, 2] == [1, 2])
    assert bpc._listify([1] == [1])


def test_protein_family_agent():
    bpe = bp.model.objects['Protein_da79d1a005a8eb259b0c09278ae9230e']
    agents = bp._get_agents_from_entity(bpe)
    assert len(agents) == 2
    assert {a.name for a in agents} == {'MAPK1', 'MAPK3'}
Esempio n. 8
0
def save_phosphorylation_stmts(owl_file, pkl_file):
    bp = biopax.process_owl(owl_file)
    sites = get_mod_sites(bp.statements)
    with open(pkl_file, 'wb') as f:
        pickle.dump(sites, f)
    return sites
Esempio n. 9
0
from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
import os
from indra.java_vm import autoclass, cast
from indra.sources import biopax
import indra.sources.biopax.processor as bpc
from indra.databases import uniprot_client
from indra.util import unicode_strs
from indra.preassembler import Preassembler
from indra.preassembler.hierarchy_manager import hierarchies
from nose.plugins.attrib import attr

model_path = os.path.dirname(os.path.abspath(__file__)) +\
             '/../../data/biopax_test.owl'

bp = biopax.process_owl(model_path)
uri_prefix = 'http://purl.org/pc2/7/'

def test_paxtools_autoclass():
    autoclass('org.biopax.paxtools.impl.level3.ProteinImpl')

def test_biopaxpattern_autoclass():
    autoclass('org.biopax.paxtools.pattern.PatternBox')

def test_cpath_autoclass():
    autoclass('cpath.client.CPathClient')

def test_listify():
    assert bpc._listify(1) == [1]
    assert bpc._listify([1,2] == [1,2])
    assert bpc._listify([1] == [1])
Esempio n. 10
0
    def get_biopax_stmts(self, filter=False, query='pathsbetween',
                         database_filter=None):
        """Get relevant statements from Pathway Commons.

        Performs a "paths between" query for the genes in :py:attr:`gene_list`
        and uses the results to build statements. This function caches two
        files: the list of statements built from the query, which is cached in
        `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway
        Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`.
        If these cached files are found, then the results are returned based
        on the cached file and Pathway Commons is not queried again.

        Parameters
        ----------
        filter : Optional[bool]
            If True, includes only those statements that exclusively mention
            genes in :py:attr:`gene_list`. Default is False.
        query : Optional[str]
            Defined what type of query is executed. The two options are
            'pathsbetween' which finds paths between the given list of genes
            and only works if more than 1 gene is given, and 'neighborhood'
            which searches the immediate neighborhood of each given gene.
            Note that for pathsbetween queries with more thatn 60 genes, the
            query will be executed in multiple blocks for scalability.
        database_filter: Optional[list[str]]
            A list of PathwayCommons databases to include in the query.

        Returns
        -------
        list of :py:class:`indra.statements.Statement`
            List of INDRA statements extracted from Pathway Commons.
        """
        # If we're using a cache, initialize the appropriate filenames
        if self.basename is not None:
            biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename
            biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename
        # Check for cached Biopax stmt file at the given path
        # if it's there, return the statements from the cache
        if self.basename is not None and os.path.isfile(biopax_stmt_path):
            logger.info("Loading Biopax statements from %s" % biopax_stmt_path)
            with open(biopax_stmt_path, 'rb') as f:
                bp_statements = pickle.load(f)
            return bp_statements
        # Check for cached file before querying Pathway Commons Web API
        if self.basename is not None and os.path.isfile(biopax_ras_owl_path):
            logger.info("Loading Biopax from OWL file %s" % biopax_ras_owl_path)
            bp = biopax.process_owl(biopax_ras_owl_path)
        # OWL file not found; do query and save to file
        else:
            if (len(self.gene_list) < 2) and (query == 'pathsbetween'):
                logger.warning('Using neighborhood query for one gene.')
                query = 'neighborhood'
            if query == 'pathsbetween':
                if len(self.gene_list) > 60:
                    block_size = 60
                else:
                    block_size = None
                bp = biopax.process_pc_pathsbetween(self.gene_list,
                                                database_filter=database_filter,
                                                block_size=block_size)
            elif query == 'neighborhood':
                bp = biopax.process_pc_neighborhood(self.gene_list,
                                                database_filter=database_filter)
            else:
                logger.error('Invalid query type: %s' % query)
                return []
            # Save the file if we're caching
            if self.basename is not None:
                bp.save_model(biopax_ras_owl_path)
        # Save statements to pickle file if we're caching
        if self.basename is not None:
            with open(biopax_stmt_path, 'wb') as f:
                pickle.dump(bp.statements, f)
        # Optionally filter out statements not involving only our gene set
        if filter:
            policy = 'one' if len(self.gene_list) > 1 else 'all'
            stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy)
        else:
            stmts = bp.statements
        return stmts