def read_phosphosite_owl(fname=phosphosite_owl_file): bp = biopax.process_owl(fname) for stmt in bp.statements: for ev in stmt.evidence: ev.source_api = 'phosphosite' ev.epistemics = {'direct': True} return bp.statements
def from_biopax(path: str, **kwargs): """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`. :param path: Path to a BioPAX OWL file :rtype: pybel.BELGraph Other kwargs are passed to :func:`from_indra_statements`. .. warning:: Not compatible with all BioPAX! See INDRA documentation. """ from indra.sources.biopax import process_owl model = process_owl(path) return from_indra_statements(stmts=model.statements, **kwargs)
def from_biopax(path, name=None, version=None, description=None): """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`. :param str path: Path to a BioPAX OWL file :param str name: The name for the BEL graph :param str version: The version of the BEL graph :param str description: The description of the BEL graph :rtype: pybel.BELGraph .. warning:: Not compatible with all BioPAX! See INDRA documentation. """ from indra.sources.biopax import process_owl model = process_owl(path) return from_indra_statements(stmts=model.statements, name=name, version=version, description=description)
def from_biopax( path: str, name: Optional[str] = None, version: Optional[str] = None, description: Optional[str] = None, authors: Optional[str] = None, contact: Optional[str] = None, license: Optional[str] = None, copyright: Optional[str] = None, disclaimer: Optional[str] = None, ): """Import a model encoded in Pathway Commons `BioPAX <http://www.biopax.org/>`_ via :mod:`indra`. :param path: Path to a BioPAX OWL file :param name: The name for the BEL graph :param version: The version of the BEL graph :param description: The description of the graph :param authors: The authors of this graph :param contact: The contact email for this graph :param license: The license for this graph :param copyright: The copyright for this graph :param disclaimer: The disclaimer for this graph :rtype: pybel.BELGraph .. warning:: Not compatible with all BioPAX! See INDRA documentation. """ from indra.sources.biopax import process_owl model = process_owl(path) return from_indra_statements( stmts=model.statements, name=name, version=version, description=description, authors=authors, contact=contact, license=license, copyright=copyright, disclaimer=disclaimer, )
def get_biopax_stmts(self, filter=False, query='pathsbetween', database_filter=None): """Get relevant statements from Pathway Commons. Performs a "paths between" query for the genes in :py:attr:`gene_list` and uses the results to build statements. This function caches two files: the list of statements built from the query, which is cached in `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`. If these cached files are found, then the results are returned based on the cached file and Pathway Commons is not queried again. Parameters ---------- filter : Optional[bool] If True, includes only those statements that exclusively mention genes in :py:attr:`gene_list`. Default is False. query : Optional[str] Defined what type of query is executed. The two options are 'pathsbetween' which finds paths between the given list of genes and only works if more than 1 gene is given, and 'neighborhood' which searches the immediate neighborhood of each given gene. Note that for pathsbetween queries with more thatn 60 genes, the query will be executed in multiple blocks for scalability. database_filter: Optional[list[str]] A list of PathwayCommons databases to include in the query. Returns ------- list of :py:class:`indra.statements.Statement` List of INDRA statements extracted from Pathway Commons. """ # If we're using a cache, initialize the appropriate filenames if self.basename is not None: biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename # Check for cached Biopax stmt file at the given path # if it's there, return the statements from the cache if self.basename is not None and os.path.exists(biopax_stmt_path): logger.info("Loading Biopax statements from %s" % biopax_stmt_path) with open(biopax_stmt_path, 'rb') as f: bp_statements = pickle.load(f) return bp_statements # Check for cached file before querying Pathway Commons Web API if self.basename is not None and os.path.exists(biopax_ras_owl_path): logger.info("Loading Biopax from OWL file %s" % biopax_ras_owl_path) bp = biopax.process_owl(biopax_ras_owl_path) # OWL file not found; do query and save to file else: if (len(self.gene_list) < 2) and (query == 'pathsbetween'): logger.warning('Using neighborhood query for one gene.') query = 'neighborhood' if query == 'pathsbetween': if len(self.gene_list) > 60: block_size = 60 else: block_size = None bp = biopax.process_pc_pathsbetween( self.gene_list, database_filter=database_filter, block_size=block_size) elif query == 'neighborhood': bp = biopax.process_pc_neighborhood( self.gene_list, database_filter=database_filter) else: logger.error('Invalid query type: %s' % query) return [] # Save the file if we're caching if self.basename is not None: bp.save_model(biopax_ras_owl_path) # Save statements to pickle file if we're caching if self.basename is not None: with open(biopax_stmt_path, 'wb') as f: pickle.dump(bp.statements, f) # Optionally filter out statements not involving only our gene set if filter: policy = 'one' if len(self.gene_list) > 1 else 'all' stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy) else: stmts = bp.statements return stmts
def process_owl(path): bp = biopax.process_owl(path) return bp.statements
import os from collections import defaultdict from indra.sources import biopax from indra.statements import * import indra.sources.biopax.processor as bpc from indra.util import unicode_strs from nose.plugins.attrib import attr model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'biopax_test.owl') bp = biopax.process_owl(model_path) stmts_by_source_id = defaultdict(set) for stmt in bp.statements: for ev in stmt.evidence: stmts_by_source_id[ev.source_id.split('/')[-1]].add(stmt) def test_listify(): assert bpc._listify(1) == [1] assert bpc._listify([1, 2] == [1, 2]) assert bpc._listify([1] == [1]) def test_protein_family_agent(): bpe = bp.model.objects['Protein_da79d1a005a8eb259b0c09278ae9230e'] agents = bp._get_agents_from_entity(bpe) assert len(agents) == 2 assert {a.name for a in agents} == {'MAPK1', 'MAPK3'}
def save_phosphorylation_stmts(owl_file, pkl_file): bp = biopax.process_owl(owl_file) sites = get_mod_sites(bp.statements) with open(pkl_file, 'wb') as f: pickle.dump(sites, f) return sites
from __future__ import absolute_import, print_function, unicode_literals from builtins import dict, str import os from indra.java_vm import autoclass, cast from indra.sources import biopax import indra.sources.biopax.processor as bpc from indra.databases import uniprot_client from indra.util import unicode_strs from indra.preassembler import Preassembler from indra.preassembler.hierarchy_manager import hierarchies from nose.plugins.attrib import attr model_path = os.path.dirname(os.path.abspath(__file__)) +\ '/../../data/biopax_test.owl' bp = biopax.process_owl(model_path) uri_prefix = 'http://purl.org/pc2/7/' def test_paxtools_autoclass(): autoclass('org.biopax.paxtools.impl.level3.ProteinImpl') def test_biopaxpattern_autoclass(): autoclass('org.biopax.paxtools.pattern.PatternBox') def test_cpath_autoclass(): autoclass('cpath.client.CPathClient') def test_listify(): assert bpc._listify(1) == [1] assert bpc._listify([1,2] == [1,2]) assert bpc._listify([1] == [1])
def get_biopax_stmts(self, filter=False, query='pathsbetween', database_filter=None): """Get relevant statements from Pathway Commons. Performs a "paths between" query for the genes in :py:attr:`gene_list` and uses the results to build statements. This function caches two files: the list of statements built from the query, which is cached in `<basename>_biopax_stmts.pkl`, and the OWL file returned by the Pathway Commons Web API, which is cached in `<basename>_pc_pathsbetween.owl`. If these cached files are found, then the results are returned based on the cached file and Pathway Commons is not queried again. Parameters ---------- filter : Optional[bool] If True, includes only those statements that exclusively mention genes in :py:attr:`gene_list`. Default is False. query : Optional[str] Defined what type of query is executed. The two options are 'pathsbetween' which finds paths between the given list of genes and only works if more than 1 gene is given, and 'neighborhood' which searches the immediate neighborhood of each given gene. Note that for pathsbetween queries with more thatn 60 genes, the query will be executed in multiple blocks for scalability. database_filter: Optional[list[str]] A list of PathwayCommons databases to include in the query. Returns ------- list of :py:class:`indra.statements.Statement` List of INDRA statements extracted from Pathway Commons. """ # If we're using a cache, initialize the appropriate filenames if self.basename is not None: biopax_stmt_path = '%s_biopax_stmts.pkl' % self.basename biopax_ras_owl_path = '%s_pc_pathsbetween.owl' % self.basename # Check for cached Biopax stmt file at the given path # if it's there, return the statements from the cache if self.basename is not None and os.path.isfile(biopax_stmt_path): logger.info("Loading Biopax statements from %s" % biopax_stmt_path) with open(biopax_stmt_path, 'rb') as f: bp_statements = pickle.load(f) return bp_statements # Check for cached file before querying Pathway Commons Web API if self.basename is not None and os.path.isfile(biopax_ras_owl_path): logger.info("Loading Biopax from OWL file %s" % biopax_ras_owl_path) bp = biopax.process_owl(biopax_ras_owl_path) # OWL file not found; do query and save to file else: if (len(self.gene_list) < 2) and (query == 'pathsbetween'): logger.warning('Using neighborhood query for one gene.') query = 'neighborhood' if query == 'pathsbetween': if len(self.gene_list) > 60: block_size = 60 else: block_size = None bp = biopax.process_pc_pathsbetween(self.gene_list, database_filter=database_filter, block_size=block_size) elif query == 'neighborhood': bp = biopax.process_pc_neighborhood(self.gene_list, database_filter=database_filter) else: logger.error('Invalid query type: %s' % query) return [] # Save the file if we're caching if self.basename is not None: bp.save_model(biopax_ras_owl_path) # Save statements to pickle file if we're caching if self.basename is not None: with open(biopax_stmt_path, 'wb') as f: pickle.dump(bp.statements, f) # Optionally filter out statements not involving only our gene set if filter: policy = 'one' if len(self.gene_list) > 1 else 'all' stmts = ac.filter_gene_list(bp.statements, self.gene_list, policy) else: stmts = bp.statements return stmts