def process_belscript(file_name, **kwargs): """Return a PybelProcessor by processing a BEL script file. Key word arguments are passed directly to pybel.from_path, for further information, see pybel.readthedocs.io/en/latest/io.html#pybel.from_path Some keyword arguments we use here differ from the defaults of PyBEL, namely we set `citation_clearing` to False and `no_identifier_validation` to True. Parameters ---------- file_name : str The path to a BEL script file. Returns ------- bp : PybelProcessor A PybelProcessor object which contains INDRA Statements in bp.statements. """ if 'citation_clearing' not in kwargs: kwargs['citation_clearing'] = False if 'no_identifier_validation' not in kwargs: kwargs['no_identifier_validation'] = True pybel_graph = pybel.from_path(file_name, **kwargs) return process_pybel_graph(pybel_graph)
def setUpClass(cls): """Set up this class with several pre-loaded BEL graphs.""" super(TestInterchange, cls).setUpClass() with mock_bel_resources: cls.thorough_graph = from_path(test_bel_thorough, manager=cls.manager, allow_nested=True) cls.slushy_graph = from_path( test_bel_slushy, manager=cls.manager, disallow_unqualified_translocations=True) cls.simple_graph = from_url(Path(test_bel_simple).as_uri(), manager=cls.manager) cls.isolated_graph = from_path(test_bel_isolated, manager=cls.manager) cls.misordered_graph = from_path(test_bel_misordered, manager=cls.manager, citation_clearing=False)
def bel_graph_loader(from_dir: str) -> BELGraph: """Obtains a combined BELGraph from all the BEL documents in one folder. :param from_dir: The folder with the BEL documents. :return: A corresponding BEL Graph. """ logger.info("Loading BEL Graph.") files = [ join(from_dir, file) for file in listdir(from_dir) if isfile(join(from_dir, file)) ] bel_files = [file for file in files if file[-4:].lower() == '.bel'] bel_graphs = [from_path(file) for file in bel_files] return union(bel_graphs)
def load_paths(paths, connection=None): """Parses multiple BEL scripts with :func:`pybel.from_path` and returns the union of the resulting graphs. :param iter[str] paths: An iterable over paths to BEL scripts :param connection: A custom database connection string or manager :type connection: Optional[str or pybel.manager.Manager] :rtype: pybel.BELGraph """ manager = Manager.ensure(connection) return union( from_path(path, manager=manager) for path in paths )
def process_belscript(file_name, **kwargs): """Return a PybelProcessor by processing a BEL script file. Key word arguments are passed directly to pybel.from_path, for further information, see pybel.readthedocs.io/en/latest/io.html#pybel.from_path Parameters ---------- file_name : str The path to a BEL script file. Returns ------- bp : PybelProcessor A PybelProcessor object which contains INDRA Statements in bp.statements. """ pybel_graph = pybel.from_path(file_name, **kwargs) return process_pybel_graph(pybel_graph)
def test_from_path(self, m1, m2, m3): graph = from_path(test_bel_extensions, manager=self.manager) self.assertEqual(0, len(graph.warnings)) self.assertEqual(expected_test_bel_4_metadata, graph.document) self.assertEqual( { 'WINE': wine_iri, 'PIZZA': 'http://www.lesfleursdunormal.fr/static/_downloads/pizza_onto.owl' }, graph.namespace_owl) self.assertEqual({'Wine': wine_iri}, graph.annotation_owl) self.assertEqual({'HGNC': HGNC_URL}, graph.namespace_url) a = PROTEIN, 'HGNC', 'AKT1' b = PROTEIN, 'HGNC', 'EGFR' self.assertHasNode(graph, a) self.assertHasNode(graph, b) self.assertHasEdge(graph, a, b) annots = { CITATION: { CITATION_NAME: 'That one article from last week', CITATION_REFERENCE: '123455', CITATION_TYPE: 'PubMed' }, EVIDENCE: 'Made up support, not even qualifying as evidence', ANNOTATIONS: { 'Wine': { 'Cotturi': True } } } self.assertHasEdge(graph, (ABUNDANCE, "PIZZA", "MeatTopping"), (ABUNDANCE, 'WINE', 'Wine'), **annots) self.assertHasEdge(graph, (ABUNDANCE, "PIZZA", "TomatoTopping"), (ABUNDANCE, 'WINE', 'Wine'), **annots) self.assertHasEdge(graph, (ABUNDANCE, 'WINE', 'WhiteWine'), (ABUNDANCE, "PIZZA", "FishTopping"), **annots)
def load_paths(paths, connection=None): """Loads a group of BEL graphs. Internally, this function uses a shared :class:`pybel.parser.MetadataParser` to cache the definitions more efficiently. :param paths: An iterable over paths to BEL scripts :param paths: iter :param connection: A custom database connection string :type connection: str :return: A BEL graph comprised of the union of all BEL graphs produced by each BEL script :rtype: pybel.BELGraph """ metadata_parser = build_metadata_parser(connection) result = BELGraph() for path in paths: subgraph = from_path(path, manager=metadata_parser) left_merge(result, subgraph) return result
def setUp(self): if 'PYBEL_BASE' in os.environ: test_bel_simple_path = os.path.join(os.environ['PYBEL_BASE'], 'tests', 'bel', 'test_bel.bel') self.graph = pybel.from_path(test_bel_simple_path) else: test_bel_simple_url = 'https://raw.githubusercontent.com/pybel/pybel/develop/tests/bel/test_bel.bel' self.graph = pybel.from_url(test_bel_simple_url) infer_central_dogma(self.graph) n1 = GENE, 'HGNC', 'AKT1' n2 = RNA, 'HGNC', 'EGFR' n3 = GENE, 'HGNC', 'DUMMY1' self.graph.add_simple_node(*n3) n4 = GENE, 'HGNC', 'DUMMY2' self.graph.add_simple_node(*n4) self.graph.add_edge(n1, n3) self.graph.add_edge(n2, n4)
def convert_recursive(directory, connection=None, upload=False, pickle=False, store_parts=False, enrich_citations=False): """Recursively parses and either uploads/pickles graphs in a given directory and sub-directories""" metadata_parser = build_metadata_parser(connection) paths = list(get_paths_recursive(directory)) log.info('Paths to parse: %s', paths) for path in paths: try: graph = from_path(path, manager=metadata_parser.manager) except: log.exception('Problem parsing %s', path) continue if enrich_citations: fix_pubmed_citations(graph) if upload: safe_upload(metadata_parser.manager, graph, store_parts=store_parts) if pickle: new_path = '{}.gpickle'.format(path[:-4]) # [:-4] gets rid of .bel at the end of the file name to_pickle(graph, new_path)
import sys import pickle import pybel from pybel.struct.filters import has_protein_modification from indra.sources import bel from indra.sources.bel.processor import get_agent from .util import get_mod_sites if __name__ == '__main__': # Parse the BEL script, takes a few minutes if sys.argv[1] == 'parse_belscript': input_file = sys.argv[2] output_file = sys.argv[3] pbg = pybel.from_path(input_file) pybel.to_pickle(pbg, output_file) # Get all variant sites from the graph #elif sys.argv[1] == 'get_pybel_mod_agents': # pbg = pybel.from_pickle('output/large_corpus_pybel.pkl') # mod_nodes = [get_agent(n) for n in pbg.nodes() # if has_protein_modification(n)] # with open('output/bel_mod_agents.pkl', 'wb') as f: # pickle.dump(mod_nodes, f) elif sys.argv[1] == 'get_pybel_stmts_by_site': input_file = sys.argv[2] output_file = sys.argv[3] pbg = pybel.from_pickle(input_file) pbp = bel.process_pybel_graph(pbg) sites = get_mod_sites(pbp.statements) with open(output_file, 'wb') as f: pickle.dump(sites, f) else:
def get_misordered_graph(mock_get): return from_path(test_bel_misordered, manager=cls.manager, citation_clearing=False)
def get_isolated_graph(mock_get): return from_path(test_bel_isolated, manager=cls.manager)
def get_slushy_graph(mock): return from_path(test_bel_slushy, manager=cls.manager)
def get_thorough_graph(mock): return from_path(test_bel_thorough, manager=cls.manager, allow_nested=True)
def convert_paths(paths, connection=None, upload=False, pickle=False, canonicalize=True, infer_central_dogma=True, enrich_citations=False, send=False, version_in_path=False, **kwargs): """Recursively parses and either uploads/pickles graphs in a given set of files :param iter[str] paths: The paths to convert :param connection: The connection :type connection: None or str or pybel.manager.Manager :param bool upload: Should the networks be uploaded to the cache? :param bool pickle: Should the networks be saved as pickles? :param bool canonicalize: Calculate canonical nodes? :param bool infer_central_dogma: Should the central dogma be inferred for all proteins, RNAs, and miRNAs :param bool enrich_citations: Should the citations be enriched using Entrez Utils? :param bool send: Send to PyBEL Web? :param bool version_in_path: Add the current pybel version to the pathname :param kwargs: Parameters to pass to :func:`pybel.from_path` """ manager = Manager.ensure(connection) failures = [] for path in paths: log.info('parsing: %s', path) try: graph = from_path(path, manager=manager, **kwargs) except Exception as e: log.exception('problem parsing %s', path) failures.append((path, e)) continue if canonicalize: add_canonical_names(graph) if infer_central_dogma: infer_central_dogma_mutator(graph) if enrich_citations: enrich_pubmed_citations(graph=graph, manager=manager) if upload: to_database(graph, connection=manager, store_parts=True) if pickle: name = path[:-len( '.bel')] # gets rid of .bel at the end of the file name if version_in_path: new_path = '{}-{}.gpickle'.format(name, get_pybel_version()) else: new_path = '{}.gpickle'.format(name) to_pickle(graph, new_path) log.info('output pickle: %s', new_path) if send: response = to_web(graph) log.info('sent to PyBEL Web with response: %s', response.json()) return failures