def install_and_populate_database(name: str, connection: Optional[str] = None) -> Optional[str]: """Install biological database. :param name: The name of the databae :param connection: The optional database connection """ export_file = os.path.join(OUTPUT_DIR, f'{name}.csv') summary_file = os.path.join(OUTPUT_DIR, f'{name}_summary.csv') json_file = os.path.join(OUTPUT_DIR, f'{name}.bel.json') if os.path.exists(export_file): logger.info(f'{EMOJI} {name} has already been retrieved. See: {export_file}') return export_file if os.path.exists(json_file): logger.info(f'{EMOJI} loaded {name} JSON: {json_file}') graph = from_json_path(json_file) df = to_pykeen_df(graph) to_pykeen_path(df, export_file) to_pykeen_summary_path(df, summary_file) return export_file if name in _PATHME_MODULES: click.secho( f'{EMOJI} You are trying to install {name} which is part of PathMe. ' 'Due to the complexity of the installation, we refer to the tutorial at' ' https://github.com/PathwayMerger/PathMe to install it. Alternatively, you can download the dumps of ' f'{name} directly from the URLs at the DiffuPath README file.' ) sys.exit(0) module_name = f'bio2bel_{name}' bio2bel_module = ensure_bio2bel_installation(module_name) logger.debug(f'{EMOJI} imported {name}') manager_cls = bio2bel_module.Manager manager = manager_cls(connection=connection) if issubclass(manager_cls, AbstractManager): if not manager.is_populated(): logger.info(f'{EMOJI} populating {module_name}') manager.populate() else: logger.debug(f'{EMOJI} {module_name} has already been populated') logger.debug(f'{EMOJI} generating BEL for {module_name}') graph = manager.to_bel() logger.debug(f'Summary: {graph.number_of_nodes()} nodes / {graph.number_of_edges()} edges') to_json_path(graph, json_file, indent=2) logger.debug(f'{EMOJI} generating PyKEEN TSV for {module_name}') df = to_pykeen_df(graph) to_pykeen_summary_path(df, summary_file) success = to_pykeen_path(df, export_file) if success: logger.debug(f'{EMOJI} wrote PyKEEN TSV to {export_file}') return export_file logger.warning(f'{EMOJI} no statements generated')
def install_bio2bel_module(name: str, connection: Optional[str] = None, rebuild: bool = False) -> Optional[str]: """Install Bio2BEL module. :param name: The name of the Bio2BEL module :param connection: The optional database connection :param rebuild: Should the cache not be used? Defaults to False. """ module_name = _SPECIAL_CASES.get(name, f'bio2bel_{name}') pykeen_df_path = os.path.join(biokeen_config.data_directory, f'{name}.{biokeen_config.keen_tsv_ext}') pykeen_df_summary_path = os.path.join(biokeen_config.data_directory, f'{name}.keen.summary.json') json_path = os.path.join(biokeen_config.data_directory, f'{name}.bel.json') if os.path.exists(pykeen_df_path) and not rebuild: logger.info( f'{EMOJI} {module_name} has already been retrieved. See: {pykeen_df_path}' ) return pykeen_df_path if os.path.exists(json_path) and not rebuild: logger.info(f'{EMOJI} loaded {module_name} JSON: {json_path}') graph = from_json_path(json_path) df = to_pykeen_df(graph) to_pykeen_path(df, pykeen_df_path) to_pykeen_summary_path(df, pykeen_df_summary_path) return pykeen_df_path bio2bel_module = ensure_bio2bel_installation(module_name) logger.debug(f'{EMOJI} imported {module_name}') manager_cls = bio2bel_module.Manager if not issubclass(manager_cls, BELManagerMixin): version = pkg_resources.get_distribution(module_name).version logger.warning( f'{EMOJI} {module_name} v{version} does not produce BEL') sys.exit(1) manager = manager_cls(connection=connection) if issubclass(manager_cls, AbstractManager): if not manager.is_populated(): logger.info(f'{EMOJI} populating {module_name}') manager.populate() else: logger.debug(f'{EMOJI} {module_name} has already been populated') logger.debug(f'{EMOJI} generating BEL for {module_name}') graph = manager.to_bel() logger.debug( f'Summary: {graph.number_of_nodes()} nodes / {graph.number_of_edges()} edges' ) to_json_path(graph, json_path, indent=2) logger.debug(f'{EMOJI} generating PyKEEN TSV for {module_name}') df = to_pykeen_df(graph) to_pykeen_summary_path(df, pykeen_df_summary_path) success = to_pykeen_path(df, pykeen_df_path) if success: logger.debug(f'{EMOJI} wrote PyKEEN TSV to {pykeen_df_path}') return pykeen_df_path logger.warning(f'{EMOJI} no statements generated')
def to_bel_json(self, path: str, **kwargs): """Export the BEL model as a node-link JSON file.""" pybel.to_json_path(self.to_bel(), path, **kwargs)
def get_graph( self, directory: Optional[str] = None, use_cached: bool = True, use_tqdm: bool = True, ) -> BELGraph: """Get the graph from all sources.""" if directory is None: if self.directory is None: raise ValueError directory = self.directory pickle_path = os.path.join(directory, f'{self.name}.bel.pickle') if use_cached and os.path.exists(pickle_path): return pybel.from_pickle(pickle_path) rv = union(self.get_graphs(use_tqdm=use_tqdm)) self.metadata.update(rv) pybel.to_pickle(rv, pickle_path) nodelink_path = os.path.join(directory, f'{self.name}.bel.nodelink.json') pybel.to_json_path(rv, nodelink_path) sif_path = os.path.join(directory, f'{self.name}.bel.sif') pybel.to_sif_path(rv, sif_path) gsea_path = os.path.join(directory, f'{self.name}.bel.gmt') pybel.to_gsea_path(rv, gsea_path) graphml_path = os.path.join(directory, f'{self.name}.bel.graphml') pybel.to_graphml(rv, graphml_path) try: statements = pybel.to_indra_statements(rv) except ImportError: pass else: indra_path = os.path.join(directory, f'{self.name}.indra.pickle') with open(indra_path, 'wb') as file: pickle.dump(statements, file) try: from pybel_cx import to_cx_file except ImportError: pass else: cx_path = os.path.join(directory, f'{self.name}.bel.cx.json') with open(cx_path, 'w') as file: to_cx_file(rv, file) try: from pybel_tools.assembler.html import to_html except ImportError: pass else: html_path = os.path.join(directory, 'index.html') with open(html_path, 'w') as file: print(to_html(rv), file=file) return rv