def main(directory: str): """Export Hetionet in several BEL formats.""" click.echo(f'Using PyBEL v{pybel.get_version(with_git_hash=True)}') click.echo('Getting hetionet') graph = pybel.get_hetionet() click.echo('Grounding hetionet') graph = pybel.grounding.ground(graph) click.echo('Exporting BEL Script') script_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.gz') pybel.to_bel_script_gz(graph, script_gz_path) click.echo('Exporting Nodelink') nodelink_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.nodelink.json.gz') pybel.to_nodelink_gz(graph, nodelink_gz_path) click.echo('Exporting GraphDati') graphdati_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.graphdati.json.gz') pybel.to_graphdati_gz(graph, graphdati_gz_path) click.echo('Exporting Machine Learning-ready TSV') tsv_path = os.path.join(directory, 'hetionet-v1.0.tsv.gz') with gzip.open(tsv_path, 'wt') as file: pybel.to_tsv(graph, file)
def main(directory: str): """Make hetionet exports.""" path = os.path.join(directory, 'hetionet.bel.nodelink.json.gz') if not os.path.exists(path): graph = get_hetionet() to_nodelink_gz(graph, path) else: click.echo('loading pickle from {}'.format(path)) graph = from_nodelink_gz(path) output_bel_gz_path = os.path.join(directory, 'hetionet.bel.gz') if not os.path.exists(output_bel_gz_path): click.echo('outputting whole hetionet as BEL GZ to {}'.format(output_bel_gz_path)) to_bel_script_gz(graph, output_bel_gz_path, use_identifiers=True) output_graphdati_jsonl_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.jsonl.gz') if not os.path.exists(output_graphdati_jsonl_gz_path): click.echo('outputting whole hetionet as BEL GraphDati JSONL GZ to {}'.format(output_graphdati_jsonl_gz_path)) to_graphdati_jsonl_gz(graph, output_graphdati_jsonl_gz_path, use_identifiers=True) output_graphdati_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.json.gz') if not os.path.exists(output_graphdati_gz_path): click.echo('outputting whole hetionet as BEL GraphDati JSON GZ to {}'.format(output_graphdati_gz_path)) to_graphdati_gz(graph, output_graphdati_gz_path, use_identifiers=True) summary_tsv_path = os.path.join(directory, 'hetionet_summary.tsv') if not os.path.exists(summary_tsv_path): click.echo('getting metaedges') rows = [] keep_keys = set() for value in get_metaedge_to_key(graph).values(): u, v, key = choice(list(value)) keep_keys.add(key) d = graph[u][v][key] bel = edge_to_bel(u, v, d, use_identifiers=True) rows.append((key[:8], bel)) df = pd.DataFrame(rows, columns=['key', 'bel']) df.to_csv(summary_tsv_path, sep='\t', index=False) non_sample_edges = [ (u, v, k, d) for u, v, k, d in tqdm(graph.edges(keys=True, data=True), desc='Getting non-sample edges to remove') if k not in keep_keys ] click.echo('Removing non-sample edges') graph.remove_edges_from(non_sample_edges) graph.remove_nodes_from(list(nx.isolates(graph))) sample_bel_path = os.path.join(directory, 'hetionet_sample.bel') click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path)) to_bel_script(graph, sample_bel_path, use_identifiers=True) sample_graphdati_path = os.path.join(directory, 'hetionet_sample.bel.graphdati.json') click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path)) to_graphdati_file(graph, sample_graphdati_path, use_identifiers=True, indent=2)
def ensure_graph( name: str, *, manager_kwargs: Optional[Mapping[str, Any]] = None) -> BELGraph: """Get the BEL graph for a given Bio2BEL package.""" directory = get_data_dir(name) path = os.path.join(directory, f'{name}.bel.nodelink.json.gz') if os.path.exists(path): return from_nodelink_gz(path) _, module = ensure_bio2bel_installation(name) manager = module.Manager(**(manager_kwargs or {})) if not isinstance(manager, BELManagerMixin): raise ValueError(f'{module} is not enabled for BEL export') graph = manager.to_bel() to_nodelink_gz(graph, path) return graph
def write( self, *, tp_nodes: Mapping[str, Set[Node]], tp_edges: Mapping[str, Set[Edge]], tn_nodes: Mapping[str, Set[Node]], tn_edges: Mapping[str, Set[Edge]], **kwargs, ) -> None: """Write the graph as gzipped BEL graphs.""" from pybel import to_nodelink_gz, to_bel_script_gz for nodes, edges, name in ((tp_nodes, tp_edges, "positive"), (tn_nodes, tn_edges, "negative")): graph = convert(nodes=nodes, edges=edges, name=name) nodelink_path = os.path.join(self.graph_dir_path, f"{name}.bel.nodelink.json.gz") to_nodelink_gz(graph, nodelink_path) bel_script_path = os.path.join(self.graph_dir_path, f"{name}.bel.gz") to_bel_script_gz(graph, bel_script_path)
def main(directory: str): """Export Hetionet in several BEL formats.""" click.echo('Getting hetionet') graph = get_hetionet() click.echo('Exporting BEL Script') script_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.gz') to_bel_script_gz(graph, script_gz_path) click.echo('Exporting Nodelink') nodelink_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.nodelink.json.gz') to_nodelink_gz(graph, nodelink_gz_path) click.echo('Exporting GraphDati') graphdati_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.graphdati.json.gz') to_graphdati_gz(graph, graphdati_gz_path)
def ensure_graph(name: str, *, manager_kwargs: Optional[Mapping[str, Any]] = None) -> BELGraph: """Generate, cache, and return the BEL graph for a given Bio2BEL package. If it has already been cached, it is loaded directly. :param name: The name of the Bio2BEL package :param manager_kwargs: Optional mapping to give as keyword arguments to the manager upon instantiation. """ directory = get_data_dir(name) path = os.path.join(directory, f'{name}.bel.nodelink.json.gz') if os.path.exists(path): return from_nodelink_gz(path) _, module = ensure_bio2bel_installation(name) manager = module.Manager(**(manager_kwargs or {})) if not isinstance(manager, BELManagerMixin): raise ValueError(f'{module} is not enabled for BEL export') graph = manager.to_bel() to_nodelink_gz(graph, path) return graph
def write_bel(connection, skip, directory, force): """Write all as BEL.""" os.makedirs(directory, exist_ok=True) from .manager.bel_manager import BELManagerMixin import pybel for _, name, manager in _iterate_managers(connection, skip): if not isinstance(manager, BELManagerMixin): continue click.secho(name, fg='cyan', bold=True) path = os.path.join(directory, f'{name}.bel.pickle') if os.path.exists(path) and not force: click.echo('👍 already exported') continue if not manager.is_populated(): click.echo('👎 unpopulated') else: graph = manager.to_bel() pybel.to_pickle(graph, path) pybel.to_nodelink_gz( graph, os.path.join(directory, f'{name}.bel.nodelink.json.gz')) pybel.to_bel_script_gz(graph, os.path.join(directory, f'{name}.bel.gz'))