def test_degradation_nanopub(): """Convert nanopub with degradation assertion to edges""" with open(f"{local_dir}/datasets/nanopub_bel-good-degradation-1.0.0.json", 'r') as f: nanopub = json.load(f) with open(f"{local_dir}/datasets/edges-good-degradation-1.0.0.json", 'r') as f: edges_result = json.load(f) N = nb.Nanopub() edges = N.bel_edges(nanopub) print('Edges:\n', json.dumps(edges, indent=4)) edges = remove_dt_keys(edges) assert edges_result == edges
def test_simple_nanopub(): """Convert simple nanopub to edges""" with open(f"{local_dir}/datasets/nanopub_bel-good-1.0.0.json", "r") as f: nanopub = json.load(f) with open(f"{local_dir}/datasets/edges-good-1.0.0.json", "r") as f: edges_result = json.load(f) N = nb.Nanopub() edges = N.bel_edges(nanopub) print("Edges:\n", json.dumps(edges, indent=4)) edges = remove_dt_keys(edges) assert edges_result == edges
def pipeline( ctx, input_fn, db_save, db_delete, output_fn, rules, species, namespace_targets, version, api, config_fn, ): """BEL Pipeline - BEL Nanopubs into BEL Edges This will process BEL Nanopubs into BEL Edges by validating, orthologizing (if requested), canonicalizing, and then computing the BEL Edges based on the given rule_set. \b input_fn: If input fn has *.gz, will read as a gzip file If input fn has *.jsonl*, will parsed as a JSONLines file IF input fn has *.json*, will be parsed as a JSON file If input fn has *.yaml* or *.yml*, will be parsed as a YAML file \b output_fn: If output fn has *.gz, will written as a gzip file If output fn has *.jsonl*, will written as a JSONLines file IF output fn has *.json*, will be written as a JSON file If output fn has *.yaml* or *.yml*, will be written as a YAML file If output fn has *.jgf, will be written as JSON Graph Formatted file """ if config_fn: config = bel.db.Config.merge_config(ctx.config, override_config_fn=config_fn) else: config = ctx.config # Configuration - will return the first truthy result in list else the default option if namespace_targets: namespace_targets = json.loads(namespace_targets) if rules: rules = rules.replace(" ", "").split(",") namespace_targets = utils.first_true( [namespace_targets, config["bel"]["lang"].get("canonical")], None ) rules = utils.first_true( [rules, config["bel"]["nanopub"].get("pipeline_edge_rules", False)], False ) api = utils.first_true( [api, config["bel_api"]["servers"].get("api_url", None)], None ) version = utils.first_true( [version, config["bel"]["lang"].get("default_bel_version", None)], None ) n = bnn.Nanopub() try: json_flag, jsonl_flag, yaml_flag, jgf_flag = False, False, False, False all_bel_edges = [] fout = None if db_save or db_delete: if db_delete: arango_client = bel.db.arangodb.get_client() bel.db.arangodb.delete_database(arango_client, "edgestore") else: arango_client = bel.db.arangodb.get_client() edgestore_handle = bel.db.arangodb.get_edgestore_handle(arango_client) elif re.search("ya?ml", output_fn): yaml_flag = True elif "jsonl" in output_fn: jsonl_flag = True elif "json" in output_fn: json_flag = True elif "jgf" in output_fn: jgf_flag = True if db_save: pass elif "gz" in output_fn: fout = gzip.open(output_fn, "wt") else: fout = open(output_fn, "wt") nanopub_cnt = 0 with timy.Timer() as timer: for np in bnf.read_nanopubs(input_fn): # print('Nanopub:\n', json.dumps(np, indent=4)) nanopub_cnt += 1 if nanopub_cnt % 100 == 0: timer.track(f"{nanopub_cnt} Nanopubs processed into Edges") bel_edges = n.bel_edges( np, namespace_targets=namespace_targets, orthologize_target=species, rules=rules, ) if db_save: bel.edge.edges.load_edges_into_db(edgestore_handle, edges=bel_edges) elif jsonl_flag: fout.write("{}\n".format(json.dumps(bel_edges))) else: all_bel_edges.extend(bel_edges) if db_save: pass elif yaml_flag: fout.write("{}\n".format(yaml.dumps(all_bel_edges))) elif json_flag: fout.write("{}\n".format(json.dumps(all_bel_edges))) elif jgf_flag: bnf.edges_to_jgf(output_fn, all_bel_edges) finally: if fout: fout.close()