def listcreator(): with timy.Timer() as timer: li = [] for i in range(0, 100000, 2): li.append(i) if i == 50000: timer.track('reached 50000')
def run_borgmatic_cmd(cmd): with timy.Timer(cmd): result = subprocess.run( cmd.split(" "), check=True, stdout=subprocess.PIPE, ) output = result.stdout.decode("utf-8").strip() return json.loads(output)
def load_orthologs(fo: IO, metadata: dict): """Load orthologs into ArangoDB Args: fo: file obj - orthologs file metadata: dict containing the metadata for orthologs """ version = metadata["metadata"]["version"] # LOAD ORTHOLOGS INTO ArangoDB with timy.Timer("Load Orthologs") as timer: arango_client = arangodb.get_client() if not arango_client: print("Cannot load orthologs without ArangoDB access") quit() belns_db = arangodb.get_belns_handle(arango_client) arangodb.batch_load_docs(belns_db, orthologs_iterator(fo, version), on_duplicate="update") log.info("Load orthologs", elapsed=timer.elapsed, source=metadata["metadata"]["source"]) # Clean up old entries remove_old_ortholog_edges = f""" FOR edge in ortholog_edges FILTER edge.source == "{metadata["metadata"]["source"]}" FILTER edge.version != "{version}" REMOVE edge IN ortholog_edges """ remove_old_ortholog_nodes = f""" FOR node in ortholog_nodes FILTER node.source == "{metadata["metadata"]["source"]}" FILTER node.version != "{version}" REMOVE node IN ortholog_nodes """ arangodb.aql_query(belns_db, remove_old_ortholog_edges) arangodb.aql_query(belns_db, remove_old_ortholog_nodes) # Add metadata to resource metadata collection metadata["_key"] = f"Orthologs_{metadata['metadata']['source']}" try: belns_db.collection(arangodb.belns_metadata_name).insert(metadata) except ArangoError as ae: belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
def load_terms(fo: IO, metadata: dict, forceupdate: bool): """Load terms into Elasticsearch and ArangoDB Forceupdate will create a new index in Elasticsearch regardless of whether an index with the resource version already exists. Args: fo: file obj - terminology file metadata: dict containing the metadata for terminology forceupdate: force full update - e.g. don't leave Elasticsearch indexes alone if their version ID matches """ version = metadata["metadata"]["version"] # LOAD TERMS INTO Elasticsearch with timy.Timer("Load Terms") as timer: es = bel.db.elasticsearch.get_client() es_version = version.replace("T", "").replace("-", "").replace(":", "") index_prefix = f"terms_{metadata['metadata']['namespace'].lower()}" index_name = f"{index_prefix}_{es_version}" # Create index with mapping if not elasticsearch.index_exists(es, index_name): elasticsearch.create_terms_index(es, index_name) elif forceupdate: # force an update to the index index_name += "_alt" elasticsearch.create_terms_index(es, index_name) else: return # Skip loading if not forced and not a new namespace terms_iterator = terms_iterator_for_elasticsearch(fo, index_name) elasticsearch.bulk_load_docs(es, terms_iterator) # Remove old namespace index index_names = elasticsearch.get_all_index_names(es) for name in index_names: if name != index_name and index_prefix in name: elasticsearch.delete_index(es, name) # Add terms_alias to this index elasticsearch.add_index_alias(es, index_name, terms_alias) log.info( "Load namespace terms", elapsed=timer.elapsed, namespace=metadata["metadata"]["namespace"], ) # LOAD EQUIVALENCES INTO ArangoDB with timy.Timer("Load Term Equivalences") as timer: arango_client = arangodb.get_client() if not arango_client: print("Cannot load terms without ArangoDB access") quit() belns_db = arangodb.get_belns_handle(arango_client) arangodb.batch_load_docs(belns_db, terms_iterator_for_arangodb(fo, version), on_duplicate="update") log.info( "Loaded namespace equivalences", elapsed=timer.elapsed, namespace=metadata["metadata"]["namespace"], ) # Clean up old entries remove_old_equivalence_edges = f""" FOR edge in equivalence_edges FILTER edge.source == "{metadata["metadata"]["namespace"]}" FILTER edge.version != "{version}" REMOVE edge IN equivalence_edges """ remove_old_equivalence_nodes = f""" FOR node in equivalence_nodes FILTER node.source == "{metadata["metadata"]["namespace"]}" FILTER node.version != "{version}" REMOVE node IN equivalence_nodes """ arangodb.aql_query(belns_db, remove_old_equivalence_edges) arangodb.aql_query(belns_db, remove_old_equivalence_nodes) # Add metadata to resource metadata collection metadata["_key"] = f"Namespace_{metadata['metadata']['namespace']}" try: belns_db.collection(arangodb.belns_metadata_name).insert(metadata) except ArangoError as ae: belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
def pipeline( ctx, input_fn, db_save, db_delete, output_fn, rules, species, namespace_targets, version, api, config_fn, ): """BEL Pipeline - BEL Nanopubs into BEL Edges This will process BEL Nanopubs into BEL Edges by validating, orthologizing (if requested), canonicalizing, and then computing the BEL Edges based on the given rule_set. \b input_fn: If input fn has *.gz, will read as a gzip file If input fn has *.jsonl*, will parsed as a JSONLines file IF input fn has *.json*, will be parsed as a JSON file If input fn has *.yaml* or *.yml*, will be parsed as a YAML file \b output_fn: If output fn has *.gz, will written as a gzip file If output fn has *.jsonl*, will written as a JSONLines file IF output fn has *.json*, will be written as a JSON file If output fn has *.yaml* or *.yml*, will be written as a YAML file If output fn has *.jgf, will be written as JSON Graph Formatted file """ if config_fn: config = bel.db.Config.merge_config(ctx.config, override_config_fn=config_fn) else: config = ctx.config # Configuration - will return the first truthy result in list else the default option if namespace_targets: namespace_targets = json.loads(namespace_targets) if rules: rules = rules.replace(" ", "").split(",") namespace_targets = utils.first_true( [namespace_targets, config["bel"]["lang"].get("canonical")], None ) rules = utils.first_true( [rules, config["bel"]["nanopub"].get("pipeline_edge_rules", False)], False ) api = utils.first_true( [api, config["bel_api"]["servers"].get("api_url", None)], None ) version = utils.first_true( [version, config["bel"]["lang"].get("default_bel_version", None)], None ) n = bnn.Nanopub() try: json_flag, jsonl_flag, yaml_flag, jgf_flag = False, False, False, False all_bel_edges = [] fout = None if db_save or db_delete: if db_delete: arango_client = bel.db.arangodb.get_client() bel.db.arangodb.delete_database(arango_client, "edgestore") else: arango_client = bel.db.arangodb.get_client() edgestore_handle = bel.db.arangodb.get_edgestore_handle(arango_client) elif re.search("ya?ml", output_fn): yaml_flag = True elif "jsonl" in output_fn: jsonl_flag = True elif "json" in output_fn: json_flag = True elif "jgf" in output_fn: jgf_flag = True if db_save: pass elif "gz" in output_fn: fout = gzip.open(output_fn, "wt") else: fout = open(output_fn, "wt") nanopub_cnt = 0 with timy.Timer() as timer: for np in bnf.read_nanopubs(input_fn): # print('Nanopub:\n', json.dumps(np, indent=4)) nanopub_cnt += 1 if nanopub_cnt % 100 == 0: timer.track(f"{nanopub_cnt} Nanopubs processed into Edges") bel_edges = n.bel_edges( np, namespace_targets=namespace_targets, orthologize_target=species, rules=rules, ) if db_save: bel.edge.edges.load_edges_into_db(edgestore_handle, edges=bel_edges) elif jsonl_flag: fout.write("{}\n".format(json.dumps(bel_edges))) else: all_bel_edges.extend(bel_edges) if db_save: pass elif yaml_flag: fout.write("{}\n".format(yaml.dumps(all_bel_edges))) elif json_flag: fout.write("{}\n".format(json.dumps(all_bel_edges))) elif jgf_flag: bnf.edges_to_jgf(output_fn, all_bel_edges) finally: if fout: fout.close()