def load_orthologs(fo: IO, metadata: dict): """Load orthologs into ArangoDB Args: fo: file obj - orthologs file metadata: dict containing the metadata for orthologs """ version = metadata["metadata"]["version"] # LOAD ORTHOLOGS INTO ArangoDB with timy.Timer("Load Orthologs") as timer: arango_client = arangodb.get_client() if not arango_client: print("Cannot load orthologs without ArangoDB access") quit() belns_db = arangodb.get_belns_handle(arango_client) arangodb.batch_load_docs(belns_db, orthologs_iterator(fo, version), on_duplicate="update") log.info("Load orthologs", elapsed=timer.elapsed, source=metadata["metadata"]["source"]) # Clean up old entries remove_old_ortholog_edges = f""" FOR edge in ortholog_edges FILTER edge.source == "{metadata["metadata"]["source"]}" FILTER edge.version != "{version}" REMOVE edge IN ortholog_edges """ remove_old_ortholog_nodes = f""" FOR node in ortholog_nodes FILTER node.source == "{metadata["metadata"]["source"]}" FILTER node.version != "{version}" REMOVE node IN ortholog_nodes """ arangodb.aql_query(belns_db, remove_old_ortholog_edges) arangodb.aql_query(belns_db, remove_old_ortholog_nodes) # Add metadata to resource metadata collection metadata["_key"] = f"Orthologs_{metadata['metadata']['source']}" try: belns_db.collection(arangodb.belns_metadata_name).insert(metadata) except ArangoError as ae: belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
def remove_old_db_entries(source, version: str = "", force: bool = False): """Remove older ortholog data entries""" if force or version == "": filter_version = "" else: filter_version = f'FILTER doc.version != "{version}"' # Clean up old entries remove_old_ortholog_edges = f""" FOR doc in {ortholog_edges_name} FILTER doc.source == "{source}" {filter_version} REMOVE doc IN {ortholog_edges_name} """ remove_old_ortholog_nodes = f""" FOR doc in {ortholog_nodes_name} FILTER doc.source == "{source}" {filter_version} REMOVE doc IN {ortholog_nodes_name} """ arangodb.aql_query(resources_db, remove_old_ortholog_edges) arangodb.aql_query(resources_db, remove_old_ortholog_nodes)
def load_terms(fo: IO, metadata: dict, forceupdate: bool): """Load terms into Elasticsearch and ArangoDB Forceupdate will create a new index in Elasticsearch regardless of whether an index with the resource version already exists. Args: fo: file obj - terminology file metadata: dict containing the metadata for terminology forceupdate: force full update - e.g. don't leave Elasticsearch indexes alone if their version ID matches """ version = metadata["metadata"]["version"] # LOAD TERMS INTO Elasticsearch with timy.Timer("Load Terms") as timer: es = bel.db.elasticsearch.get_client() es_version = version.replace("T", "").replace("-", "").replace(":", "") index_prefix = f"terms_{metadata['metadata']['namespace'].lower()}" index_name = f"{index_prefix}_{es_version}" # Create index with mapping if not elasticsearch.index_exists(es, index_name): elasticsearch.create_terms_index(es, index_name) elif forceupdate: # force an update to the index index_name += "_alt" elasticsearch.create_terms_index(es, index_name) else: return # Skip loading if not forced and not a new namespace terms_iterator = terms_iterator_for_elasticsearch(fo, index_name) elasticsearch.bulk_load_docs(es, terms_iterator) # Remove old namespace index index_names = elasticsearch.get_all_index_names(es) for name in index_names: if name != index_name and index_prefix in name: elasticsearch.delete_index(es, name) # Add terms_alias to this index elasticsearch.add_index_alias(es, index_name, terms_alias) log.info( "Load namespace terms", elapsed=timer.elapsed, namespace=metadata["metadata"]["namespace"], ) # LOAD EQUIVALENCES INTO ArangoDB with timy.Timer("Load Term Equivalences") as timer: arango_client = arangodb.get_client() if not arango_client: print("Cannot load terms without ArangoDB access") quit() belns_db = arangodb.get_belns_handle(arango_client) arangodb.batch_load_docs(belns_db, terms_iterator_for_arangodb(fo, version), on_duplicate="update") log.info( "Loaded namespace equivalences", elapsed=timer.elapsed, namespace=metadata["metadata"]["namespace"], ) # Clean up old entries remove_old_equivalence_edges = f""" FOR edge in equivalence_edges FILTER edge.source == "{metadata["metadata"]["namespace"]}" FILTER edge.version != "{version}" REMOVE edge IN equivalence_edges """ remove_old_equivalence_nodes = f""" FOR node in equivalence_nodes FILTER node.source == "{metadata["metadata"]["namespace"]}" FILTER node.version != "{version}" REMOVE node IN equivalence_nodes """ arangodb.aql_query(belns_db, remove_old_equivalence_edges) arangodb.aql_query(belns_db, remove_old_equivalence_nodes) # Add metadata to resource metadata collection metadata["_key"] = f"Namespace_{metadata['metadata']['namespace']}" try: belns_db.collection(arangodb.belns_metadata_name).insert(metadata) except ArangoError as ae: belns_db.collection(arangodb.belns_metadata_name).replace(metadata)