Esempio n. 1
0
def load_orthologs(fo: IO, metadata: dict):
    """Load orthologs into ArangoDB

    Args:
        fo: file obj - orthologs file
        metadata: dict containing the metadata for orthologs
    """

    version = metadata["metadata"]["version"]

    # LOAD ORTHOLOGS INTO ArangoDB
    with timy.Timer("Load Orthologs") as timer:

        arango_client = arangodb.get_client()
        if not arango_client:
            print("Cannot load orthologs without ArangoDB access")
            quit()
        belns_db = arangodb.get_belns_handle(arango_client)
        arangodb.batch_load_docs(belns_db,
                                 orthologs_iterator(fo, version),
                                 on_duplicate="update")

        log.info("Load orthologs",
                 elapsed=timer.elapsed,
                 source=metadata["metadata"]["source"])

        # Clean up old entries
        remove_old_ortholog_edges = f"""
            FOR edge in ortholog_edges
                FILTER edge.source == "{metadata["metadata"]["source"]}"
                FILTER edge.version != "{version}"
                REMOVE edge IN ortholog_edges
        """
        remove_old_ortholog_nodes = f"""
            FOR node in ortholog_nodes
                FILTER node.source == "{metadata["metadata"]["source"]}"
                FILTER node.version != "{version}"
                REMOVE node IN ortholog_nodes
        """
        arangodb.aql_query(belns_db, remove_old_ortholog_edges)
        arangodb.aql_query(belns_db, remove_old_ortholog_nodes)

    # Add metadata to resource metadata collection
    metadata["_key"] = f"Orthologs_{metadata['metadata']['source']}"
    try:
        belns_db.collection(arangodb.belns_metadata_name).insert(metadata)
    except ArangoError as ae:
        belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
Esempio n. 2
0
# Third Party Imports
import structlog

# Local Imports
import bel.db.arangodb as arangodb
import bel.db.elasticsearch
import bel.lang.belobj
import bel.utils
from bel.Config import config

log = structlog.getLogger(__name__)

es = bel.db.elasticsearch.get_client()

client = arangodb.get_client()
belapi_db = arangodb.get_belapi_handle(client)

bel_validations_name = arangodb.bel_validations_name
bel_validation_coll = belapi_db.collection(bel_validations_name)


def convert_msg_to_html(msg):
    """Convert \n into a <BR> for an HTML formatted message"""

    msg = re.sub("\n", "<br />", msg, flags=re.MULTILINE)
    return msg


def get_validation_for_hashes(hashes):
    """Get cached validations from validation cache database in arangodb"""
Esempio n. 3
0
def load_terms(fo: IO, metadata: dict, forceupdate: bool):
    """Load terms into Elasticsearch and ArangoDB

    Forceupdate will create a new index in Elasticsearch regardless of whether
    an index with the resource version already exists.

    Args:
        fo: file obj - terminology file
        metadata: dict containing the metadata for terminology
        forceupdate: force full update - e.g. don't leave Elasticsearch indexes
            alone if their version ID matches
    """

    version = metadata["metadata"]["version"]

    # LOAD TERMS INTO Elasticsearch
    with timy.Timer("Load Terms") as timer:
        es = bel.db.elasticsearch.get_client()

        es_version = version.replace("T", "").replace("-", "").replace(":", "")
        index_prefix = f"terms_{metadata['metadata']['namespace'].lower()}"
        index_name = f"{index_prefix}_{es_version}"

        # Create index with mapping
        if not elasticsearch.index_exists(es, index_name):
            elasticsearch.create_terms_index(es, index_name)
        elif forceupdate:  # force an update to the index
            index_name += "_alt"
            elasticsearch.create_terms_index(es, index_name)
        else:
            return  # Skip loading if not forced and not a new namespace

        terms_iterator = terms_iterator_for_elasticsearch(fo, index_name)
        elasticsearch.bulk_load_docs(es, terms_iterator)

        # Remove old namespace index
        index_names = elasticsearch.get_all_index_names(es)
        for name in index_names:
            if name != index_name and index_prefix in name:
                elasticsearch.delete_index(es, name)

        # Add terms_alias to this index
        elasticsearch.add_index_alias(es, index_name, terms_alias)

        log.info(
            "Load namespace terms",
            elapsed=timer.elapsed,
            namespace=metadata["metadata"]["namespace"],
        )

    # LOAD EQUIVALENCES INTO ArangoDB
    with timy.Timer("Load Term Equivalences") as timer:
        arango_client = arangodb.get_client()
        if not arango_client:
            print("Cannot load terms without ArangoDB access")
            quit()
        belns_db = arangodb.get_belns_handle(arango_client)
        arangodb.batch_load_docs(belns_db,
                                 terms_iterator_for_arangodb(fo, version),
                                 on_duplicate="update")

        log.info(
            "Loaded namespace equivalences",
            elapsed=timer.elapsed,
            namespace=metadata["metadata"]["namespace"],
        )

        # Clean up old entries
        remove_old_equivalence_edges = f"""
            FOR edge in equivalence_edges
                FILTER edge.source == "{metadata["metadata"]["namespace"]}"
                FILTER edge.version != "{version}"
                REMOVE edge IN equivalence_edges
        """
        remove_old_equivalence_nodes = f"""
            FOR node in equivalence_nodes
                FILTER node.source == "{metadata["metadata"]["namespace"]}"
                FILTER node.version != "{version}"
                REMOVE node IN equivalence_nodes
        """
        arangodb.aql_query(belns_db, remove_old_equivalence_edges)
        arangodb.aql_query(belns_db, remove_old_equivalence_nodes)

    # Add metadata to resource metadata collection
    metadata["_key"] = f"Namespace_{metadata['metadata']['namespace']}"
    try:
        belns_db.collection(arangodb.belns_metadata_name).insert(metadata)
    except ArangoError as ae:
        belns_db.collection(arangodb.belns_metadata_name).replace(metadata)