Ejemplo n.º 1
0
def listcreator():
    with timy.Timer() as timer:
        li = []
        for i in range(0, 100000, 2):
            li.append(i)
            if i == 50000:
                timer.track('reached 50000')
Ejemplo n.º 2
0
def run_borgmatic_cmd(cmd):
    with timy.Timer(cmd):
        result = subprocess.run(
            cmd.split(" "),
            check=True,
            stdout=subprocess.PIPE,
        )
    output = result.stdout.decode("utf-8").strip()
    return json.loads(output)
Ejemplo n.º 3
0
def load_orthologs(fo: IO, metadata: dict):
    """Load orthologs into ArangoDB

    Args:
        fo: file obj - orthologs file
        metadata: dict containing the metadata for orthologs
    """

    version = metadata["metadata"]["version"]

    # LOAD ORTHOLOGS INTO ArangoDB
    with timy.Timer("Load Orthologs") as timer:

        arango_client = arangodb.get_client()
        if not arango_client:
            print("Cannot load orthologs without ArangoDB access")
            quit()
        belns_db = arangodb.get_belns_handle(arango_client)
        arangodb.batch_load_docs(belns_db,
                                 orthologs_iterator(fo, version),
                                 on_duplicate="update")

        log.info("Load orthologs",
                 elapsed=timer.elapsed,
                 source=metadata["metadata"]["source"])

        # Clean up old entries
        remove_old_ortholog_edges = f"""
            FOR edge in ortholog_edges
                FILTER edge.source == "{metadata["metadata"]["source"]}"
                FILTER edge.version != "{version}"
                REMOVE edge IN ortholog_edges
        """
        remove_old_ortholog_nodes = f"""
            FOR node in ortholog_nodes
                FILTER node.source == "{metadata["metadata"]["source"]}"
                FILTER node.version != "{version}"
                REMOVE node IN ortholog_nodes
        """
        arangodb.aql_query(belns_db, remove_old_ortholog_edges)
        arangodb.aql_query(belns_db, remove_old_ortholog_nodes)

    # Add metadata to resource metadata collection
    metadata["_key"] = f"Orthologs_{metadata['metadata']['source']}"
    try:
        belns_db.collection(arangodb.belns_metadata_name).insert(metadata)
    except ArangoError as ae:
        belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
Ejemplo n.º 4
0
def load_terms(fo: IO, metadata: dict, forceupdate: bool):
    """Load terms into Elasticsearch and ArangoDB

    Forceupdate will create a new index in Elasticsearch regardless of whether
    an index with the resource version already exists.

    Args:
        fo: file obj - terminology file
        metadata: dict containing the metadata for terminology
        forceupdate: force full update - e.g. don't leave Elasticsearch indexes
            alone if their version ID matches
    """

    version = metadata["metadata"]["version"]

    # LOAD TERMS INTO Elasticsearch
    with timy.Timer("Load Terms") as timer:
        es = bel.db.elasticsearch.get_client()

        es_version = version.replace("T", "").replace("-", "").replace(":", "")
        index_prefix = f"terms_{metadata['metadata']['namespace'].lower()}"
        index_name = f"{index_prefix}_{es_version}"

        # Create index with mapping
        if not elasticsearch.index_exists(es, index_name):
            elasticsearch.create_terms_index(es, index_name)
        elif forceupdate:  # force an update to the index
            index_name += "_alt"
            elasticsearch.create_terms_index(es, index_name)
        else:
            return  # Skip loading if not forced and not a new namespace

        terms_iterator = terms_iterator_for_elasticsearch(fo, index_name)
        elasticsearch.bulk_load_docs(es, terms_iterator)

        # Remove old namespace index
        index_names = elasticsearch.get_all_index_names(es)
        for name in index_names:
            if name != index_name and index_prefix in name:
                elasticsearch.delete_index(es, name)

        # Add terms_alias to this index
        elasticsearch.add_index_alias(es, index_name, terms_alias)

        log.info(
            "Load namespace terms",
            elapsed=timer.elapsed,
            namespace=metadata["metadata"]["namespace"],
        )

    # LOAD EQUIVALENCES INTO ArangoDB
    with timy.Timer("Load Term Equivalences") as timer:
        arango_client = arangodb.get_client()
        if not arango_client:
            print("Cannot load terms without ArangoDB access")
            quit()
        belns_db = arangodb.get_belns_handle(arango_client)
        arangodb.batch_load_docs(belns_db,
                                 terms_iterator_for_arangodb(fo, version),
                                 on_duplicate="update")

        log.info(
            "Loaded namespace equivalences",
            elapsed=timer.elapsed,
            namespace=metadata["metadata"]["namespace"],
        )

        # Clean up old entries
        remove_old_equivalence_edges = f"""
            FOR edge in equivalence_edges
                FILTER edge.source == "{metadata["metadata"]["namespace"]}"
                FILTER edge.version != "{version}"
                REMOVE edge IN equivalence_edges
        """
        remove_old_equivalence_nodes = f"""
            FOR node in equivalence_nodes
                FILTER node.source == "{metadata["metadata"]["namespace"]}"
                FILTER node.version != "{version}"
                REMOVE node IN equivalence_nodes
        """
        arangodb.aql_query(belns_db, remove_old_equivalence_edges)
        arangodb.aql_query(belns_db, remove_old_equivalence_nodes)

    # Add metadata to resource metadata collection
    metadata["_key"] = f"Namespace_{metadata['metadata']['namespace']}"
    try:
        belns_db.collection(arangodb.belns_metadata_name).insert(metadata)
    except ArangoError as ae:
        belns_db.collection(arangodb.belns_metadata_name).replace(metadata)
Ejemplo n.º 5
0
def pipeline(
    ctx,
    input_fn,
    db_save,
    db_delete,
    output_fn,
    rules,
    species,
    namespace_targets,
    version,
    api,
    config_fn,
):
    """BEL Pipeline - BEL Nanopubs into BEL Edges

    This will process BEL Nanopubs into BEL Edges by validating, orthologizing (if requested),
    canonicalizing, and then computing the BEL Edges based on the given rule_set.

    \b
    input_fn:
        If input fn has *.gz, will read as a gzip file
        If input fn has *.jsonl*, will parsed as a JSONLines file
        IF input fn has *.json*, will be parsed as a JSON file
        If input fn has *.yaml* or *.yml*,  will be parsed as a YAML file

    \b
    output_fn:
        If output fn has *.gz, will written as a gzip file
        If output fn has *.jsonl*, will written as a JSONLines file
        IF output fn has *.json*, will be written as a JSON file
        If output fn has *.yaml* or *.yml*,  will be written as a YAML file
        If output fn has *.jgf, will be written as JSON Graph Formatted file
    """

    if config_fn:
        config = bel.db.Config.merge_config(ctx.config, override_config_fn=config_fn)
    else:
        config = ctx.config

    # Configuration - will return the first truthy result in list else the default option
    if namespace_targets:
        namespace_targets = json.loads(namespace_targets)
    if rules:
        rules = rules.replace(" ", "").split(",")

    namespace_targets = utils.first_true(
        [namespace_targets, config["bel"]["lang"].get("canonical")], None
    )
    rules = utils.first_true(
        [rules, config["bel"]["nanopub"].get("pipeline_edge_rules", False)], False
    )
    api = utils.first_true(
        [api, config["bel_api"]["servers"].get("api_url", None)], None
    )
    version = utils.first_true(
        [version, config["bel"]["lang"].get("default_bel_version", None)], None
    )

    n = bnn.Nanopub()

    try:
        json_flag, jsonl_flag, yaml_flag, jgf_flag = False, False, False, False
        all_bel_edges = []
        fout = None

        if db_save or db_delete:
            if db_delete:
                arango_client = bel.db.arangodb.get_client()
                bel.db.arangodb.delete_database(arango_client, "edgestore")
            else:
                arango_client = bel.db.arangodb.get_client()

            edgestore_handle = bel.db.arangodb.get_edgestore_handle(arango_client)

        elif re.search("ya?ml", output_fn):
            yaml_flag = True
        elif "jsonl" in output_fn:
            jsonl_flag = True
        elif "json" in output_fn:
            json_flag = True
        elif "jgf" in output_fn:
            jgf_flag = True

        if db_save:
            pass
        elif "gz" in output_fn:
            fout = gzip.open(output_fn, "wt")
        else:
            fout = open(output_fn, "wt")

        nanopub_cnt = 0
        with timy.Timer() as timer:
            for np in bnf.read_nanopubs(input_fn):
                # print('Nanopub:\n', json.dumps(np, indent=4))

                nanopub_cnt += 1
                if nanopub_cnt % 100 == 0:
                    timer.track(f"{nanopub_cnt} Nanopubs processed into Edges")

                bel_edges = n.bel_edges(
                    np,
                    namespace_targets=namespace_targets,
                    orthologize_target=species,
                    rules=rules,
                )

                if db_save:
                    bel.edge.edges.load_edges_into_db(edgestore_handle, edges=bel_edges)
                elif jsonl_flag:
                    fout.write("{}\n".format(json.dumps(bel_edges)))
                else:
                    all_bel_edges.extend(bel_edges)

        if db_save:
            pass
        elif yaml_flag:
            fout.write("{}\n".format(yaml.dumps(all_bel_edges)))
        elif json_flag:
            fout.write("{}\n".format(json.dumps(all_bel_edges)))
        elif jgf_flag:
            bnf.edges_to_jgf(output_fn, all_bel_edges)

    finally:
        if fout:
            fout.close()