Beispiel #1
0
def merge_wrapper(merge_config: str, source: List, destination: List,
                  processes: int):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merged graph can then be written to a local/remote Neo4j instance OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``merge-config`` YAML.

    Parameters
    ----------
    merge_config: str
        Merge config YAML
    source: List
        A list of source to load from the YAML
    destination: List
        A list of destination to write to, as defined in the YAML
    processes: int
        Number of processes to use

    """
    try:
        merge(merge_config, source, destination, processes)
        exit(0)
    except Exception as me:
        get_logger().error(f"kgx.merge error: {str(me)}")
        exit(1)
Beispiel #2
0
def graph_summary_wrapper(inputs: List[str],
                          input_format: str,
                          input_compression: Optional[str],
                          output: Optional[str],
                          report_type: str,
                          report_format: str,
                          stream: bool,
                          graph_name: str,
                          node_facet_properties: Optional[List],
                          edge_facet_properties: Optional[List],
                          error_log: str = ''):
    """
    Loads and summarizes a knowledge graph from a set of input files.
    \f

    Parameters
    ----------
    inputs: List[str]
        Input file
    input_format: str
        Input file format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Where to write the output (stdout, by default)
    report_type: str
        The summary get_errors type
    report_format: Optional[str]
        The summary get_errors format file types: 'yaml' or 'json'  (default is report_type specific)
    stream: bool
        Whether to parse input as a stream
    graph_name: str
        User specified name of graph being summarize
    node_facet_properties: Optional[List]
        A list of node properties from which to generate counts per value for those properties.
        For example, ``['provided_by']``
    edge_facet_properties: Optional[List]
        A list of edge properties from which to generate counts per value for those properties.
        For example, ``['original_knowledge_source', 'aggregator_knowledge_source']``
    error_log: str
        Where to write any graph processing error message (stderr, by default, for empty argument)
    """
    try:
        graph_summary(
            inputs,
            input_format,
            input_compression,
            output,
            report_type,
            report_format,
            stream,
            graph_name,
            node_facet_properties=list(node_facet_properties),
            edge_facet_properties=list(edge_facet_properties),
            error_log=error_log,
        )
        exit(0)
    except Exception as gse:
        get_logger().error(f"kgx.graph_summary error: {str(gse)}")
        exit(1)
Beispiel #3
0
def neo4j_upload_wrapper(
    inputs: List[str],
    input_format: str,
    input_compression: str,
    uri: str,
    username: str,
    password: str,
    stream: bool,
    node_filters: Tuple[str, str],
    edge_filters: Tuple[str, str],
):
    """
    Upload a set of nodes/edges to a Neo4j database.
    \f

    Parameters
    ----------
    inputs: List[str]
        A list of files that contains nodes/edges
    input_format: str
        The input format
    input_compression: str
        The input compression type
    uri: str
        The full HTTP address for Neo4j database
    username: str
        Username for authentication
    password: str
        Password for authentication
    stream: bool
        Whether to parse input as a stream
    node_filters: Tuple[str, str]
        Node filters
    edge_filters: Tuple[str, str]
        Edge filters

    """
    try:
        neo4j_upload(
            inputs,
            input_format,
            input_compression,
            uri,
            username,
            password,
            stream,
            node_filters,
            edge_filters,
        )
        exit(0)
    except Exception as nue:
        get_logger().error(f"kgx.neo4j_upload error: {str(nue)}")
        exit(1)
Beispiel #4
0
def neo4j_download_wrapper(
    uri: str,
    username: str,
    password: str,
    output: str,
    output_format: str,
    output_compression: str,
    stream: bool,
    node_filters: Tuple,
    edge_filters: Tuple,
):
    """
    Download nodes and edges from Neo4j database.
    \f

    Parameters
    ----------
    uri: str
        Neo4j URI. For example, https://localhost:7474
    username: str
        Username for authentication
    password: str
        Password for authentication
    output: str
        Where to write the output (stdout, by default)
    output_format: str
        The output type (``tsv``, by default)
    output_compression: str
        The output compression type
    stream: bool
        Whether to parse input as a stream
    node_filters: Tuple[str, str]
        Node filters
    edge_filters: Tuple[str, str]
        Edge filters

    """
    try:
        neo4j_download(
            uri,
            username,
            password,
            output,
            output_format,
            output_compression,
            stream,
            node_filters,
            edge_filters,
        )
        exit(0)
    except Exception as nde:
        get_logger().error(f"kgx.neo4j_download error: {str(nde)}")
        exit(1)
Beispiel #5
0
def validate_wrapper(
    inputs: List[str],
    input_format: str,
    input_compression: str,
    output: str,
    stream: bool,
    biolink_release: str = None,
):
    """
    Run KGX validator on an input file to check for Biolink Model compliance.
    \f

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: str
        The input compression type
    output: str
        Path to output file
    stream: bool
        Whether to parse input as a stream
    biolink_release: Optional[str]
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)
    """
    errors = []
    try:
        errors = validate(inputs, input_format, input_compression, output,
                          stream, biolink_release)
    except Exception as ex:
        get_logger().error(str(ex))
        exit(2)

    if errors:
        get_logger().error(
            "kgx.validate() errors encountered... check the error log")
        exit(1)
    else:
        exit(0)
Beispiel #6
0
    Element,
)
from bmt import Toolkit
from cachetools import LRUCache
import pandas as pd
import numpy as np
from prefixcommons.curie_util import contract_uri
from prefixcommons.curie_util import expand_uri

from kgx.config import get_logger, get_jsonld_context, get_biolink_model_schema
from kgx.graph.base_graph import BaseGraph

curie_lookup_service = None
cache = None

log = get_logger()

CORE_NODE_PROPERTIES = {"id", "name"}
CORE_EDGE_PROPERTIES = {"id", "subject", "predicate", "object", "type"}


class GraphEntityType(Enum):
    GRAPH = "graph"
    NODE = "node"
    EDGE = "edge"


# Biolink 2.0 "Knowledge Source" association slots,
# including the deprecated 'provided_by' slot

provenance_slot_types = {
Beispiel #7
0
def transform_wrapper(
    inputs: List[str],
    input_format: str,
    input_compression: str,
    output: str,
    output_format: str,
    output_compression: str,
    stream: bool,
    node_filters: Optional[List[Tuple[str, str]]],
    edge_filters: Optional[List[Tuple[str, str]]],
    transform_config: str,
    source: Optional[List],
    knowledge_sources: Optional[List[Tuple[str, str]]],
    processes: int,
    infores_catalog: Optional[str] = None,
):
    """
    Transform a Knowledge Graph from one serialization form to another.
    \f

    Parameters
    ----------
    inputs: List[str]
        A list of files that contains nodes/edges
    input_format: str
        The input format
    input_compression: str
        The input compression type
    output: str
        The output file
    output_format: str
        The output format
    output_compression: str
        The output compression typ
    stream: bool
        Whether or not to stream
    node_filters: Optional[List[Tuple[str, str]]]
        Node input filters
    edge_filters: Optional[List[Tuple[str, str]]]
        Edge input filters
    transform_config: str
        Transform config YAML
    source: List
        A list of source(s) to load from the YAML
    knowledge_sources: Optional[List[Tuple[str, str]]]
        A list of named knowledge sources with (string, boolean or tuple rewrite) specification
    infores_catalog: Optional[str]
        Optional dump of a TSV file of InfoRes CURIE to Knowledge Source mappings
    processes: int
        Number of processes to use

    """
    try:
        transform(
            inputs,
            input_format=input_format,
            input_compression=input_compression,
            output=output,
            output_format=output_format,
            output_compression=output_compression,
            stream=stream,
            node_filters=node_filters,
            edge_filters=edge_filters,
            transform_config=transform_config,
            source=source,
            knowledge_sources=knowledge_sources,
            processes=processes,
            infores_catalog=infores_catalog,
        )
        exit(0)
    except Exception as te:
        get_logger().error(f"kgx.transform error: {str(te)}")
        exit(1)
Beispiel #8
0
import pytest

from kgx.transformer import Transformer
from tests import RESOURCE_DIR, TARGET_DIR
from tests.unit import clean_database
from kgx.config import get_logger

from tests.integration import (
    check_container,
    CONTAINER_NAME,
    DEFAULT_NEO4J_URL,
    DEFAULT_NEO4J_USERNAME,
    DEFAULT_NEO4J_PASSWORD,
)

logger = get_logger()


@pytest.mark.skipif(not check_container(),
                    reason=f"Container {CONTAINER_NAME} is not running")
def test_csv_to_neo4j_load_to_graph_transform(clean_database):
    """
    Test to load a csv KGX file into Neo4j.
    """
    logger.debug("test_csv_to_neo4j_load...")
    input_args1 = {
        "filename": [
            os.path.join(RESOURCE_DIR, "cm_nodes.csv"),
            os.path.join(RESOURCE_DIR, "cm_edges.csv"),
        ],
        "format":