def merge_wrapper(merge_config: str, source: List, destination: List, processes: int): """ Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph. The merged graph can then be written to a local/remote Neo4j instance OR be serialized into a file. \f .. note:: Everything here is driven by the ``merge-config`` YAML. Parameters ---------- merge_config: str Merge config YAML source: List A list of source to load from the YAML destination: List A list of destination to write to, as defined in the YAML processes: int Number of processes to use """ try: merge(merge_config, source, destination, processes) exit(0) except Exception as me: get_logger().error(f"kgx.merge error: {str(me)}") exit(1)
def graph_summary_wrapper(inputs: List[str], input_format: str, input_compression: Optional[str], output: Optional[str], report_type: str, report_format: str, stream: bool, graph_name: str, node_facet_properties: Optional[List], edge_facet_properties: Optional[List], error_log: str = ''): """ Loads and summarizes a knowledge graph from a set of input files. \f Parameters ---------- inputs: List[str] Input file input_format: str Input file format input_compression: Optional[str] The input compression type output: Optional[str] Where to write the output (stdout, by default) report_type: str The summary get_errors type report_format: Optional[str] The summary get_errors format file types: 'yaml' or 'json' (default is report_type specific) stream: bool Whether to parse input as a stream graph_name: str User specified name of graph being summarize node_facet_properties: Optional[List] A list of node properties from which to generate counts per value for those properties. For example, ``['provided_by']`` edge_facet_properties: Optional[List] A list of edge properties from which to generate counts per value for those properties. For example, ``['original_knowledge_source', 'aggregator_knowledge_source']`` error_log: str Where to write any graph processing error message (stderr, by default, for empty argument) """ try: graph_summary( inputs, input_format, input_compression, output, report_type, report_format, stream, graph_name, node_facet_properties=list(node_facet_properties), edge_facet_properties=list(edge_facet_properties), error_log=error_log, ) exit(0) except Exception as gse: get_logger().error(f"kgx.graph_summary error: {str(gse)}") exit(1)
def neo4j_upload_wrapper( inputs: List[str], input_format: str, input_compression: str, uri: str, username: str, password: str, stream: bool, node_filters: Tuple[str, str], edge_filters: Tuple[str, str], ): """ Upload a set of nodes/edges to a Neo4j database. \f Parameters ---------- inputs: List[str] A list of files that contains nodes/edges input_format: str The input format input_compression: str The input compression type uri: str The full HTTP address for Neo4j database username: str Username for authentication password: str Password for authentication stream: bool Whether to parse input as a stream node_filters: Tuple[str, str] Node filters edge_filters: Tuple[str, str] Edge filters """ try: neo4j_upload( inputs, input_format, input_compression, uri, username, password, stream, node_filters, edge_filters, ) exit(0) except Exception as nue: get_logger().error(f"kgx.neo4j_upload error: {str(nue)}") exit(1)
def neo4j_download_wrapper( uri: str, username: str, password: str, output: str, output_format: str, output_compression: str, stream: bool, node_filters: Tuple, edge_filters: Tuple, ): """ Download nodes and edges from Neo4j database. \f Parameters ---------- uri: str Neo4j URI. For example, https://localhost:7474 username: str Username for authentication password: str Password for authentication output: str Where to write the output (stdout, by default) output_format: str The output type (``tsv``, by default) output_compression: str The output compression type stream: bool Whether to parse input as a stream node_filters: Tuple[str, str] Node filters edge_filters: Tuple[str, str] Edge filters """ try: neo4j_download( uri, username, password, output, output_format, output_compression, stream, node_filters, edge_filters, ) exit(0) except Exception as nde: get_logger().error(f"kgx.neo4j_download error: {str(nde)}") exit(1)
def validate_wrapper( inputs: List[str], input_format: str, input_compression: str, output: str, stream: bool, biolink_release: str = None, ): """ Run KGX validator on an input file to check for Biolink Model compliance. \f Parameters ---------- inputs: List[str] Input files input_format: str The input format input_compression: str The input compression type output: str Path to output file stream: bool Whether to parse input as a stream biolink_release: Optional[str] SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version) """ errors = [] try: errors = validate(inputs, input_format, input_compression, output, stream, biolink_release) except Exception as ex: get_logger().error(str(ex)) exit(2) if errors: get_logger().error( "kgx.validate() errors encountered... check the error log") exit(1) else: exit(0)
Element, ) from bmt import Toolkit from cachetools import LRUCache import pandas as pd import numpy as np from prefixcommons.curie_util import contract_uri from prefixcommons.curie_util import expand_uri from kgx.config import get_logger, get_jsonld_context, get_biolink_model_schema from kgx.graph.base_graph import BaseGraph curie_lookup_service = None cache = None log = get_logger() CORE_NODE_PROPERTIES = {"id", "name"} CORE_EDGE_PROPERTIES = {"id", "subject", "predicate", "object", "type"} class GraphEntityType(Enum): GRAPH = "graph" NODE = "node" EDGE = "edge" # Biolink 2.0 "Knowledge Source" association slots, # including the deprecated 'provided_by' slot provenance_slot_types = {
def transform_wrapper( inputs: List[str], input_format: str, input_compression: str, output: str, output_format: str, output_compression: str, stream: bool, node_filters: Optional[List[Tuple[str, str]]], edge_filters: Optional[List[Tuple[str, str]]], transform_config: str, source: Optional[List], knowledge_sources: Optional[List[Tuple[str, str]]], processes: int, infores_catalog: Optional[str] = None, ): """ Transform a Knowledge Graph from one serialization form to another. \f Parameters ---------- inputs: List[str] A list of files that contains nodes/edges input_format: str The input format input_compression: str The input compression type output: str The output file output_format: str The output format output_compression: str The output compression typ stream: bool Whether or not to stream node_filters: Optional[List[Tuple[str, str]]] Node input filters edge_filters: Optional[List[Tuple[str, str]]] Edge input filters transform_config: str Transform config YAML source: List A list of source(s) to load from the YAML knowledge_sources: Optional[List[Tuple[str, str]]] A list of named knowledge sources with (string, boolean or tuple rewrite) specification infores_catalog: Optional[str] Optional dump of a TSV file of InfoRes CURIE to Knowledge Source mappings processes: int Number of processes to use """ try: transform( inputs, input_format=input_format, input_compression=input_compression, output=output, output_format=output_format, output_compression=output_compression, stream=stream, node_filters=node_filters, edge_filters=edge_filters, transform_config=transform_config, source=source, knowledge_sources=knowledge_sources, processes=processes, infores_catalog=infores_catalog, ) exit(0) except Exception as te: get_logger().error(f"kgx.transform error: {str(te)}") exit(1)
import pytest from kgx.transformer import Transformer from tests import RESOURCE_DIR, TARGET_DIR from tests.unit import clean_database from kgx.config import get_logger from tests.integration import ( check_container, CONTAINER_NAME, DEFAULT_NEO4J_URL, DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, ) logger = get_logger() @pytest.mark.skipif(not check_container(), reason=f"Container {CONTAINER_NAME} is not running") def test_csv_to_neo4j_load_to_graph_transform(clean_database): """ Test to load a csv KGX file into Neo4j. """ logger.debug("test_csv_to_neo4j_load...") input_args1 = { "filename": [ os.path.join(RESOURCE_DIR, "cm_nodes.csv"), os.path.join(RESOURCE_DIR, "cm_edges.csv"), ], "format":