def validate(config: dict, path: str, output: str, output_dir: str, format: str): """ Run KGX validation on an input file to check for BioLink Model compliance. \f Parameters ---------- config: dict A dictionary containing the configuration for kgx.cli path: str Path to input file output: str Path to output file output_dir: Path to a directory format: The input format """ t = None if format: t = get_transformer(format)() else: t = get_transformer(get_type(path))() t.parse(path, input_format=format) validator = Validator() errors = validator.validate(t.graph) validator.write_report(errors, open(output, 'w'))
def test_validate_by_stream_inspector(): """ Test generate the validate function by streaming graph data through a graph Transformer.process() Inspector """ input_args = { "filename": [ os.path.join(RESOURCE_DIR, "graph_nodes.tsv"), os.path.join(RESOURCE_DIR, "graph_edges.tsv"), ], "format": "tsv", "aggregator_knowledge_source": True, } Validator.set_biolink_model("1.8.2") # Validator assumes the currently set Biolink Release validator = Validator() transformer = Transformer(stream=True) transformer.transform( input_args=input_args, output_args={ "format": "null" }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator, ) validator.write_report() e = validator.get_errors() assert len(e) == 0
def validate( inputs: List[str], input_format: str, input_compression: Optional[str], output: Optional[str], stream: bool, biolink_release: Optional[str] = None, ) -> Dict: """ Run KGX validator on an input file to check for Biolink Model compliance. Parameters ---------- inputs: List[str] Input files input_format: str The input format input_compression: Optional[str] The input compression type output: Optional[str] Path to output file (stdout, by default) stream: bool Whether to parse input as a stream. biolink_release: Optional[str] = None SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version) Returns ------- Dict A dictionary of entities which have parse errors indexed by [message_level][error_type][message] """ # New design pattern enabling 'stream' processing of statistics on a small memory footprint # by injecting an inspector in the Transformer.process() source-to-sink data flow. # # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ... # In the new "Inspector" design pattern, we need to instantiate it before the Transformer. # Validator.set_biolink_model(biolink_release) # Validator assumes the currently set Biolink Release validator = Validator() if stream: transformer = Transformer(stream=stream) transformer.transform( input_args={ "filename": inputs, "format": input_format, "compression": input_compression, }, output_args={"format": "null" }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator, ) else: # "Classical" non-streaming mode, with click.progressbar # but an unfriendly large memory footprint for large graphs transformer = Transformer() transformer.transform( { "filename": inputs, "format": input_format, "compression": input_compression, }, ) # Slight tweak of classical 'validate' function: that the # list of errors are cached internally in the Validator object validator.validate(transformer.store.graph) if output: validator.write_report(open(output, "w")) else: validator.write_report(stdout) # ... Third, we return directly any validation errors to the caller return validator.get_errors()
def validate( inputs: List[str], input_format: str, input_compression: Optional[str], output: Optional[str], stream: bool, ) -> List: """ Run KGX validator on an input file to check for Biolink Model compliance. Parameters ---------- inputs: List[str] Input files input_format: str The input format input_compression: Optional[str] The input compression type output: Optional[str] Path to output file (stdout, by default) stream: bool Whether to parse input as a stream. Returns ------- List Returns a list of errors, if any """ # New design pattern enabling 'stream' processing of statistics on a small memory footprint # by injecting an inspector in the Transformer.process() source-to-sink data flow. # # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ... # In the new "Inspector" design pattern, we need to instantiate it before the Transformer. # if stream: validator = Validator() transformer = Transformer(stream=stream) transformer.transform( input_args={ 'filename': inputs, 'format': input_format, 'compression': input_compression }, output_args={'format': 'null' }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator) else: # "Classical" non-streaming mode, with click.progressbar # but an unfriendly large memory footprint for large graphs transformer = Transformer() transformer.transform( { 'filename': inputs, 'format': input_format, 'compression': input_compression }, ) validator = Validator() # Slight tweak of classical 'validate' function: that the # list of errors are cached internally in the Validator object validator.validate(transformer.store.graph) if output: validator.write_report(open(output, 'w')) else: validator.write_report(sys.stdout) # ... Third, we return directly any validation errors to the caller return validator.get_errors()