def test_validator_explicit_biolink_version(): """ A fake test to establish a success condition for validation. """ G = NxGraph() G.add_node( "CHEMBL.COMPOUND:1222250", id="CHEMBL.COMPOUND:1222250", name="Dextrose", category=["Carbohydrate"], ) G.add_node( "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"] ) G.add_edge( "CHEMBL.COMPOUND:1222250", "UBERON:0000001", id="CHEMBL.COMPOUND:1222250-part_of-UBERON:0000001", relation="RO:1", predicate="part_of", subject="CHEMBL.COMPOUND:1222250", object="UBERON:0000001", category=["biolink:Association"], ) Validator.set_biolink_model(version="1.8.2") validator = Validator(verbose=True) validator.validate(G) print(validator.get_errors()) assert len(validator.get_errors()) == 0
def test_validator_good(): """ A fake test to establish a success condition for validation. """ G = NxGraph() G.add_node( "UniProtKB:P123456", id="UniProtKB:P123456", name="fake", category=["Protein"] ) G.add_node( "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"] ) G.add_node( "UBERON:0000002", id="UBERON:0000002", name="fake", category=["NamedThing"] ) G.add_edge( "UBERON:0000001", "UBERON:0000002", id="UBERON:0000001-part_of-UBERON:0000002", relation="RO:1", predicate="part_of", subject="UBERON:0000001", object="UBERON:0000002", category=["biolink:Association"], ) validator = Validator(verbose=True) validator.validate(G) print(validator.get_errors()) assert len(validator.get_errors()) == 0
def test_validator_bad(): """ A fake test to establish a fail condition for validation. """ G = NxGraph() G.add_node("x", foo=3) G.add_node("ZZZ:3", **{"nosuch": 1}) G.add_edge("x", "y", **{"baz": 6}) validator = Validator(verbose=True) validator.validate(G) assert len(validator.get_errors()) > 0
def test_validate_json(): """ Validate against a valid representative Biolink Model compliant JSON. """ input_args = { "filename": [os.path.join(RESOURCE_DIR, "valid.json")], "format": "json", } t = Transformer() t.transform(input_args) validator = Validator() validator.validate(t.store.graph) assert len(validator.get_errors()) == 0
def test_validator_good(): """ A fake test to establish a success condition for validation. """ G = NxGraph() G.add_node('UniProtKB:P123456', id='UniProtKB:P123456', name='fake', category=['Protein']) G.add_node('UBERON:0000001', id='UBERON:0000001', name='fake', category=['NamedThing']) G.add_node('UBERON:0000002', id='UBERON:0000002', name='fake', category=['NamedThing']) G.add_edge( 'UBERON:0000001', 'UBERON:0000002', id='UBERON:0000001-part_of-UBERON:0000002', relation='RO:1', predicate='part_of', subject='UBERON:0000001', object='UBERON:0000002', category=['biolink:Association'], ) validator = Validator(verbose=True) e = validator.validate(G) print(validator.report(e)) assert len(e) == 0
def validate(config: dict, path: str, output: str, output_dir: str, format: str): """ Run KGX validation on an input file to check for BioLink Model compliance. \f Parameters ---------- config: dict A dictionary containing the configuration for kgx.cli path: str Path to input file output: str Path to output file output_dir: Path to a directory format: The input format """ t = None if format: t = get_transformer(format)() else: t = get_transformer(get_type(path))() t.parse(path, input_format=format) validator = Validator() errors = validator.validate(t.graph) validator.write_report(errors, open(output, 'w'))
def validate(config, path, output, output_dir): t = get_transformer(get_type(path))() t.parse(path) validator = Validator() validator.validate(t.graph) time = datetime.now() if len(validator.errors) == 0: click.echo('No errors found') else: append_errors_to_file(output, validator.errors, time) if output_dir is not None: append_errors_to_files(output_dir, validator.errors, time)
def test_validator_bad(): """ A fake test to establish a fail condition for validation. """ G = NxGraph() G.add_node('x', foo=3) G.add_node('ZZZ:3', **{'nosuch': 1}) G.add_edge('x', 'y', **{'baz': 6}) validator = Validator(verbose=True) e = validator.validate(G) assert len(e) > 0
def test_validate_json(): """ Validate against a valid representative Biolink Model compliant JSON. """ input_args = { 'filename': [os.path.join(RESOURCE_DIR, 'valid.json')], 'format': 'json' } t = Transformer() t.transform(input_args) validator = Validator() e = validator.validate(t.store.graph) assert len(e) == 0
def validate( inputs: List[str], input_format: str, input_compression: Optional[str], output: Optional[str], stream: bool, biolink_release: Optional[str] = None, ) -> Dict: """ Run KGX validator on an input file to check for Biolink Model compliance. Parameters ---------- inputs: List[str] Input files input_format: str The input format input_compression: Optional[str] The input compression type output: Optional[str] Path to output file (stdout, by default) stream: bool Whether to parse input as a stream. biolink_release: Optional[str] = None SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version) Returns ------- Dict A dictionary of entities which have parse errors indexed by [message_level][error_type][message] """ # New design pattern enabling 'stream' processing of statistics on a small memory footprint # by injecting an inspector in the Transformer.process() source-to-sink data flow. # # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ... # In the new "Inspector" design pattern, we need to instantiate it before the Transformer. # Validator.set_biolink_model(biolink_release) # Validator assumes the currently set Biolink Release validator = Validator() if stream: transformer = Transformer(stream=stream) transformer.transform( input_args={ "filename": inputs, "format": input_format, "compression": input_compression, }, output_args={"format": "null" }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator, ) else: # "Classical" non-streaming mode, with click.progressbar # but an unfriendly large memory footprint for large graphs transformer = Transformer() transformer.transform( { "filename": inputs, "format": input_format, "compression": input_compression, }, ) # Slight tweak of classical 'validate' function: that the # list of errors are cached internally in the Validator object validator.validate(transformer.store.graph) if output: validator.write_report(open(output, "w")) else: validator.write_report(stdout) # ... Third, we return directly any validation errors to the caller return validator.get_errors()
def validate( inputs: List[str], input_format: str, input_compression: Optional[str], output: Optional[str], stream: bool, ) -> List: """ Run KGX validator on an input file to check for Biolink Model compliance. Parameters ---------- inputs: List[str] Input files input_format: str The input format input_compression: Optional[str] The input compression type output: Optional[str] Path to output file (stdout, by default) stream: bool Whether to parse input as a stream. Returns ------- List Returns a list of errors, if any """ # New design pattern enabling 'stream' processing of statistics on a small memory footprint # by injecting an inspector in the Transformer.process() source-to-sink data flow. # # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ... # In the new "Inspector" design pattern, we need to instantiate it before the Transformer. # if stream: validator = Validator() transformer = Transformer(stream=stream) transformer.transform( input_args={ 'filename': inputs, 'format': input_format, 'compression': input_compression }, output_args={'format': 'null' }, # streaming processing throws the graph data away # ... Second, we inject the Inspector into the transform() call, # for the underlying Transformer.process() to use... inspector=validator) else: # "Classical" non-streaming mode, with click.progressbar # but an unfriendly large memory footprint for large graphs transformer = Transformer() transformer.transform( { 'filename': inputs, 'format': input_format, 'compression': input_compression }, ) validator = Validator() # Slight tweak of classical 'validate' function: that the # list of errors are cached internally in the Validator object validator.validate(transformer.store.graph) if output: validator.write_report(open(output, 'w')) else: validator.write_report(sys.stdout) # ... Third, we return directly any validation errors to the caller return validator.get_errors()