Example #1
0
def test_validate_node_properties(query):
    """
    Test validate_node_properties in Validator.
    """
    required_properties = Validator.get_required_node_properties()
    e = Validator.validate_node_properties(query[0], query[1], required_properties)
    assert (len(e) == 0) == query[2]
Example #2
0
def validate(config: dict, path: str, output: str, output_dir: str,
             format: str):
    """
    Run KGX validation on an input file to check for BioLink Model compliance.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    path: str
        Path to input file
    output: str
        Path to output file
    output_dir:
        Path to a directory
    format:
        The input format

    """
    t = None
    if format:
        t = get_transformer(format)()
    else:
        t = get_transformer(get_type(path))()
    t.parse(path, input_format=format)
    validator = Validator()
    errors = validator.validate(t.graph)
    validator.write_report(errors, open(output, 'w'))
Example #3
0
def test_validator_good():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node('UniProtKB:P123456',
               id='UniProtKB:P123456',
               name='fake',
               category=['Protein'])
    G.add_node('UBERON:0000001',
               id='UBERON:0000001',
               name='fake',
               category=['NamedThing'])
    G.add_node('UBERON:0000002',
               id='UBERON:0000002',
               name='fake',
               category=['NamedThing'])
    G.add_edge(
        'UBERON:0000001',
        'UBERON:0000002',
        id='UBERON:0000001-part_of-UBERON:0000002',
        relation='RO:1',
        predicate='part_of',
        subject='UBERON:0000001',
        object='UBERON:0000002',
        category=['biolink:Association'],
    )
    validator = Validator(verbose=True)
    e = validator.validate(G)
    print(validator.report(e))
    assert len(e) == 0
Example #4
0
def test_validate_edge_properties(query):
    """
    Test validate_edge_properties in Validator.
    """
    required_properties = Validator.get_required_edge_properties()
    e = Validator.validate_edge_properties(query[0], query[1], query[2], required_properties)
    print(Validator.report(e))
    assert (len(e) == 0) == query[3]
Example #5
0
def test_validator_bad():
    """
    A fake test to establish a fail condition for validation.
    """
    G = NxGraph()
    G.add_node("x", foo=3)
    G.add_node("ZZZ:3", **{"nosuch": 1})
    G.add_edge("x", "y", **{"baz": 6})
    validator = Validator(verbose=True)
    validator.validate(G)
    assert len(validator.get_errors()) > 0
Example #6
0
def test_validator_bad():
    """
    A fake test to establish a fail condition for validation.
    """
    G = NxGraph()
    G.add_node('x', foo=3)
    G.add_node('ZZZ:3', **{'nosuch': 1})
    G.add_edge('x', 'y', **{'baz': 6})
    validator = Validator(verbose=True)
    e = validator.validate(G)
    assert len(e) > 0
Example #7
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON.
    """
    input_args = {
        'filename': [os.path.join(RESOURCE_DIR, 'valid.json')],
        'format': 'json'
    }
    t = Transformer()
    t.transform(input_args)
    validator = Validator()
    e = validator.validate(t.store.graph)
    assert len(e) == 0
Example #8
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON.
    """
    input_args = {
        "filename": [os.path.join(RESOURCE_DIR, "valid.json")],
        "format": "json",
    }
    t = Transformer()
    t.transform(input_args)
    validator = Validator()
    validator.validate(t.store.graph)
    assert len(validator.get_errors()) == 0
Example #9
0
def test_validator_explicit_biolink_version():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "CHEMBL.COMPOUND:1222250",
        id="CHEMBL.COMPOUND:1222250",
        name="Dextrose",
        category=["Carbohydrate"],
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "CHEMBL.COMPOUND:1222250",
        "UBERON:0000001",
        id="CHEMBL.COMPOUND:1222250-part_of-UBERON:0000001",
        relation="RO:1",
        predicate="part_of",
        subject="CHEMBL.COMPOUND:1222250",
        object="UBERON:0000001",
        category=["biolink:Association"],
    )
    Validator.set_biolink_model(version="1.8.2")
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
Example #10
0
def test_distinct_class_versus_validator_instance_biolink_version():
    Validator.set_biolink_model(version="1.7.0")
    validator = Validator()
    Validator.set_biolink_model(version="1.8.2")
    validator_class_tk = Validator.get_toolkit()
    validation_instance_version = validator.get_validation_model_version()
    assert validation_instance_version != validator_class_tk.get_model_version()
Example #11
0
def validate(config, path, output, output_dir):
    t = get_transformer(get_type(path))()
    t.parse(path)

    validator = Validator()
    validator.validate(t.graph)

    time = datetime.now()

    if len(validator.errors) == 0:
        click.echo('No errors found')

    else:
        append_errors_to_file(output, validator.errors, time)
        if output_dir is not None:
            append_errors_to_files(output_dir, validator.errors, time)
Example #12
0
def test_validate_edge_property_values(query):
    """
    Test validate_edge_property_values in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_edge_property_values(query[0], query[1], query[2])
    assert (len(validator.get_errors()) == 0) == query[3]
Example #13
0
def test_get_required_edge_properties(property):
    """
    Test get_required_edge_properties in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    properties = validator.get_required_edge_properties()
    assert property in properties
Example #14
0
def test_validate_node_property_uriorcurie_types_error(query):
    """
    Test validate_node_property_types in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_node_property_types(query[0], query[1])
    assert validator.get_errors() is not None
Example #15
0
def test_validate_edge_label(query):
    """
    Test validate_edge_predicate in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_edge_predicate(query[0], query[1], dict(query[2]))
    assert (len(validator.get_errors()) == 0) == query[3]
Example #16
0
def test_validate_categories(query):
    """
    Test validate_categories in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_categories(query[0], query[1])
    assert (len(validator.get_errors()) == 0) == query[2]
Example #17
0
def test_get_all_prefixes(prefix):
    """
    Test get_all_prefixes in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    prefixes = validator.get_all_prefixes()
    assert prefix in prefixes
Example #18
0
def test_validate_by_stream_inspector():
    """
    Test generate the validate function by streaming
    graph data through a graph Transformer.process() Inspector
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "graph_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "graph_edges.tsv"),
        ],
        "format": "tsv",
        "aggregator_knowledge_source": True,
    }

    Validator.set_biolink_model("1.8.2")

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    transformer = Transformer(stream=True)

    transformer.transform(
        input_args=input_args,
        output_args={
            "format": "null"
        },  # streaming processing throws the graph data away
        # ... Second, we inject the Inspector into the transform() call,
        # for the underlying Transformer.process() to use...
        inspector=validator,
    )

    validator.write_report()

    e = validator.get_errors()
    assert len(e) == 0
Example #19
0
def test_validate_edge_property_subject_is_int_type_error(query):
    """
    Test validate_edge_property_types in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_edge_property_types(query[0], query[1], query[2])
    assert validator.get_default_model_version() is not None
    assert validator.get_errors() is not None
Example #20
0
def test_validate_node_properties(query):
    """
    Test validate_node_properties in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    required_properties = validator.get_required_node_properties()
    validator.validate_node_properties(query[0], query[1], required_properties)
    assert (len(validator.get_errors()) == 0) == query[2]
Example #21
0
def test_validate_edge_property_types_and_prefixes(query):
    """
    Test validate_edge_property_types in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    validator.validate_edge_property_types(query[0], query[1], query[2])
    assert validator.get_default_model_version() is not None
    assert (len(validator.get_errors()) == 0) == query[3]
    assert "biolink" in validator.get_all_prefixes()
Example #22
0
def test_validator_good():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "UniProtKB:P123456", id="UniProtKB:P123456", name="fake", category=["Protein"]
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_node(
        "UBERON:0000002", id="UBERON:0000002", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "UBERON:0000001",
        "UBERON:0000002",
        id="UBERON:0000001-part_of-UBERON:0000002",
        relation="RO:1",
        predicate="part_of",
        subject="UBERON:0000001",
        object="UBERON:0000002",
        category=["biolink:Association"],
    )
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
Example #23
0
def test_validate_edge_properties(query):
    """
    Test validate_edge_properties in Validator.
    """
    validator = Validator.get_the_validator()
    validator.clear_errors()
    required_properties = validator.get_required_edge_properties()
    validator.validate_edge_properties(query[0], query[1], query[2],
                                       required_properties)

    # Dump a report to stderr ... will be a JSON document now
    print("\n*** validator error log:", file=stderr)
    validator.write_report()

    assert (len(validator.get_errors()) == 0) == query[3]
Example #24
0
def test_get_all_prefixes(prefix):
    """
    Test get_all_prefixes in Validator.
    """
    prefixes = Validator.get_all_prefixes()
    assert prefix in prefixes
Example #25
0
def test_validate_categories(query):
    """
    Test validate_categories in Validator.
    """
    e = Validator.validate_categories(query[0], query[1])
    assert (len(e) == 0) == query[2]
Example #26
0
def test_validate_edge_property_values(query):
    """
    Test validate_edge_property_values in Validator.
    """
    e = Validator.validate_edge_property_values(query[0], query[1], query[2])
    assert (len(e) == 0) == query[3]
Example #27
0
def test_distinct_validator_class_versus_default_toolkit_biolink_version():
    Validator.set_biolink_model(version="1.8.2")
    default_tk = get_toolkit()
    validator_tk = Validator.get_toolkit()
    assert default_tk.get_model_version() != validator_tk.get_model_version()
Example #28
0
def test_validate_edge_label(query):
    """
    Test validate_edge_predicate in Validator.
    """
    e = Validator.validate_edge_predicate(query[0], query[1], dict(query[2]))
    assert (len(e) == 0) == query[3]
Example #29
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
    biolink_release: Optional[str] = None,
) -> Dict:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    biolink_release: Optional[str] = None
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)

    Returns
    -------
    Dict
        A dictionary of entities which have parse errors indexed by [message_level][error_type][message]

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    Validator.set_biolink_model(biolink_release)

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    if stream:
        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            },
            output_args={"format": "null"
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator,
        )
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            }, )

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, "w"))
    else:
        validator.write_report(stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()
Example #30
0
def test_get_required_edge_properties(property):
    """
    Test get_required_edge_properties in Validator.
    """
    properties = Validator.get_required_edge_properties()
    assert property in properties