Ejemplo n.º 1
0
def test_validator_explicit_biolink_version():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "CHEMBL.COMPOUND:1222250",
        id="CHEMBL.COMPOUND:1222250",
        name="Dextrose",
        category=["Carbohydrate"],
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "CHEMBL.COMPOUND:1222250",
        "UBERON:0000001",
        id="CHEMBL.COMPOUND:1222250-part_of-UBERON:0000001",
        relation="RO:1",
        predicate="part_of",
        subject="CHEMBL.COMPOUND:1222250",
        object="UBERON:0000001",
        category=["biolink:Association"],
    )
    Validator.set_biolink_model(version="1.8.2")
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
Ejemplo n.º 2
0
def test_validator_good():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "UniProtKB:P123456", id="UniProtKB:P123456", name="fake", category=["Protein"]
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_node(
        "UBERON:0000002", id="UBERON:0000002", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "UBERON:0000001",
        "UBERON:0000002",
        id="UBERON:0000001-part_of-UBERON:0000002",
        relation="RO:1",
        predicate="part_of",
        subject="UBERON:0000001",
        object="UBERON:0000002",
        category=["biolink:Association"],
    )
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
Ejemplo n.º 3
0
def test_validate_by_stream_inspector():
    """
    Test generate the validate function by streaming
    graph data through a graph Transformer.process() Inspector
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "graph_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "graph_edges.tsv"),
        ],
        "format": "tsv",
        "aggregator_knowledge_source": True,
    }

    Validator.set_biolink_model("1.8.2")

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    transformer = Transformer(stream=True)

    transformer.transform(
        input_args=input_args,
        output_args={
            "format": "null"
        },  # streaming processing throws the graph data away
        # ... Second, we inject the Inspector into the transform() call,
        # for the underlying Transformer.process() to use...
        inspector=validator,
    )

    validator.write_report()

    e = validator.get_errors()
    assert len(e) == 0
Ejemplo n.º 4
0
def test_validator_bad():
    """
    A fake test to establish a fail condition for validation.
    """
    G = NxGraph()
    G.add_node("x", foo=3)
    G.add_node("ZZZ:3", **{"nosuch": 1})
    G.add_edge("x", "y", **{"baz": 6})
    validator = Validator(verbose=True)
    validator.validate(G)
    assert len(validator.get_errors()) > 0
Ejemplo n.º 5
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON.
    """
    input_args = {
        "filename": [os.path.join(RESOURCE_DIR, "valid.json")],
        "format": "json",
    }
    t = Transformer()
    t.transform(input_args)
    validator = Validator()
    validator.validate(t.store.graph)
    assert len(validator.get_errors()) == 0
Ejemplo n.º 6
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
    biolink_release: Optional[str] = None,
) -> Dict:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    biolink_release: Optional[str] = None
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)

    Returns
    -------
    Dict
        A dictionary of entities which have parse errors indexed by [message_level][error_type][message]

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    Validator.set_biolink_model(biolink_release)

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    if stream:
        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            },
            output_args={"format": "null"
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator,
        )
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            }, )

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, "w"))
    else:
        validator.write_report(stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()
Ejemplo n.º 7
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
) -> List:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    Returns
    -------
    List
        Returns a list of errors, if any

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    if stream:
        validator = Validator()

        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            },
            output_args={'format': 'null'
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator)
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            }, )
        validator = Validator()

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, 'w'))
    else:
        validator.write_report(sys.stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()