예제 #1
0
def test_validator_explicit_biolink_version():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "CHEMBL.COMPOUND:1222250",
        id="CHEMBL.COMPOUND:1222250",
        name="Dextrose",
        category=["Carbohydrate"],
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "CHEMBL.COMPOUND:1222250",
        "UBERON:0000001",
        id="CHEMBL.COMPOUND:1222250-part_of-UBERON:0000001",
        relation="RO:1",
        predicate="part_of",
        subject="CHEMBL.COMPOUND:1222250",
        object="UBERON:0000001",
        category=["biolink:Association"],
    )
    Validator.set_biolink_model(version="1.8.2")
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
예제 #2
0
def test_validator_good():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "UniProtKB:P123456", id="UniProtKB:P123456", name="fake", category=["Protein"]
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_node(
        "UBERON:0000002", id="UBERON:0000002", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "UBERON:0000001",
        "UBERON:0000002",
        id="UBERON:0000001-part_of-UBERON:0000002",
        relation="RO:1",
        predicate="part_of",
        subject="UBERON:0000001",
        object="UBERON:0000002",
        category=["biolink:Association"],
    )
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
예제 #3
0
def test_validator_bad():
    """
    A fake test to establish a fail condition for validation.
    """
    G = NxGraph()
    G.add_node("x", foo=3)
    G.add_node("ZZZ:3", **{"nosuch": 1})
    G.add_edge("x", "y", **{"baz": 6})
    validator = Validator(verbose=True)
    validator.validate(G)
    assert len(validator.get_errors()) > 0
예제 #4
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON.
    """
    input_args = {
        "filename": [os.path.join(RESOURCE_DIR, "valid.json")],
        "format": "json",
    }
    t = Transformer()
    t.transform(input_args)
    validator = Validator()
    validator.validate(t.store.graph)
    assert len(validator.get_errors()) == 0
예제 #5
0
def test_validator_good():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node('UniProtKB:P123456',
               id='UniProtKB:P123456',
               name='fake',
               category=['Protein'])
    G.add_node('UBERON:0000001',
               id='UBERON:0000001',
               name='fake',
               category=['NamedThing'])
    G.add_node('UBERON:0000002',
               id='UBERON:0000002',
               name='fake',
               category=['NamedThing'])
    G.add_edge(
        'UBERON:0000001',
        'UBERON:0000002',
        id='UBERON:0000001-part_of-UBERON:0000002',
        relation='RO:1',
        predicate='part_of',
        subject='UBERON:0000001',
        object='UBERON:0000002',
        category=['biolink:Association'],
    )
    validator = Validator(verbose=True)
    e = validator.validate(G)
    print(validator.report(e))
    assert len(e) == 0
예제 #6
0
def validate(config: dict, path: str, output: str, output_dir: str,
             format: str):
    """
    Run KGX validation on an input file to check for BioLink Model compliance.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    path: str
        Path to input file
    output: str
        Path to output file
    output_dir:
        Path to a directory
    format:
        The input format

    """
    t = None
    if format:
        t = get_transformer(format)()
    else:
        t = get_transformer(get_type(path))()
    t.parse(path, input_format=format)
    validator = Validator()
    errors = validator.validate(t.graph)
    validator.write_report(errors, open(output, 'w'))
예제 #7
0
def validate(config, path, output, output_dir):
    t = get_transformer(get_type(path))()
    t.parse(path)

    validator = Validator()
    validator.validate(t.graph)

    time = datetime.now()

    if len(validator.errors) == 0:
        click.echo('No errors found')

    else:
        append_errors_to_file(output, validator.errors, time)
        if output_dir is not None:
            append_errors_to_files(output_dir, validator.errors, time)
예제 #8
0
def test_validator_bad():
    """
    A fake test to establish a fail condition for validation.
    """
    G = NxGraph()
    G.add_node('x', foo=3)
    G.add_node('ZZZ:3', **{'nosuch': 1})
    G.add_edge('x', 'y', **{'baz': 6})
    validator = Validator(verbose=True)
    e = validator.validate(G)
    assert len(e) > 0
예제 #9
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON.
    """
    input_args = {
        'filename': [os.path.join(RESOURCE_DIR, 'valid.json')],
        'format': 'json'
    }
    t = Transformer()
    t.transform(input_args)
    validator = Validator()
    e = validator.validate(t.store.graph)
    assert len(e) == 0
예제 #10
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
    biolink_release: Optional[str] = None,
) -> Dict:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    biolink_release: Optional[str] = None
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)

    Returns
    -------
    Dict
        A dictionary of entities which have parse errors indexed by [message_level][error_type][message]

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    Validator.set_biolink_model(biolink_release)

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    if stream:
        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            },
            output_args={"format": "null"
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator,
        )
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            }, )

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, "w"))
    else:
        validator.write_report(stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()
예제 #11
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
) -> List:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    Returns
    -------
    List
        Returns a list of errors, if any

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    if stream:
        validator = Validator()

        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            },
            output_args={'format': 'null'
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator)
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            }, )
        validator = Validator()

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, 'w'))
    else:
        validator.write_report(sys.stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()