예제 #1
0
def validate(config: dict, path: str, output: str, output_dir: str,
             format: str):
    """
    Run KGX validation on an input file to check for BioLink Model compliance.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    path: str
        Path to input file
    output: str
        Path to output file
    output_dir:
        Path to a directory
    format:
        The input format

    """
    t = None
    if format:
        t = get_transformer(format)()
    else:
        t = get_transformer(get_type(path))()
    t.parse(path, input_format=format)
    validator = Validator()
    errors = validator.validate(t.graph)
    validator.write_report(errors, open(output, 'w'))
예제 #2
0
def test_validate_by_stream_inspector():
    """
    Test generate the validate function by streaming
    graph data through a graph Transformer.process() Inspector
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "graph_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "graph_edges.tsv"),
        ],
        "format": "tsv",
        "aggregator_knowledge_source": True,
    }

    Validator.set_biolink_model("1.8.2")

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    transformer = Transformer(stream=True)

    transformer.transform(
        input_args=input_args,
        output_args={
            "format": "null"
        },  # streaming processing throws the graph data away
        # ... Second, we inject the Inspector into the transform() call,
        # for the underlying Transformer.process() to use...
        inspector=validator,
    )

    validator.write_report()

    e = validator.get_errors()
    assert len(e) == 0
예제 #3
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
    biolink_release: Optional[str] = None,
) -> Dict:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    biolink_release: Optional[str] = None
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)

    Returns
    -------
    Dict
        A dictionary of entities which have parse errors indexed by [message_level][error_type][message]

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    Validator.set_biolink_model(biolink_release)

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    if stream:
        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            },
            output_args={"format": "null"
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator,
        )
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            }, )

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, "w"))
    else:
        validator.write_report(stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()
예제 #4
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
) -> List:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    Returns
    -------
    List
        Returns a list of errors, if any

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    if stream:
        validator = Validator()

        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            },
            output_args={'format': 'null'
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator)
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                'filename': inputs,
                'format': input_format,
                'compression': input_compression
            }, )
        validator = Validator()

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, 'w'))
    else:
        validator.write_report(sys.stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()