Beispiel #1
0
    def load_parquet(
        self,
        path: IOPathLike,
        **kwargs: typing.Any,
    ) -> "KnowledgeGraph":
        """
Wrapper for [`pandas.read_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html?highlight=read_parquet#pandas.read_parquet) which parses an RDF graph represented as a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine.

To prepare for upcoming **kglab** features, **this is the preferred method for deserializing an RDF graph.**

Note: this adds relations to an RDF graph, it does not overwrite the existing RDF graph.

    path:
must be a file name (str), path object to a local file reference, or a [*readable, file-like object*](https://docs.python.org/3/glossary.html#term-file-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; a file URL can also be a path to a directory that contains multiple partitioned files, including a bucket in cloud storage – based on [`fsspec`](https://github.com/intake/filesystem_spec)

    returns:
this `KnowledgeGraph` object – used for method chaining
        """
        df = pd.read_parquet(path,
                             **chocolate.filter_args(kwargs, pd.read_parquet))

        for _, row in df.iterrows():
            triple = "{} {} {} .".format(row[0], row[1], row[2])
            self._g.parse(data=triple, format="ttl")

        return self
Beispiel #2
0
    def save_parquet(
        self,
        path: IOPathLike,
        *,
        compression: str = "snappy",
        storage_options: dict = None,  # pylint: disable=W0613
        **kwargs: typing.Any,
    ) -> None:
        """
Wrapper for [`pandas.to_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html?highlight=to_parquet) which serializes an RDF graph to a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine.

To prepare for upcoming **kglab** features, **this is the preferred method for serializing an RDF graph.**

    path:
must be a file name (str), path object to a local file reference, or a [*writable, bytes-like object*](https://docs.python.org/3/glossary.html#term-bytes-like-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; accessing cloud storage is based on [`fsspec`](https://github.com/intake/filesystem_spec)

    compression:
name of the compression algorithm to use; defaults to `"snappy"`; can also be `"gzip"`, `"brotli"`, or `None` for no compression

    storage_options:
extra options parsed by [`fsspec`](https://github.com/intake/filesystem_spec) for cloud storage access; **NOT USED UNTIL `pandas` 1.2.x becomes stable
        """
        rows_list = [{
            "s": s.n3(),
            "p": p.n3(),
            "o": o.n3()
        } for s, p, o in self._g]
        df = pd.DataFrame(rows_list, columns=("s", "p", "o"))

        df.to_parquet(
            path,
            compression=compression,
            #storage_options=storage_options,
            **chocolate.filter_args(kwargs, df.to_parquet),
        )
Beispiel #3
0
    def save_parquet (
        self,
        path: IOPathLike,
        *,
        compression: str = "snappy",
        storage_options: dict = None, # pylint: disable=W0613
        **kwargs: typing.Any,
        ) -> None:
        """
Wrapper for [`pandas.to_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html?highlight=to_parquet) which serializes an RDF graph to a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine.
Uses the [RAPIDS `cuDF` library](https://docs.rapids.ai/api/cudf/stable/) if GPUs are enabled.

To prepare for upcoming **kglab** features, **this is the preferred method for serializing an RDF graph.**

    path:
must be a file name (str), path object to a local file reference, or a [*writable, bytes-like object*](https://docs.python.org/3/glossary.html#term-bytes-like-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; accessing cloud storage is based on [`fsspec`](https://github.com/intake/filesystem_spec)

    compression:
name of the compression algorithm to use; defaults to `"snappy"`; can also be `"gzip"`, `"brotli"`, or `None` for no compression

    storage_options:
extra options parsed by [`fsspec`](https://github.com/intake/filesystem_spec) for cloud storage access; **NOT USED** until `pandas` 1.2.x becomes stable across platforms and also RAPIDS provides support
        """
        rows_list: typing.List[dict] = [
            {
                self._PARQUET_COL_NAMES[0]: s.n3(),
                self._PARQUET_COL_NAMES[1]: p.n3(),
                self._PARQUET_COL_NAMES[2]: o.n3(),
            }
            for s, p, o in self._g
        ]

        if self.use_gpus:
            df = cudf.DataFrame(rows_list, columns=self._PARQUET_COL_NAMES)
        else:
            df = pd.DataFrame(rows_list, columns=self._PARQUET_COL_NAMES)

        df.to_parquet(
            path,
            compression=compression,
            #storage_options=storage_options,
            **chocolate.filter_args(kwargs, df.to_parquet),
        )
Beispiel #4
0
from chocolate import filter_args
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

args = {"lr": 0.1, "min_lr": 0.001}
var = torch.FloatTensor([0])
optimizer = optim.SGD([var], **filter_args(args, optim.SGD))
lr_scheduler = ReduceLROnPlateau(optimizer,
                                 **filter_args(args, ReduceLROnPlateau))
Beispiel #5
0
    def validate(
        self,
        *,
        shacl_graph: typing.Optional[typing.Union[GraphLike,
                                                  typing.AnyStr]] = None,
        shacl_graph_format: typing.Optional[str] = None,
        ont_graph: typing.Optional[typing.Union[GraphLike,
                                                typing.AnyStr]] = None,
        ont_graph_format: typing.Optional[str] = None,
        advanced: typing.Optional[bool] = False,
        inference: typing.Optional[str] = None,
        inplace: typing.Optional[bool] = True,
        abort_on_error: typing.Optional[bool] = None,
        **kwargs: typing.Any,
    ) -> typing.Tuple[bool, "KnowledgeGraph", str]:
        """
Wrapper for [`pyshacl.validate()`](https://github.com/RDFLib/pySHACL) for validating the RDF graph using rules expressed in the [SHACL](https://www.w3.org/TR/shacl/) (Shapes Constraint Language).

    shacl_graph:
text representation, file path, or URL of the SHACL *shapes graph* to use in validation

    shacl_graph_format:
RDF format, if the `shacl_graph` parameter is a text representation of the *shapes graph*

    ont_graph:
text representation, file path, or URL of an optional, extra ontology to mix into the RDF graph

    ont_graph_format
RDF format, if the `ont_graph` parameter is a text representation of the extra ontology

    advanced:
enable advanced SHACL features

    inference:
prior to validation, run OWL2 RL profile-based expansion of the RDF graph based on [OWL-RL](https://github.com/RDFLib/OWL-RL); `"rdfs"`, `"owlrl"`, `"both"`, `None`

    inplace:
when enabled, do not clone the RDF graph prior to inference/expansion, just manipulate it in-place

    abort_on_error:
abort validation on the first error

    returns:
a tuple of `conforms` (RDF graph passes the validation rules); `report_graph` (report as a `KnowledgeGraph` object); `report_text` (report formatted as text)
        """
        conforms, report_graph_data, report_text = pyshacl.validate(
            self._g,
            shacl_graph=shacl_graph,
            shacl_graph_format=shacl_graph_format,
            ont_graph=ont_graph,
            ont_graph_format=ont_graph_format,
            advanced=advanced,
            inference=inference,
            inplace=inplace,
            abort_on_error=abort_on_error,
            serialize_report_graph="ttl",
            **chocolate.filter_args(kwargs, pyshacl.validate),
        )

        g = rdflib.Graph()

        g.parse(data=report_graph_data, format="ttl", encoding="utf-8")

        report_graph = KnowledgeGraph(
            name="SHACL report graph",
            namespaces=self.get_ns_dict(),
            import_graph=g,
        )

        return conforms, report_graph, report_text