def load_parquet( self, path: IOPathLike, **kwargs: typing.Any, ) -> "KnowledgeGraph": """ Wrapper for [`pandas.read_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html?highlight=read_parquet#pandas.read_parquet) which parses an RDF graph represented as a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine. To prepare for upcoming **kglab** features, **this is the preferred method for deserializing an RDF graph.** Note: this adds relations to an RDF graph, it does not overwrite the existing RDF graph. path: must be a file name (str), path object to a local file reference, or a [*readable, file-like object*](https://docs.python.org/3/glossary.html#term-file-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; a file URL can also be a path to a directory that contains multiple partitioned files, including a bucket in cloud storage – based on [`fsspec`](https://github.com/intake/filesystem_spec) returns: this `KnowledgeGraph` object – used for method chaining """ df = pd.read_parquet(path, **chocolate.filter_args(kwargs, pd.read_parquet)) for _, row in df.iterrows(): triple = "{} {} {} .".format(row[0], row[1], row[2]) self._g.parse(data=triple, format="ttl") return self
def save_parquet( self, path: IOPathLike, *, compression: str = "snappy", storage_options: dict = None, # pylint: disable=W0613 **kwargs: typing.Any, ) -> None: """ Wrapper for [`pandas.to_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html?highlight=to_parquet) which serializes an RDF graph to a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine. To prepare for upcoming **kglab** features, **this is the preferred method for serializing an RDF graph.** path: must be a file name (str), path object to a local file reference, or a [*writable, bytes-like object*](https://docs.python.org/3/glossary.html#term-bytes-like-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; accessing cloud storage is based on [`fsspec`](https://github.com/intake/filesystem_spec) compression: name of the compression algorithm to use; defaults to `"snappy"`; can also be `"gzip"`, `"brotli"`, or `None` for no compression storage_options: extra options parsed by [`fsspec`](https://github.com/intake/filesystem_spec) for cloud storage access; **NOT USED UNTIL `pandas` 1.2.x becomes stable """ rows_list = [{ "s": s.n3(), "p": p.n3(), "o": o.n3() } for s, p, o in self._g] df = pd.DataFrame(rows_list, columns=("s", "p", "o")) df.to_parquet( path, compression=compression, #storage_options=storage_options, **chocolate.filter_args(kwargs, df.to_parquet), )
def save_parquet ( self, path: IOPathLike, *, compression: str = "snappy", storage_options: dict = None, # pylint: disable=W0613 **kwargs: typing.Any, ) -> None: """ Wrapper for [`pandas.to_parquet()`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_parquet.html?highlight=to_parquet) which serializes an RDF graph to a [Parquet](https://parquet.apache.org/) file, using the [`pyarrow`](https://arrow.apache.org/) engine. Uses the [RAPIDS `cuDF` library](https://docs.rapids.ai/api/cudf/stable/) if GPUs are enabled. To prepare for upcoming **kglab** features, **this is the preferred method for serializing an RDF graph.** path: must be a file name (str), path object to a local file reference, or a [*writable, bytes-like object*](https://docs.python.org/3/glossary.html#term-bytes-like-object); a string could be a URL; valid URL schemes include `https`, `http`, `ftp`, `s3`, `gs`, `file`; accessing cloud storage is based on [`fsspec`](https://github.com/intake/filesystem_spec) compression: name of the compression algorithm to use; defaults to `"snappy"`; can also be `"gzip"`, `"brotli"`, or `None` for no compression storage_options: extra options parsed by [`fsspec`](https://github.com/intake/filesystem_spec) for cloud storage access; **NOT USED** until `pandas` 1.2.x becomes stable across platforms and also RAPIDS provides support """ rows_list: typing.List[dict] = [ { self._PARQUET_COL_NAMES[0]: s.n3(), self._PARQUET_COL_NAMES[1]: p.n3(), self._PARQUET_COL_NAMES[2]: o.n3(), } for s, p, o in self._g ] if self.use_gpus: df = cudf.DataFrame(rows_list, columns=self._PARQUET_COL_NAMES) else: df = pd.DataFrame(rows_list, columns=self._PARQUET_COL_NAMES) df.to_parquet( path, compression=compression, #storage_options=storage_options, **chocolate.filter_args(kwargs, df.to_parquet), )
from chocolate import filter_args import torch import torch.optim as optim from torch.optim.lr_scheduler import ReduceLROnPlateau args = {"lr": 0.1, "min_lr": 0.001} var = torch.FloatTensor([0]) optimizer = optim.SGD([var], **filter_args(args, optim.SGD)) lr_scheduler = ReduceLROnPlateau(optimizer, **filter_args(args, ReduceLROnPlateau))
def validate( self, *, shacl_graph: typing.Optional[typing.Union[GraphLike, typing.AnyStr]] = None, shacl_graph_format: typing.Optional[str] = None, ont_graph: typing.Optional[typing.Union[GraphLike, typing.AnyStr]] = None, ont_graph_format: typing.Optional[str] = None, advanced: typing.Optional[bool] = False, inference: typing.Optional[str] = None, inplace: typing.Optional[bool] = True, abort_on_error: typing.Optional[bool] = None, **kwargs: typing.Any, ) -> typing.Tuple[bool, "KnowledgeGraph", str]: """ Wrapper for [`pyshacl.validate()`](https://github.com/RDFLib/pySHACL) for validating the RDF graph using rules expressed in the [SHACL](https://www.w3.org/TR/shacl/) (Shapes Constraint Language). shacl_graph: text representation, file path, or URL of the SHACL *shapes graph* to use in validation shacl_graph_format: RDF format, if the `shacl_graph` parameter is a text representation of the *shapes graph* ont_graph: text representation, file path, or URL of an optional, extra ontology to mix into the RDF graph ont_graph_format RDF format, if the `ont_graph` parameter is a text representation of the extra ontology advanced: enable advanced SHACL features inference: prior to validation, run OWL2 RL profile-based expansion of the RDF graph based on [OWL-RL](https://github.com/RDFLib/OWL-RL); `"rdfs"`, `"owlrl"`, `"both"`, `None` inplace: when enabled, do not clone the RDF graph prior to inference/expansion, just manipulate it in-place abort_on_error: abort validation on the first error returns: a tuple of `conforms` (RDF graph passes the validation rules); `report_graph` (report as a `KnowledgeGraph` object); `report_text` (report formatted as text) """ conforms, report_graph_data, report_text = pyshacl.validate( self._g, shacl_graph=shacl_graph, shacl_graph_format=shacl_graph_format, ont_graph=ont_graph, ont_graph_format=ont_graph_format, advanced=advanced, inference=inference, inplace=inplace, abort_on_error=abort_on_error, serialize_report_graph="ttl", **chocolate.filter_args(kwargs, pyshacl.validate), ) g = rdflib.Graph() g.parse(data=report_graph_data, format="ttl", encoding="utf-8") report_graph = KnowledgeGraph( name="SHACL report graph", namespaces=self.get_ns_dict(), import_graph=g, ) return conforms, report_graph, report_text