Ejemplo n.º 1
0
    def __init__(
        self,
        filepath: str,
        bucket_name: str,
        s3fs_args: Dict[str, Any] = None,
        credentials: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``MatplotlibS3Writer``.

        Args:
            bucket_name: Name of the bucket without "s3://" prefix.
            filepath: Key path to matplot object file(s).
            s3fs_args: Arguments for ``S3FileSystem``. See
                https://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem
            credentials: Arguments for ``client_kwargs``. If needed ``aws_access_key_id``
                and ``aws_secret_access_key`` are provided here.
            save_args: Save args passed to `plt.savefig`. See
                https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
        """
        deprecation_warning(self.__class__.__name__)
        _credentials = copy.deepcopy(credentials) or {}

        self._s3fs_args = copy.deepcopy(s3fs_args) or {}
        self._filepath = Path(filepath)
        self._save_args = save_args if save_args else dict()
        self._bucket_name = Path(bucket_name)

        self._s3 = S3FileSystem(client_kwargs=_credentials, **self._s3fs_args)
Ejemplo n.º 2
0
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``JSONLocalDataSet`` pointing to a concrete
        filepath.

        Args:
            filepath: path to a local json file.
            load_args: Arguments passed on to ```json.load``.
                See https://docs.python.org/3/library/json.html for details.
                All defaults are preserved.
            save_args: Arguments passed on to ```json.dump``.
                See https://docs.python.org/3/library/json.html
                for details. All defaults are preserved.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)
Ejemplo n.º 3
0
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``NetworkXLocalDataSet``.

        Args:
            filepath: The path to the NetworkX graph JSON file.
            load_args: Arguments passed on to ```networkx.node_link_graph``.
                See the details in
                https://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html
            save_args: Arguments passed on to ```networkx.node_link_data``.
                See the details in
                https://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(
            filepath=Path(filepath),
            load_args=load_args,
            save_args=save_args,
            version=version,
        )
Ejemplo n.º 4
0
    def __init__(self,
                 filepath: str,
                 load_args: Dict[str, Any] = None,
                 version: Version = None) -> None:
        """Creates a new instance of ``FeatherLocalDataSet`` pointing to a concrete
        filepath.

        Args:
            filepath: path to a feather file.
            load_args: feather options for loading feather files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html#pandas.read_feather
                All defaults are preserved.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)
        default_load_args = {}  # type: Dict[str, Any]
        self._load_args = ({
            **default_load_args,
            **load_args
        } if load_args is not None else default_load_args)
Ejemplo n.º 5
0
    def __init__(
        self,
        file_url: str,
        file_path: str = None,
        auth: Optional[Union[Tuple[str], AuthBase]] = None,
        load_args: Optional[Dict[str, Any]] = None,
        force_download: bool = False,
    ) -> None:
        """Creates a new instance of ``CSVHTTPDataSet`` pointing to a concrete
        csv file over HTTP(S).

        Args:
            fileurl: A URL to fetch the CSV file.
            auth: Anything ``requests.get`` accepts. Normally it's either
            ``('login', 'password')``, or ``AuthBase`` instance for more complex cases.
            load_args: Pandas options for loading csv files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
                All defaults are preserved.
        """
        deprecation_warning(self.__class__.__name__)
        super().__init__()
        self._file_url = file_url
        self._file_path = file_path
        self._auth_backend = auth
        self._load_args = copy.deepcopy(load_args or {})
        self._force_download = force_download
Ejemplo n.º 6
0
    def __init__(
        self,
        filepath: str,
        bucket_name: str = None,
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        project: str = None,
        gcsfs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``JSONGCSDataSet`` pointing to a concrete
        JSON file on GCS.

        Args:
            filepath: Path to a JSON file. May contain the full path in Google
                Cloud Storage including bucket and protocol, e.g.
                `gcs://bucket-name/path/to/file.json`.
            bucket_name: GCS bucket name. Must be specified **only** if not
                present in ``filepath``.
            credentials: Credentials to access the GCS bucket such as
                ``client_email`` and ``token_uri``, or
                ``refresh_token``, ``client_secret``, ``client_id``.
            load_args: Pandas options for loading JSON files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html
                All defaults are preserved.
            save_args: Pandas options for saving JSON files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html
                All defaults are preserved, but "index", which is set to False.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            project: The GCP project ID, as per:
                https://cloud.google.com/resource-manager/docs/creating-managing-projects
            gcsfs_args: Extra arguments to pass into ``GCSFileSystem``. See
                https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
        """
        deprecation_warning(self.__class__.__name__)
        _credentials = deepcopy(credentials) or {}
        _gcsfs_args = deepcopy(gcsfs_args) or {}
        _gcs = gcsfs.GCSFileSystem(project=project,
                                   token=_credentials,
                                   **_gcsfs_args)
        path = _gcs._strip_protocol(filepath)
        path = PurePosixPath(
            "{}/{}".format(bucket_name, path) if bucket_name else path)

        super().__init__(
            filepath=path,
            version=version,
            exists_function=_gcs.exists,
            glob_function=_gcs.glob,
            load_args=load_args,
            save_args=save_args,
        )
        self._gcs = _gcs
Ejemplo n.º 7
0
    def __init__(
        self,
        filepath: str,
        bucket_name: str = None,
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        s3fs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``CSVS3DataSet`` pointing to a concrete
        csv file on S3.

        Args:
            filepath: Path to a csv file. May contain the full path in S3
                including bucket and protocol, e.g. `s3://bucket-name/path/to/file.csv`.
            bucket_name: S3 bucket name. Must be specified **only** if not
                present in ``filepath``.
            credentials: Credentials to access the S3 bucket, such as
                ``aws_access_key_id``, ``aws_secret_access_key``.
            load_args: Pandas options for loading csv files.
                You can find all available arguments at:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
                All defaults are preserved.
            save_args: Pandas options for saving csv files.
                You can find all available arguments at:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
                All defaults are preserved, but "index", which is set to False.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            s3fs_args: S3FileSystem options. You can see all available arguments at:
                https://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem

        """
        deprecation_warning(self.__class__.__name__)
        _credentials = copy.deepcopy(credentials) or {}
        _s3fs_args = copy.deepcopy(s3fs_args) or {}
        _s3 = S3FileSystem(client_kwargs=_credentials, **_s3fs_args)
        path = _s3._strip_protocol(filepath)
        path = PurePosixPath(
            "{}/{}".format(bucket_name, path) if bucket_name else path)

        super().__init__(path,
                         version,
                         exists_function=_s3.exists,
                         glob_function=_s3.glob)

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)

        self._s3 = _s3
Ejemplo n.º 8
0
    def __init__(
        self,
        filepath: str,
        bucket_name: str = None,
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        s3fs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``ParquetS3DataSet`` pointing to a concrete
        parquet file on S3.

        Args:
            filepath: Path to a parquet file, parquet collection or the directory
                of a multipart parquet. May contain the full path in S3 including
                bucket and protocol, e.g. `s3://bucket-name/path/to/file.parquet`.
            bucket_name: S3 bucket name. Must be specified **only** if not
                present in ``filepath``.
            credentials: Credentials to access the S3 bucket, such as
                ``aws_access_key_id``, ``aws_secret_access_key``.
            load_args: Additional loading options `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.ParquetFile.to_pandas
            save_args: Additional saving options for `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            s3fs_args: S3FileSystem options. You can see all available arguments at:
                https://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem
        """
        deprecation_warning(self.__class__.__name__)
        _credentials = copy.deepcopy(credentials) or {}
        _s3fs_args = copy.deepcopy(s3fs_args) or {}
        _s3 = S3FileSystem(client_kwargs=_credentials, **_s3fs_args)
        path = _s3._strip_protocol(filepath)
        path = PurePosixPath(
            "{}/{}".format(bucket_name, path) if bucket_name else path)

        super().__init__(
            load_args=load_args,
            save_args=save_args,
            filepath=path,
            version=version,
            exists_function=_s3.exists,
            glob_function=_s3.glob,
        )

        self._s3 = _s3
Ejemplo n.º 9
0
    def __init__(
        self,
        filepath: str,
        bucket_name: str = None,
        credentials: Dict[str, Any] = None,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
        project: str = None,
        gcsfs_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``ParquetGCSDataSet`` pointing to a concrete
        Parquet file on GCS.

        Args:
            filepath: Path to a Parquet file. May contain the full path in Google
                Cloud Storage including bucket and protocol, e.g.
                ``gcs://bucket-name/path/to/file.parquet``.
            bucket_name: GCS bucket name. Must be specified **only** if not
                present in ``filepath``.
            credentials: Credentials to access the GCS bucket. Authentication is performed
                by gcsfs according to https://gcsfs.readthedocs.io/en/latest/#credentials
            load_args: Pandas options for loading Parquet files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_parquet.html
                All defaults are preserved.
            save_args: Additional saving options for `pyarrow.parquet.write_table`.
                Here you can find all available arguments:
                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.
            project: The GCP project. If not specified, then the default is inferred
                by a remote request.
                https://cloud.google.com/resource-manager/docs/creating-managing-projects
            gcsfs_args: Extra arguments to pass into ``GCSFileSystem``. See
                https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
        """
        deprecation_warning(self.__class__.__name__)
        _credentials = deepcopy(credentials) or {}
        _gcsfs_args = deepcopy(gcsfs_args) or {}
        _gcs = gcsfs.GCSFileSystem(project=project, token=_credentials, **_gcsfs_args)
        path = _gcs._strip_protocol(filepath)
        path = PurePosixPath("{}/{}".format(bucket_name, path) if bucket_name else path)
        super().__init__(
            filepath=path,
            version=version,
            exists_function=_gcs.exists,
            glob_function=_gcs.glob,
            load_args=load_args,
            save_args=save_args,
        )
        self._gcs = _gcs
Ejemplo n.º 10
0
    def __init__(
        self,
        filepath: str,
        engine: str = "xlsxwriter",
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``ExcelLocalDataSet`` pointing to a concrete
        filepath.

        Args:
            engine: The engine used to write to excel files. The default
                engine is 'xlsxwriter'.

            filepath: path to an Excel file.

            load_args: Pandas options for loading Excel files. Here you can
                find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html
                The default_load_arg engine is 'xlrd', all others preserved.

            save_args: Pandas options for saving Excel files. Here you can
                find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html
                All defaults are preserved, but "index", which is set to False.
                If you would like to specify options for the `ExcelWriter`,
                you can include them under "writer" key. Here you can
                find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html

            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)

        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        self._writer_args = {"engine": engine}  # type: Dict[str, Any]
        if save_args is not None:
            writer_args = save_args.pop("writer", {})  # type: Dict[str, Any]
            self._writer_args.update(writer_args)
            self._save_args.update(save_args)
Ejemplo n.º 11
0
    def __init__(
        self,
        filepath: str,
        engine: str = "auto",
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``ParquetLocalDataSet`` pointing to a
        concrete filepath.

        Args:
            filepath: Path to a parquet file or a metadata file of a multipart
                parquet collection or the directory of a multipart parquet.

            engine: The engine to use, one of: `auto`, `fastparquet`,
                `pyarrow`. If `auto`, then the default behavior is to try
                `pyarrow`, falling back to `fastparquet` if `pyarrow` is
                unavailable.

            load_args: Additional loading options `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.ParquetFile.to_pandas

            save_args: Additional saving options for `pyarrow`:
                https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas
                or `fastparquet`:
                https://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write

            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)
        self._engine = engine

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)
Ejemplo n.º 12
0
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
    ) -> None:
        """Creates a new instance of ``MatplotlibLocalWriter``.

        Args:
            filepath: Path to a matplot object file.
            load_args: Currently ignored as loading is not supported.
            save_args: Save args passed to `plt.savefig`. See
                https://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html
        """
        deprecation_warning(self.__class__.__name__)
        self._filepath = Path(filepath)
        self._load_args = load_args if load_args else dict()
        self._save_args = save_args if save_args else dict()
Ejemplo n.º 13
0
    def __init__(
        self,
        filepath: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``CSVLocalDataSet`` pointing to a concrete
        filepath.

        Args:
            filepath: path to a csv file.
            load_args: Pandas options for loading csv files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
                All defaults are preserved.
            save_args: Pandas options for saving csv files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
                All defaults are preserved, but "index", which is set to False.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        Raises:
            ValueError: If 'filepath' looks like a remote path.
        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)

        if is_remote_path(filepath):
            raise ValueError(
                "{} seems to be a remote file, which is not supported by {}".
                format(filepath, self.__class__.__name__))

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)

        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)
Ejemplo n.º 14
0
    def __init__(
        self, filepath: str, save_args: Dict[str, Any] = None, version: Version = None
    ) -> None:
        """Creates a new instance of ``YAMLLocalDataset`` pointing to a concrete
        filepath.

        Args:
            filepath: path to a local yaml file.
            save_args: Arguments passed on to ```yaml.dump``.
                See https://pyyaml.org/wiki/PyYAMLDocumentation for details.
                ``{"default_flow_style": False}`` in default.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(filepath=Path(filepath), save_args=save_args, version=version)
Ejemplo n.º 15
0
    def __init__(
        self,
        filepath: str,
        load_args: Optional[Dict[str, Any]] = None,
        save_args: Optional[Dict[str, Any]] = None,
    ) -> None:
        """
        Creates a new instance of ``BioSequenceLocalDataSet`` pointing
        to a concrete filepath.

        Args:
            filepath: path to sequence file
            load_args: Options for loading sequence files. Here you can find
                all supported file formats: https://biopython.org/wiki/SeqIO
            save_args: args supported by Biopython are 'handle' and 'format'.
                Handle by default is equal to ``filepath``.

        """
        deprecation_warning(self.__class__.__name__)
        self._filepath = filepath
        super().__init__(load_args, save_args)
Ejemplo n.º 16
0
    def __init__(
        self,
        filepath: str,
        key: str,
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``HDFLocalDataSet`` pointing to a concrete
        filepath.

        Args:
            filepath: Path to an hdf file.
            key: Identifier to the group in the HDF store.
            load_args: Pandas options for loading hdf files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_hdf.html
                All defaults are preserved.
            save_args: Pandas options for saving hdf files.
                Here you can find all available arguments:
                https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_hdf.html
                All defaults are preserved.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)
        self._key = key

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)
Ejemplo n.º 17
0
    def __init__(
        self,
        filepath: str,
        backend: str = "pickle",
        load_args: Dict[str, Any] = None,
        save_args: Dict[str, Any] = None,
        version: Version = None,
    ) -> None:
        """Creates a new instance of ``PickleLocalDataSet`` pointing to a
        concrete filepath. ``PickleLocalDataSet`` can use two backends to
        serialise objects to disk:

        pickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump

        joblib.dump: https://pythonhosted.org/joblib/generated/joblib.dump.html

        and it can use two backends to load serialised objects into memory:

        pickle.load: https://docs.python.org/3/library/pickle.html#pickle.load

        joblib.load: https://pythonhosted.org/joblib/generated/joblib.load.html

        Joblib tends to exhibit better performance in case objects store NumPy
        arrays:
        http://gael-varoquaux.info/programming/new_low-overhead_persistence_in_joblib_for_big_data.html.

        Args:
            filepath: path to a pkl file.
            backend: backend to use, must be one of ['pickle', 'joblib'].
            load_args: Options for loading pickle files. Refer to the help
                file of ``pickle.load`` or ``joblib.load`` for options.
            save_args: Options for saving pickle files. Refer to the help
                file of ``pickle.dump`` or ``joblib.dump`` for options.
            version: If specified, should be an instance of
                ``kedro.io.core.Version``. If its ``load`` attribute is
                None, the latest version will be loaded. If its ``save``
                attribute is None, save version will be autogenerated.

        Raises:
            ValueError: If 'backend' is not one of ['pickle', 'joblib'].
            ImportError: If 'backend' could not be imported.

        """
        deprecation_warning(self.__class__.__name__)
        super().__init__(Path(filepath), version)

        if backend not in ["pickle", "joblib"]:
            raise ValueError(
                "backend should be one of ['pickle', 'joblib'], got %s" %
                backend)
        if backend == "joblib" and joblib is None:
            raise ImportError("selected backend 'joblib' could not be "
                              "imported. Make sure it is installed.")

        self._backend = backend

        # Handle default load and save arguments
        self._load_args = copy.deepcopy(self.DEFAULT_LOAD_ARGS)
        if load_args is not None:
            self._load_args.update(load_args)
        self._save_args = copy.deepcopy(self.DEFAULT_SAVE_ARGS)
        if save_args is not None:
            self._save_args.update(save_args)