コード例 #1
0
    def _create_catalog(self, conf_catalog, conf_creds) -> DataCatalog:
        save_version = generate_current_version()

        run_id = pai.current_run_uuid()
        if run_id:
            save_version += "-" + run_id

        return DataCatalog.from_config(conf_catalog,
                                       conf_creds,
                                       save_version=save_version)
コード例 #2
0
 def test_from_sane_config_versioned(self, sane_config, dummy_dataframe):
     """Test load and save of versioned data sets from config"""
     sane_config["catalog"]["boats"]["versioned"] = True
     version = generate_current_version()
     catalog = DataCatalog.from_config(**sane_config,
                                       load_versions={"boats": version},
                                       save_version=version)
     catalog.save("boats", dummy_dataframe)
     path = Path(sane_config["catalog"]["boats"]["filepath"])
     path = path / version / path.name
     assert path.is_file()
     reloaded_df = catalog.load("boats")
     assert_frame_equal(reloaded_df, dummy_dataframe)
コード例 #3
0
    def test_from_sane_config_versioned(self, sane_config, dummy_dataframe):
        """Test load and save of versioned data sets from config"""
        sane_config["catalog"]["boats"]["versioned"] = True
        version = generate_current_version()
        catalog = DataCatalog.from_config(**sane_config,
                                          load_versions={"boats": version},
                                          save_version=version)
        journal = VersionJournal({"project_path": "fake-path"})
        catalog.set_version_journal(journal)
        assert catalog._journal == journal  # pylint: disable=protected-access

        catalog.save("boats", dummy_dataframe)
        path = Path(sane_config["catalog"]["boats"]["filepath"])
        path = path / version / path.name
        assert path.is_file()
        reloaded_df = catalog.load("boats")
        assert_frame_equal(reloaded_df, dummy_dataframe)
コード例 #4
0
ファイル: conftest.py プロジェクト: vishal-qb/kedro
def save_version(request):
    return request.param or generate_current_version()
コード例 #5
0
    def from_config(
        cls: Type,
        catalog: Optional[Dict[str, Dict[str, Any]]],
        credentials: Dict[str, Dict[str, Any]] = None,
        load_versions: Dict[str, str] = None,
        save_version: str = None,
    ) -> "DataCatalog":
        """Create a ``DataCatalog`` instance from configuration. This is a
        factory method used to provide developers with a way to instantiate
        ``DataCatalog`` with configuration parsed from configuration files.

        Args:
            catalog: A dictionary whose keys are the data set names and
                the values are dictionaries with the constructor arguments
                for classes implementing ``AbstractDataSet``. The data set
                class to be loaded is specified with the key ``type`` and their
                fully qualified class name. All ``kedro.io`` data set can be
                specified by their class name only, i.e. their module name
                can be omitted.
            credentials: A dictionary containing credentials for different
                data sets. Use the ``credentials`` key in a ``AbstractDataSet``
                to refer to the appropriate credentials as shown in the example
                below.
            load_versions: A mapping between dataset names and versions
                to load. Has no effect on data sets without enabled versioning.
            save_version: Version string to be used for ``save`` operations
                by all data sets with enabled versioning. It must: a) be a
                case-insensitive string that conforms with operating system
                filename limitations, b) always return the latest version when
                sorted in lexicographical order.

        Returns:
            An instantiated ``DataCatalog`` containing all specified
            data sets, created and ready to use.

        Raises:
            DataSetError: When the method fails to create any of the data
                sets from their config.

        Example:
        ::

            >>> config = {
            >>>     "cars": {
            >>>         "type": "CSVLocalDataSet",
            >>>         "filepath": "cars.csv",
            >>>         "save_args": {
            >>>             "index": False
            >>>         }
            >>>     },
            >>>     "boats": {
            >>>         "type": "CSVS3DataSet",
            >>>         "filepath": "boats.csv",
            >>>         "bucket_name": "mck-147789798-bucket",
            >>>         "credentials": "boats_credentials"
            >>>         "save_args": {
            >>>             "index": False
            >>>         }
            >>>     }
            >>> }
            >>>
            >>> credentials = {
            >>>     "boats_credentials": {
            >>>         "aws_access_key_id": "<your key id>",
            >>>         "aws_secret_access_key": "<your secret>"
            >>>      }
            >>> }
            >>>
            >>> catalog = DataCatalog.from_config(config, credentials)
            >>>
            >>> df = catalog.load("cars")
            >>> catalog.save("boats", df)
        """
        data_sets = {}
        catalog = copy.deepcopy(catalog) or {}
        credentials = copy.deepcopy(credentials) or {}
        save_version = save_version or generate_current_version()
        load_versions = copy.deepcopy(load_versions) or {}

        for ds_name, ds_config in catalog.items():
            if "type" not in ds_config:
                raise DataSetError("`type` is missing from DataSet '{}' "
                                   "catalog configuration".format(ds_name))
            if CREDENTIALS_KEY in ds_config:
                ds_config[CREDENTIALS_KEY] = _get_credentials(
                    ds_config.pop(CREDENTIALS_KEY),
                    credentials  # credentials name
                )
            data_sets[ds_name] = AbstractDataSet.from_config(
                ds_name, ds_config, load_versions.get(ds_name), save_version)
        return cls(data_sets=data_sets)