def test_builder_cls(dummy_register): # pylint: disable=redefined-outer-name # The dataset will be installed in the cache installed_path = cache.cache_path() / 'modules/tfds_community/kaggle/ds0' assert not installed_path.exists() builder_cls = dummy_register.builder_cls(utils.DatasetName('kaggle:ds0')) assert builder_cls.name == 'dummy_dataset' assert 'kaggle' in builder_cls.code_path.parts assert issubclass(builder_cls, dataset_builder.DatasetBuilder) # Dataset installed in the cache # Filename should be deterministic assert list(sorted(installed_path.iterdir())) == [ installed_path / '1de59094bbe913e9a95aa0cff6f46bc06d813bd5c288eac34950b473e4ef199c', ] # Reusing the dataset should re-use the cache with mock.patch.object( register_package, '_download_and_cache', side_effect=ValueError('Dataset should have been cached already')): builder_cls2 = dummy_register.builder_cls( utils.DatasetName('kaggle:ds0')) assert builder_cls is builder_cls2 # Datasets from different namespace can have the same name builder_cls = dummy_register.builder_cls(utils.DatasetName('mlds:ds0')) assert 'mlds' in builder_cls.code_path.parts assert issubclass(builder_cls, dataset_builder.DatasetBuilder) with pytest.raises(registered.DatasetNotFoundError): dummy_register.builder(utils.DatasetName('other:ds0'))
def __init__(self, path: utils.PathLike): """Contructor. Args: path: Remote location of the package index (file containing the list of dataset packages) """ super().__init__() self._remote_path: utils.ReadOnlyPath = utils.as_path(path) self._cached_path: utils.ReadOnlyPath = ( cache.cache_path() / 'community-datasets-list.jsonl') # Pre-load the index from the cache if self._cached_path.exists(): self._refresh_from_content(self._cached_path.read_text())
def test_builder_cls(dummy_register): # pylint: disable=redefined-outer-name # The dataset will be installed in the cache installed_path = cache.cache_path() installed_path /= 'modules/tfds_community/kaggle/dummy_dataset' assert not installed_path.exists() ds_name = naming.DatasetName('kaggle:dummy_dataset') builder_cls = dummy_register.builder_cls(ds_name) assert builder_cls.name == 'dummy_dataset' clshash = 'e58f413affd65c267bae7acbd27fd5ac673d3e3ae13c316ffc2a461d00c8ab56' assert installed_path / f'{clshash}/dummy_dataset.py' == builder_cls.code_path assert 'kaggle' in builder_cls.code_path.parts assert issubclass(builder_cls, dataset_builder.DatasetBuilder) assert not builder_cls.url_infos # No checksums installed with the package # Dataset installed in the cache # Filename should be deterministic assert list(sorted(installed_path.iterdir())) == [installed_path / clshash] # Reusing the dataset should re-use the cache with mock.patch.object( register_package, '_download_and_cache', side_effect=ValueError('Dataset should have been cached already')): ds_name = naming.DatasetName('kaggle:dummy_dataset') builder_cls2 = dummy_register.builder_cls(ds_name) assert builder_cls is builder_cls2 # Datasets from different namespace can have the same name ds_name = naming.DatasetName('mlds:dummy_dataset') builder_cls = dummy_register.builder_cls(ds_name) assert 'mlds' in builder_cls.code_path.parts assert issubclass(builder_cls, dataset_builder.DatasetBuilder) # Checksums have been correctly installed assert 'http://dummy.org/data.txt' in builder_cls.url_infos with pytest.raises(registered.DatasetNotFoundError): dummy_register.builder(naming.DatasetName('other:ds0'))
def test_mock_cache_path(tmp_path: pathlib.Path): with mock_cache_path(tmp_path): assert os.fspath(tmp_path) not in sys.path assert cache.cache_path() == tmp_path assert cache.module_path() == tmp_path / 'modules' assert os.fspath(tmp_path / 'modules') in sys.path