def test_builder_cls(dummy_register):  # pylint: disable=redefined-outer-name

    # The dataset will be installed in the cache
    installed_path = cache.cache_path() / 'modules/tfds_community/kaggle/ds0'

    assert not installed_path.exists()

    builder_cls = dummy_register.builder_cls(utils.DatasetName('kaggle:ds0'))
    assert builder_cls.name == 'dummy_dataset'
    assert 'kaggle' in builder_cls.code_path.parts
    assert issubclass(builder_cls, dataset_builder.DatasetBuilder)

    # Dataset installed in the cache
    # Filename should be deterministic
    assert list(sorted(installed_path.iterdir())) == [
        installed_path /
        '1de59094bbe913e9a95aa0cff6f46bc06d813bd5c288eac34950b473e4ef199c',
    ]

    # Reusing the dataset should re-use the cache
    with mock.patch.object(
            register_package,
            '_download_and_cache',
            side_effect=ValueError('Dataset should have been cached already')):
        builder_cls2 = dummy_register.builder_cls(
            utils.DatasetName('kaggle:ds0'))
    assert builder_cls is builder_cls2

    # Datasets from different namespace can have the same name
    builder_cls = dummy_register.builder_cls(utils.DatasetName('mlds:ds0'))
    assert 'mlds' in builder_cls.code_path.parts
    assert issubclass(builder_cls, dataset_builder.DatasetBuilder)

    with pytest.raises(registered.DatasetNotFoundError):
        dummy_register.builder(utils.DatasetName('other:ds0'))
    def __init__(self, path: utils.PathLike):
        """Contructor.

    Args:
      path: Remote location of the package index (file containing the list of
        dataset packages)
    """
        super().__init__()
        self._remote_path: utils.ReadOnlyPath = utils.as_path(path)
        self._cached_path: utils.ReadOnlyPath = (
            cache.cache_path() / 'community-datasets-list.jsonl')

        # Pre-load the index from the cache
        if self._cached_path.exists():
            self._refresh_from_content(self._cached_path.read_text())
Esempio n. 3
0
def test_builder_cls(dummy_register):  # pylint: disable=redefined-outer-name

  # The dataset will be installed in the cache
  installed_path = cache.cache_path()
  installed_path /= 'modules/tfds_community/kaggle/dummy_dataset'
  assert not installed_path.exists()

  ds_name = naming.DatasetName('kaggle:dummy_dataset')
  builder_cls = dummy_register.builder_cls(ds_name)
  assert builder_cls.name == 'dummy_dataset'

  clshash = 'e58f413affd65c267bae7acbd27fd5ac673d3e3ae13c316ffc2a461d00c8ab56'
  assert installed_path / f'{clshash}/dummy_dataset.py' == builder_cls.code_path
  assert 'kaggle' in builder_cls.code_path.parts
  assert issubclass(builder_cls, dataset_builder.DatasetBuilder)
  assert not builder_cls.url_infos  # No checksums installed with the package

  # Dataset installed in the cache
  # Filename should be deterministic
  assert list(sorted(installed_path.iterdir())) == [installed_path / clshash]

  # Reusing the dataset should re-use the cache
  with mock.patch.object(
      register_package,
      '_download_and_cache',
      side_effect=ValueError('Dataset should have been cached already')):
    ds_name = naming.DatasetName('kaggle:dummy_dataset')
    builder_cls2 = dummy_register.builder_cls(ds_name)
  assert builder_cls is builder_cls2

  # Datasets from different namespace can have the same name
  ds_name = naming.DatasetName('mlds:dummy_dataset')
  builder_cls = dummy_register.builder_cls(ds_name)
  assert 'mlds' in builder_cls.code_path.parts
  assert issubclass(builder_cls, dataset_builder.DatasetBuilder)
  # Checksums have been correctly installed
  assert 'http://dummy.org/data.txt' in builder_cls.url_infos

  with pytest.raises(registered.DatasetNotFoundError):
    dummy_register.builder(naming.DatasetName('other:ds0'))
def test_mock_cache_path(tmp_path: pathlib.Path):
    with mock_cache_path(tmp_path):
        assert os.fspath(tmp_path) not in sys.path
        assert cache.cache_path() == tmp_path
        assert cache.module_path() == tmp_path / 'modules'
        assert os.fspath(tmp_path / 'modules') in sys.path