def test_builder_code_not_found(code_builder: dataset_builder.DatasetBuilder): """If the code isn't found, use files instead.""" # Patch `tfds.builder_cls` to emulate that the dataset isn't registered with mock.patch.object( load, 'builder_cls', side_effect=registered.DatasetNotFoundError(code_builder.name), ): # Files exists, but not code, loading from files builder = load.builder(code_builder.name) assert isinstance(builder, read_only_builder.ReadOnlyBuilder) load.load(code_builder.name, split=[]) # Dataset found -> no error if code_builder.builder_config: # When the code isn't found, default config is infered from `.config/` assert builder.builder_config.name == code_builder.BUILDER_CONFIGS[0].name # Explicitly passing a config should works too. config_name = f'{code_builder.name}/{code_builder.builder_config.name}' builder = load.builder(config_name) assert isinstance(builder, read_only_builder.ReadOnlyBuilder) # Neither code not files found, raise DatasetNotFoundError with pytest.raises(registered.DatasetNotFoundError): load.builder(code_builder.name, data_dir='/tmp/non-existing/tfds/dir') with pytest.raises(registered.DatasetNotFoundError): load.load( code_builder.name, split=[], data_dir='/tmp/non-existing/tfds/dir' )
def test_builder_code_not_found(code_builder: dataset_builder.DatasetBuilder): """If the code isn't found, use files instead.""" # Patch `tfds.builder_cls` to emulate that the dataset isn't registered with mock.patch.object( load, 'builder_cls', side_effect=registered.DatasetNotFoundError(code_builder.name), ): # When the code isn't found, loading dataset require explicit config name: # tfds.builder('ds/config') config_name = code_builder.name if code_builder.builder_config: config_name = f'{config_name}/{code_builder.builder_config.name}' # Files exists, but not code, loading from files builder = load.builder(config_name) assert isinstance(builder, read_only_builder.ReadOnlyBuilder) load.load(config_name, split=[]) # Dataset found -> no error # Neither code not files found, raise DatasetNotFoundError with pytest.raises(registered.DatasetNotFoundError): load.builder(config_name, data_dir='/tmp/non-existing/tfds/dir') with pytest.raises(registered.DatasetNotFoundError): load.load(config_name, split=[], data_dir='/tmp/non-existing/tfds/dir')
def test_abstract(self): name = "unregistered_builder" self.assertEqual(name, UnregisteredBuilder.name) self.assertNotIn(name, load.list_builders()) with self.assertRaisesWithPredicateMatch(ValueError, "an abstract class"): load.builder(name)
def test_in_development(self): name = "in_development_dataset_builder" self.assertEqual(name, InDevelopmentDatasetBuilder.name) self.assertNotIn(name, load.list_builders()) with self.assertRaisesWithPredicateMatch( ValueError, ("Dataset %s is under active development and is not available yet." ) % name): load.builder(name)
def test_registered(self): name = "empty_dataset_builder" self.assertEqual(name, EmptyDatasetBuilder.name) self.assertIsInstance(load.builder(name), EmptyDatasetBuilder) self.assertIn(name, load.list_builders()) nonexistent = "nonexistent_foobar_dataset" with self.assertRaisesWithPredicateMatch(ValueError, "not found"): load.builder(nonexistent) # Prints registered datasets with self.assertRaisesWithPredicateMatch(ValueError, name): load.builder(nonexistent)
def test_builder_files_exists(code_builder: dataset_builder.DatasetBuilder): """Tests that `tfds.builder` is correctly loaded from the code/files.""" # When code is available, and no version specified, load from code builder = load.builder(code_builder.name) assert isinstance(builder, type(code_builder)) # Check builder is DummyMnist assert not isinstance(builder, read_only_builder.ReadOnlyBuilder) # If the version is specified, load from the files (backward support) builder = load.builder(f'{code_builder.name}:*.*.*') # Most recent version assert not isinstance(builder, type(code_builder)) assert isinstance(builder, read_only_builder.ReadOnlyBuilder) # If the version is specified but files not found, load from the code builder = load.builder( f'{code_builder.name}:*.*.*', data_dir='/tmp/path/tfds/not-exists') assert isinstance(builder, type(code_builder)) assert not isinstance(builder, read_only_builder.ReadOnlyBuilder)
def test_builder_config(code_builder: dataset_builder.DatasetBuilder): """Tests that code found but config not loads from files.""" if not code_builder.BUILDER_CONFIGS: return # Remove the registered configs with mock.patch.object(type(code_builder), 'BUILDER_CONFIGS', []), \ mock.patch.object(type(code_builder), 'builder_configs', {}): # Config isn't present in the code anymore with pytest.raises(ValueError, match='BuilderConfig .* not found'): load.builder( f'{code_builder.name}/dummy_config', data_dir='/tmp/path/not-exists') # But previously generated configs still be loaded from disk builder = load.builder(f'{code_builder.name}/dummy_config') assert not isinstance(builder, type(code_builder)) assert isinstance(builder, read_only_builder.ReadOnlyBuilder)
def test_builder_with_kwargs(self): name = "empty_dataset_builder" name_with_kwargs = name + "/k1=1,k2=1.,k3=foo,k4=True,k5=False" builder = load.builder(name_with_kwargs, data_dir="bar") expectations = [("k1", 1), ("k2", 1.), ("k3", u"foo"), ("k4", True), ("k5", False)] for k, v in expectations: self.assertEqual(type(builder.kwargs[k]), type(v)) self.assertEqual(builder.kwargs[k], v)
def test_notebook_overwrite_dataset(self): """Redefining the same builder twice is possible on colab.""" with mock.patch.object(py_utils, "is_notebook", lambda: True): name = "colab_builder" self.assertNotIn(name, load.list_builders()) class ColabBuilder(registered.RegisteredDataset): pass self.assertIn(name, load.list_builders()) self.assertIsInstance(load.builder(name), ColabBuilder) old_colab_class = ColabBuilder class ColabBuilder(registered.RegisteredDataset): # pylint: disable=function-redefined pass self.assertIsInstance(load.builder(name), ColabBuilder) self.assertNotIsInstance(load.builder(name), old_colab_class)
def test_builder_fullname(self): fullname = "empty_dataset_builder/conf1-attr:1.0.1/k1=1,k2=2" builder = load.builder(fullname, data_dir="bar") expected = { "k1": 1, "k2": 2, "version": "1.0.1", "config": "conf1-attr", "data_dir": "bar" } self.assertEqual(expected, builder.kwargs)
def test_community_public_load(): with mock.patch( 'tensorflow_datasets.core.community.community_register.list_builders', return_value=['ns:ds'], ), mock.patch( 'tensorflow_datasets.core.community.community_register.builder_cls', return_value=testing.DummyDataset, ): assert load.list_builders() == ['ns:ds'] # Builder is correctly returned assert load.builder_cls('ns:ds') is testing.DummyDataset assert isinstance(load.builder('ns:ds'), testing.DummyDataset)
def test_builder_camel_case(self): fullname = "EmptyDatasetBuilder" builder = load.builder(fullname) self.assertIsInstance(builder, EmptyDatasetBuilder)
def test_show_examples(self): with testing.mock_data(): builder = load.builder('imagenet2012') visualization.show_statistics(builder.info)