def test_find_builder_dir_legacy_ds(mock_fs: testing.MockFs): """Legacy dataset should be ignored (no feature config file).""" mock_fs.add_file('path/to/ds0/1.0.0/temp.txt') assert _find_builder_dir('ds0') is None mock_fs.add_file('path/to/ds0/1.0.0/features.json') assert _find_builder_dir('ds0') == 'path/to/ds0/1.0.0'
def test_get_version_str(mock_fs: testing.MockFs): mock_fs.add_file('path/to/ds/1.0.0/features.json') mock_fs.add_file('path/to/ds/1.0.1/features.json') mock_fs.add_file('path/to/ds/1.1.0/features.json') mock_fs.add_file('path/to/ds/2.0.1/features.json') get_version_str = functools.partial( read_only_builder._get_version_str, 'path/to/ds/' # pylint: disable=protected-access ) # requested_version is None -> Returns last version assert get_version_str(requested_version=None) == '2.0.1' # Returns highest matching version assert get_version_str(requested_version='1.*.*') == '1.1.0' assert get_version_str(requested_version='*.*.*') == '2.0.1' assert get_version_str(requested_version='1.0.0') == '1.0.0' # No matching version found assert get_version_str(requested_version='1.3.*') is None assert get_version_str(requested_version='2.3.5') is None assert _find_builder_dir('ds') == 'path/to/ds/2.0.1' assert _find_builder_dir('ds:*.*.*') == 'path/to/ds/2.0.1' assert _find_builder_dir('ds:1.*.*') == 'path/to/ds/1.1.0' assert _find_builder_dir('ds:1.0.0') == 'path/to/ds/1.0.0' assert _find_builder_dir('ds:1.3.*') is None assert _find_builder_dir('ds:2.3.5') is None
def test_find_builder_dir_multi_versions(mock_fs: testing.MockFs): """Versions should be sorted numerically (10 > 9).""" mock_fs.add_file('path/to/ds0/1.0.0/features.json') mock_fs.add_file('path/to/ds0/9.9.9/features.json') mock_fs.add_file('path/to/ds0/10.0.0/features.json') assert _find_builder_dir('ds0') == 'path/to/ds0/10.0.0' # Explicitly given version assert _find_builder_dir('ds0:9.9.9') == 'path/to/ds0/9.9.9' # Non-existing version assert _find_builder_dir('ds0:9.9.0') is None
def test_get_version_str_empty_builder_dir(mock_fs: testing.MockFs): builder_dir = 'path/to/ds/' error_msg = (f'The builder directory {builder_dir} doesn\'t contain any ' 'versions.') mock_fs.add_file(f'{builder_dir}features.json') get_version_str = functools.partial( read_only_builder._get_version_str, 'path/to/ds/' # pylint: disable=protected-access ) with _assert_raises(error_msg): assert get_version_str() is None
def test_find_builder_config_code(mock_fs: testing.MockFs): """When code exists, extract the default config name.""" class MyDataset(testing.DummyMnist): # pylint: disable=unused-variable """Dummy dataset.""" BUILDER_CONFIGS = [ dataset_builder.BuilderConfig( # pylint: disable=g-complex-comprehension name=name, version='2.0.0', description=f'{name} description' ) for name in ('default_config', 'other_config') ] mock_fs.add_file('path/to/my_dataset/default_config/0.0.1/features.json') mock_fs.add_file('path/to/my_dataset/default_config/1.0.0/features.json') mock_fs.add_file('path/to/my_dataset/other_config/1.0.0/features.json') mock_fs.add_file('path/to/my_dataset/old_config/0.8.0/features.json') mock_fs.add_file('path/to/my_dataset/old_config/1.0.0/features.json') mock_fs.add_file('path/to/my_dataset/broken_config/features.json') mock_fs.add_file('path/to/my_dataset/0.0.1/features.json') # If code can be reached, use it to load the default config name # Note that the existing version is loaded, even if the code is at a # more recent version. assert ( _find_builder_dir('my_dataset') == 'path/to/my_dataset/default_config/1.0.0' ) # Explicitly given version with implicit config. assert ( _find_builder_dir('my_dataset:0.0.1') == 'path/to/my_dataset/default_config/0.0.1' ) # When config is explicitly given, load the last detected version assert ( _find_builder_dir('my_dataset/other_config') == 'path/to/my_dataset/other_config/1.0.0' ) assert ( _find_builder_dir('my_dataset/old_config') == 'path/to/my_dataset/old_config/1.0.0' ) assert ( _find_builder_dir('my_dataset/old_config:0.8.0') == 'path/to/my_dataset/old_config/0.8.0' ) assert _find_builder_dir('my_dataset/broken_config') is None assert _find_builder_dir('my_dataset/unknown_config') is None
def test_builder_from_directories_splits(mock_fs: testing.MockFs): def split_for(name: str, shard_lengths: Sequence[int]) -> proto.SplitInfo: return proto.SplitInfo(name=name, shard_lengths=shard_lengths) def dataset_info(splits): text_feature = proto.feature_pb2.Feature( python_class_name= 'tensorflow_datasets.core.features.text_feature.Text', text=proto.feature_pb2.TextFeature()) features = proto.feature_pb2.Feature( python_class_name= 'tensorflow_datasets.core.features.features_dict.FeaturesDict', features_dict=proto.feature_pb2.FeaturesDict( features={'text': text_feature})) return proto.dataset_info_pb2.DatasetInfo(name='ds_name', version='1.0.0', file_format='tfrecord', splits=splits, features=features) split_train_1 = split_for('train', [4, 5]) split_test_1 = split_for('test', [3]) split_train_2 = split_for('train', [3, 7]) builder_dirs = { '/path/dataset/a': dataset_info([split_train_1, split_test_1]), '/path/dataset/b': dataset_info([split_train_2]), } for builder_dir, di_proto in builder_dirs.items(): content = json_format.MessageToJson(di_proto, sort_keys=True) mock_fs.add_file(path=f'{builder_dir}/dataset_info.json', content=content) result = read_only_builder.builder_from_directories( list(builder_dirs.keys())) assert isinstance(result, read_only_builder.ReadOnlyBuilder) assert isinstance(result.info.splits['train'], splits_lib.MultiSplitInfo) assert isinstance(result.info.splits['test'], splits_lib.MultiSplitInfo) assert str(result.info.splits['test']) == ( 'MultiSplitInfo(name=\'test\', ' 'split_infos=[<SplitInfo num_examples=3, num_shards=1>])') assert str(result.info.splits['train']) == ( 'MultiSplitInfo(name=\'train\', split_infos=[' '<SplitInfo num_examples=9, num_shards=2>, ' '<SplitInfo num_examples=10, num_shards=2>])')
def test_find_builder_dir_with_multiple_data_dir(mock_fs: testing.MockFs): mock_fs.add_file('path/to/ds0/1.0.0/features.json') # Dataset not found. assert read_only_builder._find_builder_dir('ds0') is None with mock.patch.object( constants, 'list_data_dirs', return_value=[constants.DATA_DIR, 'path/to'], ): assert read_only_builder._find_builder_dir('ds0') == 'path/to/ds0/1.0.0' # Dataset present in 2 different data_dir duplicate_path = os.path.join(constants.DATA_DIR, 'ds0/1.0.0/features.json') mock_fs.add_file(duplicate_path) with pytest.raises(ValueError, match='detected in multiple locations'): read_only_builder._find_builder_dir('ds0')
def test_get_version_str(mock_fs: testing.MockFs): mock_fs.add_file('path/to/ds/1.0.0/features.json') mock_fs.add_file('path/to/ds/1.0.1/features.json') mock_fs.add_file('path/to/ds/1.1.0/features.json') mock_fs.add_file('path/to/ds/2.0.1/features.json') get_version_str = functools.partial( read_only_builder._get_version_str, 'path/to/ds/' # pylint: disable=protected-access ) with error_utils.reraise_with_context(registered.DatasetNotFoundError): # requested_version is None -> Returns last version assert get_version_str(requested_version=None) == '2.0.1' # Returns highest matching version assert get_version_str(requested_version='1.*.*') == '1.1.0' assert get_version_str(requested_version='*.*.*') == '2.0.1' assert get_version_str(requested_version='1.0.0') == '1.0.0' # No matching version found assert get_version_str(requested_version='1.3.*') is None assert get_version_str(requested_version='2.3.5') is None assert _find_builder_dir('ds') == 'path/to/ds/2.0.1' assert _find_builder_dir('ds:*.*.*') == 'path/to/ds/2.0.1' assert _find_builder_dir('ds:1.*.*') == 'path/to/ds/1.1.0' assert _find_builder_dir('ds:1.0.0') == 'path/to/ds/1.0.0' assert _find_builder_dir('ds:1.3.*') is None assert _find_builder_dir('ds:2.3.5') is None # No matching version found, updated error context. requested_version = '1.3.*' builder_dir = 'path/to/ds/' error_msg = (f'No version matching the requested {requested_version} was ' f'found in the builder directory: {builder_dir}.') with _assert_raises(error_msg): assert get_version_str(requested_version=requested_version) is None
def test_find_builder_dir_bad_version_dir_name(mock_fs: testing.MockFs): """Ill-formatted folders should be ignored.""" mock_fs.add_file('path/to/ds0/9.9./features.json') mock_fs.add_file('path/to/ds0/1.0.o/features.json') mock_fs.add_file('path/to/ds0/other/features.json') assert _find_builder_dir('ds0') is None mock_fs.add_file('path/to/ds0/1.1.0/features.json') assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0'
def test_find_builder_config_no_code(mock_fs: testing.MockFs): """When the code can't be reached, config should be explicit.""" mock_fs.add_file('path/to/ds0/config/1.0.0/features.json') mock_fs.add_file('path/to/ds0/1.1.0/features.json') # If the original code can't be reached, assume no config assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0' # Config is explicitly given assert _find_builder_dir('ds0/config') == 'path/to/ds0/config/1.0.0' mock_fs.add_file('path/to/ds1/config/1.0.0/features.json') # Config not available, return None assert _find_builder_dir('ds1') is None assert _find_builder_dir('ds1/config') == 'path/to/ds1/config/1.0.0'
def test_find_builder_wrong_dir(mock_fs: testing.MockFs): mock_fs.add_file('path/to/ds0/1.1.0/features.json') assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0' assert _find_builder_dir('ds0', data_dir='path/to/other/dir') is None