def _iter_builder_names( ns2data_dir: Dict[str, utils.ReadOnlyPath], ) -> Iterator[str]: """Yields the `ns:name` dataset names.""" FILTERED_DIRNAME = frozenset(('downloads', )) # pylint: disable=invalid-name # For better performances, could try to load all namespaces asynchonously for ns_name, data_dir in ns2data_dir.items(): # Note: `data_dir` might contain non-dataset folders, but checking # individual dataset would have significant performance drop, so # this is an acceptable trade-of. for builder_dir in _maybe_iterdir(data_dir): if builder_dir.name in FILTERED_DIRNAME: continue if not naming.is_valid_dataset_name(builder_dir.name): continue yield str( utils.DatasetName(namespace=ns_name, name=builder_dir.name))
def test_is_valid_dataset_name(): assert naming.is_valid_dataset_name('dataset123_abc') assert not naming.is_valid_dataset_name('dataset-abc') assert not naming.is_valid_dataset_name('dataset.old')
def _is_valid_dataset_name(dataset_name: str) -> bool: return (dataset_name not in FILTERED_DIRNAME and naming.is_valid_dataset_name(dataset_name))