def builder(name: str, **builder_init_kwargs: Any) -> dataset_builder.DatasetBuilder: """Fetches a `tfds.core.DatasetBuilder` by string name. Args: name: `str`, the registered name of the `DatasetBuilder` (the class name as camel or snake case: `MyDataset` or `my_dataset`). This can be either `'dataset_name'` or `'dataset_name/config_name'` for datasets with `BuilderConfig`s. As a convenience, this string may contain comma-separated keyword arguments for the builder. For example `'foo_bar/a=True,b=3'` would use the `FooBar` dataset passing the keyword arguments `a=True` and `b=3` (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to use the `'zoo'` config and pass to the builder keyword arguments `a=True` and `b=3`). **builder_init_kwargs: `dict` of keyword arguments passed to the `DatasetBuilder`. These will override keyword arguments passed in `name`, if any. Returns: A `tfds.core.DatasetBuilder`. Raises: DatasetNotFoundError: if `name` is unrecognized. """ name, builder_kwargs = _dataset_name_and_kwargs_from_name_str(name) builder_kwargs.update(builder_init_kwargs) with py_utils.try_reraise( prefix="Failed to construct dataset {}".format(name)): return builder_cls(name)(**builder_kwargs) # pytype: disable=not-instantiable
def test_reraise(self): class CustomError(Exception): def __init__(self, *args, **kwargs): # pylint: disable=super-init-not-called pass # Do not call super() to ensure this would work with bad code. with self.assertRaisesRegex(ValueError, 'Caught: '): with py_utils.try_reraise('Caught: '): raise ValueError with self.assertRaisesRegex(ValueError, 'Caught: With message'): with py_utils.try_reraise('Caught: '): raise ValueError('With message') with self.assertRaisesRegex(CustomError, 'Caught: 123'): with py_utils.try_reraise('Caught: '): raise CustomError(123) with self.assertRaisesRegex(CustomError, "('Caught: ', 123, {})"): with py_utils.try_reraise('Caught: '): raise CustomError(123, {}) with self.assertRaisesRegex(Exception, 'Caught: '): with py_utils.try_reraise('Caught: '): ex = CustomError(123, {}) ex.args = 'Not a tuple' raise ex with self.assertRaisesRegex(RuntimeError, 'Caught: message'): with py_utils.try_reraise('Caught: '): raise tf.errors.FailedPreconditionError(None, None, 'message')
def builder( name: str, *, data_dir: Optional[str] = None, **builder_init_kwargs: Any ) -> dataset_builder.DatasetBuilder: """Fetches a `tfds.core.DatasetBuilder` by string name. Args: name: `str`, the registered name of the `DatasetBuilder` (the class name as camel or snake case: `MyDataset` or `my_dataset`). This can be either `'dataset_name'` or `'dataset_name/config_name'` for datasets with `BuilderConfig`s. As a convenience, this string may contain comma-separated keyword arguments for the builder. For example `'foo_bar/a=True,b=3'` would use the `FooBar` dataset passing the keyword arguments `a=True` and `b=3` (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to use the `'zoo'` config and pass to the builder keyword arguments `a=True` and `b=3`). data_dir: Path to the dataset(s). See `tfds.load` for more information. **builder_init_kwargs: `dict` of keyword arguments passed to the `DatasetBuilder`. These will override keyword arguments passed in `name`, if any. Returns: A `tfds.core.DatasetBuilder`. Raises: DatasetNotFoundError: if `name` is unrecognized. """ builder_name, builder_kwargs = _dataset_name_and_kwargs_from_name_str(name) # Try loading the code (if it exists) try: cls = builder_cls(builder_name) except DatasetNotFoundError as e: if e.is_abstract: raise # Abstract can't be instanciated neither from code nor files. cls = None # Class not found not_found_error = e # Save the exception to eventually reraise version_explicitly_given = "version" in builder_kwargs # Try loading from files first: # * If code not present. # * If version explicitly given (backward/forward compatibility). # Note: If `builder_init_kwargs` are set (e.g. version='experimental_latest', # custom config,...), read from generation code. if (not cls or version_explicitly_given) and not builder_init_kwargs: builder_dir = find_builder_dir(name, data_dir=data_dir) if builder_dir is not None: # A generated dataset was found on disk return read_only_builder.builder_from_directory(builder_dir) # If loading from files was skipped (e.g. files not found), load from the # source code. if cls: with py_utils.try_reraise(prefix=f"Failed to construct dataset {name}: "): return cls(data_dir=data_dir, **builder_kwargs, **builder_init_kwargs) # pytype: disable=not-instantiable # If neither the code nor the files are found, raise DatasetNotFoundError raise not_found_error
def builder(name: str, *, try_gcs: bool = False, **builder_kwargs: Any) -> dataset_builder.DatasetBuilder: """Fetches a `tfds.core.DatasetBuilder` by string name. Args: name: `str`, the registered name of the `DatasetBuilder` (the class name as camel or snake case: `MyDataset` or `my_dataset`). This can be either `'dataset_name'` or `'dataset_name/config_name'` for datasets with `BuilderConfig`s. As a convenience, this string may contain comma-separated keyword arguments for the builder. For example `'foo_bar/a=True,b=3'` would use the `FooBar` dataset passing the keyword arguments `a=True` and `b=3` (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to use the `'zoo'` config and pass to the builder keyword arguments `a=True` and `b=3`). try_gcs: `bool`, if True, tfds.load will see if the dataset exists on the public GCS bucket before building it locally. **builder_kwargs: `dict` of keyword arguments passed to the `tfds.core.DatasetBuilder`. Returns: A `tfds.core.DatasetBuilder`. Raises: DatasetNotFoundError: if `name` is unrecognized. """ # 'kaggle:my_dataset:1.0.0' -> ('kaggle', 'my_dataset', {'version': '1.0.0'}) ns_name, builder_name, builder_kwargs = naming.parse_builder_name_kwargs( name, **builder_kwargs) # `try_gcs` currently only support non-community datasets if try_gcs and not ns_name and gcs_utils.is_dataset_on_gcs(builder_name): data_dir = builder_kwargs.get('data_dir') if data_dir: raise ValueError( f'Cannot have both `try_gcs=True` and `data_dir={data_dir}` ' 'explicitly set') builder_kwargs['data_dir'] = gcs_utils.gcs_path('datasets') # Community datasets if ns_name: raise NotImplementedError # First check whether code exists or not (imported datasets) try: cls = builder_cls(builder_name) except registered.DatasetNotFoundError as e: cls = None # Class not found not_found_error = e # Save the exception to eventually reraise # Eventually try loading from files first if _try_load_from_files_first(cls, **builder_kwargs): try: b = read_only_builder.builder_from_files(builder_name, **builder_kwargs) return b except registered.DatasetNotFoundError as e: pass # If code exists and loading from files was skipped (e.g. files not found), # load from the source code. if cls: with py_utils.try_reraise( prefix=f'Failed to construct dataset {name}: '): return cls(**builder_kwargs) # pytype: disable=not-instantiable # If neither the code nor the files are found, raise DatasetNotFoundError raise not_found_error