コード例 #1
0
ファイル: load.py プロジェクト: Unofficial-SRJ/datasets
def builder(name: str,
            **builder_init_kwargs: Any) -> dataset_builder.DatasetBuilder:
    """Fetches a `tfds.core.DatasetBuilder` by string name.

  Args:
    name: `str`, the registered name of the `DatasetBuilder` (the class name
      as camel or snake case: `MyDataset` or `my_dataset`).
      This can be either `'dataset_name'` or
      `'dataset_name/config_name'` for datasets with `BuilderConfig`s.
      As a convenience, this string may contain comma-separated keyword
      arguments for the builder. For example `'foo_bar/a=True,b=3'` would use
      the `FooBar` dataset passing the keyword arguments `a=True` and `b=3`
      (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to
      use the `'zoo'` config and pass to the builder keyword arguments `a=True`
      and `b=3`).
    **builder_init_kwargs: `dict` of keyword arguments passed to the
      `DatasetBuilder`. These will override keyword arguments passed in `name`,
      if any.

  Returns:
    A `tfds.core.DatasetBuilder`.

  Raises:
    DatasetNotFoundError: if `name` is unrecognized.
  """
    name, builder_kwargs = _dataset_name_and_kwargs_from_name_str(name)
    builder_kwargs.update(builder_init_kwargs)
    with py_utils.try_reraise(
            prefix="Failed to construct dataset {}".format(name)):
        return builder_cls(name)(**builder_kwargs)  # pytype: disable=not-instantiable
コード例 #2
0
    def test_reraise(self):
        class CustomError(Exception):
            def __init__(self, *args, **kwargs):  # pylint: disable=super-init-not-called
                pass  # Do not call super() to ensure this would work with bad code.

        with self.assertRaisesRegex(ValueError, 'Caught: '):
            with py_utils.try_reraise('Caught: '):
                raise ValueError

        with self.assertRaisesRegex(ValueError, 'Caught: With message'):
            with py_utils.try_reraise('Caught: '):
                raise ValueError('With message')

        with self.assertRaisesRegex(CustomError, 'Caught: 123'):
            with py_utils.try_reraise('Caught: '):
                raise CustomError(123)

        with self.assertRaisesRegex(CustomError, "('Caught: ', 123, {})"):
            with py_utils.try_reraise('Caught: '):
                raise CustomError(123, {})

        with self.assertRaisesRegex(Exception, 'Caught: '):
            with py_utils.try_reraise('Caught: '):
                ex = CustomError(123, {})
                ex.args = 'Not a tuple'
                raise ex

        with self.assertRaisesRegex(RuntimeError, 'Caught: message'):
            with py_utils.try_reraise('Caught: '):
                raise tf.errors.FailedPreconditionError(None, None, 'message')
コード例 #3
0
def builder(
    name: str,
    *,
    data_dir: Optional[str] = None,
    **builder_init_kwargs: Any
) -> dataset_builder.DatasetBuilder:
  """Fetches a `tfds.core.DatasetBuilder` by string name.

  Args:
    name: `str`, the registered name of the `DatasetBuilder` (the class name
      as camel or snake case: `MyDataset` or `my_dataset`).
      This can be either `'dataset_name'` or
      `'dataset_name/config_name'` for datasets with `BuilderConfig`s.
      As a convenience, this string may contain comma-separated keyword
      arguments for the builder. For example `'foo_bar/a=True,b=3'` would use
      the `FooBar` dataset passing the keyword arguments `a=True` and `b=3`
      (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to
      use the `'zoo'` config and pass to the builder keyword arguments `a=True`
      and `b=3`).
    data_dir: Path to the dataset(s). See `tfds.load` for more information.
    **builder_init_kwargs: `dict` of keyword arguments passed to the
      `DatasetBuilder`. These will override keyword arguments passed in `name`,
      if any.

  Returns:
    A `tfds.core.DatasetBuilder`.

  Raises:
    DatasetNotFoundError: if `name` is unrecognized.
  """
  builder_name, builder_kwargs = _dataset_name_and_kwargs_from_name_str(name)

  # Try loading the code (if it exists)
  try:
    cls = builder_cls(builder_name)
  except DatasetNotFoundError as e:
    if e.is_abstract:
      raise  # Abstract can't be instanciated neither from code nor files.
    cls = None  # Class not found
    not_found_error = e  # Save the exception to eventually reraise

  version_explicitly_given = "version" in builder_kwargs

  # Try loading from files first:
  # * If code not present.
  # * If version explicitly given (backward/forward compatibility).
  # Note: If `builder_init_kwargs` are set (e.g. version='experimental_latest',
  # custom config,...), read from generation code.
  if (not cls or version_explicitly_given) and not builder_init_kwargs:
    builder_dir = find_builder_dir(name, data_dir=data_dir)
    if builder_dir is not None:  # A generated dataset was found on disk
      return read_only_builder.builder_from_directory(builder_dir)

  # If loading from files was skipped (e.g. files not found), load from the
  # source code.
  if cls:
    with py_utils.try_reraise(prefix=f"Failed to construct dataset {name}: "):
      return cls(data_dir=data_dir, **builder_kwargs, **builder_init_kwargs)  # pytype: disable=not-instantiable

  # If neither the code nor the files are found, raise DatasetNotFoundError
  raise not_found_error
コード例 #4
0
ファイル: load.py プロジェクト: daleyang96/datasets-1
def builder(name: str,
            *,
            try_gcs: bool = False,
            **builder_kwargs: Any) -> dataset_builder.DatasetBuilder:
    """Fetches a `tfds.core.DatasetBuilder` by string name.

  Args:
    name: `str`, the registered name of the `DatasetBuilder` (the class name
      as camel or snake case: `MyDataset` or `my_dataset`).
      This can be either `'dataset_name'` or
      `'dataset_name/config_name'` for datasets with `BuilderConfig`s.
      As a convenience, this string may contain comma-separated keyword
      arguments for the builder. For example `'foo_bar/a=True,b=3'` would use
      the `FooBar` dataset passing the keyword arguments `a=True` and `b=3`
      (for builders with configs, it would be `'foo_bar/zoo/a=True,b=3'` to
      use the `'zoo'` config and pass to the builder keyword arguments `a=True`
      and `b=3`).
    try_gcs: `bool`, if True, tfds.load will see if the dataset exists on
      the public GCS bucket before building it locally.
    **builder_kwargs: `dict` of keyword arguments passed to the
      `tfds.core.DatasetBuilder`.

  Returns:
    A `tfds.core.DatasetBuilder`.

  Raises:
    DatasetNotFoundError: if `name` is unrecognized.
  """
    # 'kaggle:my_dataset:1.0.0' -> ('kaggle', 'my_dataset', {'version': '1.0.0'})
    ns_name, builder_name, builder_kwargs = naming.parse_builder_name_kwargs(
        name, **builder_kwargs)

    # `try_gcs` currently only support non-community datasets
    if try_gcs and not ns_name and gcs_utils.is_dataset_on_gcs(builder_name):
        data_dir = builder_kwargs.get('data_dir')
        if data_dir:
            raise ValueError(
                f'Cannot have both `try_gcs=True` and `data_dir={data_dir}` '
                'explicitly set')
        builder_kwargs['data_dir'] = gcs_utils.gcs_path('datasets')

    # Community datasets
    if ns_name:
        raise NotImplementedError

    # First check whether code exists or not (imported datasets)
    try:
        cls = builder_cls(builder_name)
    except registered.DatasetNotFoundError as e:
        cls = None  # Class not found
        not_found_error = e  # Save the exception to eventually reraise

    # Eventually try loading from files first
    if _try_load_from_files_first(cls, **builder_kwargs):
        try:
            b = read_only_builder.builder_from_files(builder_name,
                                                     **builder_kwargs)
            return b
        except registered.DatasetNotFoundError as e:
            pass

    # If code exists and loading from files was skipped (e.g. files not found),
    # load from the source code.
    if cls:
        with py_utils.try_reraise(
                prefix=f'Failed to construct dataset {name}: '):
            return cls(**builder_kwargs)  # pytype: disable=not-instantiable

    # If neither the code nor the files are found, raise DatasetNotFoundError
    raise not_found_error