Example #1
0
  def __new__(cls, cls_name, bases, class_dict):
    name = naming.camelcase_to_snakecase(cls_name)
    class_dict["name"] = name
    builder_cls = super(RegisteredDataset, cls).__new__(  # pylint: disable=too-many-function-args
        cls, cls_name, bases, class_dict)

    if py_utils.is_notebook():  # On Colab/Jupyter, we allow overwriting
      pass
    elif name in _DATASET_REGISTRY:
      raise ValueError("Dataset with name %s already registered." % name)
    elif name in _IN_DEVELOPMENT_REGISTRY:
      raise ValueError(
          "Dataset with name %s already registered as in development." % name)
    elif name in _ABSTRACT_DATASET_REGISTRY:
      raise ValueError(
          "Dataset with name %s already registered as abstract." % name)

    if _skip_registration:
      pass  # Skip dataset registration within the contextmanager
    elif inspect.isabstract(builder_cls):
      _ABSTRACT_DATASET_REGISTRY[name] = builder_cls
    elif class_dict.get("IN_DEVELOPMENT"):
      _IN_DEVELOPMENT_REGISTRY[name] = builder_cls
    else:
      _DATASET_REGISTRY[name] = builder_cls
    return builder_cls
Example #2
0
    def __new__(mcs, cls_name, bases, class_dict):
        name = naming.camelcase_to_snakecase(cls_name)
        class_dict["name"] = name
        cls = super(RegisteredDataset, mcs).__new__(mcs, cls_name, bases,
                                                    class_dict)

        if py_utils.is_notebook():  # On Colab/Jupyter, we allow overwriting
            pass
        elif name in _DATASET_REGISTRY:
            raise ValueError("Dataset with name %s already registered." % name)
        elif name in _IN_DEVELOPMENT_REGISTRY:
            raise ValueError(
                "Dataset with name %s already registered as in development." %
                name)
        elif name in _ABSTRACT_DATASET_REGISTRY:
            raise ValueError(
                "Dataset with name %s already registered as abstract." % name)

        if inspect.isabstract(cls):
            _ABSTRACT_DATASET_REGISTRY[name] = cls
        elif class_dict.get("IN_DEVELOPMENT"):
            _IN_DEVELOPMENT_REGISTRY[name] = cls
        else:
            _DATASET_REGISTRY[name] = cls
        return cls
Example #3
0
    def __new__(mcs, cls_name, bases, class_dict):
        name = naming.camelcase_to_snakecase(cls_name)
        class_dict["name"] = name
        cls = super(RegisteredDataset, mcs).__new__(mcs, cls_name, bases,
                                                    class_dict)

        if name in _DATASET_REGISTRY:
            raise ValueError("Dataset with name %s already registered." % name)
        if not inspect.isabstract(cls):
            _DATASET_REGISTRY[name] = cls
        return cls
Example #4
0
    def __init_subclass__(cls, skip_registration=False, **kwargs):  # pylint: disable=redefined-outer-name
        super().__init_subclass__(**kwargs)

        # @tag-tfdatasets-datasetbuilder-012
        # Set the name if the dataset does not define it.
        # Use __dict__ rather than getattr so subclasses are not affected.
        if not cls.__dict__.get('name'):
            cls.name = naming.camelcase_to_snakecase(cls.__name__)

        is_abstract = inspect.isabstract(cls)

        # Capture all concrete datasets, including when skip registration is True.
        # This ensure that `builder_cls_from_module` can load the datasets
        # even when the module has been imported inside a `skip_registration`
        # context.
        if not is_abstract:
            _MODULE_TO_DATASETS[cls.__module__].append(cls)

        # Skip dataset registration within contextmanager, or if skip_registration
        # is passed as meta argument.
        if skip_registration or _skip_registration:
            return

        # Check for name collisions
        if py_utils.is_notebook():  # On Colab/Jupyter, we allow overwriting
            pass
        elif cls.name in _DATASET_REGISTRY:
            raise ValueError(
                f'Dataset with name {cls.name} already registered.')
        elif cls.name in _IN_DEVELOPMENT_REGISTRY:
            raise ValueError(
                f'Dataset with name {cls.name} already registered as in development.'
            )
        elif cls.name in _ABSTRACT_DATASET_REGISTRY:
            raise ValueError(
                f'Dataset with name {cls.name} already registered as abstract.'
            )

        # Add the dataset to the registers
        if is_abstract:
            _ABSTRACT_DATASET_REGISTRY[cls.name] = cls
        elif cls.IN_DEVELOPMENT:
            _IN_DEVELOPMENT_REGISTRY[cls.name] = cls
        else:
            _DATASET_REGISTRY[cls.name] = cls
Example #5
0
 def from_cls(cls,
              dataset_collection_class: Type["DatasetCollection"],
              release_notes: Mapping[str, str],
              description: Optional[str] = None,
              citation: Optional[str] = None) -> "DatasetCollectionInfo":
     """Creates a DatasetCollectionInfo instance based on class information."""
     name: str = naming.camelcase_to_snakecase(
         dataset_collection_class.__name__)
     if not description:
         description = get_file_content_from_dataset_folder(
             dataset_collection_class,
             DESCRIPTION_FILE,
             raise_error_if_fails=True)
     if not citation:
         citation = get_file_content_from_dataset_folder(
             dataset_collection_class, CITATIONS_FILE)
     return cls(name=name,
                release_notes=release_notes,
                description=description,
                citation=citation)
Example #6
0
def _dataset_name_and_kwargs_from_name_str(name_str):
  """Extract kwargs from name str."""
  res = _NAME_REG.match(name_str)
  if not res:
    raise ValueError(_NAME_STR_ERR.format(name_str))
  name = res.group("dataset_name")
  # Normalize the name to accept CamelCase
  name = naming.camelcase_to_snakecase(name)
  kwargs = _kwargs_str_to_kwargs(res.group("kwargs"))
  try:
    for attr in ["config", "version"]:
      val = res.group(attr)
      if val is None:
        continue
      if attr in kwargs:
        raise ValueError("Dataset %s: cannot pass %s twice." % (name, attr))
      kwargs[attr] = val
    return name, kwargs
  except:
    logging.error(_NAME_STR_ERR.format(name_str))   # pylint: disable=logging-format-interpolation
    raise
Example #7
0
    def __new__(cls, cls_name, bases, class_dict):
        name = naming.camelcase_to_snakecase(cls_name)
        class_dict["name"] = name
        builder_cls = super(RegisteredDataset, cls).__new__(  # pylint: disable=too-many-function-args,redefined-outer-name
            cls, cls_name, bases, class_dict)

        if py_utils.is_notebook():  # On Colab/Jupyter, we allow overwriting
            pass
        elif name in _DATASET_REGISTRY:
            raise ValueError("Dataset with name %s already registered." % name)
        elif name in _IN_DEVELOPMENT_REGISTRY:
            raise ValueError(
                "Dataset with name %s already registered as in development." %
                name)
        elif name in _ABSTRACT_DATASET_REGISTRY:
            raise ValueError(
                "Dataset with name %s already registered as abstract." % name)

        is_abstract = inspect.isabstract(builder_cls)

        # Capture all concrete datasets, including when skip registration is True.
        # This ensure that `builder_cls_from_module` can load the datasets
        # even when the module has been imported inside a `skip_registration`
        # context.
        if not is_abstract:
            _MODULE_TO_DATASETS[builder_cls.__module__].append(builder_cls)

        if _skip_registration:
            pass  # Skip dataset registration within the contextmanager
        elif is_abstract:
            _ABSTRACT_DATASET_REGISTRY[name] = builder_cls
        elif class_dict.get("IN_DEVELOPMENT"):
            _IN_DEVELOPMENT_REGISTRY[name] = builder_cls
        else:
            _DATASET_REGISTRY[name] = builder_cls
        return builder_cls
Example #8
0
 def test_snake_to_camelcase(self, camel, snake):
   self.assertEqual(naming.snake_to_camelcase(snake), camel)
   # camelcase_to_snakecase is a no-op if the name is already snake_case.
   self.assertEqual(naming.camelcase_to_snakecase(snake), snake)
Example #9
0
 def test_camelcase_to_snakecase(self, camel, snake):
   self.assertEqual(snake, naming.camelcase_to_snakecase(camel))