def __new__(cls, cls_name, bases, class_dict): name = naming.camelcase_to_snakecase(cls_name) class_dict["name"] = name builder_cls = super(RegisteredDataset, cls).__new__( # pylint: disable=too-many-function-args cls, cls_name, bases, class_dict) if py_utils.is_notebook(): # On Colab/Jupyter, we allow overwriting pass elif name in _DATASET_REGISTRY: raise ValueError("Dataset with name %s already registered." % name) elif name in _IN_DEVELOPMENT_REGISTRY: raise ValueError( "Dataset with name %s already registered as in development." % name) elif name in _ABSTRACT_DATASET_REGISTRY: raise ValueError( "Dataset with name %s already registered as abstract." % name) if _skip_registration: pass # Skip dataset registration within the contextmanager elif inspect.isabstract(builder_cls): _ABSTRACT_DATASET_REGISTRY[name] = builder_cls elif class_dict.get("IN_DEVELOPMENT"): _IN_DEVELOPMENT_REGISTRY[name] = builder_cls else: _DATASET_REGISTRY[name] = builder_cls return builder_cls
def __new__(mcs, cls_name, bases, class_dict): name = naming.camelcase_to_snakecase(cls_name) class_dict["name"] = name cls = super(RegisteredDataset, mcs).__new__(mcs, cls_name, bases, class_dict) if py_utils.is_notebook(): # On Colab/Jupyter, we allow overwriting pass elif name in _DATASET_REGISTRY: raise ValueError("Dataset with name %s already registered." % name) elif name in _IN_DEVELOPMENT_REGISTRY: raise ValueError( "Dataset with name %s already registered as in development." % name) elif name in _ABSTRACT_DATASET_REGISTRY: raise ValueError( "Dataset with name %s already registered as abstract." % name) if inspect.isabstract(cls): _ABSTRACT_DATASET_REGISTRY[name] = cls elif class_dict.get("IN_DEVELOPMENT"): _IN_DEVELOPMENT_REGISTRY[name] = cls else: _DATASET_REGISTRY[name] = cls return cls
def __new__(mcs, cls_name, bases, class_dict): name = naming.camelcase_to_snakecase(cls_name) class_dict["name"] = name cls = super(RegisteredDataset, mcs).__new__(mcs, cls_name, bases, class_dict) if name in _DATASET_REGISTRY: raise ValueError("Dataset with name %s already registered." % name) if not inspect.isabstract(cls): _DATASET_REGISTRY[name] = cls return cls
def __init_subclass__(cls, skip_registration=False, **kwargs): # pylint: disable=redefined-outer-name super().__init_subclass__(**kwargs) # @tag-tfdatasets-datasetbuilder-012 # Set the name if the dataset does not define it. # Use __dict__ rather than getattr so subclasses are not affected. if not cls.__dict__.get('name'): cls.name = naming.camelcase_to_snakecase(cls.__name__) is_abstract = inspect.isabstract(cls) # Capture all concrete datasets, including when skip registration is True. # This ensure that `builder_cls_from_module` can load the datasets # even when the module has been imported inside a `skip_registration` # context. if not is_abstract: _MODULE_TO_DATASETS[cls.__module__].append(cls) # Skip dataset registration within contextmanager, or if skip_registration # is passed as meta argument. if skip_registration or _skip_registration: return # Check for name collisions if py_utils.is_notebook(): # On Colab/Jupyter, we allow overwriting pass elif cls.name in _DATASET_REGISTRY: raise ValueError( f'Dataset with name {cls.name} already registered.') elif cls.name in _IN_DEVELOPMENT_REGISTRY: raise ValueError( f'Dataset with name {cls.name} already registered as in development.' ) elif cls.name in _ABSTRACT_DATASET_REGISTRY: raise ValueError( f'Dataset with name {cls.name} already registered as abstract.' ) # Add the dataset to the registers if is_abstract: _ABSTRACT_DATASET_REGISTRY[cls.name] = cls elif cls.IN_DEVELOPMENT: _IN_DEVELOPMENT_REGISTRY[cls.name] = cls else: _DATASET_REGISTRY[cls.name] = cls
def from_cls(cls, dataset_collection_class: Type["DatasetCollection"], release_notes: Mapping[str, str], description: Optional[str] = None, citation: Optional[str] = None) -> "DatasetCollectionInfo": """Creates a DatasetCollectionInfo instance based on class information.""" name: str = naming.camelcase_to_snakecase( dataset_collection_class.__name__) if not description: description = get_file_content_from_dataset_folder( dataset_collection_class, DESCRIPTION_FILE, raise_error_if_fails=True) if not citation: citation = get_file_content_from_dataset_folder( dataset_collection_class, CITATIONS_FILE) return cls(name=name, release_notes=release_notes, description=description, citation=citation)
def _dataset_name_and_kwargs_from_name_str(name_str): """Extract kwargs from name str.""" res = _NAME_REG.match(name_str) if not res: raise ValueError(_NAME_STR_ERR.format(name_str)) name = res.group("dataset_name") # Normalize the name to accept CamelCase name = naming.camelcase_to_snakecase(name) kwargs = _kwargs_str_to_kwargs(res.group("kwargs")) try: for attr in ["config", "version"]: val = res.group(attr) if val is None: continue if attr in kwargs: raise ValueError("Dataset %s: cannot pass %s twice." % (name, attr)) kwargs[attr] = val return name, kwargs except: logging.error(_NAME_STR_ERR.format(name_str)) # pylint: disable=logging-format-interpolation raise
def __new__(cls, cls_name, bases, class_dict): name = naming.camelcase_to_snakecase(cls_name) class_dict["name"] = name builder_cls = super(RegisteredDataset, cls).__new__( # pylint: disable=too-many-function-args,redefined-outer-name cls, cls_name, bases, class_dict) if py_utils.is_notebook(): # On Colab/Jupyter, we allow overwriting pass elif name in _DATASET_REGISTRY: raise ValueError("Dataset with name %s already registered." % name) elif name in _IN_DEVELOPMENT_REGISTRY: raise ValueError( "Dataset with name %s already registered as in development." % name) elif name in _ABSTRACT_DATASET_REGISTRY: raise ValueError( "Dataset with name %s already registered as abstract." % name) is_abstract = inspect.isabstract(builder_cls) # Capture all concrete datasets, including when skip registration is True. # This ensure that `builder_cls_from_module` can load the datasets # even when the module has been imported inside a `skip_registration` # context. if not is_abstract: _MODULE_TO_DATASETS[builder_cls.__module__].append(builder_cls) if _skip_registration: pass # Skip dataset registration within the contextmanager elif is_abstract: _ABSTRACT_DATASET_REGISTRY[name] = builder_cls elif class_dict.get("IN_DEVELOPMENT"): _IN_DEVELOPMENT_REGISTRY[name] = builder_cls else: _DATASET_REGISTRY[name] = builder_cls return builder_cls
def test_snake_to_camelcase(self, camel, snake): self.assertEqual(naming.snake_to_camelcase(snake), camel) # camelcase_to_snakecase is a no-op if the name is already snake_case. self.assertEqual(naming.camelcase_to_snakecase(snake), snake)
def test_camelcase_to_snakecase(self, camel, snake): self.assertEqual(snake, naming.camelcase_to_snakecase(camel))