def test_exceptions(): HubException() AuthenticationException() AuthorizationException(Response()) AuthorizationException(Response(noerror=True)) NotFoundException() BadRequestException(Response()) BadRequestException(Response(noerror=True)) OverLimitException() ServerException() BadGatewayException() GatewayTimeoutException() WaitTimeoutException() LockedException() HubDatasetNotFoundException("Hello") PermissionException("Hello") ShapeLengthException() ShapeArgumentNotFoundException() SchemaArgumentNotFoundException() ValueShapeError("Shape 1", "Shape 2") NoneValueException("Yahoo!") ModuleNotInstalledException("my_module") WrongUsernameException("usernameX") NotHubDatasetToOverwriteException() NotHubDatasetToAppendException() DynamicTensorNotFoundException() DynamicTensorShapeException("none") DynamicTensorShapeException("length") DynamicTensorShapeException("not_equal") DynamicTensorShapeException("another_cause")
def _check_and_prepare_dir(self): """ Checks if input data is ok. Creates or overwrites dataset folder. Returns True dataset needs to be created opposed to read. """ fs, path, mode = self._fs, self._path, self._mode if path.startswith("s3://"): with open(posixpath.expanduser("~/.activeloop/store"), "rb") as f: stored_username = json.load(f)["_id"] current_username = path.split("/")[-2] if stored_username != current_username: try: fs.listdir(path) except: raise WrongUsernameException(stored_username) exist_meta = fs.exists(posixpath.join(path, "meta.json")) if exist_meta: if "w" in mode: fs.rm(path, recursive=True) fs.makedirs(path) return True return False else: if "r" in mode: raise HubDatasetNotFoundException(path) exist_dir = fs.exists(path) if not exist_dir: fs.makedirs(path) elif get_file_count(fs, path) > 0: if "w" in mode: raise NotHubDatasetToOverwriteException() else: raise NotHubDatasetToAppendException() return True
def load(tag, creds=None, session_creds=True) -> Dataset: """Load a dataset from repository using given url and credentials (optional)""" fs, path = _load_fs_and_path(tag, creds, session_creds=session_creds) fs: fsspec.AbstractFileSystem = fs path_2 = f"{path}/meta.json" if not fs.exists(path_2): raise HubDatasetNotFoundException(tag) with fs.open(path_2, "r") as f: ds_meta = json.loads(f.read()) for name in ds_meta["tensors"]: assert fs.exists( f"{path}/{name}" ), f"Tensor {name} of {tag} dataset does not exist" if "dask" not in sys.modules: raise ModuleNotInstalledException("dask") else: import dask import dask.array global dask if ds_meta["len"] == 0: logger.warning("The dataset is empty (has 0 samples)") return Dataset( { name: Tensor( tmeta, dask.array.from_array( np.empty(shape=(0,) + tuple(tmeta["shape"][1:]), dtype="uint8"), ), ) for name, tmeta in ds_meta["tensors"].items() }, metainfo=ds_meta.get("metainfo"), ) len_ = ds_meta["len"] # added reverse compatibility for previous versions for name, tmeta in ds_meta["tensors"].items(): if "chunksize" not in tmeta: tmeta["chunksize"] = 1 return Dataset( { name: Tensor( tmeta, _dask_concat( [ dask.array.from_delayed( dask.delayed(_numpy_load)( fs, f"{path}/{name}/{i}.npy", codec_from_name(tmeta.get("dcompress")), ), shape=(min(tmeta["chunksize"], len_ - i),) + tuple(tmeta["shape"][1:]), dtype=tmeta["dtype"], ) for i in range(0, len_, tmeta["chunksize"]) ] ), ) for name, tmeta in ds_meta["tensors"].items() }, metainfo=ds_meta.get("metainfo"), )