def test_exceptions():
    HubException()
    AuthenticationException()
    AuthorizationException(Response())
    AuthorizationException(Response(noerror=True))
    NotFoundException()
    BadRequestException(Response())
    BadRequestException(Response(noerror=True))
    OverLimitException()
    ServerException()
    BadGatewayException()
    GatewayTimeoutException()
    WaitTimeoutException()
    LockedException()
    HubDatasetNotFoundException("Hello")
    PermissionException("Hello")
    ShapeLengthException()
    ShapeArgumentNotFoundException()
    SchemaArgumentNotFoundException()
    ValueShapeError("Shape 1", "Shape 2")
    NoneValueException("Yahoo!")
    ModuleNotInstalledException("my_module")
    WrongUsernameException("usernameX")
    NotHubDatasetToOverwriteException()
    NotHubDatasetToAppendException()
    DynamicTensorNotFoundException()

    DynamicTensorShapeException("none")
    DynamicTensorShapeException("length")
    DynamicTensorShapeException("not_equal")
    DynamicTensorShapeException("another_cause")
Exemple #2
0
 def _check_and_prepare_dir(self):
     """
     Checks if input data is ok.
     Creates or overwrites dataset folder.
     Returns True dataset needs to be created opposed to read.
     """
     fs, path, mode = self._fs, self._path, self._mode
     if path.startswith("s3://"):
         with open(posixpath.expanduser("~/.activeloop/store"), "rb") as f:
             stored_username = json.load(f)["_id"]
         current_username = path.split("/")[-2]
         if stored_username != current_username:
             try:
                 fs.listdir(path)
             except:
                 raise WrongUsernameException(stored_username)
     exist_meta = fs.exists(posixpath.join(path, "meta.json"))
     if exist_meta:
         if "w" in mode:
             fs.rm(path, recursive=True)
             fs.makedirs(path)
             return True
         return False
     else:
         if "r" in mode:
             raise HubDatasetNotFoundException(path)
         exist_dir = fs.exists(path)
         if not exist_dir:
             fs.makedirs(path)
         elif get_file_count(fs, path) > 0:
             if "w" in mode:
                 raise NotHubDatasetToOverwriteException()
             else:
                 raise NotHubDatasetToAppendException()
         return True
Exemple #3
0
def load(tag, creds=None, session_creds=True) -> Dataset:
    """Load a dataset from repository using given url and credentials (optional)"""
    fs, path = _load_fs_and_path(tag, creds, session_creds=session_creds)
    fs: fsspec.AbstractFileSystem = fs
    path_2 = f"{path}/meta.json"
    if not fs.exists(path_2):
        raise HubDatasetNotFoundException(tag)

    with fs.open(path_2, "r") as f:
        ds_meta = json.loads(f.read())

    for name in ds_meta["tensors"]:
        assert fs.exists(
            f"{path}/{name}"
        ), f"Tensor {name} of {tag} dataset does not exist"

    if "dask" not in sys.modules:
        raise ModuleNotInstalledException("dask")
    else:
        import dask
        import dask.array

        global dask

    if ds_meta["len"] == 0:
        logger.warning("The dataset is empty (has 0 samples)")

        return Dataset(
            {
                name: Tensor(
                    tmeta,
                    dask.array.from_array(
                        np.empty(shape=(0,) + tuple(tmeta["shape"][1:]), dtype="uint8"),
                    ),
                )
                for name, tmeta in ds_meta["tensors"].items()
            },
            metainfo=ds_meta.get("metainfo"),
        )
    len_ = ds_meta["len"]

    # added reverse compatibility for previous versions
    for name, tmeta in ds_meta["tensors"].items():
        if "chunksize" not in tmeta:
            tmeta["chunksize"] = 1

    return Dataset(
        {
            name: Tensor(
                tmeta,
                _dask_concat(
                    [
                        dask.array.from_delayed(
                            dask.delayed(_numpy_load)(
                                fs,
                                f"{path}/{name}/{i}.npy",
                                codec_from_name(tmeta.get("dcompress")),
                            ),
                            shape=(min(tmeta["chunksize"], len_ - i),)
                            + tuple(tmeta["shape"][1:]),
                            dtype=tmeta["dtype"],
                        )
                        for i in range(0, len_, tmeta["chunksize"])
                    ]
                ),
            )
            for name, tmeta in ds_meta["tensors"].items()
        },
        metainfo=ds_meta.get("metainfo"),
    )