Example #1
0
    ds = Dataset(
        url=url,
        token=None,
        shape=(1000,),
        mode="w",
        schema=schema,
    )

    ds["first"][0] = np.ones((10, 10))

    pickled_ds = cloudpickle.dumps(ds)
    new_ds = pickle.loads(pickled_ds)
    assert np.all(new_ds["first"][0].compute() == ds["first"][0].compute())


@pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials")
def test_pickleability_s3():
    test_pickleability("s3://snark-test/test_dataset_pickle_s3")


@pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials")
def test_pickleability_gcs():
    test_pickleability("gcs://snark-test/test_dataset_gcs")


def test_dataset_dynamic_shaped():
    schema = {
        "first": Tensor(
            shape=(None, None),
            dtype="int32",
            max_shape=(100, 100),
Example #2
0
@pytest.mark.skipif(not dask_loaded(), reason="dask is not installed")
def test_unknown_size_input():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 11))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1, )
    ds = ds.store("./data/test_store_tmp/unknown_count")
    assert len(ds) == 55
    assert (ds["rra"][:10].compute() == np.array(
        [0, 0, 1, 0, 1, 2, 0, 1, 2, 3], dtype="int32")).all()
    assert ds["rra"][9].compute() == 3
    assert (ds["arr"][5].compute() == np.array([2, 3, 4, 5, 6],
                                               dtype="int32")).all()


@pytest.mark.skipif(not s3_creds_exist() or not dask_loaded(),
                    reason="requires s3 credentials")
def test_s3_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1, )
    ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1],
                                                dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4],
                                               dtype="int32")).all()


@pytest.mark.skipif(not gcp_creds_exist() or not dask_loaded(),
Example #3
0

@pytest.mark.skipif(not hub_creds_exist(), reason="requires hub credentials")
def test_dataset_hub():
    password = os.getenv("ACTIVELOOP_HUB_PASSWORD")
    login_fn("testingacc", password)
    test_dataset("testingacc/test_dataset_private", public=False)
    test_dataset("testingacc/test_dataset_public")


@pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials")
def test_dataset_gcs():
    test_dataset("gcs://snark-test/test_dataset_gcs")


@pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials")
def test_dataset_s3():
    test_dataset("s3://snark-test/test_dataset_s3")


@pytest.mark.skipif(not azure_creds_exist(), reason="requires azure credentials")
def test_dataset_azure():
    import os

    token = {"account_key": os.getenv("ACCOUNT_KEY")}
    test_dataset(
        "https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure",
        token=token,
    )

Example #4
0
def test_unknown_size_input():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 11))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1,)
    ds = ds.store("./data/test_store_tmp/unknown_count")
    assert len(ds) == 55
    assert (
        ds["rra"][:10].compute()
        == np.array([0, 0, 1, 0, 1, 2, 0, 1, 2, 3], dtype="int32")
    ).all()
    assert ds["rra"][9].compute() == 3
    assert (ds["arr"][5].compute() == np.array([2, 3, 4, 5, 6], dtype="int32")).all()


@pytest.mark.skipif(
    not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials"
)
def test_s3_dataset():
    ds = dataset.generate(UnknownCountGenerator(), range(1, 3))
    assert ds["arr"].shape == (-1, 5)
    assert ds["rra"].shape == (-1,)
    ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset")
    assert len(ds) == 3
    assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all()
    assert ds["rra"][2].compute() == 1
    assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all()


@pytest.mark.skipif(
    not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials"
)