ds = Dataset( url=url, token=None, shape=(1000,), mode="w", schema=schema, ) ds["first"][0] = np.ones((10, 10)) pickled_ds = cloudpickle.dumps(ds) new_ds = pickle.loads(pickled_ds) assert np.all(new_ds["first"][0].compute() == ds["first"][0].compute()) @pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials") def test_pickleability_s3(): test_pickleability("s3://snark-test/test_dataset_pickle_s3") @pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials") def test_pickleability_gcs(): test_pickleability("gcs://snark-test/test_dataset_gcs") def test_dataset_dynamic_shaped(): schema = { "first": Tensor( shape=(None, None), dtype="int32", max_shape=(100, 100),
@pytest.mark.skipif(not dask_loaded(), reason="dask is not installed") def test_unknown_size_input(): ds = dataset.generate(UnknownCountGenerator(), range(1, 11)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1, ) ds = ds.store("./data/test_store_tmp/unknown_count") assert len(ds) == 55 assert (ds["rra"][:10].compute() == np.array( [0, 0, 1, 0, 1, 2, 0, 1, 2, 3], dtype="int32")).all() assert ds["rra"][9].compute() == 3 assert (ds["arr"][5].compute() == np.array([2, 3, 4, 5, 6], dtype="int32")).all() @pytest.mark.skipif(not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials") def test_s3_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1, ) ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif(not gcp_creds_exist() or not dask_loaded(),
@pytest.mark.skipif(not hub_creds_exist(), reason="requires hub credentials") def test_dataset_hub(): password = os.getenv("ACTIVELOOP_HUB_PASSWORD") login_fn("testingacc", password) test_dataset("testingacc/test_dataset_private", public=False) test_dataset("testingacc/test_dataset_public") @pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials") def test_dataset_gcs(): test_dataset("gcs://snark-test/test_dataset_gcs") @pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials") def test_dataset_s3(): test_dataset("s3://snark-test/test_dataset_s3") @pytest.mark.skipif(not azure_creds_exist(), reason="requires azure credentials") def test_dataset_azure(): import os token = {"account_key": os.getenv("ACCOUNT_KEY")} test_dataset( "https://activeloop.blob.core.windows.net/activeloop-hub/test_dataset_azure", token=token, )
def test_unknown_size_input(): ds = dataset.generate(UnknownCountGenerator(), range(1, 11)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1,) ds = ds.store("./data/test_store_tmp/unknown_count") assert len(ds) == 55 assert ( ds["rra"][:10].compute() == np.array([0, 0, 1, 0, 1, 2, 0, 1, 2, 3], dtype="int32") ).all() assert ds["rra"][9].compute() == 3 assert (ds["arr"][5].compute() == np.array([2, 3, 4, 5, 6], dtype="int32")).all() @pytest.mark.skipif( not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials" ) def test_s3_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1,) ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif( not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials" )