@pytest.mark.skipif(not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials") def test_s3_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1, ) ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif(not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials") def test_gcs_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1, ) ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif(not pytorch_loaded() or not dask_loaded(),
schema=schema, ) ds["first"][0] = np.ones((10, 10)) pickled_ds = cloudpickle.dumps(ds) new_ds = pickle.loads(pickled_ds) assert np.all(new_ds["first"][0].compute() == ds["first"][0].compute()) @pytest.mark.skipif(not s3_creds_exist(), reason="requires s3 credentials") def test_pickleability_s3(): test_pickleability("s3://snark-test/test_dataset_pickle_s3") @pytest.mark.skipif(not gcp_creds_exist(), reason="requires gcp credentials") def test_pickleability_gcs(): test_pickleability("gcs://snark-test/test_dataset_gcs") def test_dataset_dynamic_shaped(): schema = { "first": Tensor( shape=(None, None), dtype="int32", max_shape=(100, 100), chunks=(100, ), ) } ds = Dataset(
@pytest.mark.skipif( not s3_creds_exist() or not dask_loaded(), reason="requires s3 credentials" ) def test_s3_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1,) ds = ds.store("s3://snark-test/test_dataflow/test_s3_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif( not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials" ) def test_gcs_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1,) ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif( not pytorch_loaded() or not dask_loaded(), reason="requires pytorch to be loaded" )