url="./data/test_from_tf/ds4", mode="w") for i in range(10): ds["label", "d", "e", i] = i * np.ones((5, 3)) dsv = ds[5:] tds = dsv.to_tensorflow(include_shapes=True) out_ds = hub.Dataset.from_tensorflow(tds) res_ds = out_ds.store("./data/test_from_tf/ds6", length=5) # generator has no length, argument needed for i in range(5): assert (res_ds["label", "d", "e", i].numpy() == (5 + i) * np.ones( (5, 3))).all() @pytest.mark.skipif(not pytorch_loaded(), reason="requires pytorch to be loaded") def test_to_pytorch(): import torch my_schema = { "image": Tensor((10, 1920, 1080, 3), "uint8"), "label": { "a": Tensor((100, 200), "int32"), "b": Tensor((100, 400), "int64"), "c": Tensor((5, 3), "uint8"), "d": { "e": Tensor((5, 3), "uint8") }, "f": "float", },
import pytest from hub.utils import pytorch_loaded, tensorflow_loaded from hub.training.model import Model import importlib torch_spec = importlib.util.find_spec("torch") if torch_spec is not None: import torch tensorflow_spec = importlib.util.find_spec("tensorflow") if tensorflow_spec is not None: import tensorflow as tf @pytest.mark.skipif( not pytorch_loaded(), reason="requires pytorch to be loaded", ) def test_store_load_torch(): if torch_spec is None: raise ModuleNotFoundError("Module 'torch' is not installed") model_arch = torch.nn.Sequential( torch.nn.Linear(1000, 100), torch.nn.ReLU(), torch.nn.Linear(100, 10), ) model_init = Model(model_arch) model_init.store("./data/") loaded_model = Model() loaded_model.load(f"./data/{model_init._model.__class__.__name__}.pth") assert repr(model_init._model) == repr(loaded_model._model)
@pytest.mark.skipif(not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials") def test_gcs_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1, ) ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif(not pytorch_loaded() or not dask_loaded(), reason="requires pytorch to be loaded") def test_to_pytorch(): import torch t1 = tensor.from_array(np.array([[1, 2], [3, 4]], dtype="int32")) np_arr = np.empty(2, object) np_arr[0] = np.array([5, 6, 7, 8], dtype="int32") np_arr[1] = np.array([7, 8, 9], dtype="int32") # np_arr[:] = [np_arr0, np_arr1] t2 = tensor.from_array(np_arr) ds = dataset.from_tensors({"t1": t1, "t2": t2}) torch_ds = ds.to_pytorch() train_loader = torch.utils.data.DataLoader(torch_ds, batch_size=1, num_workers=0,
schema=my_schema, shape=(10,), url="./data/test_from_tf/ds4", mode="w" ) for i in range(10): ds["label", "d", "e", i] = i * np.ones((5, 3)) dsv = ds[5:] tds = dsv.to_tensorflow() out_ds = hub.Dataset.from_tensorflow(tds) res_ds = out_ds.store( "./data/test_from_tf/ds6", length=5 ) # generator has no length, argument needed for i in range(5): assert (res_ds["label", "d", "e", i].numpy() == (5 + i) * np.ones((5, 3))).all() @pytest.mark.skipif(not pytorch_loaded(), reason="requires pytorch to be loaded") def test_to_pytorch(): import torch my_schema = { "image": Tensor((10, 1920, 1080, 3), "uint8"), "label": { "a": Tensor((100, 200), "int32"), "b": Tensor((100, 400), "int64"), "c": Tensor((5, 3), "uint8"), "d": {"e": Tensor((5, 3), "uint8")}, "f": "float", }, } ds = hub.Dataset( schema=my_schema, shape=(10,), url="./data/test_from_tf/ds5", mode="w"
@pytest.mark.skipif( not gcp_creds_exist() or not dask_loaded(), reason="requires gcs credentials" ) def test_gcs_dataset(): ds = dataset.generate(UnknownCountGenerator(), range(1, 3)) assert ds["arr"].shape == (-1, 5) assert ds["rra"].shape == (-1,) ds = ds.store("gcs://snark-test/test_dataflow/test_gcs_dataset") assert len(ds) == 3 assert (ds["rra"][:3].compute() == np.array([0, 0, 1], dtype="int32")).all() assert ds["rra"][2].compute() == 1 assert (ds["arr"][1].compute() == np.array([0, 1, 2, 3, 4], dtype="int32")).all() @pytest.mark.skipif( not pytorch_loaded() or not dask_loaded(), reason="requires pytorch to be loaded" ) def test_to_pytorch(): import torch t1 = tensor.from_array(np.array([[1, 2], [3, 4]], dtype="int32")) np_arr = np.empty(2, object) np_arr[0] = np.array([5, 6, 7, 8], dtype="int32") np_arr[1] = np.array([7, 8, 9], dtype="int32") # np_arr[:] = [np_arr0, np_arr1] t2 = tensor.from_array(np_arr) ds = dataset.from_tensors({"t1": t1, "t2": t2}) torch_ds = ds.to_pytorch() train_loader = torch.utils.data.DataLoader( torch_ds, batch_size=1, num_workers=0, collate_fn=torch_ds.collate_fn )