def benchmark_iterate_hub_local_tensorflow_setup( dataset_name, dataset_split, batch_size, prefetch_factor ): dset = Dataset.from_tfds(dataset_name, split=dataset_split) path = os.path.join(".", "hub_data", "tfds") dset.store(path) dset = Dataset(path, cache=False, storage_cache=False, mode="r") loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor) return (loader,)
def time_iter_hub_wasabi_tensorflow( dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None ): dset = Dataset(dataset_info["hub_name"], cache=False, storage_cache=False, mode="r") loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor) with Timer("Hub (remote - Wasabi) `.to_tensorflow()`"): for batch in loader: image = batch["image"] label = batch["label"] if process is not None: process(image, label)
def time_iter_hub_local_tensorflow( dataset_info, batch_size=BATCH_SIZE, prefetch_factor=PREFETCH_SIZE, process=None ): dset = Dataset.from_tfds(dataset_info["name"], split=dataset_info["split"]) path = os.path.join(ROOT, "Hub_data", "tfds") dset.store(path) dset = Dataset(path, cache=False, storage_cache=False, mode="r") loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor) with Timer("Hub (local) `.to_tensorflow()`"): for batch in loader: image = batch["image"] label = batch["label"] if process is not None: process(image, label)
def main(): # Create dataset ds = Dataset( "./data/example/pytorch", shape=(64, ), schema={ "image": schema.Tensor((512, 512), dtype="float"), "label": schema.Tensor((512, 512), dtype="float"), }, ) # tansform into Tensorflow dataset ds = ds.to_tensorflow().batch(8) # Iterate over the data for batch in ds: print(batch["image"], batch["label"])
def time_iter_tensorflow(dataset_name="activeloop/mnist", batch_size=1, prefetch_factor=0, process=None): dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r") loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor) with Timer( f"{dataset_name} TF prefetch {prefetch_factor:03} in batches of {batch_size:03}" ): for idx, batch in enumerate(loader): image = batch["image"] label = batch["label"] if process is not None: process(idx, image, label)
def benchmark_iterate_hub_tensorflow_setup(dataset_name, batch_size, prefetch_factor): dset = Dataset(dataset_name, cache=False, storage_cache=False, mode="r") loader = dset.to_tensorflow().batch(batch_size).prefetch(prefetch_factor) return (loader,)