def hub_array(args): bucket = hub.fs(os.path.join(args.path, 'hub')).connect() arr = bucket.array('my_array', shape=(50000, 50000), chunk=(1000, 1000), dtype='int32') return arr
def test_pytorch(): print('testing pytorch') # Create arrays datahub = hub.fs('./data/cache').connect() images = datahub.array(name='test/dataloaders/images3', shape=(100, 100, 100), chunk=(1, 100, 100), dtype='uint8') labels = datahub.array(name='test/dataloaders/labels3', shape=(100, 1), chunk=(100, 1), dtype='uint8') # Create dataset ds = datahub.dataset(name='test/loaders/dataset2', components={ 'images': images, 'labels': labels }) # Transform to Pytorch train_dataset = ds.to_pytorch() # Create data loader train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=32, num_workers=2, pin_memory=False, shuffle=False, drop_last=False ) # Loop over attributes for i, batch in enumerate(train_loader): print(f'iteration {i}: batch size={batch["images"].shape[0]}') #assert (batch['images'].shape == (32, 100, 100)) #assert (batch['labels'].shape == (32, 1)) print('pass')
def test_pytorch_new(): print('testing pytorch new') # Create arrays conn = hub.fs('./data/cache').connect() images = conn.array('test/test1/image2', (1000, 100, 100, 3), chunk=(100, 100, 100, 3), dtype='uint8') labels = conn.array('test/test1/label2', (1000, 1), chunk=(100, 1), dtype='uint8') masks = conn.array('test/test1/mask2', (1000, 100, 100), chunk=(100, 100, 100), dtype='uint8') # Create dataset ds = conn.dataset(name='test/test1/loaders2', components={ 'image': images, 'label': labels, 'mask': masks }) # Transform to Pytorch train_dataset = ds.to_pytorch(transform=ToTensor()) # Create data loader train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=32, num_workers=4, pin_memory=True ) # Loop over attributes for i, batch in enumerate(train_loader): for key, item in batch.items(): if key == "image": print(key) print(item.shape) # assert (item.shape == (32, 100, 100, 3)) if key == "label": print(key) print(item.shape) pass # assert (item.shape == (32, 1)) if key == "mask": print(key) print(item.shape) pass # assert (item.shape == (32, 100, 100)) break print('pass')
def conn_setup(bucket: str = 'waymo-dataset-upload') -> hub.Bucket: return hub.fs('/drive/upload').connect()
import os # import tensorflow as tf import math import numpy as np import itertools import io # tf.enable_eager_execution() # from waymo_open_dataset.utils import range_image_utils # from waymo_open_dataset.utils import transform_utils # from waymo_open_dataset.utils import frame_utils # from waymo_open_dataset import dataset_pb2 as open_dataset import hub from PIL import Image # client = hub.gs('snark_waymo_open_dataset', creds_path='.creds/gs.json').connect() client = hub.fs('/drive/upload').connect() arr = client.array_open('validation/images') for i in range(0, 5): img = arr[10, i] print(img.shape) Image.fromarray(img, 'RGB').save(f'output/image-{i}.jpg')