def create_datasets(): hubs = {} for name in [ 'RADAR_FRONT', 'RADAR_BACK_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_FRONT_RIGHT', 'RADAR_BACK_LEFT' ]: hubs[name] = hub.array((length, 18, 200), dtype=np.float32, name='aptiv/nutonomy3:{}'.format(name), chunk_size=(1000, 18, 200)) for name in ['LIDAR_TOP']: hubs[name] = hub.array((length, 4, 30000), dtype=np.float32, name='aptiv/nutonomy3:{}'.format(name), chunk_size=(100, 4, 30000)) for name in [ 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_FRONT_LEFT' ]: hubs[name] = hub.array((length, 900, 1600, 3), dtype=np.uint8, name='aptiv/nutonomy3:{}'.format(name), chunk_size=(5, 900, 1600, 3)) dataset = hub.dataset(hubs, name='aptiv/nutonomy3:v1.0-trainval') return dataset
def test_pytorch(): print('testing pytorch') # Create arrays images = hub.array((100, 100, 100), name='test/dataloaders:images', dtype='uint8') labels = hub.array((100, 1), name='test/dataloaders:labels', dtype='uint8') # Create dataset ds = hub.dataset({ 'images': images, 'labels': labels }, name='test/loaders:dataset') # Transform to Pytorch train_dataset = ds.to_pytorch() # Create data loader train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, num_workers=1, pin_memory=True) # Loop over attributes for i, (images, labels) in enumerate(train_loader): print(images.shape, labels.shape) assert (images.shape == (32, 100, 100)) assert (labels.shape == (32, 1)) break print('pass')
def test_dataset(): x = hub.array((10000, 512, 512), name='test/example:input', dtype='uint8') y = hub.array((10000, 4), name='test/example:label', dtype='uint8') ds = hub.dataset({'input': x, 'label': y}, name='test/dataset:train3') assert ds['input'].shape[0] == 10000 # return single array assert ds['label', 0].mean() == 0 # equivalent ds['train'][0] # return pair of arrays as long as dimensions requested are broadcastable assert ds[0][0].mean() == 0
def test_cross_chunk_upload_download(): print('- Cross Chunk Upload and Download') x = hub.array((100, 100, 100), name="test/example:2", dtype='uint8') x[2:5, 0:10, 0:10] = np.ones((3, 10, 10), dtype='uint8') assert x[2:5, 0:10, 0:10].mean() == 1 assert x[2:5, 10:, 10:].mean() == 0 print('passed')
def test_init(): print('- Initialize array') shape = (10, 10, 10, 10) x = hub.array(shape, name="test/example:1", dtype='uint8') shape = np.array(shape) assert np.all(np.array(x.shape) == shape) print('passed')
def test_fs(): print('- Test writing local to filesystem') shape = (10, 10, 10, 10) x = hub.array(shape, name="test/backend:1", dtype='uint8', backend='fs') shape = np.array(shape) assert np.all(np.array(x.shape) == shape) assert x[0, 0, 0, 0] == 0 x[1] = 1 assert x[1, 0, 0, 0] == 1 print('passed')
def test_wo_aws_or_hub_creds(): os.system('mv ~/.aws ~/.aws_arxiv') os.system('mv ~/.hub ~/.hub_arxiv') try: import hub x = hub.array((100, 100, 100), 'image/test:smth', dtype='uint8') print(x.shape) except Exception as err: print('pass', err) pass os.system('mv ~/.hub_arxiv ~/.hub') os.system('mv ~/.aws_arxiv ~/.aws')
def test_broadcasting(): print('- Broadcasting') x = hub.array((100, 100, 100), name="test/example:3", dtype='uint8') x[0, 0, 0] = 11 assert x[0, 0, 0] == 11 x[0] = 10 assert x[0].mean() == 10 x[1] = np.ones((100, 100), dtype='uint8') assert x[1].mean() == 1 x[3, 90, :] = np.ones((1, 1, 100), dtype='uint8') assert x[3, 90].mean() == 1 print('passed')
def test_multiple(): print('- Test writing local to filesystem and s3') shape = (10, 10, 10, 10) x = hub.array(shape, name="test/backend:3", dtype='uint8', backend=['fs', 's3']) shape = np.array(shape) assert np.all(np.array(x.shape) == shape) assert x[0, 0, 0, 0] == 0 x[1] = 1 assert x[1, 0, 0, 0] == 1 print('passed')
def test_cache(): print('- Test write to cache') shape = (10, 10, 10, 10) x = hub.array(shape, name="test/backend:4", dtype='uint8', backend=['fs', 's3'], caching=True) assert np.all(np.array(x.shape) == shape) assert x[0, 0, 0, 0] == 0 x[1] = 1 assert x[1, 0, 0, 0] == 1 print('passed')
def test_tensorflow(): print('testing tensorflow') # Create arrays images = hub.array((10, 100, 100), name='name1', dtype='uint8') labels = hub.array((10, 1), name='name2', dtype='uint8') # Create dataset ds = hub.dataset({ 'images': images, 'labels': labels }, name='test/loaders:dataset') # Transform to Pytorch train_dataset = ds.to_tensorflow() assert len(train_dataset) == 10 for image, label in train_dataset: assert len(image) == 100 assert len(image[0]) == 100 assert len(label) == 1 train_dataset = train_dataset.batch(32, drop_remainder=True) # Loop over attributes for _, (images, labels) in train_dataset.enumerate(): # assert len(images) == 32 # assert len(labels) == 32 assert len(images[0]) == 100 assert len(images[0][0]) == 100 assert len(labels[0]) == 1 break print('pass')
def test_squeeze_array(): print('- Squeezing arrays') x = hub.array((100, 100, 100), name="test/example:3", dtype='uint8') assert len(x[0].shape) == 2 assert len(x[:1].shape) == 3 assert len(x[:2, 0, :].sh py2: image: python:2 volumes: - ./:/workspace/ command: bash -c " cd /workspace && pip install -e . && python -c 'import hub; hub.load(name=\"imagenet/image:train\")'"ape) == 2 assert len(x[0, 0, :].shape) == 1 assert x[0, 0, 0] == 0 print('passed')
import os from PIL import Image import numpy as np import hub from pathlib import Path from pathos.threading import ThreadPool import time pool = ThreadPool(nodes=20) #val_path = list(Path('./ILSVRC/Data/CLS-LOC/val').glob('*.JPEG')) val_path = list(Path('./ILSVRC/Data/CLS-LOC/train').glob('**/*.JPEG')) shape = (len(val_path), 500, 375, 3) x = hub.array(shape, name='imagenet/test:latest', dtype='uint8') print(x.shape) index = 1 def upload_val(index): t1 = time.time() # Preprocess the image img = Image.open(val_path[index]) img = img.resize((500, 375), Image.ANTIALIAS) img = np.asarray(img) if len(img.shape) == 2: img = np.expand_dims(img, -1) if img.shape[-1] == 4: img = img[..., :3] img = np.transpose(img, axes=(1, 0, 2)) # Upload the image
def test_dtypes(): print('- Numpy dtypes arrays') x = hub.array((100,100,100), name="test/example:5", dtype=np.uint8) assert x.dtype == 'uint8' print('passed')
def test_chunk_shape(): print('- Chunk shape') x = hub.array((100, 100, 100), name="test/example:3", dtype='uint8', chunk_size=(10, 10, 10)) x[0:10, 0:10, 0:10] = 0 print('passed')
def test_multiple_upload_download(): print('- Multiple Chunk Upload and Download') x = hub.array((10, 10, 10, 10), name="test/example:1", dtype='uint8') x[0:3] = np.ones((3, 10, 10, 10), dtype='uint8') assert x[0:3].mean() == 1 print('passed')
def test_aws_wo_hub_creds(): os.system('mv ~/.hub ~/.hub_arxiv') import hub x = hub.array((100, 100, 100), 'image/test:smth', dtype='uint8') print(x.shape) os.system('mv ~/.hub_arxiv ~/.hub')