def test_load(caplog): if dask_loaded(): obj = hub.load("./data/new/test") assert "Deprecated Warning" in caplog.text obj = hub.load("./data/test/test_dataset2") assert isinstance(obj, hub.Dataset) == True
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is class_index of the target class. """ target = torch.randint(0, self.num_classes, size=(1, ), dtype=torch.long)[0] if not self.x: self.x = hub.load('imagenet/imagenet:{}'.format(str(chunk_size))) if self.batch is None or self.indexbeta == self.batch_size: self.indexbeta = 0 self.batch = self.x[:self.batch_size] img = self.batch[self.indexbeta] self.indexbeta += 1 img = transforms.ToPILImage()(img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target
def _peek(src, dst, k): ds = hub.load(src) ds3 = ds.filter(lambda x: np.random.uniform() > (1-k)) # creates datasetview @transform(schema=ds3.schema) def load_transform1(sample, stuff): di = _generate_dict(sample, stuff) return di ds4 = load_transform1(ds3, stuff=list(ds3.keys)) ds5 = ds4.store(dst) return True
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is class_index of the target class. """ # create random image that is consistent with the index id # if index >= len(self): # raise IndexError("{} index out of range".format(self.__class__.__name__)) # rng_state = torch.get_rng_state() # torch.manual_seed(index + self.random_offset) # img = torch.randn(*self.image_size) target = torch.randint(0, self.num_classes, size=(1, ), dtype=torch.long)[0] # torch.set_rng_state(rng_state) # convert to PIL Image # from PIL import Image # img = np.array(img) # t1 = time.time() if not self.x: self.x = hub.load("imagenet/benchmark:{}".format(str(chunk_size))) # print('loaded {}s'.format(time.time()-t1)) if self.batch is None or self.indexbeta == self.batch_size: self.indexbeta = 0 # t3 = time.time() self.batch = self.x[:self.batch_size] # t4 = time.time() # self.avgTime.update(self.batch.shape[0]/(t4-t3)) # if index % 10 == 0: # print(self.batch.shape) # self.progress.display(index) # print('{} img/sec'.format(self.batch.shape[0]/(t4-t3))) # if i % args.print_freq == 0: img = self.batch[self.indexbeta] self.indexbeta += 1 img = transforms.ToPILImage()(img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target
def test_dataset_from_directory(): def create_image(path_to_direcotry): from PIL import Image shape = (512, 512, 3) for i in range(10): img = np.ones(shape, dtype="uint8") img = Image.fromarray(img) img.save(os.path.join(path_to_direcotry, str(i) + ".png")) def data_in_dir(path_to_direcotry): if os.path.exists(path_to_direcotry): create_image(path_to_direcotry) else: os.mkdir(os.path.join(path_to_direcotry)) create_image(path_to_direcotry) def root_dir_image(root): if os.path.exists(root): import shutil shutil.rmtree(root) os.mkdir(root) for i in range(10): dir_name = "data_" + str(i) data_in_dir(os.path.join(root, dir_name)) def del_data(*path_to_dir): for i in path_to_dir: import shutil shutil.rmtree(i) root_url = "./data/categorical_label_data" store_url = "./data/categorical_label_data_store" root_dir_image(root_url) ds = Dataset.from_directory(root_url) ds.store(store_url) ds = load(store_url) labels = ClassLabel(names=os.listdir(root_url)) label = os.listdir(root_url) assert ds["image", 0].compute().shape == (512, 512, 3) assert ds["label", 0].compute() == labels.str2int(label[0]) del_data(root_url, store_url)
def upload_data(tasks): def batch(iterable, n=1): l = len(iterable) for ndx in range(0, l, n): yield iterable[ndx:min(ndx + n, l)] name = tasks[0][1] x = hub.load('aptiv/nutonomy:{}'.format(name)) for b in batch(tasks, n=x.chunk_shape[0]): t1 = time.time() y = np.zeros(x.chunk_shape, dtype=x.dtype) i = 0 for el in b: data = np.array(get_data(el[-1])) y[i, ..., :data.shape[-1]] = data i += 1 x[b[0][0]:b[-1][0] + 1] = y[:b[-1][0] - b[0][0] + 1] t2 = time.time() print('{}: uploaded chunk {}-{} in {}s'.format( name, b[0][0], b[-1][0], t2 - t1))
import torch import hub # Load data ds = hub.load("mnist/mnist") # Transform into pytorch ds = ds.to_pytorch() ds = torch.utils.data.DataLoader( ds, batch_size=8, num_workers=8, collate_fn=ds.collate_fn ) # Iterate over the data for batch in ds: print(batch["data"], batch["labels"])
min_value=0.0, max_value=1.0, value=0.0, step=0.2) if subset != 0: selected_dataset = datasets[0] st.write(f'Slicing {subset} of {selected_dataset}') tag = "mynameisvinn/{}-{}-{}".format(selected_dataset, uuid.uuid1(), subset) src = f'activeloop/{selected_dataset}' if _peek(src=src, dst=tag, k=subset): st.write(f"Fetching {selected_dataset} from Hub") body = f"""import hub \nds = hub.load('{tag}')""".format(tag) st.code(body, language='python') ds = hub.load(f'{src}') keys = ds.schema.dict_ code = "" for k in keys: snippet = "{}s = ds['{}'].compute()".format(k, k) code += snippet code += '\n' st.code(code, language='python') else: st.write("Slicing failed, sorry!")
import hub from hub import tensor, dataset import numpy as np images = tensor.from_array(np.zeros((4, 512, 512))) labels = tensor.from_array(np.zeros((4, 512, 512))) ds = dataset.from_tensors({"images": images, "labels": labels}) ds = ds.store("davit/basic4") ds = hub.load("davit/basic2") print(ds["images"][0].compute())
def test_load_array(): print('- Loading arrays') x = hub.load(name="test/example:3") print(x.shape) print('passed')
import hub from hub.utils import Timer from hub import dev_mode dev_mode() if __name__ == "__main__": # path = "s3://snark-test/coco_dataset" path = "./data/test/coco" with Timer("Eurosat TFDS"): out_ds = hub.Dataset.from_tfds("coco", num=1000) res_ds = out_ds.store(path) ds = hub.load(path)
def test_mnist(): print('test mnist') x = hub.load('mnist/mnist:train') assert x[59995].mean() == 32.910714285714285 print('passed')
def test_imagenet(): print('test imagenet') x = hub.load('imagenet') assert x[1000000].mean() == 163.95653688888888 print('passed')
def test_load_wrong_dataset(): try: obj = hub.load("./data/dataset_that_does_not_exist") except Exception as ex: assert isinstance(ex, HubDatasetNotFoundException)
def test_public_access_no_creds(): x = hub.load('imagenet') assert x[0].mean() == 1