Ejemplo n.º 1
0
def test_load(caplog):
    if dask_loaded():
        obj = hub.load("./data/new/test")
        assert "Deprecated Warning" in caplog.text

    obj = hub.load("./data/test/test_dataset2")
    assert isinstance(obj, hub.Dataset) == True
Ejemplo n.º 2
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        target = torch.randint(0,
                               self.num_classes,
                               size=(1, ),
                               dtype=torch.long)[0]

        if not self.x:
            self.x = hub.load('imagenet/imagenet:{}'.format(str(chunk_size)))

        if self.batch is None or self.indexbeta == self.batch_size:
            self.indexbeta = 0
            self.batch = self.x[:self.batch_size]

        img = self.batch[self.indexbeta]
        self.indexbeta += 1

        img = transforms.ToPILImage()(img)

        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target
Ejemplo n.º 3
0
def _peek(src, dst, k):
    ds = hub.load(src)
    ds3 = ds.filter(lambda x: np.random.uniform() > (1-k))  # creates datasetview
    
    @transform(schema=ds3.schema)
    def load_transform1(sample, stuff):
        di = _generate_dict(sample, stuff)
        return di
    
    ds4 = load_transform1(ds3, stuff=list(ds3.keys))
    ds5 = ds4.store(dst)
    return True
Ejemplo n.º 4
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        # create random image that is consistent with the index id
        # if index >= len(self):
        #    raise IndexError("{} index out of range".format(self.__class__.__name__))
        # rng_state = torch.get_rng_state()
        # torch.manual_seed(index + self.random_offset)
        # img = torch.randn(*self.image_size)
        target = torch.randint(0,
                               self.num_classes,
                               size=(1, ),
                               dtype=torch.long)[0]
        # torch.set_rng_state(rng_state)

        # convert to PIL Image
        # from PIL import Image

        # img = np.array(img)
        # t1 = time.time()
        if not self.x:
            self.x = hub.load("imagenet/benchmark:{}".format(str(chunk_size)))
            # print('loaded {}s'.format(time.time()-t1))

        if self.batch is None or self.indexbeta == self.batch_size:
            self.indexbeta = 0
            # t3 = time.time()
            self.batch = self.x[:self.batch_size]
            # t4 = time.time()
            # self.avgTime.update(self.batch.shape[0]/(t4-t3))
            # if index % 10 == 0:
            #    print(self.batch.shape)
            #    self.progress.display(index)
            #   print('{} img/sec'.format(self.batch.shape[0]/(t4-t3)))

        # if i % args.print_freq == 0:

        img = self.batch[self.indexbeta]
        self.indexbeta += 1

        img = transforms.ToPILImage()(img)

        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target
Ejemplo n.º 5
0
def test_dataset_from_directory():
    def create_image(path_to_direcotry):
        from PIL import Image

        shape = (512, 512, 3)
        for i in range(10):
            img = np.ones(shape, dtype="uint8")
            img = Image.fromarray(img)
            img.save(os.path.join(path_to_direcotry, str(i) + ".png"))

    def data_in_dir(path_to_direcotry):
        if os.path.exists(path_to_direcotry):
            create_image(path_to_direcotry)
        else:
            os.mkdir(os.path.join(path_to_direcotry))
            create_image(path_to_direcotry)

    def root_dir_image(root):
        if os.path.exists(root):
            import shutil

            shutil.rmtree(root)
        os.mkdir(root)
        for i in range(10):
            dir_name = "data_" + str(i)
            data_in_dir(os.path.join(root, dir_name))

    def del_data(*path_to_dir):
        for i in path_to_dir:
            import shutil

            shutil.rmtree(i)

    root_url = "./data/categorical_label_data"
    store_url = "./data/categorical_label_data_store"

    root_dir_image(root_url)

    ds = Dataset.from_directory(root_url)
    ds.store(store_url)

    ds = load(store_url)

    labels = ClassLabel(names=os.listdir(root_url))
    label = os.listdir(root_url)

    assert ds["image", 0].compute().shape == (512, 512, 3)
    assert ds["label", 0].compute() == labels.str2int(label[0])
    del_data(root_url, store_url)
Ejemplo n.º 6
0
    def upload_data(tasks):
        def batch(iterable, n=1):
            l = len(iterable)
            for ndx in range(0, l, n):
                yield iterable[ndx:min(ndx + n, l)]

        name = tasks[0][1]
        x = hub.load('aptiv/nutonomy:{}'.format(name))
        for b in batch(tasks, n=x.chunk_shape[0]):
            t1 = time.time()
            y = np.zeros(x.chunk_shape, dtype=x.dtype)
            i = 0
            for el in b:
                data = np.array(get_data(el[-1]))
                y[i, ..., :data.shape[-1]] = data
                i += 1
            x[b[0][0]:b[-1][0] + 1] = y[:b[-1][0] - b[0][0] + 1]
            t2 = time.time()
            print('{}: uploaded chunk {}-{} in {}s'.format(
                name, b[0][0], b[-1][0], t2 - t1))
Ejemplo n.º 7
0
import torch

import hub

# Load data
ds = hub.load("mnist/mnist")

# Transform into pytorch
ds = ds.to_pytorch()

ds = torch.utils.data.DataLoader(
    ds, batch_size=8, num_workers=8, collate_fn=ds.collate_fn
)

# Iterate over the data
for batch in ds:
    print(batch["data"], batch["labels"])
Ejemplo n.º 8
0
                       min_value=0.0,
                       max_value=1.0,
                       value=0.0,
                       step=0.2)

    if subset != 0:
        selected_dataset = datasets[0]
        st.write(f'Slicing {subset} of {selected_dataset}')

        tag = "mynameisvinn/{}-{}-{}".format(selected_dataset, uuid.uuid1(),
                                             subset)
        src = f'activeloop/{selected_dataset}'

        if _peek(src=src, dst=tag, k=subset):
            st.write(f"Fetching {selected_dataset} from Hub")
            body = f"""import hub \nds = hub.load('{tag}')""".format(tag)
            st.code(body, language='python')

            ds = hub.load(f'{src}')
            keys = ds.schema.dict_
            code = ""

            for k in keys:
                snippet = "{}s = ds['{}'].compute()".format(k, k)
                code += snippet
                code += '\n'
            st.code(code, language='python')

        else:
            st.write("Slicing failed, sorry!")
Ejemplo n.º 9
0
Archivo: basic.py Proyecto: 40a/Hub-1
import hub
from hub import tensor, dataset
import numpy as np

images = tensor.from_array(np.zeros((4, 512, 512)))
labels = tensor.from_array(np.zeros((4, 512, 512)))

ds = dataset.from_tensors({"images": images, "labels": labels})

ds = ds.store("davit/basic4")
ds = hub.load("davit/basic2")

print(ds["images"][0].compute())
Ejemplo n.º 10
0
def test_load_array():
    print('- Loading arrays')
    x = hub.load(name="test/example:3")
    print(x.shape)
    print('passed')
Ejemplo n.º 11
0
import hub
from hub.utils import Timer
from hub import dev_mode

dev_mode()

if __name__ == "__main__":
    # path = "s3://snark-test/coco_dataset"
    path = "./data/test/coco"
    with Timer("Eurosat TFDS"):
        out_ds = hub.Dataset.from_tfds("coco", num=1000)

        res_ds = out_ds.store(path)
        ds = hub.load(path)
Ejemplo n.º 12
0
def test_mnist():
    print('test mnist')
    x = hub.load('mnist/mnist:train')
    assert x[59995].mean() == 32.910714285714285
    print('passed')
Ejemplo n.º 13
0
def test_imagenet():
    print('test imagenet')
    x = hub.load('imagenet')
    assert x[1000000].mean() == 163.95653688888888
    print('passed')
Ejemplo n.º 14
0
def test_load_wrong_dataset():
    try:
        obj = hub.load("./data/dataset_that_does_not_exist")
    except Exception as ex:
        assert isinstance(ex, HubDatasetNotFoundException)
Ejemplo n.º 15
0
def test_public_access_no_creds():
    x = hub.load('imagenet')
    assert x[0].mean() == 1