Esempio n. 1
0
def test_objectview():
    schema = SchemaDict({
        "a":
        Tensor((20, 20), dtype=int, max_shape=(20, 20)),
        "b":
        Sequence(dtype=BBox(dtype=float)),
        "c":
        Sequence(dtype=SchemaDict(
            {"d": Sequence((), dtype=Tensor((5, 5), dtype=float))})),
        "e":
        Sequence(dtype={
            "f": {
                "g": Tensor(5, dtype=int),
                "h": Tensor((), dtype=int)
            }
        }),
    })
    ds = hub.Dataset("./nested_seq", shape=(5, ), mode="w", schema=schema)

    # dataset view to objectview
    dv = ds[3:5]
    dv["c", 0] = {"d": 5 * np.ones((2, 2, 5, 5))}
    assert (dv[0, "c", 0, "d", 0].compute() == 5 * np.ones((5, 5))).all()

    # dataset view unsqueezed
    with pytest.raises(IndexError):
        dv["c", "d"].compute()

    # dataset unsqueezed
    with pytest.raises(IndexError):
        ds["c", "d"].compute()

    # tensorview to object view
    # sequence of tensor
    ds["b", 0] = 0.5 * np.ones((5, 4))
    tv = ds["b", 0]
    tv[0] = 0.3 * np.ones((4, ))
    assert (tv[0].compute() == 0.3 * np.ones((4, ))).all()

    # ds to object view
    assert (ds[3, "c", "d"].compute() == 5 * np.ones((2, 2, 5, 5))).all()

    # Sequence of schemadicts
    ds[0, "e"] = {"f": {"g": np.ones((3, 5)), "h": np.array([42, 25, 15])}}
    with pytest.raises(KeyError):
        ds[0, "e", 1].compute()
    assert (ds[0, "e", "f", "h"].compute() == np.array([42, 25, 15])).all()

    # With dataset view
    dv[0, "e"] = {"f": {"g": np.ones((3, 5)), "h": np.array([1, 25, 1])}}
    # dv[0, "e", 1]["f", "h"] = 25
    assert (dv[0, "e", "f", "h"].compute() == np.array([1, 25, 1])).all()

    # If not lazy mode all slices should be stable
    ds.lazy = False
    assert ds[0, "e", 0, "f", "h"] == 42
    with pytest.raises(KeyError):
        ds[0, "e", 1]["f", "h"] == 25
    ds.lazy = True

    # make an objectview
    ov = ds["c", "d"]
    with pytest.raises(IndexError):
        ov.compute()
    assert (ov[3].compute() == 5 * np.ones((2, 2, 5, 5))).all()
    # ov[3, 1] = 2 * np.ones((2, 5, 5))
    assert (ov[3][0, 0].compute() == 5 * np.ones((5, 5))).all()
    assert (ov[3][1].compute() == 5 * np.ones((2, 5, 5))).all()
Esempio n. 2
0
from hub.exceptions import DirectoryNotEmptyException
from hub.schema import BBox, ClassLabel, Image, SchemaDict, Sequence, Tensor, Text
from hub.schema.class_label import ClassLabel
from hub.utils import (
    azure_creds_exist,
    gcp_creds_exist,
    hub_creds_exist,
    minio_creds_exist,
    s3_creds_exist,
    transformers_loaded,
)

Dataset = dataset.Dataset

my_schema = {
    "image": Tensor((10, 1920, 1080, 3), "uint8"),
    "label": {
        "a": Tensor((100, 200), "int32", compressor="lz4"),
        "b": Tensor((100, 400), "int64", compressor="zstd"),
        "c": Tensor((5, 3), "uint8"),
        "d": {"e": Tensor((5, 3), "uint8")},
    },
}


def test_dataset_2():
    dt = {"first": "float", "second": "float"}
    ds = Dataset(schema=dt, shape=(2,), url="./data/test/test_dataset2", mode="w")
    ds.meta_information["description"] = "This is my description"

    ds["first"][0] = 2.3
Esempio n. 3
0
import numpy as np
import zarr

import hub
from hub.schema import Tensor, Image, Text
from hub.utils import Timer

my_schema = {
    "image": Tensor((28, 28, 4), "int32", (28, 28, 4)),
    "label": Text((None, ), "int64", (20, )),
    "confidence": "float",
}

dynamic_schema = {
    "image": Tensor(shape=(None, None, None),
                    dtype="int32",
                    max_shape=(32, 32, 3)),
    "label": Text((None, ), "int64", (20, )),
}


def test_pipeline_basic():
    ds = hub.Dataset("./data/test/test_pipeline_basic",
                     mode="w",
                     shape=(100, ),
                     schema=my_schema)

    for i in range(len(ds)):
        ds["image", i] = np.ones((28, 28, 4), dtype="int32")
        ds["label", i] = f"hello {i}"
        ds["confidence", i] = 0.2
Esempio n. 4
0
def benchmark(sample_size=100, width=1000, channels=4, dtype="int8"):
    numpy_arr = np.zeros((sample_size, width, width, channels), dtype=dtype)
    zarr_fs = zarr.zeros(
        (sample_size, width, width, channels),
        dtype=dtype,
        store=zarr.storage.FSStore("./data/test/array"),
        overwrite=True,
    )
    zarr_lmdb = zarr.zeros(
        (sample_size, width, width, channels),
        dtype=dtype,
        store=zarr.storage.LMDBStore("./data/test/array2"),
        overwrite=True,
    )

    my_schema = {
        "image": Tensor((width, width, channels), dtype,
                        (width, width, channels)),
    }

    ds_fs = hub.Dataset(
        "./data/test/test_pipeline_basic_3",
        mode="w",
        shape=(sample_size, ),
        schema=my_schema,
        cache=0,
    )

    ds_fs_cache = hub.Dataset(
        "./data/test/test_pipeline_basic_2",
        mode="w",
        shape=(sample_size, ),
        schema=my_schema,
    )
    if False:
        print(
            f"~~~ Sequential write of {sample_size}x{width}x{width}x{channels} random arrays ~~~"
        )
        for name, arr in [
            ("Numpy", numpy_arr),
            ("Zarr FS", zarr_fs),
            ("Zarr LMDB", zarr_lmdb),
            ("Hub FS", ds_fs["image"]),
            ("Hub FS+Cache", ds_fs_cache["image"]),
        ]:
            with Timer(name):
                for i in range(sample_size):
                    arr[i] = (np.random.rand(width, width, channels) *
                              255).astype(dtype)

    print(
        f"~~~ Pipeline {sample_size}x{width}x{width}x{channels} random arrays ~~~"
    )
    for name, processes in [
        ("single", 1),
        ("processed", 10),
    ]:  # , ("ray", 10), ("green", 10), ("dask", 10)]:

        @hub.transform(schema=my_schema, scheduler=name, processes=processes)
        def my_transform(sample):
            return {
                "image":
                (np.random.rand(width, width, channels) * 255).astype(dtype),
            }

        with Timer(name):
            out_ds = my_transform(ds_fs)
            out_ds.store(f"./data/test/test_pipeline_basic_output_{name}")
Esempio n. 5
0
def test_tensor_repr():
    tensor_object_2 = Tensor(shape=(5000,), dtype="<U20")
    assert tensor_object_2.__repr__() == "Tensor(shape=(5000,), dtype='<U20')"
Esempio n. 6
0
def test_tensor_error():
    try:
        Tensor(None, max_shape=None)
    except TypeError as ex:
        assert "both shape and max_shape cannot be None at the same time" in str(ex)
Esempio n. 7
0
from hub import Dataset
from hub.api.datasetview import TensorView
from hub.exceptions import NoneValueException
from hub.schema import Tensor

import numpy as np
import pytest

my_schema = {
    "image":
    Tensor((None, None, None, None), "uint8", max_shape=(10, 1920, 1080, 4)),
    "label":
    float,
}

ds = Dataset("./data/test/dataset", shape=(100, ), mode="w", schema=my_schema)


def test_tensorview_init():
    with pytest.raises(NoneValueException):
        tensorview_object = TensorView(ds, subpath=None)
    with pytest.raises(NoneValueException):
        tensorview_object_2 = TensorView(dataset=None, subpath="image")


def test_tensorview_getitem():
    images_tensorview = ds["image"]
    with pytest.raises(IndexError):
        images_tensorview["7", 0:1920, 0:1080, 0:3].compute()

Esempio n. 8
0
    1024,
    # 2048,
    # 4096,
    # 8192,
    # 8192 * 2,
    # 8192 * 4,
    # 8192 * 8,
]

download_time = []
upload_time = []
for cs in chunk_sizes:
    shape = (1, )
    my_schema = {
        "img":
        Tensor(shape=(cs, cs), chunks=cs, dtype="uint8", compressor="default")
    }
    ds = hub.Dataset("test/benchmark:t{}".format(str(cs)),
                     shape=shape,
                     schema=my_schema)
    arr = (255 * np.random.rand(shape[0], cs, cs)).astype("uint8")

    # Upload
    t1 = time.time()
    ds["img"][:] = arr
    t2 = time.time()
    upload_time.append(t2 - t1)

    # Download
    t3 = time.time()
    ds["img"][:]
Esempio n. 9
0
def test_tensor_error_2():
    with pytest.raises(TypeError):
        t1 = Tensor(shape=(5.1))
    with pytest.raises(TypeError):
        t2 = Tensor(shape=(5.1, ))
    with pytest.raises(TypeError):
        t3 = Tensor(shape=(5, 6), max_shape=(7.2, 8))
    with pytest.raises(ValueError):
        t4 = Tensor(shape=(5, 6), max_shape=(7, 8, 9))
    with pytest.raises(TypeError):
        t5 = Tensor(shape=(5, None), max_shape=(5, None))
    with pytest.raises(TypeError):
        t6 = Tensor(shape=(5, 6), max_shape=(7.2, 8))
    with pytest.raises(ValueError):
        t7 = Tensor(max_shape=(10, 15))
    with pytest.raises(TypeError):
        t8 = Tensor(None)
    with pytest.raises(ValueError):
        t9 = Tensor((5, 6, None))
    with pytest.raises(TypeError):
        t10 = Tensor(max_shape="abc")
    with pytest.raises(TypeError):
        t11 = Tensor(max_shape=(7.4, 2))
    with pytest.raises(ValueError):
        t12 = Tensor(max_shape=[])
Esempio n. 10
0
def test_dataset_change_schema():
    schema = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    ds = Dataset("./data/test_schema_change", schema=schema, shape=(100, ))
    new_schema_1 = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((200, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_2 = {
        "abrs": "uint8",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_3 = {
        "abc": "uint8",
        "def": {
            "ghijk": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_4 = {
        "abc": "uint16",
        "def": {
            "ghi": Tensor((100, 100)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    new_schema_5 = {
        "abc": "uint8",
        "def": {
            "ghi": Tensor((100, 100, 3)),
            "rst": Tensor((100, 100, 100)),
        },
    }
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_1,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_2,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_3,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_4,
                     shape=(100, ))
    with pytest.raises(SchemaMismatchException):
        ds = Dataset("./data/test_schema_change",
                     schema=new_schema_5,
                     shape=(100, ))
Esempio n. 11
0
def test_tensor_error():
    try:
        Tensor(None, max_shape=None)
    except TypeError as ex:
        assert "shape cannot be None" in str(ex)
Esempio n. 12
0
"""
License:
This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
"""

import numpy as np

import hub
from hub.schema import Tensor

schema = {
    "image": Tensor((10, 1920, 1080, 3), "uint8"),
    "label": {
        "a": Tensor((100, 200), "int32"),
        "b": Tensor((100, 400), "int64"),
    },
}


def test_hub_open():
    ds = hub.Dataset("./data/test/hub_open",
                     token=None,
                     shape=(10000, ),
                     mode="w",
                     schema=schema)
    ds["label/a", 5, 50, 50] = 9
    assert ds["label/a", 5, 50, 50].numpy() == 9
    ds["image", 5, 4, 120:200, 150:300, :] = 3 * np.ones((80, 150, 3), "uint8")
    assert (ds["image", 5, 4, 120:200, 150:300, :].numpy() == 3 * np.ones(
        (80, 150, 3), "uint8")).all()
Esempio n. 13
0
def test_tensor_repr():
    tensor_object = Tensor()
    tensor_object_2 = Tensor(shape=(5000,), dtype="<U20")
    assert tensor_object.__repr__() == "Tensor(shape=(None,), dtype='float64')"
    assert tensor_object_2.__repr__() == "Tensor(shape=(5000,), dtype='<U20')"
Esempio n. 14
0
def test_dataset_bug_2(url="./data/test/dataset", token=None):
    my_schema = {
        "image": Tensor((100, 100), "uint8"),
    }
    ds = Dataset(url, token=token, shape=(10000,), mode="w", schema=my_schema)
    ds["image", 0:1] = [np.zeros((100, 100))]
Esempio n. 15
0
def test_tensor_init():
    with pytest.raises(ValueError):
        Tensor(shape=2, max_shape=(2, 2))
Esempio n. 16
0
def test_dataset_no_shape(url="./data/test/dataset", token=None):
    try:
        Tensor(shape=(120, 120, 3), max_shape=(120, 120, 4))
    except ValueError:
        pass
Esempio n. 17
0
    give 'max_shape' arguement a maximum possible size of image.
    """
    "image":
    schema.Image(shape=(None, None, 3),
                 max_shape=(1920, 1920, 3),
                 dtype="uint8"),
    "isValidation":
    "float64",
    "img_paths":
    Text(shape=(None, ), max_shape=(15, )),
    "img_width":
    "int32",
    "img_height":
    "int32",
    "objpos":
    Tensor(max_shape=(100, ), dtype="float64"),
    """
    'joint_self' has nested list structure
    """
    "joint_self":
    Tensor(shape=(None, None), max_shape=(100, 100), dtype="float64"),
    "scale_provided":
    "float64",
    "annolist_index":
    "int32",
    "people_index":
    "int32",
    "numOtherPeople":
    "int32",
}
"""