Ejemplo n.º 1
0
def test_dataset_getitem_str_index():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2, "t3": t2})
    ds = ds[["t1", "t2"], 0:2]
    assert (ds["t1"].compute() == np.array([[1, 2], [4, 5]], dtype="int32")).all()
    assert (ds["t2"].compute() == np.array([1, 2], dtype="int32")).all()
Ejemplo n.º 2
0
def test_dataset_getitem_str():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    ds = ds["t1", "t2"]
    assert ds["t1"] is t1
    assert ds["t2"] is t2
Ejemplo n.º 3
0
def main():
    files = ["training", "testing"]
    dicts = []

    # required to generate named labels
    mapping = {0: "T-shirt/top",
               1: "Trouser",
               2: "Pullover",
               3: "Dress",
               4: "Coat",
               5: "Sandal",
               6: "Shirt",
               7: "Sneaker",
               8: "Bag",
               9: "Ankle boot"
               }

    for f in files:
        images, labels = load_fashion_mnist(f, path="./data/fashion-mnist")
        dicts += [{"images": images, "labels": labels}]

    images = np.concatenate([d["images"] for d in dicts])
    labels = np.concatenate([np.array(d["labels"], dtype="int8") for d in dicts])
    named_labels = np.array([mapping[label] for label in labels])
    print(images.shape, labels.shape)

    images_t = tensor.from_array(images, dtag="mask")
    labels_t = tensor.from_array(labels, dtag="text")
    named_labels_t = tensor.from_array(named_labels, dtag="text")

    ds = dataset.from_tensors({"data": images_t, "labels": labels_t, "named_labels": named_labels_t})
    ds.store("mnist/fashion-mnist")
Ejemplo n.º 4
0
def main():

    train, test = load_pascal_detection(path)

    train_images = np.concatenate([img for img in train["imgs"]])
    train_labels = np.concatenate([lbl for lbl in train["lbls"]])

    test_images = np.concatenate([img for img in test["imgs"]])
    test_labels = np.concatenate([lbl for lbl in test["lbls"]])

    train_images = tensor.from_array(train_images, dtag="imgs")
    train_labels = tensor.from_array(train_labels, dtag="lbls")

    test_images = tensor.from_array(test_images, dtag="imgs")
    test_labels = tensor.from_array(test_labels, dtag="lbls")

    train_ds = dataset.from_tensors({
        "data": train_images,
        "labels": train_labels
    })
    test_ds = dataset.from_tensors({
        "data": test_images,
        "labels": test_labels
    })

    train_ds.store('arenbeglaryan/vocdetection')
Ejemplo n.º 5
0
Archivo: upload.py Proyecto: 40a/Hub-1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--dataset_path",
        type=str,
        help="Path to fashion-mnist dataset",
        default="./data/fashion-mnist",
    )
    parser.add_argument(
        "-o",
        "--output_name",
        type=str,
        help="Dataset output name",
        default="fashion-mnist",
    )
    args = parser.parse_args()
    files = ["training", "testing"]
    dicts = []
    for f in files:
        images, labels = load_fashion_mnist(f, path=args.dataset_path)
        dicts += [{"images": images, "labels": labels}]
    images = np.concatenate([d["images"] for d in dicts])
    labels = np.concatenate(
        [np.array(d["labels"], dtype="int8") for d in dicts])
    print(images.shape, labels.shape)

    images_t = tensor.from_array(images, dtag="mask")
    labels_t = tensor.from_array(labels)

    ds = dataset.from_tensors({"data": images_t, "labels": labels_t})
    ds.store(f"{args.output_name}")
Ejemplo n.º 6
0
def test_to_pytorch():

    try:
        import torch
    except ImportError:
        print("Pytorch hasn't been imported and tested")
        return

    t1 = tensor.from_array(np.array([[1, 2], [3, 4]], dtype="int32"))
    np_arr = np.empty(2, object)
    np_arr[0] = np.array([5, 6, 7, 8], dtype="int32")
    np_arr[1] = np.array([7, 8, 9], dtype="int32")
    # np_arr[:] = [np_arr0, np_arr1]
    t2 = tensor.from_array(np_arr)
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    torch_ds = ds.to_pytorch()
    train_loader = torch.utils.data.DataLoader(
        torch_ds, batch_size=1, num_workers=0, collate_fn=torch_ds.collate_fn
    )
    data = list(train_loader)
    assert len(data) == 2
    for i in range(2):
        assert "t1" in data[i]
        assert "t2" in data[i]
    assert data[0]["t1"][0].tolist() == [1, 2]
    assert data[0]["t2"][0] == [5, 6, 7, 8]
    assert data[1]["t1"][0].tolist() == [3, 4]
    assert data[1]["t2"][0] == [7, 8, 9]
Ejemplo n.º 7
0
Archivo: upload.py Proyecto: 40a/Hub-1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--dataset_path",
        type=str,
        help="Path to cifar dataset",
        default="./data/mnist",
    )
    parser.add_argument(
        "-o",
        "--output_name",
        type=str,
        help="Dataset output name",
        default="mnist",
    )
    args = parser.parse_args()
    files = ["training", "testing"]
    dicts = []
    for f in files:
        images, labels = load_mnist(f, path=args.dataset_path)
        dicts += [{"images": images, "labels": labels}]
    images = np.concatenate([d["images"] for d in dicts])
    # images = images.reshape((len(images), 3, 32, 32))
    labels = np.concatenate(
        [np.array(d["labels"], dtype="int8") for d in dicts])
    print(images.shape, labels.shape)
    # Image.fromarray(images[1000]).save("./data/image.png")
    images_t = tensor.from_array(images, dtag="mask")
    labels_t = tensor.from_array(labels)
    # coarse_labels_t = tensor.from_array(coarse_labels)
    ds = dataset.from_tensors({"data": images_t, "labels": labels_t})
    ds.store(f"{args.output_name}")
Ejemplo n.º 8
0
def test_dataset_concat():
    t1 = tensor.from_array(np.array([5, 6, 7], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds1 = dataset.from_tensors({"t1": t1})
    ds2 = dataset.from_tensors({"t1": t2})
    ds = dataset.concat([ds1, ds2])
    assert len(ds) == 6
    assert (ds["t1"].compute() == np.array([5, 6, 7, 1, 2, 3], dtype="int32")).all()
Ejemplo n.º 9
0
def test_dataset_len():
    t1 = tensor.from_array(
        np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    t3 = tensor.from_array(
        np.array([[1, 2, 3, 4, 6], [4, 5, 6, 7, 6], [7, 8, 9, 10, 6]],
                 dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2, "t3": t3})
    assert len(ds) == 3
Ejemplo n.º 10
0
def test_dataset_store_load():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    path = "./data/test_store_tmp/store_load"
    ds = ds.store(path)
    assert (ds["t1"].compute() == np.array([[1, 2], [4, 5], [7, 8]],
                                           dtype="int32")).all()
    assert (ds["t2"].compute() == np.array([1, 2, 3], dtype="int32")).all()
Ejemplo n.º 11
0
def test_dataset_merge():
    t1 = tensor.from_array(
        np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    t3 = tensor.from_array(
        np.array([[1, 2, 3, 4, 6], [4, 5, 6, 7, 6], [7, 8, 9, 10, 6]],
                 dtype="int32"))

    ds1 = dataset.from_tensors({"t1": t1, "t2": t2})
    ds2 = dataset.from_tensors({"t3": t3})
    ds = dataset.merge([ds1, ds2])
    assert sorted(ds.keys()) == ["t1", "t2", "t3"]
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to cifar dataset",
        default="./data/cifar10",
    )
    parser.add_argument(
        "output_name",
        metavar="N",
        type=str,
        help="Dataset output name",
        default="cifar10",
    )
    args = parser.parse_args()
    files = sorted([f for f in os.listdir(args.dataset_path) if "_batch" in f])
    dicts = []
    for f in files:
        with open(os.path.join(args.dataset_path, f), "rb") as fh:
            dicts += [pickle.load(fh, encoding="bytes")]
            print(dicts[-1].keys())
    images = np.concatenate([d[b"data"] for d in dicts])
    images = images.reshape((len(images), 3, 32, 32))
    labels = np.concatenate(
        [np.array(d[b"labels"], dtype="int16") for d in dicts])
    print(images.shape, labels.shape)
    Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png")
    images_t = tensor.from_array(images, dtag="image")
    labels_t = tensor.from_array(labels)
    classes = [
        "airplane",
        "automobile",
        "bird",
        "cat",
        "deer",
        "dog",
        "frog",
        "horse",
        "ship",
        "truck",
    ]
    label_texts_t = tensor.from_array(np.array(
        [classes[label] for label in labels], dtype="U16"),
                                      dtag="text")
    ds = dataset.from_tensors({
        "data": images_t,
        "labels": labels_t,
        "classes": label_texts_t
    })
    ds.store(f"{args.output_name}")
Ejemplo n.º 13
0
def test_store_empty_dataset():
    t = tensor.from_array(np.array([], dtype="uint8"))
    ds = dataset.from_tensors({"empty_tensor": t})
    try:
        ds = ds.store("./data/hub/empty_dataset")
    except Exception as e:
        pytest.fail(f"failed storing empty dataset {str(e)}")
Ejemplo n.º 14
0
def test_tensor_meta():
    t = tensor.from_array(
        np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], dtype="int32"))
    assert t.ndim == 2
    assert len(t) == 3
    assert t.shape == (3, 4)
    assert t.dtype == "int32"
Ejemplo n.º 15
0
def test_dataset_iter():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    items = list(ds)
    assert len(items) == 3
    for item in items:
        assert isinstance(item, dict)
    for item in items:
        assert sorted(item.keys()) == ["t1", "t2"]
    assert (items[0]["t1"].compute() == np.array([1, 2], dtype="int32")).all()
    assert (items[1]["t1"].compute() == np.array([4, 5], dtype="int32")).all()
    assert (items[2]["t1"].compute() == np.array([7, 8], dtype="int32")).all()
    assert items[0]["t2"].compute() == 1
    assert items[1]["t2"].compute() == 2
    assert items[2]["t2"].compute() == 3
Ejemplo n.º 16
0
def test_dataset_store_load_big():
    t1 = tensor.from_array(np.zeros(shape=(2**10, 2**13), dtype="int32"))
    ds = dataset.from_tensors({"t1": t1})
    path = "./data/test_store_tmp/store_load_big"
    ds = ds.store(path)
    assert (ds["t1"].compute() == np.zeros(shape=(2**10, 2**13),
                                           dtype="int32")).all()
Ejemplo n.º 17
0
def test_description_license():
    t1 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    ds = dataset.from_tensors(
        {"abc": t1, "def": t2},
        license="Some license",
        description="Some description",
        citation="Some citation",
        howtoload="Some howtoload",
    )
    assert ds.license == "Some license"
    assert ds.description == "Some description"
    assert ds.citation == "Some citation"
    assert ds.howtoload == "Some howtoload"
    ds = ds.store("./data/test_store_tmp/test_description_license")
    assert ds.license == "Some license"
    assert ds.description == "Some description"
    assert ds.citation == "Some citation"
    assert ds.howtoload == "Some howtoload"
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to cifar dataset",
        default="./data/cifar100",
    )
    parser.add_argument(
        "output_name",
        metavar="N",
        type=str,
        help="Dataset output name",
        default="cifar100",
    )
    args = parser.parse_args()
    files = ["train", "test"]
    dicts = []
    for f in files:
        with open(os.path.join(args.dataset_path, f), "rb") as fh:
            dicts += [pickle.load(fh, encoding="bytes")]
            print(dicts[-1].keys())
    images = np.concatenate([d[b"data"] for d in dicts])
    images = images.reshape((len(images), 3, 32, 32))
    classes = {
        "aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"],
        "fish": ["aquarium fish", "flatfish", "ray", "shark", "trout"],
        "flowers": ["orchids", "poppies", "roses", "sunflowers", "tulips"],
        "food containers": ["bottles", "bowls", "cans", "cups", "plates"],
        "fruit and vegetables": [
            "apples",
            "mushrooms",
            "oranges",
            "pears",
            "sweet peppers",
        ],
        "household electrical devices": [
            "clock",
            "computer keyboard",
            "lamp",
            "telephone",
            "television",
        ],
        "household furniture": ["bed", "chair", "couch", "table", "wardrobe"],
        "insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"],
        "large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"],
        "large man-made outdoor things": [
            "bridge",
            "castle",
            "house",
            "road",
            "skyscraper",
        ],
        "large natural outdoor scenes":
        ["cloud", "forest", "mountain", "plain", "sea"],
        "large omnivores and herbivores": [
            "camel",
            "cattle",
            "chimpanzee",
            "elephant",
            "kangaroo",
        ],
        "medium-sized mammals":
        ["fox", "porcupine", "possum", "raccoon", "skunk"],
        "non-insect invertebrates":
        ["crab", "lobster", "snail", "spider", "worm"],
        "people": ["baby", "boy", "girl", "man", "woman"],
        "reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"],
        "small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"],
        "trees": ["maple", "oak", "palm", "pine", "willow"],
        "vehicles 1":
        ["bicycle", "bus", "motorcycle", "pickup truck", "train"],
        "vehicles 2": ["lawn-mower", "rocket", "streetcar", "tank", "tractor"],
    }

    superclasses = list(classes.keys())
    subclasses = [item for key in superclasses for item in classes[key]]

    fine_labels = np.concatenate(
        [np.array(d[b"fine_labels"], dtype="int16") for d in dicts])
    coarse_labels = np.concatenate(
        [np.array(d[b"coarse_labels"], dtype="int16") for d in dicts])

    print(images.shape, fine_labels.shape, coarse_labels.shape)
    Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png")

    images_t = tensor.from_array(images, dtag="image")
    fine_labels_t = tensor.from_array(fine_labels)
    coarse_labels_t = tensor.from_array(coarse_labels)
    classes_t = tensor.from_array(
        np.array([subclasses[label] for label in fine_labels], dtype="U64"),
        dtag="text",
    )
    superclasses_t = tensor.from_array(
        np.array([superclasses[label] for label in coarse_labels],
                 dtype="U64"),
        dtag="text",
    )
    ds = dataset.from_tensors({
        "data": images_t,
        "fine_labels": fine_labels_t,
        "coarse_labels": coarse_labels_t,
        "classes": classes_t,
        "superclasses": superclasses_t,
    })
    ds.store(f"{args.output_name}")
Ejemplo n.º 19
0
def test_tensor_getitem1():
    t = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    assert t[2:4].shape == (2,)
    assert (t[2:4].compute() == np.array([3, 4], dtype="int32")).all()
Ejemplo n.º 20
0
def test_tensor_getitem2():
    t = tensor.from_array(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int32"))
    assert t[1:3, 1:3].shape == (2, 2)
    assert (t[1:3, 1:3].compute() == np.array([[5, 6], [8, 9]], dtype="int32")).all()
Ejemplo n.º 21
0
def test_tensor_getitem0():
    t = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    assert t[2].compute() == 3
Ejemplo n.º 22
0
def test_tensor_dtag():
    t = tensor.from_array(np.array([1, 2], dtype="int32"), dtag="image")
    ds = dataset.from_tensors({"name": t})
    ds.store("./data/new/test")
    ds = dataset.load("./data/new/test")
    assert ds["name"].dtag == "image"
Ejemplo n.º 23
0
def test_lz4():
    ds = dataset.from_tensors(
        {"t1": tensor.from_array(np.array([1, 2, 3]), dcompress="lz4:4")}
    )
    ds = ds.store("./data/test_store_tmp/test_lz4")
    assert ds["t1"].compute().tolist() == [1, 2, 3]
Ejemplo n.º 24
0
def test_tensor_iter():
    t = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    n = list(t)
    assert [x.compute() for x in n] == [1, 2, 3, 4, 5]