Python from_tensors Beispiele, hub.collections.dataset.from_tensors Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_collections.py Projekt: thomascherickal/Hub

def test_dataset_store_load():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    path = "./data/test_store_tmp/store_load"
    ds = ds.store(path)
    assert (ds["t1"].compute() == np.array([[1, 2], [4, 5], [7, 8]],
                                           dtype="int32")).all()
    assert (ds["t2"].compute() == np.array([1, 2, 3], dtype="int32")).all()

Beispiel #2

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_dataset_len():
    t1 = tensor.from_array(
        np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], dtype="int32")
    )
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    t3 = tensor.from_array(
        np.array([[1, 2, 3, 4, 6], [4, 5, 6, 7, 6], [7, 8, 9, 10, 6]], dtype="int32")
    )
    ds = dataset.from_tensors({"t1": t1, "t2": t2, "t3": t3})
    assert len(ds) == 3

Beispiel #3

0

Datei anzeigen

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to cifar dataset",
        default="./data/cifar10",
    )
    parser.add_argument(
        "output_name",
        metavar="N",
        type=str,
        help="Dataset output name",
        default="cifar10",
    )
    args = parser.parse_args()
    files = sorted([f for f in os.listdir(args.dataset_path) if "_batch" in f])
    dicts = []
    for f in files:
        with open(os.path.join(args.dataset_path, f), "rb") as fh:
            dicts += [pickle.load(fh, encoding="bytes")]
            print(dicts[-1].keys())
    images = np.concatenate([d[b"data"] for d in dicts])
    images = images.reshape((len(images), 3, 32, 32))
    labels = np.concatenate(
        [np.array(d[b"labels"], dtype="int16") for d in dicts])
    print(images.shape, labels.shape)
    Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png")
    images_t = tensor.from_array(images, dtag="image")
    labels_t = tensor.from_array(labels)
    classes = [
        "airplane",
        "automobile",
        "bird",
        "cat",
        "deer",
        "dog",
        "frog",
        "horse",
        "ship",
        "truck",
    ]
    label_texts_t = tensor.from_array(np.array(
        [classes[label] for label in labels], dtype="U16"),
                                      dtag="text")
    ds = dataset.from_tensors({
        "data": images_t,
        "labels": labels_t,
        "classes": label_texts_t
    })
    ds.store(f"{args.output_name}")

Beispiel #4

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_dataset_iter():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    items = list(ds)
    assert len(items) == 3
    for item in items:
        assert isinstance(item, dict)
    for item in items:
        assert sorted(item.keys()) == ["t1", "t2"]
    assert (items[0]["t1"].compute() == np.array([1, 2], dtype="int32")).all()
    assert (items[1]["t1"].compute() == np.array([4, 5], dtype="int32")).all()
    assert (items[2]["t1"].compute() == np.array([7, 8], dtype="int32")).all()
    assert items[0]["t2"].compute() == 1
    assert items[1]["t2"].compute() == 2
    assert items[2]["t2"].compute() == 3

Beispiel #5

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_description_license():
    t1 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32"))
    ds = dataset.from_tensors(
        {"abc": t1, "def": t2},
        license="Some license",
        description="Some description",
        citation="Some citation",
        howtoload="Some howtoload",
    )
    assert ds.license == "Some license"
    assert ds.description == "Some description"
    assert ds.citation == "Some citation"
    assert ds.howtoload == "Some howtoload"
    ds = ds.store("./data/test_store_tmp/test_description_license")
    assert ds.license == "Some license"
    assert ds.description == "Some description"
    assert ds.citation == "Some citation"
    assert ds.howtoload == "Some howtoload"

Beispiel #6

0

Datei anzeigen

Datei: upload.py Projekt: stjordanis/Hub-1

def main():
    files = ["training", "testing"]
    dicts = []

    # required to generate named labels
    mapping = {
        0: "T-shirt/top",
        1: "Trouser",
        2: "Pullover",
        3: "Dress",
        4: "Coat",
        5: "Sandal",
        6: "Shirt",
        7: "Sneaker",
        8: "Bag",
        9: "Ankle boot",
    }

    for f in files:
        images, labels = load_fashion_mnist(f, path="./data/fashion-mnist")
        dicts += [{"images": images, "labels": labels}]

    images = np.concatenate([d["images"] for d in dicts])
    labels = np.concatenate(
        [np.array(d["labels"], dtype="int8") for d in dicts])
    named_labels = np.array([mapping[label] for label in labels])
    print(images.shape, labels.shape)

    images_t = tensor.from_array(images, dtag="mask")
    labels_t = tensor.from_array(labels, dtag="text")
    named_labels_t = tensor.from_array(named_labels, dtag="text")

    ds = dataset.from_tensors({
        "data": images_t,
        "labels": labels_t,
        "named_labels": named_labels_t
    })
    ds.store("mnist/fashion-mnist")

Beispiel #7

0

Datei anzeigen

Datei: test_collections.py Projekt: stjordanis/Hub-1

def test_to_pytorch():
    import torch

    t1 = tensor.from_array(np.array([[1, 2], [3, 4]], dtype="int32"))
    np_arr = np.empty(2, object)
    np_arr[0] = np.array([5, 6, 7, 8], dtype="int32")
    np_arr[1] = np.array([7, 8, 9], dtype="int32")
    # np_arr[:] = [np_arr0, np_arr1]
    t2 = tensor.from_array(np_arr)
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    torch_ds = ds.to_pytorch()
    train_loader = torch.utils.data.DataLoader(
        torch_ds, batch_size=1, num_workers=0, collate_fn=torch_ds.collate_fn
    )
    data = list(train_loader)
    assert len(data) == 2
    for i in range(2):
        assert "t1" in data[i]
        assert "t2" in data[i]
    assert data[0]["t1"][0].tolist() == [1, 2]
    assert data[0]["t2"][0] == [5, 6, 7, 8]
    assert data[1]["t1"][0].tolist() == [3, 4]
    assert data[1]["t2"][0] == [7, 8, 9]

Beispiel #8

0

Datei anzeigen

Datei: upload_cifar100.py Projekt: 40a/Hub-1

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "dataset_path",
        metavar="P",
        type=str,
        help="Path to cifar dataset",
        default="./data/cifar100",
    )
    parser.add_argument(
        "output_name",
        metavar="N",
        type=str,
        help="Dataset output name",
        default="cifar100",
    )
    args = parser.parse_args()
    files = ["train", "test"]
    dicts = []
    for f in files:
        with open(os.path.join(args.dataset_path, f), "rb") as fh:
            dicts += [pickle.load(fh, encoding="bytes")]
            print(dicts[-1].keys())
    images = np.concatenate([d[b"data"] for d in dicts])
    images = images.reshape((len(images), 3, 32, 32))
    classes = {
        "aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"],
        "fish": ["aquarium fish", "flatfish", "ray", "shark", "trout"],
        "flowers": ["orchids", "poppies", "roses", "sunflowers", "tulips"],
        "food containers": ["bottles", "bowls", "cans", "cups", "plates"],
        "fruit and vegetables": [
            "apples",
            "mushrooms",
            "oranges",
            "pears",
            "sweet peppers",
        ],
        "household electrical devices": [
            "clock",
            "computer keyboard",
            "lamp",
            "telephone",
            "television",
        ],
        "household furniture": ["bed", "chair", "couch", "table", "wardrobe"],
        "insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"],
        "large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"],
        "large man-made outdoor things": [
            "bridge",
            "castle",
            "house",
            "road",
            "skyscraper",
        ],
        "large natural outdoor scenes":
        ["cloud", "forest", "mountain", "plain", "sea"],
        "large omnivores and herbivores": [
            "camel",
            "cattle",
            "chimpanzee",
            "elephant",
            "kangaroo",
        ],
        "medium-sized mammals":
        ["fox", "porcupine", "possum", "raccoon", "skunk"],
        "non-insect invertebrates":
        ["crab", "lobster", "snail", "spider", "worm"],
        "people": ["baby", "boy", "girl", "man", "woman"],
        "reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"],
        "small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"],
        "trees": ["maple", "oak", "palm", "pine", "willow"],
        "vehicles 1":
        ["bicycle", "bus", "motorcycle", "pickup truck", "train"],
        "vehicles 2": ["lawn-mower", "rocket", "streetcar", "tank", "tractor"],
    }

    superclasses = list(classes.keys())
    subclasses = [item for key in superclasses for item in classes[key]]

    fine_labels = np.concatenate(
        [np.array(d[b"fine_labels"], dtype="int16") for d in dicts])
    coarse_labels = np.concatenate(
        [np.array(d[b"coarse_labels"], dtype="int16") for d in dicts])

    print(images.shape, fine_labels.shape, coarse_labels.shape)
    Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png")

    images_t = tensor.from_array(images, dtag="image")
    fine_labels_t = tensor.from_array(fine_labels)
    coarse_labels_t = tensor.from_array(coarse_labels)
    classes_t = tensor.from_array(
        np.array([subclasses[label] for label in fine_labels], dtype="U64"),
        dtag="text",
    )
    superclasses_t = tensor.from_array(
        np.array([superclasses[label] for label in coarse_labels],
                 dtype="U64"),
        dtag="text",
    )
    ds = dataset.from_tensors({
        "data": images_t,
        "fine_labels": fine_labels_t,
        "coarse_labels": coarse_labels_t,
        "classes": classes_t,
        "superclasses": superclasses_t,
    })
    ds.store(f"{args.output_name}")

Beispiel #9

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_dataset_getitem_index():
    t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32"))
    t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32"))
    ds = dataset.from_tensors({"t1": t1, "t2": t2})
    assert (ds[0:2]["t1"].compute() == np.array([[1, 2], [4, 5]], dtype="int32")).all()
    assert (ds[0:2]["t2"].compute() == np.array([1, 2], dtype="int32")).all()

Beispiel #10

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_tensor_dtag():
    t = tensor.from_array(np.array([1, 2], dtype="int32"), dtag="image")
    ds = dataset.from_tensors({"name": t})
    ds.store("./data/new/test")
    ds = dataset.load("./data/new/test")
    assert ds["name"].dtag == "image"

Beispiel #11

0

Datei anzeigen

Datei: test_dataset.py Projekt: vvkpd/Hub

def test_lz4():
    ds = dataset.from_tensors(
        {"t1": tensor.from_array(np.array([1, 2, 3]), dcompress="lz4:4")}
    )
    ds = ds.store("./data/test_store_tmp/test_lz4")
    assert ds["t1"].compute().tolist() == [1, 2, 3]