def test_dataset_store_load(): t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32")) t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32")) ds = dataset.from_tensors({"t1": t1, "t2": t2}) path = "./data/test_store_tmp/store_load" ds = ds.store(path) assert (ds["t1"].compute() == np.array([[1, 2], [4, 5], [7, 8]], dtype="int32")).all() assert (ds["t2"].compute() == np.array([1, 2, 3], dtype="int32")).all()
def test_dataset_len(): t1 = tensor.from_array( np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], dtype="int32") ) t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32")) t3 = tensor.from_array( np.array([[1, 2, 3, 4, 6], [4, 5, 6, 7, 6], [7, 8, 9, 10, 6]], dtype="int32") ) ds = dataset.from_tensors({"t1": t1, "t2": t2, "t3": t3}) assert len(ds) == 3
def main(): parser = argparse.ArgumentParser() parser.add_argument( "dataset_path", metavar="P", type=str, help="Path to cifar dataset", default="./data/cifar10", ) parser.add_argument( "output_name", metavar="N", type=str, help="Dataset output name", default="cifar10", ) args = parser.parse_args() files = sorted([f for f in os.listdir(args.dataset_path) if "_batch" in f]) dicts = [] for f in files: with open(os.path.join(args.dataset_path, f), "rb") as fh: dicts += [pickle.load(fh, encoding="bytes")] print(dicts[-1].keys()) images = np.concatenate([d[b"data"] for d in dicts]) images = images.reshape((len(images), 3, 32, 32)) labels = np.concatenate( [np.array(d[b"labels"], dtype="int16") for d in dicts]) print(images.shape, labels.shape) Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png") images_t = tensor.from_array(images, dtag="image") labels_t = tensor.from_array(labels) classes = [ "airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck", ] label_texts_t = tensor.from_array(np.array( [classes[label] for label in labels], dtype="U16"), dtag="text") ds = dataset.from_tensors({ "data": images_t, "labels": labels_t, "classes": label_texts_t }) ds.store(f"{args.output_name}")
def test_dataset_iter(): t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32")) t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32")) ds = dataset.from_tensors({"t1": t1, "t2": t2}) items = list(ds) assert len(items) == 3 for item in items: assert isinstance(item, dict) for item in items: assert sorted(item.keys()) == ["t1", "t2"] assert (items[0]["t1"].compute() == np.array([1, 2], dtype="int32")).all() assert (items[1]["t1"].compute() == np.array([4, 5], dtype="int32")).all() assert (items[2]["t1"].compute() == np.array([7, 8], dtype="int32")).all() assert items[0]["t2"].compute() == 1 assert items[1]["t2"].compute() == 2 assert items[2]["t2"].compute() == 3
def test_description_license(): t1 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32")) t2 = tensor.from_array(np.array([1, 2, 3, 4, 5], dtype="int32")) ds = dataset.from_tensors( {"abc": t1, "def": t2}, license="Some license", description="Some description", citation="Some citation", howtoload="Some howtoload", ) assert ds.license == "Some license" assert ds.description == "Some description" assert ds.citation == "Some citation" assert ds.howtoload == "Some howtoload" ds = ds.store("./data/test_store_tmp/test_description_license") assert ds.license == "Some license" assert ds.description == "Some description" assert ds.citation == "Some citation" assert ds.howtoload == "Some howtoload"
def main(): files = ["training", "testing"] dicts = [] # required to generate named labels mapping = { 0: "T-shirt/top", 1: "Trouser", 2: "Pullover", 3: "Dress", 4: "Coat", 5: "Sandal", 6: "Shirt", 7: "Sneaker", 8: "Bag", 9: "Ankle boot", } for f in files: images, labels = load_fashion_mnist(f, path="./data/fashion-mnist") dicts += [{"images": images, "labels": labels}] images = np.concatenate([d["images"] for d in dicts]) labels = np.concatenate( [np.array(d["labels"], dtype="int8") for d in dicts]) named_labels = np.array([mapping[label] for label in labels]) print(images.shape, labels.shape) images_t = tensor.from_array(images, dtag="mask") labels_t = tensor.from_array(labels, dtag="text") named_labels_t = tensor.from_array(named_labels, dtag="text") ds = dataset.from_tensors({ "data": images_t, "labels": labels_t, "named_labels": named_labels_t }) ds.store("mnist/fashion-mnist")
def test_to_pytorch(): import torch t1 = tensor.from_array(np.array([[1, 2], [3, 4]], dtype="int32")) np_arr = np.empty(2, object) np_arr[0] = np.array([5, 6, 7, 8], dtype="int32") np_arr[1] = np.array([7, 8, 9], dtype="int32") # np_arr[:] = [np_arr0, np_arr1] t2 = tensor.from_array(np_arr) ds = dataset.from_tensors({"t1": t1, "t2": t2}) torch_ds = ds.to_pytorch() train_loader = torch.utils.data.DataLoader( torch_ds, batch_size=1, num_workers=0, collate_fn=torch_ds.collate_fn ) data = list(train_loader) assert len(data) == 2 for i in range(2): assert "t1" in data[i] assert "t2" in data[i] assert data[0]["t1"][0].tolist() == [1, 2] assert data[0]["t2"][0] == [5, 6, 7, 8] assert data[1]["t1"][0].tolist() == [3, 4] assert data[1]["t2"][0] == [7, 8, 9]
def main(): parser = argparse.ArgumentParser() parser.add_argument( "dataset_path", metavar="P", type=str, help="Path to cifar dataset", default="./data/cifar100", ) parser.add_argument( "output_name", metavar="N", type=str, help="Dataset output name", default="cifar100", ) args = parser.parse_args() files = ["train", "test"] dicts = [] for f in files: with open(os.path.join(args.dataset_path, f), "rb") as fh: dicts += [pickle.load(fh, encoding="bytes")] print(dicts[-1].keys()) images = np.concatenate([d[b"data"] for d in dicts]) images = images.reshape((len(images), 3, 32, 32)) classes = { "aquatic mammals": ["beaver", "dolphin", "otter", "seal", "whale"], "fish": ["aquarium fish", "flatfish", "ray", "shark", "trout"], "flowers": ["orchids", "poppies", "roses", "sunflowers", "tulips"], "food containers": ["bottles", "bowls", "cans", "cups", "plates"], "fruit and vegetables": [ "apples", "mushrooms", "oranges", "pears", "sweet peppers", ], "household electrical devices": [ "clock", "computer keyboard", "lamp", "telephone", "television", ], "household furniture": ["bed", "chair", "couch", "table", "wardrobe"], "insects": ["bee", "beetle", "butterfly", "caterpillar", "cockroach"], "large carnivores": ["bear", "leopard", "lion", "tiger", "wolf"], "large man-made outdoor things": [ "bridge", "castle", "house", "road", "skyscraper", ], "large natural outdoor scenes": ["cloud", "forest", "mountain", "plain", "sea"], "large omnivores and herbivores": [ "camel", "cattle", "chimpanzee", "elephant", "kangaroo", ], "medium-sized mammals": ["fox", "porcupine", "possum", "raccoon", "skunk"], "non-insect invertebrates": ["crab", "lobster", "snail", "spider", "worm"], "people": ["baby", "boy", "girl", "man", "woman"], "reptiles": ["crocodile", "dinosaur", "lizard", "snake", "turtle"], "small mammals": ["hamster", "mouse", "rabbit", "shrew", "squirrel"], "trees": ["maple", "oak", "palm", "pine", "willow"], "vehicles 1": ["bicycle", "bus", "motorcycle", "pickup truck", "train"], "vehicles 2": ["lawn-mower", "rocket", "streetcar", "tank", "tractor"], } superclasses = list(classes.keys()) subclasses = [item for key in superclasses for item in classes[key]] fine_labels = np.concatenate( [np.array(d[b"fine_labels"], dtype="int16") for d in dicts]) coarse_labels = np.concatenate( [np.array(d[b"coarse_labels"], dtype="int16") for d in dicts]) print(images.shape, fine_labels.shape, coarse_labels.shape) Image.fromarray(images[1000].transpose(1, 2, 0)).save("./data/image.png") images_t = tensor.from_array(images, dtag="image") fine_labels_t = tensor.from_array(fine_labels) coarse_labels_t = tensor.from_array(coarse_labels) classes_t = tensor.from_array( np.array([subclasses[label] for label in fine_labels], dtype="U64"), dtag="text", ) superclasses_t = tensor.from_array( np.array([superclasses[label] for label in coarse_labels], dtype="U64"), dtag="text", ) ds = dataset.from_tensors({ "data": images_t, "fine_labels": fine_labels_t, "coarse_labels": coarse_labels_t, "classes": classes_t, "superclasses": superclasses_t, }) ds.store(f"{args.output_name}")
def test_dataset_getitem_index(): t1 = tensor.from_array(np.array([[1, 2], [4, 5], [7, 8]], dtype="int32")) t2 = tensor.from_array(np.array([1, 2, 3], dtype="int32")) ds = dataset.from_tensors({"t1": t1, "t2": t2}) assert (ds[0:2]["t1"].compute() == np.array([[1, 2], [4, 5]], dtype="int32")).all() assert (ds[0:2]["t2"].compute() == np.array([1, 2], dtype="int32")).all()
def test_tensor_dtag(): t = tensor.from_array(np.array([1, 2], dtype="int32"), dtag="image") ds = dataset.from_tensors({"name": t}) ds.store("./data/new/test") ds = dataset.load("./data/new/test") assert ds["name"].dtag == "image"
def test_lz4(): ds = dataset.from_tensors( {"t1": tensor.from_array(np.array([1, 2, 3]), dcompress="lz4:4")} ) ds = ds.store("./data/test_store_tmp/test_lz4") assert ds["t1"].compute().tolist() == [1, 2, 3]