def generate( cls, root, *, year, num_samples, ): annotations_dir = root / "annotations" annotations_dir.mkdir() for split in ("train", "val"): config_name = f"{split}{year}" images_meta = cls._make_images_archive(root, config_name, num_samples=num_samples) cls._make_annotations( annotations_dir, config_name, images_meta=images_meta, ) make_zip(root, f"annotations_trainval{year}.zip", annotations_dir) return num_samples
def fer2013(info, root, config): num_samples = 5 if config.split == "train" else 3 path = root / f"{config.split}.txt" with open(path, "w", newline="") as file: field_names = ["emotion"] if config.split == "train" else [] field_names.append("pixels") file.write(",".join(field_names) + "\n") writer = csv.DictWriter(file, fieldnames=field_names, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) for _ in range(num_samples): rowdict = { "pixels": " ".join([ str(int(pixel)) for pixel in torch.randint(256, (48 * 48, ), dtype=torch.uint8) ]) } if config.split == "train": rowdict["emotion"] = int(torch.randint(7, ())) writer.writerow(rowdict) make_zip(root, f"{path.name}.zip", path) return num_samples
def emnist(info, root, _): # The image sets that merge some lower case letters in their respective upper case variant, still use dense # labels in the data files. Thus, num_categories != len(categories) there. num_categories = defaultdict( lambda: len(info.categories), {image_set: 47 for image_set in ("Balanced", "By_Merge")}) mock_infos = {} file_names = set() for config in info._configs: prefix = f"emnist-{config.image_set.replace('_', '').lower()}-{config.split}" images_file = f"{prefix}-images-idx3-ubyte.gz" labels_file = f"{prefix}-labels-idx1-ubyte.gz" file_names.update({images_file, labels_file}) mock_infos[config] = dict(num_samples=MNISTMockData.generate( root, num_categories=num_categories[config.image_set], images_file=images_file, labels_file=labels_file, )) make_zip(root, "emnist-gzip.zip", *file_names) return mock_infos
def _make_images_archive(cls, root, name, *, num_samples): image_paths = create_image_folder( root, name, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_samples ) images_meta = [] for path in image_paths: with PIL.Image.open(path) as image: width, height = image.size images_meta.append(dict(file_name=path.name, id=int(path.stem), width=width, height=height)) make_zip(root, f"{name}.zip") return images_meta
def eurosat(info, root, config): data_folder = pathlib.Path(root, "eurosat", "2750") data_folder.mkdir(parents=True) num_examples_per_class = 3 classes = ("AnnualCrop", "Forest") for cls in classes: create_image_folder( root=data_folder, name=cls, file_name_fn=lambda idx: f"{cls}_{idx}.jpg", num_examples=num_examples_per_class, ) make_zip(root, "EuroSAT.zip", data_folder) return len(classes) * num_examples_per_class
def generate(cls, root): image_file_names, num_samples_map = cls._make_split_file(root) image_files = create_image_folder( root, "img_align_celeba", file_name_fn=lambda idx: image_file_names[idx], num_examples=len(image_file_names) ) make_zip(root, image_files[0].parent.with_suffix(".zip").name) for make_ann_file_fn in ( cls._make_identity_file, cls._make_attributes_file, cls._make_bounding_boxes_file, cls._make_landmarks_file, ): make_ann_file_fn(root, image_file_names) return num_samples_map
def clevr(info, root, config): data_folder = root / "CLEVR_v1.0" num_samples_map = { "train": 3, "val": 2, "test": 1, } images_folder = data_folder / "images" image_files = { split: create_image_folder( images_folder, split, file_name_fn=lambda idx: f"CLEVR_{split}_{idx:06d}.jpg", num_examples=num_samples, ) for split, num_samples in num_samples_map.items() } scenes_folder = data_folder / "scenes" scenes_folder.mkdir() for split in ["train", "val"]: with open(scenes_folder / f"CLEVR_{split}_scenes.json", "w") as file: json.dump( { "scenes": [ { "image_filename": image_file.name, # We currently only return the number of objects in a scene. # Thus, it is sufficient for now to only mock the number of elements. "objects": [None] * int(torch.randint(1, 5, ())), } for image_file in image_files[split] ] }, file, ) make_zip(root, f"{data_folder.name}.zip") return { config_: num_samples_map[config_.split] for config_ in info._configs }
def gtsrb(info, root, config): num_examples_per_class = 5 if config.split == "train" else 3 classes = ("00000", "00042", "00012") num_examples = num_examples_per_class * len(classes) csv_columns = [ "Filename", "Width", "Height", "Roi.X1", "Roi.Y1", "Roi.X2", "Roi.Y2", "ClassId" ] def _make_ann_file(path, num_examples, class_idx): if class_idx == "random": class_idx = torch.randint(1, len(classes) + 1, size=(1, )).item() with open(path, "w") as csv_file: writer = csv.DictWriter(csv_file, fieldnames=csv_columns, delimiter=";") writer.writeheader() for image_idx in range(num_examples): writer.writerow({ "Filename": f"{image_idx:05d}.ppm", "Width": torch.randint(1, 100, size=()).item(), "Height": torch.randint(1, 100, size=()).item(), "Roi.X1": torch.randint(1, 100, size=()).item(), "Roi.Y1": torch.randint(1, 100, size=()).item(), "Roi.X2": torch.randint(1, 100, size=()).item(), "Roi.Y2": torch.randint(1, 100, size=()).item(), "ClassId": class_idx, }) if config["split"] == "train": train_folder = root / "GTSRB" / "Training" train_folder.mkdir(parents=True) for class_idx in classes: create_image_folder( train_folder, name=class_idx, file_name_fn=lambda image_idx: f"{class_idx}_{image_idx:05d}.ppm", num_examples=num_examples_per_class, ) _make_ann_file( path=train_folder / class_idx / f"GT-{class_idx}.csv", num_examples=num_examples_per_class, class_idx=int(class_idx), ) make_zip(root, "GTSRB-Training_fixed.zip", train_folder) else: test_folder = root / "GTSRB" / "Final_Test" test_folder.mkdir(parents=True) create_image_folder( test_folder, name="Images", file_name_fn=lambda image_idx: f"{image_idx:05d}.ppm", num_examples=num_examples, ) make_zip(root, "GTSRB_Final_Test_Images.zip", test_folder) _make_ann_file( path=root / "GT-final_test.csv", num_examples=num_examples, class_idx="random", ) make_zip(root, "GTSRB_Final_Test_GT.zip", "GT-final_test.csv") return num_examples