def __init__(self, root_dir, train: bool = True): self.root_dir = root_dir prefix = "train" if train else "t10k" images_filename = prefix + "-images-idx3-ubyte.gz" labels_filename = prefix + "-labels-idx1-ubyte.gz" self.images = torch.from_numpy(io.load_idx(os.path.join(self.root_dir, images_filename))) self.labels = torch.from_numpy(io.load_idx(os.path.join(self.root_dir, labels_filename))) super().__init__(self.images, self.labels)
def __init__(self, root_dir, train: bool = True): self.root_dir = root_dir prefix = "train" if train else "t10k" images_filename = prefix + "-images-idx3-ubyte.gz" labels_filename = prefix + "-labels-idx1-ubyte.gz" self.images = torch.from_numpy( io.load_idx(os.path.join(self.root_dir, images_filename))) self.labels = torch.from_numpy( io.load_idx(os.path.join(self.root_dir, labels_filename))) super().__init__(self.images, self.labels)
def measure_dir(data_dir, pool): for name in ['t10k', 'train']: in_path = os.path.join(data_dir, name + "-images-idx3-ubyte.gz") out_path = os.path.join(data_dir, name + "-morpho.csv") print(f"Processing MNIST data file {in_path}...") data = io.load_idx(in_path) df = measure.measure_batch(data, pool=pool, chunksize=100) df.to_csv(out_path, index_label='index') print(f"Morphometrics saved to {out_path}")
def dotted_zero(): image = io.load_idx("../data/train-images-idx3-ubyte.gz")[56] print(linearize(image)) image = baseline_set_pixel(image, 255, 16, 16) image = baseline_set_pixel(image, 255, 16, 15) image = baseline_set_pixel(image, 255, 15, 16) image = baseline_set_pixel(image, 255, 15, 15) print(linearize(image)) draw_digit(image, 1000)
def slashed_zero(): image = io.load_idx("../data/train-images-idx3-ubyte.gz")[56] label = io.load_idx("../data/train-labels-idx1-ubyte.gz")[56] print(linearize(image)) image = baseline_set_pixel(image, 255, 10, 19) image = baseline_set_pixel(image, 255, 11, 18) image = baseline_set_pixel(image, 255, 12, 17) image = baseline_set_pixel(image, 255, 13, 16) image = baseline_set_pixel(image, 255, 14, 15) image = baseline_set_pixel(image, 255, 15, 14) image = baseline_set_pixel(image, 255, 16, 13) image = baseline_set_pixel(image, 255, 17, 12) image = baseline_set_pixel(image, 255, 18, 11) image = baseline_set_pixel(image, 255, 19, 10) image = baseline_set_pixel(image, 255, 11, 19) image = baseline_set_pixel(image, 255, 12, 18) image = baseline_set_pixel(image, 255, 13, 17) image = baseline_set_pixel(image, 255, 14, 16) image = baseline_set_pixel(image, 255, 15, 15) image = baseline_set_pixel(image, 255, 16, 14) image = baseline_set_pixel(image, 255, 17, 13) image = baseline_set_pixel(image, 255, 18, 12) image = baseline_set_pixel(image, 255, 19, 11) image = baseline_set_pixel(image, 255, 20, 10) image = baseline_set_pixel(image, 255, 12, 19) image = baseline_set_pixel(image, 255, 13, 18) image = baseline_set_pixel(image, 255, 14, 17) image = baseline_set_pixel(image, 255, 15, 16) image = baseline_set_pixel(image, 255, 16, 15) image = baseline_set_pixel(image, 255, 17, 14) image = baseline_set_pixel(image, 255, 18, 13) image = baseline_set_pixel(image, 255, 19, 12) image = baseline_set_pixel(image, 255, 20, 11) image = baseline_set_pixel(image, 255, 21, 10) print(linearize(image)) print_digit(image) draw_digit(image, label)
scale=morph.scale) width = _measure_width(pert_morph) if abs(width - self.target_width) > self._tolerance: print( f"!!! Incorrect width after transformation: {width:.1f}, " f"expected {self.target_width:.1f}.") pert_hires_image = self(pert_morph) return pert_hires_image if __name__ == '__main__': import matplotlib.pyplot as plt from morphomnist import io, measure image_path = "/vol/biomedic/users/dc315/mnist/original/t10k-images-idx3-ubyte.gz" images = io.load_idx(image_path) # pert = SetThickness(3.5) # pert = SetSlant(np.deg2rad(30)) pert = SetWidth(8, validate=True) for n in range(20): print(n, "before:") measure.measure_image(images[n], verbose=True) morph = ImageMorphology(images[n], scale=16) pert_hires_image = pert(morph) pert_image = morph.downscale(pert_hires_image) print(n, "after:") measure.measure_image(pert_image, verbose=True)
def get_image(i): image = io.load_idx("../data/train-images-idx3-ubyte.gz")[i] return image
def extract_images(n, m, l): for i in range(n, m): image = io.load_idx("../data/train-images-idx3-ubyte.gz")[i] label = io.load_idx("../data/train-labels-idx1-ubyte.gz")[i] if label == l: draw_digit(image, i)
def __iter__(self): return self def __next__(self): img = get_image(self.num) self.num = (self.num + 1) % 60000 return img def get_images(): imgiter = ImageIter() return [img for img in imgiter] if __name__ == "__main__": # hmix(3936, 41172, 0.365234375) image1 = io.load_idx("../data/train-images-idx3-ubyte.gz")[3936] image2 = io.load_idx("../data/train-images-idx3-ubyte.gz")[41172] # draw_digit(cancelFromLeft(image,0.6), 123) hmid = squarize(hmix(linearize(image1), linearize(image2), 0.365234375)) # vmid = squarize(vmix(linearize(image1), linearize(image2), 0.2294921875)) print_digit(image1) print_digit(image2) print_digit(hmid) # print_digit(vmid) # draw_digit(vmid,1337) # print(linearize(image)) # dotted_zero()
def add_swel_frac(data_dir, metrics): test_pert = io.load_idx(os.path.join(data_dir, "t10k-pert-idx1-ubyte.gz")) metrics['swel'] = (test_pert == 3).astype(int) metrics['frac'] = (test_pert == 4).astype(int)
morph = ImageMorphology(img, THRESHOLD, UP_FACTOR) out_imgs = [morph.downscale(morph.binary_image)] + \ [morph.downscale(pert(morph)) for pert in PERTURBATIONS] return out_imgs if __name__ == '__main__': raw_dir = "/vol/biomedic/users/dc315/mnist/raw" dataset_root = "/vol/biomedic/users/dc315/mnist_new" dataset_names = ["plain", "thin", "thic", "swel", "frac"] pool = multiprocessing.Pool() for subset in ["train", "t10k"]: imgs_filename = f"{subset}-images-idx3-ubyte.gz" labels_filename = f"{subset}-labels-idx1-ubyte.gz" raw_imgs = io.load_idx(os.path.join(raw_dir, imgs_filename)) gen = pool.imap(process_image, enumerate(raw_imgs), chunksize=100) try: import tqdm gen = tqdm.tqdm(gen, total=len(raw_imgs), unit='img', ascii=True) except ImportError: def plain_progress(g): print(f"\rProcessing images: 0/{len(raw_imgs)}", end='') for i, res in enumerate(g): print(f"\rProcessing images: {i + 1}/{len(raw_imgs)}", end='') yield res print()
if __name__ == '__main__': data_root = "/vol/biomedic/users/dc315/mnist" dataset_names = ["plain", "thin", "thic", "swel", "frac"] pairings = [(0, 1, 2), (0, 3, 4)] for pairing in pairings[1:]: for subset in ["train", "t10k"]: labels_filename = f"{subset}-labels-idx1-ubyte.gz" images_filename = f"{subset}-images-idx3-ubyte.gz" metrics_filename = f"{subset}-morpho.csv" pert_filename = f"{subset}-pert-idx1-ubyte.gz" data_dirs = [os.path.join(data_root, dataset_names[i]) for i in pairing] imgs_paths = [os.path.join(data_dir, images_filename) for data_dir in data_dirs] metrics_paths = [os.path.join(data_dir, metrics_filename) for data_dir in data_dirs] all_images = np.array([io.load_idx(path) for path in imgs_paths]) all_metrics = [pd.read_csv(path, index_col='index') for path in metrics_paths] num = all_images[0].shape[0] indices = np.random.choice(len(pairing), size=num) pert = np.asarray(pairing)[indices] inter_images = all_images[indices, np.arange(num)] inter_metrics = interleave_dfs(all_metrics, pert, pairing) inter_dir = os.path.join(data_root, '+'.join([dataset_names[i] for i in pairing])) print(f"Saving results to {inter_dir}/...") os.makedirs(inter_dir, exist_ok=True) inter_pert_path = os.path.join(inter_dir, pert_filename) inter_images_path = os.path.join(inter_dir, images_filename) inter_metrics_path = os.path.join(inter_dir, metrics_filename) inter_labels_path = os.path.join(inter_dir, labels_filename)
morph = ImageMorphology(img, THRESHOLD, UP_FACTOR) out_imgs = [morph.downscale(morph.binary_image)] + \ [morph.downscale(pert(morph)) for pert in PERTURBATIONS] return out_imgs if __name__ == '__main__': raw_dir = "/vol/biomedic/users/dc315/mnist/raw" dataset_root = "/vol/biomedic/users/dc315/mnist_new" dataset_names = ["plain", "thin", "thic", "swel", "frac"] pool = multiprocessing.Pool() for subset in ["train", "t10k"]: imgs_filename = f"{subset}-images-idx3-ubyte.gz" labels_filename = f"{subset}-labels-idx1-ubyte.gz" raw_imgs = io.load_idx(os.path.join(raw_dir, imgs_filename)) gen = pool.imap(process_image, enumerate(raw_imgs), chunksize=100) try: import tqdm gen = tqdm.tqdm(gen, total=len(raw_imgs), unit='img', ascii=True) except ImportError: def plain_progress(g): print(f"\rProcessing images: 0/{len(raw_imgs)}", end='') for i, res in enumerate(g): print(f"\rProcessing images: {i + 1}/{len(raw_imgs)}", end='') yield res print() gen = plain_progress(gen) result = zip(*list(gen))
images_filename = f"{subset}-images-idx3-ubyte.gz" metrics_filename = f"{subset}-morpho.csv" pert_filename = f"{subset}-pert-idx1-ubyte.gz" data_dirs = [ os.path.join(data_root, dataset_names[i]) for i in pairing ] imgs_paths = [ os.path.join(data_dir, images_filename) for data_dir in data_dirs ] metrics_paths = [ os.path.join(data_dir, metrics_filename) for data_dir in data_dirs ] all_images = np.array([io.load_idx(path) for path in imgs_paths]) all_metrics = [ pd.read_csv(path, index_col='index') for path in metrics_paths ] num = all_images[0].shape[0] indices = np.random.choice(len(pairing), size=num) pert = np.asarray(pairing)[indices] inter_images = all_images[indices, np.arange(num)] inter_metrics = interleave_dfs(all_metrics, pert, pairing) inter_dir = os.path.join( data_root, '+'.join([dataset_names[i] for i in pairing])) print(f"Saving results to {inter_dir}/...") os.makedirs(inter_dir, exist_ok=True) inter_pert_path = os.path.join(inter_dir, pert_filename)