Beispiel #1
0
 def __init__(self, root_dir, train: bool = True):
     self.root_dir = root_dir
     prefix = "train" if train else "t10k"
     images_filename = prefix + "-images-idx3-ubyte.gz"
     labels_filename = prefix + "-labels-idx1-ubyte.gz"
     self.images = torch.from_numpy(io.load_idx(os.path.join(self.root_dir, images_filename)))
     self.labels = torch.from_numpy(io.load_idx(os.path.join(self.root_dir, labels_filename)))
     super().__init__(self.images, self.labels)
Beispiel #2
0
 def __init__(self, root_dir, train: bool = True):
     self.root_dir = root_dir
     prefix = "train" if train else "t10k"
     images_filename = prefix + "-images-idx3-ubyte.gz"
     labels_filename = prefix + "-labels-idx1-ubyte.gz"
     self.images = torch.from_numpy(
         io.load_idx(os.path.join(self.root_dir, images_filename)))
     self.labels = torch.from_numpy(
         io.load_idx(os.path.join(self.root_dir, labels_filename)))
     super().__init__(self.images, self.labels)
Beispiel #3
0
def measure_dir(data_dir, pool):
    for name in ['t10k', 'train']:
        in_path = os.path.join(data_dir, name + "-images-idx3-ubyte.gz")
        out_path = os.path.join(data_dir, name + "-morpho.csv")
        print(f"Processing MNIST data file {in_path}...")
        data = io.load_idx(in_path)
        df = measure.measure_batch(data, pool=pool, chunksize=100)
        df.to_csv(out_path, index_label='index')
        print(f"Morphometrics saved to {out_path}")
Beispiel #4
0
def dotted_zero():
    image = io.load_idx("../data/train-images-idx3-ubyte.gz")[56]
    print(linearize(image))
    image = baseline_set_pixel(image, 255, 16, 16)
    image = baseline_set_pixel(image, 255, 16, 15)
    image = baseline_set_pixel(image, 255, 15, 16)
    image = baseline_set_pixel(image, 255, 15, 15)
    print(linearize(image))
    draw_digit(image, 1000)
Beispiel #5
0
def measure_dir(data_dir, pool):
    for name in ['t10k', 'train']:
        in_path = os.path.join(data_dir, name + "-images-idx3-ubyte.gz")
        out_path = os.path.join(data_dir, name + "-morpho.csv")
        print(f"Processing MNIST data file {in_path}...")
        data = io.load_idx(in_path)
        df = measure.measure_batch(data, pool=pool, chunksize=100)
        df.to_csv(out_path, index_label='index')
        print(f"Morphometrics saved to {out_path}")
Beispiel #6
0
def slashed_zero():
    image = io.load_idx("../data/train-images-idx3-ubyte.gz")[56]
    label = io.load_idx("../data/train-labels-idx1-ubyte.gz")[56]
    print(linearize(image))
    image = baseline_set_pixel(image, 255, 10, 19)
    image = baseline_set_pixel(image, 255, 11, 18)
    image = baseline_set_pixel(image, 255, 12, 17)
    image = baseline_set_pixel(image, 255, 13, 16)
    image = baseline_set_pixel(image, 255, 14, 15)
    image = baseline_set_pixel(image, 255, 15, 14)
    image = baseline_set_pixel(image, 255, 16, 13)
    image = baseline_set_pixel(image, 255, 17, 12)
    image = baseline_set_pixel(image, 255, 18, 11)
    image = baseline_set_pixel(image, 255, 19, 10)
    image = baseline_set_pixel(image, 255, 11, 19)
    image = baseline_set_pixel(image, 255, 12, 18)
    image = baseline_set_pixel(image, 255, 13, 17)
    image = baseline_set_pixel(image, 255, 14, 16)
    image = baseline_set_pixel(image, 255, 15, 15)
    image = baseline_set_pixel(image, 255, 16, 14)
    image = baseline_set_pixel(image, 255, 17, 13)
    image = baseline_set_pixel(image, 255, 18, 12)
    image = baseline_set_pixel(image, 255, 19, 11)
    image = baseline_set_pixel(image, 255, 20, 10)
    image = baseline_set_pixel(image, 255, 12, 19)
    image = baseline_set_pixel(image, 255, 13, 18)
    image = baseline_set_pixel(image, 255, 14, 17)
    image = baseline_set_pixel(image, 255, 15, 16)
    image = baseline_set_pixel(image, 255, 16, 15)
    image = baseline_set_pixel(image, 255, 17, 14)
    image = baseline_set_pixel(image, 255, 18, 13)
    image = baseline_set_pixel(image, 255, 19, 12)
    image = baseline_set_pixel(image, 255, 20, 11)
    image = baseline_set_pixel(image, 255, 21, 10)

    print(linearize(image))
    print_digit(image)
    draw_digit(image, label)
Beispiel #7
0
                                         scale=morph.scale)
            width = _measure_width(pert_morph)
            if abs(width - self.target_width) > self._tolerance:
                print(
                    f"!!! Incorrect width after transformation: {width:.1f}, "
                    f"expected {self.target_width:.1f}.")
                pert_hires_image = self(pert_morph)
        return pert_hires_image


if __name__ == '__main__':
    import matplotlib.pyplot as plt
    from morphomnist import io, measure

    image_path = "/vol/biomedic/users/dc315/mnist/original/t10k-images-idx3-ubyte.gz"
    images = io.load_idx(image_path)

    # pert = SetThickness(3.5)
    # pert = SetSlant(np.deg2rad(30))
    pert = SetWidth(8, validate=True)
    for n in range(20):
        print(n, "before:")
        measure.measure_image(images[n], verbose=True)

        morph = ImageMorphology(images[n], scale=16)
        pert_hires_image = pert(morph)
        pert_image = morph.downscale(pert_hires_image)

        print(n, "after:")
        measure.measure_image(pert_image, verbose=True)
Beispiel #8
0
def get_image(i):
    image = io.load_idx("../data/train-images-idx3-ubyte.gz")[i]
    return image
Beispiel #9
0
def extract_images(n, m, l):
    for i in range(n, m):
        image = io.load_idx("../data/train-images-idx3-ubyte.gz")[i]
        label = io.load_idx("../data/train-labels-idx1-ubyte.gz")[i]
        if label == l:
            draw_digit(image, i)
Beispiel #10
0
    def __iter__(self):
        return self

    def __next__(self):
        img = get_image(self.num)
        self.num = (self.num + 1) % 60000
        return img


def get_images():
    imgiter = ImageIter()
    return [img for img in imgiter]


if __name__ == "__main__":
    # hmix(3936, 41172, 0.365234375)
    image1 = io.load_idx("../data/train-images-idx3-ubyte.gz")[3936]
    image2 = io.load_idx("../data/train-images-idx3-ubyte.gz")[41172]
    # draw_digit(cancelFromLeft(image,0.6), 123)
    hmid = squarize(hmix(linearize(image1), linearize(image2), 0.365234375))
    # vmid = squarize(vmix(linearize(image1), linearize(image2), 0.2294921875))

    print_digit(image1)
    print_digit(image2)
    print_digit(hmid)
    # print_digit(vmid)
    # draw_digit(vmid,1337)
    # print(linearize(image))
    # dotted_zero()
Beispiel #11
0
def add_swel_frac(data_dir, metrics):
    test_pert = io.load_idx(os.path.join(data_dir, "t10k-pert-idx1-ubyte.gz"))
    metrics['swel'] = (test_pert == 3).astype(int)
    metrics['frac'] = (test_pert == 4).astype(int)
Beispiel #12
0
    morph = ImageMorphology(img, THRESHOLD, UP_FACTOR)
    out_imgs = [morph.downscale(morph.binary_image)] + \
               [morph.downscale(pert(morph)) for pert in PERTURBATIONS]
    return out_imgs


if __name__ == '__main__':
    raw_dir = "/vol/biomedic/users/dc315/mnist/raw"
    dataset_root = "/vol/biomedic/users/dc315/mnist_new"
    dataset_names = ["plain", "thin", "thic", "swel", "frac"]

    pool = multiprocessing.Pool()
    for subset in ["train", "t10k"]:
        imgs_filename = f"{subset}-images-idx3-ubyte.gz"
        labels_filename = f"{subset}-labels-idx1-ubyte.gz"
        raw_imgs = io.load_idx(os.path.join(raw_dir, imgs_filename))

        gen = pool.imap(process_image, enumerate(raw_imgs), chunksize=100)
        try:
            import tqdm
            gen = tqdm.tqdm(gen, total=len(raw_imgs), unit='img', ascii=True)
        except ImportError:

            def plain_progress(g):
                print(f"\rProcessing images: 0/{len(raw_imgs)}", end='')
                for i, res in enumerate(g):
                    print(f"\rProcessing images: {i + 1}/{len(raw_imgs)}",
                          end='')
                    yield res
                print()
Beispiel #13
0
if __name__ == '__main__':
    data_root = "/vol/biomedic/users/dc315/mnist"
    dataset_names = ["plain", "thin", "thic", "swel", "frac"]
    pairings = [(0, 1, 2), (0, 3, 4)]
    for pairing in pairings[1:]:
        for subset in ["train", "t10k"]:
            labels_filename = f"{subset}-labels-idx1-ubyte.gz"
            images_filename = f"{subset}-images-idx3-ubyte.gz"
            metrics_filename = f"{subset}-morpho.csv"
            pert_filename = f"{subset}-pert-idx1-ubyte.gz"

            data_dirs = [os.path.join(data_root, dataset_names[i]) for i in pairing]
            imgs_paths = [os.path.join(data_dir, images_filename) for data_dir in data_dirs]
            metrics_paths = [os.path.join(data_dir, metrics_filename) for data_dir in data_dirs]
            all_images = np.array([io.load_idx(path) for path in imgs_paths])
            all_metrics = [pd.read_csv(path, index_col='index') for path in metrics_paths]

            num = all_images[0].shape[0]
            indices = np.random.choice(len(pairing), size=num)
            pert = np.asarray(pairing)[indices]
            inter_images = all_images[indices, np.arange(num)]
            inter_metrics = interleave_dfs(all_metrics, pert, pairing)

            inter_dir = os.path.join(data_root, '+'.join([dataset_names[i] for i in pairing]))
            print(f"Saving results to {inter_dir}/...")
            os.makedirs(inter_dir, exist_ok=True)
            inter_pert_path = os.path.join(inter_dir, pert_filename)
            inter_images_path = os.path.join(inter_dir, images_filename)
            inter_metrics_path = os.path.join(inter_dir, metrics_filename)
            inter_labels_path = os.path.join(inter_dir, labels_filename)
Beispiel #14
0
    morph = ImageMorphology(img, THRESHOLD, UP_FACTOR)
    out_imgs = [morph.downscale(morph.binary_image)] + \
               [morph.downscale(pert(morph)) for pert in PERTURBATIONS]
    return out_imgs


if __name__ == '__main__':
    raw_dir = "/vol/biomedic/users/dc315/mnist/raw"
    dataset_root = "/vol/biomedic/users/dc315/mnist_new"
    dataset_names = ["plain", "thin", "thic", "swel", "frac"]

    pool = multiprocessing.Pool()
    for subset in ["train", "t10k"]:
        imgs_filename = f"{subset}-images-idx3-ubyte.gz"
        labels_filename = f"{subset}-labels-idx1-ubyte.gz"
        raw_imgs = io.load_idx(os.path.join(raw_dir, imgs_filename))

        gen = pool.imap(process_image, enumerate(raw_imgs), chunksize=100)
        try:
            import tqdm
            gen = tqdm.tqdm(gen, total=len(raw_imgs), unit='img', ascii=True)
        except ImportError:
            def plain_progress(g):
                print(f"\rProcessing images: 0/{len(raw_imgs)}", end='')
                for i, res in enumerate(g):
                    print(f"\rProcessing images: {i + 1}/{len(raw_imgs)}", end='')
                    yield res
                print()
            gen = plain_progress(gen)

        result = zip(*list(gen))
Beispiel #15
0
            images_filename = f"{subset}-images-idx3-ubyte.gz"
            metrics_filename = f"{subset}-morpho.csv"
            pert_filename = f"{subset}-pert-idx1-ubyte.gz"

            data_dirs = [
                os.path.join(data_root, dataset_names[i]) for i in pairing
            ]
            imgs_paths = [
                os.path.join(data_dir, images_filename)
                for data_dir in data_dirs
            ]
            metrics_paths = [
                os.path.join(data_dir, metrics_filename)
                for data_dir in data_dirs
            ]
            all_images = np.array([io.load_idx(path) for path in imgs_paths])
            all_metrics = [
                pd.read_csv(path, index_col='index') for path in metrics_paths
            ]

            num = all_images[0].shape[0]
            indices = np.random.choice(len(pairing), size=num)
            pert = np.asarray(pairing)[indices]
            inter_images = all_images[indices, np.arange(num)]
            inter_metrics = interleave_dfs(all_metrics, pert, pairing)

            inter_dir = os.path.join(
                data_root, '+'.join([dataset_names[i] for i in pairing]))
            print(f"Saving results to {inter_dir}/...")
            os.makedirs(inter_dir, exist_ok=True)
            inter_pert_path = os.path.join(inter_dir, pert_filename)