Exemple #1
0
def split_data(in_dir, out_dir, n=5):

    # Count images in subdirs and verify equal amounts
    sub_dirs = get_subdirs(in_dir)

    image_lists = {
        basename(sub_dir): find_images(sub_dir, extensions=['*.gif'])
        for sub_dir in sub_dirs
    }
    no_imgs = np.asarray([len(x) for x in image_lists.values()])

    try:
        assert (np.array_equal(no_imgs, no_imgs))
    except AssertionError:
        print "Number of images in directories '{}' must be equal".format(
            image_lists.keys())

    split_size = int(round(no_imgs[0] / float(n)))

    for i in range(0, n):

        # Create split directory
        split_dir = make_sub_dir(out_dir, 'split_{}'.format(i))

        for dir_name, image_list in image_lists.items():

            sub_list = image_list[i * split_size:(i * split_size) + split_size]
            img_dir = make_sub_dir(split_dir, dir_name)

            for img in sub_list:
                copy(img, img_dir)
Exemple #2
0
    def run(self):

        # Get paths to all images
        im_files = find_images(join(self.input_dir, '*'))
        assert (len(im_files) > 0)

        if 'augmentation' in self.pipeline.keys():
            print "Starting preprocessing ({} processes)".format(
                self.processes)
            optimization_pool = Pool(self.processes)
            subprocess = partial(preprocess, params=self)
            results = optimization_pool.map(subprocess, im_files)
        else:
            print "Using previously augmented data"

        # Create training and validation (imbalanced)
        print "Splitting into training/validation"

        try:
            train_imgs, val_imgs = self.train_val_split()
            self.random_sample(train_imgs, val_imgs)
        except AssertionError:
            print "No images found in one more classes - unable to split training and validation"
            print self.class_distribution
            exit()
Exemple #3
0
def unique_images(in_dir):

    unique = []
    im_df = pd.DataFrame(data=[image_to_metadata(im) for im in find_images(join(in_dir, '*'))])

    for imID, group in im_df.groupby('imID'):

        unique.append((group['class'].iloc[0], group['image'].iloc[0]))

    return unique
Exemple #4
0
    def train_val_split(self):

        train_imgs = [[], [], []]
        val_imgs = [[], [], []]

        for cidx, class_ in enumerate(CLASSES):

            # Get all augmented images per class
            aug_imgs = find_images(join(self.augment_dir, class_))
            assert (len(aug_imgs) > 0)

            # Create dataframe
            patient_metadata = [image_to_metadata(img) for img in aug_imgs]
            patient_metadata = pd.DataFrame(data=patient_metadata)

            # Group images by patient and sorted by total images per patient
            grouped = [(data, len(data))
                       for _, data in patient_metadata.groupby('subjectID')]
            grouped = sorted(grouped, key=lambda x: x[1], reverse=True)

            # Calculate how many patients to add to training
            total_images = len(aug_imgs)
            no_train_imgs = np.floor(
                float(total_images) * self.pipeline.train_split)
            cum_sum = np.cumsum([g[1] for g in grouped])
            no_train_patients = next(x[0] for x in enumerate(cum_sum)
                                     if x[1] > no_train_imgs)

            # Create validation and training
            for idx, group in enumerate(grouped):

                if idx >= no_train_patients:
                    val_imgs[cidx].extend([
                        x['image'] for x in group[0].to_dict(orient='records')
                    ])
                else:
                    train_imgs[cidx].extend([
                        x['image'] for x in group[0].to_dict(orient='records')
                    ])

        # Ensure that we ended up with some data in both groups
        assert (all(len(tr_class) > 0 for tr_class in train_imgs))
        assert (all(len(v_class) > 0 for v_class in val_imgs))

        return train_imgs, val_imgs
Exemple #5
0
    def ensemble(self, results):

        if self.evaluate in results.keys():
            results.pop(self.evaluate)  # don't include the evaluation data in the ensembling

        segmented_images = [sorted(find_images(x)) for _, x in results.items()]

        for seg_images in zip(*segmented_images):

            print seg_images

            im_name = basename(seg_images[0])
            seg_arr = np.dstack([np.asarray(Image.open(seg)) for seg in seg_images])

            mean_image = np.round(np.mean(seg_arr, axis=2)).astype(np.uint8)
            Image.fromarray(mean_image).save(join(self.mean_dir, im_name))

            max_image = np.max(seg_arr, axis=2)
            Image.fromarray(max_image).save(join(self.max_dir, im_name))
Exemple #6
0
    def segment_all(self, img_dir):

        result_dirs = {}
        imgs = find_images(img_dir)

        for i, model_dir in enumerate(self.models):

            print "Instantiating model #{}: {}".format(i+1, model_dir)

            result_dir = join(self.out_dir, str(i))  # create directory to store segmented images
            print "Segmented images will be written to '{}'".format(result_dir)

            model_id = basename(model_dir.rstrip(sep))  # identifier for this particular model
            result_dirs[model_id] = result_dir  # dictionary to map between IDs and results

            model = SegmentUnet(result_dir, model_dir, stride=(4, 4))  # instantiate U-Net
            model.segment_batch(imgs)  # segment images

        self.ensemble(result_dirs)  # combine the result of the segmentations
Exemple #7
0
            np.uint16)[::-1]
    retina_center[0] += x_shift

    # Generate a circle of the approximate size, centered based on the guide mask
    c_mask = np.zeros(img.shape)
    cv2.circle(c_mask, tuple(retina_center), int(radius * cf), (1, 1, 1), -1,
               8, 0)

    return c_mask


def apply_mask(im, mask):

    im[np.invert(mask.astype(np.bool))] = 0
    return np.transpose(im, (1, 2, 0))


if __name__ == "__main__":

    import sys
    out_dir = sys.argv[2]

    for im_path in find_images(sys.argv[1]):

        im = np.asarray(Image.open(im_path))
        mask = circular_mask(im).astype(np.uint8) * 255

        _, file_name = split(im_path)
        name, ext = splitext(file_name)
        Image.fromarray(mask).save(join(out_dir, name + '_mask.gif'))
if __name__ == '__main__':

    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument('-i', '--input-dir', dest='in_dir', required=True)
    parser.add_argument('-o', '--out-dir', dest='out_dir', required=True)

    parser.add_argument('-t',
                        '--thresh',
                        dest='thresh',
                        help="0 < thresh < 255",
                        type=int,
                        default=200)
    parser.add_argument('-s',
                        '--smallest',
                        dest='smallest',
                        help="Smallest object size allowed",
                        type=int,
                        default=10)

    args = parser.parse_args()

    for im_path in find_images(args.in_dir):

        im_name = basename(im_path)
        img = cv2.imread(im_path)

        pruned = binary_morph(img, thresh=args.thresh, min_size=args.smallest)
        cv2.imwrite(join(args.out_dir, im_name), pruned)
Exemple #9
0
                        help='retina-unet dir',
                        dest='model',
                        required=True)
    parser.add_argument('-e',
                        '--erode',
                        help='Size of structuring element for mask erosion',
                        dest='erode',
                        type=int,
                        default=10)
    parser.add_argument('-s',
                        '--stride',
                        help="Stride dimensions (width, height)",
                        type=int,
                        default=8)
    args = parser.parse_args()

    # Get list of images to segment
    data = []
    if isdir(args.images):
        data.extend(find_images(join(args.images)))
    elif isfile(args.images):
        data.append(args.images)
    else:
        raise IOError("Please specify a valid image path or folder of images")

    s = SegmentUnet(args.out_dir,
                    args.model,
                    stride=(args.stride, args.stride),
                    erode=args.erode)
    s.segment_batch(data)
Exemple #10
0
                        type=int,
                        default=10)
    parser.add_argument('-s',
                        '--stride',
                        help="Stride dimensions (width, height)",
                        type=int,
                        default=8)
    args = parser.parse_args()

    unet = SegmentUnet(args.model,
                       out_dir=args.out_dir,
                       stride=(args.stride, args.stride),
                       erode=args.erode)

    # Get list of images to segment
    data = []
    if isdir(args.images):

        results = unet.segment_batch(find_images(args.images))

        if results:
            results = np.asarray(results).transpose((1, 2, 0))

    elif isfile(args.images):
        seg_result = segment(np.asarray(Image.open(args.images)), unet)

        if args.out_dir:
            visualize(seg_result, join(args.out_dir, basename(args.images)))
    else:
        raise IOError("Please specify a valid image path or folder of images")
Exemple #11
0
                        dest='images',
                        required=True)
    parser.add_argument('-o',
                        '--out-dir',
                        help="Output directory",
                        dest="out_dir",
                        required=True)
    parser.add_argument('-u',
                        '--unet',
                        help='retina-unet dir',
                        dest='model',
                        required=True)
    parser.add_argument('-s',
                        '--stride',
                        help="Stride dimensions (width, height)",
                        nargs='*',
                        default=(8, 8))
    args = parser.parse_args()

    # Get list of images to segment
    data = []
    if isdir(args.images):
        data.extend(find_images(args.images))
    elif isfile(args.images):
        data.append(args.images)
    else:
        raise IOError("Please specify a valid image path or folder of images")

    s = SegmentUnet(args.out_dir, args.model, stride=args.stride)
    s.segment_batch(data)