def split_data(in_dir, out_dir, n=5): # Count images in subdirs and verify equal amounts sub_dirs = get_subdirs(in_dir) image_lists = { basename(sub_dir): find_images(sub_dir, extensions=['*.gif']) for sub_dir in sub_dirs } no_imgs = np.asarray([len(x) for x in image_lists.values()]) try: assert (np.array_equal(no_imgs, no_imgs)) except AssertionError: print "Number of images in directories '{}' must be equal".format( image_lists.keys()) split_size = int(round(no_imgs[0] / float(n))) for i in range(0, n): # Create split directory split_dir = make_sub_dir(out_dir, 'split_{}'.format(i)) for dir_name, image_list in image_lists.items(): sub_list = image_list[i * split_size:(i * split_size) + split_size] img_dir = make_sub_dir(split_dir, dir_name) for img in sub_list: copy(img, img_dir)
def run(self): # Get paths to all images im_files = find_images(join(self.input_dir, '*')) assert (len(im_files) > 0) if 'augmentation' in self.pipeline.keys(): print "Starting preprocessing ({} processes)".format( self.processes) optimization_pool = Pool(self.processes) subprocess = partial(preprocess, params=self) results = optimization_pool.map(subprocess, im_files) else: print "Using previously augmented data" # Create training and validation (imbalanced) print "Splitting into training/validation" try: train_imgs, val_imgs = self.train_val_split() self.random_sample(train_imgs, val_imgs) except AssertionError: print "No images found in one more classes - unable to split training and validation" print self.class_distribution exit()
def unique_images(in_dir): unique = [] im_df = pd.DataFrame(data=[image_to_metadata(im) for im in find_images(join(in_dir, '*'))]) for imID, group in im_df.groupby('imID'): unique.append((group['class'].iloc[0], group['image'].iloc[0])) return unique
def train_val_split(self): train_imgs = [[], [], []] val_imgs = [[], [], []] for cidx, class_ in enumerate(CLASSES): # Get all augmented images per class aug_imgs = find_images(join(self.augment_dir, class_)) assert (len(aug_imgs) > 0) # Create dataframe patient_metadata = [image_to_metadata(img) for img in aug_imgs] patient_metadata = pd.DataFrame(data=patient_metadata) # Group images by patient and sorted by total images per patient grouped = [(data, len(data)) for _, data in patient_metadata.groupby('subjectID')] grouped = sorted(grouped, key=lambda x: x[1], reverse=True) # Calculate how many patients to add to training total_images = len(aug_imgs) no_train_imgs = np.floor( float(total_images) * self.pipeline.train_split) cum_sum = np.cumsum([g[1] for g in grouped]) no_train_patients = next(x[0] for x in enumerate(cum_sum) if x[1] > no_train_imgs) # Create validation and training for idx, group in enumerate(grouped): if idx >= no_train_patients: val_imgs[cidx].extend([ x['image'] for x in group[0].to_dict(orient='records') ]) else: train_imgs[cidx].extend([ x['image'] for x in group[0].to_dict(orient='records') ]) # Ensure that we ended up with some data in both groups assert (all(len(tr_class) > 0 for tr_class in train_imgs)) assert (all(len(v_class) > 0 for v_class in val_imgs)) return train_imgs, val_imgs
def ensemble(self, results): if self.evaluate in results.keys(): results.pop(self.evaluate) # don't include the evaluation data in the ensembling segmented_images = [sorted(find_images(x)) for _, x in results.items()] for seg_images in zip(*segmented_images): print seg_images im_name = basename(seg_images[0]) seg_arr = np.dstack([np.asarray(Image.open(seg)) for seg in seg_images]) mean_image = np.round(np.mean(seg_arr, axis=2)).astype(np.uint8) Image.fromarray(mean_image).save(join(self.mean_dir, im_name)) max_image = np.max(seg_arr, axis=2) Image.fromarray(max_image).save(join(self.max_dir, im_name))
def segment_all(self, img_dir): result_dirs = {} imgs = find_images(img_dir) for i, model_dir in enumerate(self.models): print "Instantiating model #{}: {}".format(i+1, model_dir) result_dir = join(self.out_dir, str(i)) # create directory to store segmented images print "Segmented images will be written to '{}'".format(result_dir) model_id = basename(model_dir.rstrip(sep)) # identifier for this particular model result_dirs[model_id] = result_dir # dictionary to map between IDs and results model = SegmentUnet(result_dir, model_dir, stride=(4, 4)) # instantiate U-Net model.segment_batch(imgs) # segment images self.ensemble(result_dirs) # combine the result of the segmentations
np.uint16)[::-1] retina_center[0] += x_shift # Generate a circle of the approximate size, centered based on the guide mask c_mask = np.zeros(img.shape) cv2.circle(c_mask, tuple(retina_center), int(radius * cf), (1, 1, 1), -1, 8, 0) return c_mask def apply_mask(im, mask): im[np.invert(mask.astype(np.bool))] = 0 return np.transpose(im, (1, 2, 0)) if __name__ == "__main__": import sys out_dir = sys.argv[2] for im_path in find_images(sys.argv[1]): im = np.asarray(Image.open(im_path)) mask = circular_mask(im).astype(np.uint8) * 255 _, file_name = split(im_path) name, ext = splitext(file_name) Image.fromarray(mask).save(join(out_dir, name + '_mask.gif'))
if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('-i', '--input-dir', dest='in_dir', required=True) parser.add_argument('-o', '--out-dir', dest='out_dir', required=True) parser.add_argument('-t', '--thresh', dest='thresh', help="0 < thresh < 255", type=int, default=200) parser.add_argument('-s', '--smallest', dest='smallest', help="Smallest object size allowed", type=int, default=10) args = parser.parse_args() for im_path in find_images(args.in_dir): im_name = basename(im_path) img = cv2.imread(im_path) pruned = binary_morph(img, thresh=args.thresh, min_size=args.smallest) cv2.imwrite(join(args.out_dir, im_name), pruned)
help='retina-unet dir', dest='model', required=True) parser.add_argument('-e', '--erode', help='Size of structuring element for mask erosion', dest='erode', type=int, default=10) parser.add_argument('-s', '--stride', help="Stride dimensions (width, height)", type=int, default=8) args = parser.parse_args() # Get list of images to segment data = [] if isdir(args.images): data.extend(find_images(join(args.images))) elif isfile(args.images): data.append(args.images) else: raise IOError("Please specify a valid image path or folder of images") s = SegmentUnet(args.out_dir, args.model, stride=(args.stride, args.stride), erode=args.erode) s.segment_batch(data)
type=int, default=10) parser.add_argument('-s', '--stride', help="Stride dimensions (width, height)", type=int, default=8) args = parser.parse_args() unet = SegmentUnet(args.model, out_dir=args.out_dir, stride=(args.stride, args.stride), erode=args.erode) # Get list of images to segment data = [] if isdir(args.images): results = unet.segment_batch(find_images(args.images)) if results: results = np.asarray(results).transpose((1, 2, 0)) elif isfile(args.images): seg_result = segment(np.asarray(Image.open(args.images)), unet) if args.out_dir: visualize(seg_result, join(args.out_dir, basename(args.images))) else: raise IOError("Please specify a valid image path or folder of images")
dest='images', required=True) parser.add_argument('-o', '--out-dir', help="Output directory", dest="out_dir", required=True) parser.add_argument('-u', '--unet', help='retina-unet dir', dest='model', required=True) parser.add_argument('-s', '--stride', help="Stride dimensions (width, height)", nargs='*', default=(8, 8)) args = parser.parse_args() # Get list of images to segment data = [] if isdir(args.images): data.extend(find_images(args.images)) elif isfile(args.images): data.append(args.images) else: raise IOError("Please specify a valid image path or folder of images") s = SegmentUnet(args.out_dir, args.model, stride=args.stride) s.segment_batch(data)