def post_process_predictions(self, img_df, input_type, colour_type): self.logger.info( 'Post processing predicted %s %s images and generate labels for each image', input_type, colour_type) if self.MOSAIC: # Mosaic prediction steps: # 1. Make the list of mosaic images # 2. Merge the mosaic images into the list of all images (needed for post-processing), and at the # same time, update the predictions so they match the mosaic images if needed. # 3. Perform post-processing to get the output labels (one label for each mosaic image) # 4. Split the mosaic labels to match the original images # 5. Re-label the split labels, to ensure all nuclei in a single label image are consecutive all_images = img_df['image_process'].values.tolist() all_predictions = img_df['prediction'].values.tolist() input_len = len(all_images) mosaic_images, _, mosaic_dict, _ = mosaic.make_mosaic( img_df['image'].values.tolist(), None) mosaic_images = [impr.preprocess_image(x) for x in mosaic_images] (all_images, _, all_predictions) = mosaic.merge_mosaic_images( mosaic_dict, mosaic_images, all_images, all_predictions) labels = [ impr.post_process_image(img, prediction[:, :, 0], prediction[:, :, 1]) for img, prediction in zip(all_images, all_predictions) ] labels = mosaic.split_merged_mosaic(mosaic_dict, labels, input_len) labels = [renumber_labels(label_img) for label_img in labels] img_df['label'] = pd.Series(labels).values else: add_labels_to_dataframe(img_df)
def batched_predictions(self, img_df, models, input_type): # Iterator to chunk images/sizes into batches. def image_batches(images, sizes, batch_size): for i in range(0, len(images), batch_size): yield (images[i:i + batch_size], sizes[i:i + batch_size]) predictions_full_size = [] all_images = img_df['image_process'].values.tolist() if self.MOSAIC: self.logger.info('Forming mosaics from %d input %s images...', len(all_images), input_type) # Mosaics are formed on the raw image, since individual parts of a mosaic may have # been altered differently during pre-processing. mosaic_images, _, mosaic_dict, not_combined = mosaic.make_mosaic( img_df['image'].values.tolist(), None) mosaic_images = [impr.preprocess_image(x) for x in mosaic_images] self.logger.info( 'Found %d 4x4 image mosaics, %d images could not be combined into mosaics.', len(mosaic_images), len(not_combined)) self.logger.debug('Mosaic dictionary: %s', mosaic_dict) self.logger.debug( 'Images that could not be combined into mosaics: %s', str(not_combined)) # Any images not included in the mosaic images should be from the list of pre-processed images (all_images, all_sizes, _) = mosaic.merge_mosaic_images(mosaic_dict, mosaic_images, all_images) self.logger.info('Total of %d images after mosaic processing.', len(all_images)) mosaic_images = None else: all_sizes = img_df['size'].values # Split the total set of images into smaller batches. For large datasets (i.e. after applying # windowing and rotation), trying to do all images at once encountered a Python MemoryError. batch_size = 100 self.logger.info( 'Predict on %s images in batches of up to %d original images...', input_type, batch_size) for (batch, sizes) in image_batches(all_images, all_sizes, batch_size): predict_inputs = self.build_model_prediction_inputs(batch) predictions = average_model_predictions(predict_inputs, models, self.WEIGHTS) predictions_full_size.extend( predict_restore_to_fullsize(predictions, sizes)) del predictions if self.MOSAIC: self.logger.info('Re-forming full-size images from mosaics.') input_len = len(img_df['image_process'].values.tolist()) predictions_full_size = mosaic.split_merged_mosaic( mosaic_dict, predictions_full_size, input_len) return predictions_full_size
def build_model_training_inputs(self, cluster_df, cluster_ix): X_train_3channel = [] all_proc_images = cluster_df['image_process'].values.tolist() num_orig_images = len(all_proc_images) all_masks = cluster_df['mask_train'].values.tolist() if self.MOSAIC: self.logger.info('Forming mosaics from %d input images...', len(all_proc_images)) # Mosaics are formed on the raw image, since individual parts of a mosaic may have # been altered differently during pre-processing. mosaic_images, _, mosaic_dict, not_combined = mosaic.make_mosaic( cluster_df['image'].values.tolist(), None) self.logger.info( 'Found %d 4x4 image mosaics, %d images could not be combined into mosaics.', len(mosaic_images), len(not_combined)) self.logger.debug('Mosaic dictionary: %s', mosaic_dict) self.logger.debug( 'Images that could not be combined into mosaics: %s', str(not_combined)) # Augmentation needs the original 3-channel colour image in some cases too if cluster_ix == self.COLOUR_IX: (X_train_3channel, _, _) = mosaic.merge_mosaic_images( mosaic_dict, mosaic_images, cluster_df['image'].values.tolist()) mosaic_images = [impr.preprocess_image(x) for x in mosaic_images] (X_train, _, Y_train) = mosaic.merge_mosaic_images(mosaic_dict, mosaic_images, all_proc_images, all_masks) self.logger.info('Total of %d images after mosaic processing.', len(X_train)) mosaic_images = None else: X_train = all_proc_images Y_train = all_masks if cluster_ix == self.COLOUR_IX: X_train_3channel = cluster_df['image'].values.tolist() self.logger.info('%d images of the original training data', len(X_train)) if len(X_train) > 0: (X_train, Y_train) = self.augment_training_inputs(X_train, Y_train, X_train_3channel) X_train_3channel = None self.logger.info('Windowing on training data') X_train = window_images(X_train, self.IMG_HEIGHT, self.IMG_WIDTH) Y_train = window_images(Y_train, self.IMG_HEIGHT, self.IMG_WIDTH) self.logger.info('%d images to the training data after windowing', X_train.shape[0]) # Rotations/flips moved here instead of in the main augmentation loop, to ensure all # augmented samples are also mirrored/flipped. if self.ROTATE_IMAGES and len(X_train) > 0: self.logger.info('Rotate and mirror train images') rotate_amplify_rate = 8 num_windows = X_train.shape[0] estimated_images = num_windows * rotate_amplify_rate if estimated_images > self.MAX_TRAIN_SIZE: max_windows_to_rotate = int(self.MAX_TRAIN_SIZE / rotate_amplify_rate) self.logger.info( 'Only rotating the first %d windows to reduce training size.', max_windows_to_rotate) augment_half_X = augment_max(X_train[0:max_windows_to_rotate]) X_train = np.concatenate( (augment_half_X, X_train[max_windows_to_rotate:]), axis=0) augment_half_X = None augment_half_Y = augment_max(Y_train[0:max_windows_to_rotate]) Y_train = np.concatenate( (augment_half_Y, Y_train[max_windows_to_rotate:]), axis=0) augment_half_Y = None else: X_train = augment_max(X_train) Y_train = augment_max(Y_train) self.logger.info( '%d images to the training data after rotations/flips', X_train.shape[0]) if len(X_train) > 0: self.logger.info('Final augmentation rate is %d', int(X_train.shape[0] / num_orig_images)) return (X_train, Y_train)