예제 #1
0
 def post_process_predictions(self, img_df, input_type, colour_type):
     self.logger.info(
         'Post processing predicted %s %s images and generate labels for each image',
         input_type, colour_type)
     if self.MOSAIC:
         # Mosaic prediction steps:
         # 1. Make the list of mosaic images
         # 2. Merge the mosaic images into the list of all images (needed for post-processing), and at the
         #    same time, update the predictions so they match the mosaic images if needed.
         # 3. Perform post-processing to get the output labels (one label for each mosaic image)
         # 4. Split the mosaic labels to match the original images
         # 5. Re-label the split labels, to ensure all nuclei in a single label image are consecutive
         all_images = img_df['image_process'].values.tolist()
         all_predictions = img_df['prediction'].values.tolist()
         input_len = len(all_images)
         mosaic_images, _, mosaic_dict, _ = mosaic.make_mosaic(
             img_df['image'].values.tolist(), None)
         mosaic_images = [impr.preprocess_image(x) for x in mosaic_images]
         (all_images, _, all_predictions) = mosaic.merge_mosaic_images(
             mosaic_dict, mosaic_images, all_images, all_predictions)
         labels = [
             impr.post_process_image(img, prediction[:, :, 0],
                                     prediction[:, :, 1])
             for img, prediction in zip(all_images, all_predictions)
         ]
         labels = mosaic.split_merged_mosaic(mosaic_dict, labels, input_len)
         labels = [renumber_labels(label_img) for label_img in labels]
         img_df['label'] = pd.Series(labels).values
     else:
         add_labels_to_dataframe(img_df)
예제 #2
0
    def batched_predictions(self, img_df, models, input_type):
        # Iterator to chunk images/sizes into batches.
        def image_batches(images, sizes, batch_size):
            for i in range(0, len(images), batch_size):
                yield (images[i:i + batch_size], sizes[i:i + batch_size])

        predictions_full_size = []
        all_images = img_df['image_process'].values.tolist()
        if self.MOSAIC:
            self.logger.info('Forming mosaics from %d input %s images...',
                             len(all_images), input_type)
            # Mosaics are formed on the raw image, since individual parts of a mosaic may have
            # been altered differently during pre-processing.
            mosaic_images, _, mosaic_dict, not_combined = mosaic.make_mosaic(
                img_df['image'].values.tolist(), None)
            mosaic_images = [impr.preprocess_image(x) for x in mosaic_images]
            self.logger.info(
                'Found %d 4x4 image mosaics, %d images could not be combined into mosaics.',
                len(mosaic_images), len(not_combined))
            self.logger.debug('Mosaic dictionary: %s', mosaic_dict)
            self.logger.debug(
                'Images that could not be combined into mosaics: %s',
                str(not_combined))

            # Any images not included in the mosaic images should be from the list of pre-processed images
            (all_images, all_sizes,
             _) = mosaic.merge_mosaic_images(mosaic_dict, mosaic_images,
                                             all_images)
            self.logger.info('Total of %d images after mosaic processing.',
                             len(all_images))
            mosaic_images = None
        else:
            all_sizes = img_df['size'].values

        # Split the total set of images into smaller batches. For large datasets (i.e. after applying
        # windowing and rotation), trying to do all images at once encountered a Python MemoryError.
        batch_size = 100
        self.logger.info(
            'Predict on %s images in batches of up to %d original images...',
            input_type, batch_size)
        for (batch, sizes) in image_batches(all_images, all_sizes, batch_size):
            predict_inputs = self.build_model_prediction_inputs(batch)
            predictions = average_model_predictions(predict_inputs, models,
                                                    self.WEIGHTS)
            predictions_full_size.extend(
                predict_restore_to_fullsize(predictions, sizes))
            del predictions

        if self.MOSAIC:
            self.logger.info('Re-forming full-size images from mosaics.')
            input_len = len(img_df['image_process'].values.tolist())
            predictions_full_size = mosaic.split_merged_mosaic(
                mosaic_dict, predictions_full_size, input_len)

        return predictions_full_size
예제 #3
0
    def build_model_training_inputs(self, cluster_df, cluster_ix):
        X_train_3channel = []
        all_proc_images = cluster_df['image_process'].values.tolist()
        num_orig_images = len(all_proc_images)
        all_masks = cluster_df['mask_train'].values.tolist()
        if self.MOSAIC:
            self.logger.info('Forming mosaics from %d input images...',
                             len(all_proc_images))
            # Mosaics are formed on the raw image, since individual parts of a mosaic may have
            # been altered differently during pre-processing.
            mosaic_images, _, mosaic_dict, not_combined = mosaic.make_mosaic(
                cluster_df['image'].values.tolist(), None)
            self.logger.info(
                'Found %d 4x4 image mosaics, %d images could not be combined into mosaics.',
                len(mosaic_images), len(not_combined))
            self.logger.debug('Mosaic dictionary: %s', mosaic_dict)
            self.logger.debug(
                'Images that could not be combined into mosaics: %s',
                str(not_combined))

            # Augmentation needs the original 3-channel colour image in some cases too
            if cluster_ix == self.COLOUR_IX:
                (X_train_3channel, _, _) = mosaic.merge_mosaic_images(
                    mosaic_dict, mosaic_images,
                    cluster_df['image'].values.tolist())

            mosaic_images = [impr.preprocess_image(x) for x in mosaic_images]
            (X_train, _,
             Y_train) = mosaic.merge_mosaic_images(mosaic_dict, mosaic_images,
                                                   all_proc_images, all_masks)
            self.logger.info('Total of %d images after mosaic processing.',
                             len(X_train))
            mosaic_images = None
        else:
            X_train = all_proc_images
            Y_train = all_masks
            if cluster_ix == self.COLOUR_IX:
                X_train_3channel = cluster_df['image'].values.tolist()

        self.logger.info('%d images of the original training data',
                         len(X_train))

        if len(X_train) > 0:
            (X_train,
             Y_train) = self.augment_training_inputs(X_train, Y_train,
                                                     X_train_3channel)
        X_train_3channel = None

        self.logger.info('Windowing on training data')
        X_train = window_images(X_train, self.IMG_HEIGHT, self.IMG_WIDTH)
        Y_train = window_images(Y_train, self.IMG_HEIGHT, self.IMG_WIDTH)
        self.logger.info('%d images to the training data after windowing',
                         X_train.shape[0])

        # Rotations/flips moved here instead of in the main augmentation loop, to ensure all
        # augmented samples are also mirrored/flipped.
        if self.ROTATE_IMAGES and len(X_train) > 0:
            self.logger.info('Rotate and mirror train images')
            rotate_amplify_rate = 8
            num_windows = X_train.shape[0]
            estimated_images = num_windows * rotate_amplify_rate
            if estimated_images > self.MAX_TRAIN_SIZE:
                max_windows_to_rotate = int(self.MAX_TRAIN_SIZE /
                                            rotate_amplify_rate)
                self.logger.info(
                    'Only rotating the first %d windows to reduce training size.',
                    max_windows_to_rotate)
                augment_half_X = augment_max(X_train[0:max_windows_to_rotate])
                X_train = np.concatenate(
                    (augment_half_X, X_train[max_windows_to_rotate:]), axis=0)
                augment_half_X = None
                augment_half_Y = augment_max(Y_train[0:max_windows_to_rotate])
                Y_train = np.concatenate(
                    (augment_half_Y, Y_train[max_windows_to_rotate:]), axis=0)
                augment_half_Y = None
            else:
                X_train = augment_max(X_train)
                Y_train = augment_max(Y_train)
            self.logger.info(
                '%d images to the training data after rotations/flips',
                X_train.shape[0])

        if len(X_train) > 0:
            self.logger.info('Final augmentation rate is %d',
                             int(X_train.shape[0] / num_orig_images))

        return (X_train, Y_train)