def predict_autocontext( self,
                         img,
                         mask,
                         nb_labels,
                         nb_autocontext,
                         debug=False,
                         return_all=False ):
    proba = np.ones((nb_labels,img.shape[0],img.shape[1],img.shape[2]),dtype='float32')
    proba /= nb_labels

    header = img.get_header()
    header['dim'][3] = nb_labels
    proba = irtk.Image(proba,header,squeeze=False)
    
    all_steps = []

    for k in xrange(nb_autocontext):
        knowledge = self.get_knowledge(img,proba,mask=mask)

        if debug:
            irtk.imwrite("knowledge_"+str(k)+".nii.gz", knowledge)

        forest = integralForest( folder=self.folder(k),
                                 test=self.params['test'],
                                 parallel=self.params['parallel'],
                                 nb_knowledge_layers=knowledge.shape[0] )
        proba = forest.predict_autocontext( img,
                                            knowledge,
                                            mask,
                                            self.params['ksampling'] )
        proba = irtk.Image(proba,header,squeeze=False)
        if return_all:
            all_steps.append( proba.copy() )
        if debug:
            irtk.imwrite("debug_"+str(k)+".nii.gz", proba)
        
        if k < 1:
            for i in xrange(proba.shape[0]):
                if i == 0:
                    proba[i] = 0
                else:
                    proba[i] = self.get_center(proba[i])
            
        if debug:
            print "done autocontext", k
        #     irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba)

    if not return_all:
        return proba
    else:
        return all_steps
    def fit( self,
             patient_ids,
             n_validation=5,
             min_dice_score=0.7,
             max_autocontext=10,
             start=0 ):
        """
        Train the classifier.
        """
        nb_labels = len(self.params['labels'])+1
        ## Preprocess data only once to speed up training
        ## (requires more memory)

        # split patients to get validation set
        np.random.shuffle(patient_ids)

        n_validation = min(len(patient_ids)/2,n_validation)
        
        training_patients = patient_ids[:-n_validation]
        validation_patients = patient_ids[-n_validation:]

        self.info['training_patients'] = training_patients
        self.info['validation_patients'] = validation_patients

        if self.params['verbose']:
            print "fitting with", len(training_patients), "training patients and", \
                len(validation_patients), "validation patients"

            print "doing preprocessing..."
        gc.collect()
        training_data = Parallel(n_jobs=self.params['n_jobs'])(delayed(self.params['training_preprocessing_function'])( patient_id,
                                                                                                  self.params['img_folder'],
                                                                                                  self.params['seg_folder'],
                                                                                                  self.params["resample"],
                                                                                                  online=False )
                                                               for patient_id in training_patients )

        if self.params['verbose']:
            print "learning"
            
        i = start
        dice_score = 0
        previous_dice_score = -1
        while ( i < max_autocontext and
                dice_score < min_dice_score and
                dice_score > previous_dice_score ):
            forest = integralForest( ntrees=self.params['n_estimators'],
                                     bagging=self.params['bootstrap'],
                                     max_depth=self.params['max_depth'],
                                     min_items=self.params['min_items'],
                                     nb_tests=self.params['nb_tests'],
                                     parallel=self.params['parallel'],
                                     test=self.params['test'],
                                     cx=self.params['cx'],  cy=self.params['cy'],  cz=self.params['cz'],
                                     dx=self.params['dx'],  dy=self.params['dy'],  dz=self.params['dz'],
                                     nb_labels=nb_labels,
                                     nb_knowledge_layers=2*(nb_labels-1),
                                     ksampling=self.params['ksampling'],
                                     verbose=False,
                                     nfeatures=len(self.feature_mapping())
                                     )

            print "predicting training data"
            tmp_probas = Parallel(n_jobs=self.params['n_jobs'])(delayed(predict_autocontext)( self,
                                                                                              data['img'],
                                                                                              data['mask'],
                                                                                              nb_labels,
                                                                                              i )
                                                                for data in training_data )

            # tmp_probas = []
            # for data in training_data:
            #     tmp_probas.append( predict_autocontext( self,
            #                                             data['img'],
            #                                             data['mask'],
            #                                             data['extra_layers'],
            #                                             data['metadata'],
            #                                             nb_labels,
            #                                             all_ga[data['patient_id']],
            #                                             i ) )
            
            for data,proba in zip(training_data,tmp_probas):
                img = data['img']
                mask = data['mask']
                seg = data['seg'].copy()

                # kind of bootstrapping
                for l in range(proba.shape[0]):
                    correct = np.logical_and( proba[l] > 0.5, seg == l )
                    # remove half of the correctly classified voxels
                    points = np.transpose(np.nonzero(correct))
                    if len(points) > 10:
                        np.random.shuffle(points)
                        points = points[:len(points)/2]
                        seg[points[:,0],
                            points[:,1],
                            points[:,2]] = 255

                knowledge = self.get_knowledge(img,proba,mask=mask)
                forest.add_image_autocontext(img,seg,knowledge)

                # irtk.imwrite( "debug/"+data['patient_id']+"_knowledge"+str(i)+".nii.gz",
                #               knowledge )                  

            print "starting to learn autocontext",i
            forest.grow( self.params['nb_samples'],
                         self.params['nb_background_samples'] )

            print "writing"
            forest.write(self.folder(i))
            print "done", i

            feature_importance = forest.get_feature_importance()
            mapping = self.feature_mapping()

            if len(feature_importance) != len(mapping):
                print "ERROR: forest.get_feature_importance() returns", len(feature_importance), "features"
                print "       feature_mapping() expects", len(mapping), "features"
            
            feature_importance = dict( zip(mapping,
                                           feature_importance) )
            self.info['feature_importance'].append( feature_importance )
            print feature_importance
            
            i += 1
            
            # release memory
            del forest
            gc.collect()

            previous_dice_score = dice_score
            print "scoring"
            dice_score = self.score(validation_patients,nb_autocontext=i)
            improvement = dice_score - previous_dice_score
            
            self.info['validation_scores'].append(dice_score)
            self.info['improvements'].append(improvement)
            
            if self.params['verbose']:
                print "Validation score:", dice_score
                print "improvement:", improvement

        self.save()
#!/usr/bin/python

import cv, cv2
import numpy as np
import matplotlib.pyplot as plt
from lib.integralforest import integralForest
from glob import glob
import re

import irtk

forest = integralForest( ntrees=5,
                         bagging=0.5,
                         max_depth=7,
                         min_items=10,
                         nb_tests=1000,
                         parallel=-1,
                         test="patch",
                         cx=50,  cy=50,  cz=0,
                         dx=50,  dy=50,  dz=0 )

images = []
segmentations = []

raw_data_folder = "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/raw_data/"
ground_truth = glob("/home/kevin/Imperial/PhD/gitlab/ess/brain_test/ground_truth/*")#[:50]

segmentations = []
for f in ground_truth:
    pattern =  r'/(?P<raw_file>[^/]+)_(?P<x>\d+)_(?P<y>\d+)_(?P<w>\d+)_(?P<h>\d+)\.png$'
    match = re.search( pattern, f )
Example #4
0
import cv, cv2
import numpy as np
import matplotlib.pyplot as plt
from lib.integralforest import integralForest
from glob import glob
import re

import irtk

forest = integralForest(ntrees=5,
                        bagging=0.5,
                        max_depth=7,
                        min_items=10,
                        nb_tests=1000,
                        parallel=-1,
                        test="patch",
                        cx=50,
                        cy=50,
                        cz=0,
                        dx=50,
                        dy=50,
                        dz=0)

images = []
segmentations = []

raw_data_folder = "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/raw_data/"
ground_truth = glob(
    "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/ground_truth/*")  #[:50]

segmentations = []
def predict_autocontext( self,
                         img,
                         mask,
                         extra_layers,
                         metadata,
                         nb_labels,
                         ga,
                         nb_autocontext,
                         debug=False,
                         return_all=False ):
    proba = np.ones((nb_labels,img.shape[0],img.shape[1],img.shape[2]),dtype='float32')
    proba /= nb_labels

    header = img.get_header()
    header['dim'][3] = nb_labels
    proba = irtk.Image(proba,header)
    
    all_steps = []

    for k in xrange(nb_autocontext):
        metadata = self.get_center_axis(proba,k)
        knowledge = self.get_knowledge(img,proba,extra_layers,mask=mask)

        if debug:
            irtk.imwrite("knowledge_"+str(k)+".nii.gz", knowledge)

        forest = integralForest( folder=self.folder(k),
                                 test=self.params['test'],
                                 parallel=self.params['parallel'],
                                 nb_knowledge_layers=knowledge.shape[0] )
        proba = forest.predict_autocontext( img,
                                            knowledge,
                                            mask,
                                            self.params['ksampling'],
                                            metadata )
        proba = irtk.Image(proba,header)
        if return_all:
            all_steps.append( proba.copy() )
        if debug:
            irtk.imwrite("debug_"+str(k)+".nii.gz", proba)
        
        if k < 1:
            for i in xrange(proba.shape[0]):
                if i == 0:
                    proba[i] = 0
                else:
                    proba[i] = self.groups[i-1].get_center(proba[i])
                
        #     # volume constraint
        #     # set not ventricule to 0
        #     tmp_proba = proba[1]
        #     for i in xrange(proba.shape[0]):
        #         if i == 1:
        #             continue
        #         proba[i] = 0
                
        #     # rescale ventricule
        #     target_volume = 182950.0*0.001**3
        #     #target_volume = 151807.0*0.001**3
            
        #     if k == 0:
        #         target_volume *= 0.5
        #     # elif k == 1:
        #     #     target_volume *= 0.25
        #     # elif k == 2:
        #     #     target_volume *= 0.5
                
        #     box_volume = float(proba.shape[1])*proba.header['pixelSize'][2]*float(proba.shape[2])*proba.header['pixelSize'][1]*float(proba.shape[3])*proba.header['pixelSize'][0]

        #     ratio = float(target_volume) / float(box_volume)

        #     #print "ratio", ratio
        #     q0 = mquantiles( tmp_proba.flatten(), prob=[1.0-ratio] )
        #     tmp_proba[proba[1]<q0] = q0
        #     tmp_proba -= tmp_proba.min()
        #     tmp_proba /= tmp_proba.max()

        #     lcc = irtk.largest_connected_component(tmp_proba,fill_holes=False)
        #     tmp_proba[lcc==0] = 0

        #     proba[1] = tmp_proba
            
        if debug:
            print "done autocontext", k
        #     irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba)

    if not return_all:
        return proba
    else:
        return all_steps
Example #6
0
def predict_autocontext(self,
                        img,
                        mask,
                        extra_layers,
                        metadata,
                        nb_labels,
                        ga,
                        nb_autocontext,
                        debug=False,
                        return_all=False):
    proba = np.ones((nb_labels, img.shape[0], img.shape[1], img.shape[2]),
                    dtype='float32')
    proba /= nb_labels

    header = img.get_header()
    header['dim'][3] = nb_labels
    proba = irtk.Image(proba, header)

    all_steps = []

    for k in xrange(nb_autocontext):
        metadata = self.get_center_axis(proba, k)
        knowledge = self.get_knowledge(img, proba, extra_layers, mask=mask)

        if debug:
            irtk.imwrite("knowledge_" + str(k) + ".nii.gz", knowledge)

        forest = integralForest(folder=self.folder(k),
                                test=self.params['test'],
                                parallel=self.params['parallel'],
                                nb_knowledge_layers=knowledge.shape[0])
        proba = forest.predict_autocontext(img, knowledge, mask,
                                           self.params['ksampling'], metadata)
        proba = irtk.Image(proba, header)
        if return_all:
            all_steps.append(proba.copy())
        if debug:
            irtk.imwrite("debug_" + str(k) + ".nii.gz", proba)

        if k < 1:
            for i in xrange(proba.shape[0]):
                if i == 0:
                    proba[i] = 0
                else:
                    proba[i] = self.groups[i - 1].get_center(proba[i])

        #     # volume constraint
        #     # set not ventricule to 0
        #     tmp_proba = proba[1]
        #     for i in xrange(proba.shape[0]):
        #         if i == 1:
        #             continue
        #         proba[i] = 0

        #     # rescale ventricule
        #     target_volume = 182950.0*0.001**3
        #     #target_volume = 151807.0*0.001**3

        #     if k == 0:
        #         target_volume *= 0.5
        #     # elif k == 1:
        #     #     target_volume *= 0.25
        #     # elif k == 2:
        #     #     target_volume *= 0.5

        #     box_volume = float(proba.shape[1])*proba.header['pixelSize'][2]*float(proba.shape[2])*proba.header['pixelSize'][1]*float(proba.shape[3])*proba.header['pixelSize'][0]

        #     ratio = float(target_volume) / float(box_volume)

        #     #print "ratio", ratio
        #     q0 = mquantiles( tmp_proba.flatten(), prob=[1.0-ratio] )
        #     tmp_proba[proba[1]<q0] = q0
        #     tmp_proba -= tmp_proba.min()
        #     tmp_proba /= tmp_proba.max()

        #     lcc = irtk.largest_connected_component(tmp_proba,fill_holes=False)
        #     tmp_proba[lcc==0] = 0

        #     proba[1] = tmp_proba

        if debug:
            print "done autocontext", k
        #     irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba)

    if not return_all:
        return proba
    else:
        return all_steps