def predict_autocontext( self, img, mask, nb_labels, nb_autocontext, debug=False, return_all=False ): proba = np.ones((nb_labels,img.shape[0],img.shape[1],img.shape[2]),dtype='float32') proba /= nb_labels header = img.get_header() header['dim'][3] = nb_labels proba = irtk.Image(proba,header,squeeze=False) all_steps = [] for k in xrange(nb_autocontext): knowledge = self.get_knowledge(img,proba,mask=mask) if debug: irtk.imwrite("knowledge_"+str(k)+".nii.gz", knowledge) forest = integralForest( folder=self.folder(k), test=self.params['test'], parallel=self.params['parallel'], nb_knowledge_layers=knowledge.shape[0] ) proba = forest.predict_autocontext( img, knowledge, mask, self.params['ksampling'] ) proba = irtk.Image(proba,header,squeeze=False) if return_all: all_steps.append( proba.copy() ) if debug: irtk.imwrite("debug_"+str(k)+".nii.gz", proba) if k < 1: for i in xrange(proba.shape[0]): if i == 0: proba[i] = 0 else: proba[i] = self.get_center(proba[i]) if debug: print "done autocontext", k # irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba) if not return_all: return proba else: return all_steps
def fit( self, patient_ids, n_validation=5, min_dice_score=0.7, max_autocontext=10, start=0 ): """ Train the classifier. """ nb_labels = len(self.params['labels'])+1 ## Preprocess data only once to speed up training ## (requires more memory) # split patients to get validation set np.random.shuffle(patient_ids) n_validation = min(len(patient_ids)/2,n_validation) training_patients = patient_ids[:-n_validation] validation_patients = patient_ids[-n_validation:] self.info['training_patients'] = training_patients self.info['validation_patients'] = validation_patients if self.params['verbose']: print "fitting with", len(training_patients), "training patients and", \ len(validation_patients), "validation patients" print "doing preprocessing..." gc.collect() training_data = Parallel(n_jobs=self.params['n_jobs'])(delayed(self.params['training_preprocessing_function'])( patient_id, self.params['img_folder'], self.params['seg_folder'], self.params["resample"], online=False ) for patient_id in training_patients ) if self.params['verbose']: print "learning" i = start dice_score = 0 previous_dice_score = -1 while ( i < max_autocontext and dice_score < min_dice_score and dice_score > previous_dice_score ): forest = integralForest( ntrees=self.params['n_estimators'], bagging=self.params['bootstrap'], max_depth=self.params['max_depth'], min_items=self.params['min_items'], nb_tests=self.params['nb_tests'], parallel=self.params['parallel'], test=self.params['test'], cx=self.params['cx'], cy=self.params['cy'], cz=self.params['cz'], dx=self.params['dx'], dy=self.params['dy'], dz=self.params['dz'], nb_labels=nb_labels, nb_knowledge_layers=2*(nb_labels-1), ksampling=self.params['ksampling'], verbose=False, nfeatures=len(self.feature_mapping()) ) print "predicting training data" tmp_probas = Parallel(n_jobs=self.params['n_jobs'])(delayed(predict_autocontext)( self, data['img'], data['mask'], nb_labels, i ) for data in training_data ) # tmp_probas = [] # for data in training_data: # tmp_probas.append( predict_autocontext( self, # data['img'], # data['mask'], # data['extra_layers'], # data['metadata'], # nb_labels, # all_ga[data['patient_id']], # i ) ) for data,proba in zip(training_data,tmp_probas): img = data['img'] mask = data['mask'] seg = data['seg'].copy() # kind of bootstrapping for l in range(proba.shape[0]): correct = np.logical_and( proba[l] > 0.5, seg == l ) # remove half of the correctly classified voxels points = np.transpose(np.nonzero(correct)) if len(points) > 10: np.random.shuffle(points) points = points[:len(points)/2] seg[points[:,0], points[:,1], points[:,2]] = 255 knowledge = self.get_knowledge(img,proba,mask=mask) forest.add_image_autocontext(img,seg,knowledge) # irtk.imwrite( "debug/"+data['patient_id']+"_knowledge"+str(i)+".nii.gz", # knowledge ) print "starting to learn autocontext",i forest.grow( self.params['nb_samples'], self.params['nb_background_samples'] ) print "writing" forest.write(self.folder(i)) print "done", i feature_importance = forest.get_feature_importance() mapping = self.feature_mapping() if len(feature_importance) != len(mapping): print "ERROR: forest.get_feature_importance() returns", len(feature_importance), "features" print " feature_mapping() expects", len(mapping), "features" feature_importance = dict( zip(mapping, feature_importance) ) self.info['feature_importance'].append( feature_importance ) print feature_importance i += 1 # release memory del forest gc.collect() previous_dice_score = dice_score print "scoring" dice_score = self.score(validation_patients,nb_autocontext=i) improvement = dice_score - previous_dice_score self.info['validation_scores'].append(dice_score) self.info['improvements'].append(improvement) if self.params['verbose']: print "Validation score:", dice_score print "improvement:", improvement self.save()
#!/usr/bin/python import cv, cv2 import numpy as np import matplotlib.pyplot as plt from lib.integralforest import integralForest from glob import glob import re import irtk forest = integralForest( ntrees=5, bagging=0.5, max_depth=7, min_items=10, nb_tests=1000, parallel=-1, test="patch", cx=50, cy=50, cz=0, dx=50, dy=50, dz=0 ) images = [] segmentations = [] raw_data_folder = "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/raw_data/" ground_truth = glob("/home/kevin/Imperial/PhD/gitlab/ess/brain_test/ground_truth/*")#[:50] segmentations = [] for f in ground_truth: pattern = r'/(?P<raw_file>[^/]+)_(?P<x>\d+)_(?P<y>\d+)_(?P<w>\d+)_(?P<h>\d+)\.png$' match = re.search( pattern, f )
import cv, cv2 import numpy as np import matplotlib.pyplot as plt from lib.integralforest import integralForest from glob import glob import re import irtk forest = integralForest(ntrees=5, bagging=0.5, max_depth=7, min_items=10, nb_tests=1000, parallel=-1, test="patch", cx=50, cy=50, cz=0, dx=50, dy=50, dz=0) images = [] segmentations = [] raw_data_folder = "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/raw_data/" ground_truth = glob( "/home/kevin/Imperial/PhD/gitlab/ess/brain_test/ground_truth/*") #[:50] segmentations = []
def predict_autocontext( self, img, mask, extra_layers, metadata, nb_labels, ga, nb_autocontext, debug=False, return_all=False ): proba = np.ones((nb_labels,img.shape[0],img.shape[1],img.shape[2]),dtype='float32') proba /= nb_labels header = img.get_header() header['dim'][3] = nb_labels proba = irtk.Image(proba,header) all_steps = [] for k in xrange(nb_autocontext): metadata = self.get_center_axis(proba,k) knowledge = self.get_knowledge(img,proba,extra_layers,mask=mask) if debug: irtk.imwrite("knowledge_"+str(k)+".nii.gz", knowledge) forest = integralForest( folder=self.folder(k), test=self.params['test'], parallel=self.params['parallel'], nb_knowledge_layers=knowledge.shape[0] ) proba = forest.predict_autocontext( img, knowledge, mask, self.params['ksampling'], metadata ) proba = irtk.Image(proba,header) if return_all: all_steps.append( proba.copy() ) if debug: irtk.imwrite("debug_"+str(k)+".nii.gz", proba) if k < 1: for i in xrange(proba.shape[0]): if i == 0: proba[i] = 0 else: proba[i] = self.groups[i-1].get_center(proba[i]) # # volume constraint # # set not ventricule to 0 # tmp_proba = proba[1] # for i in xrange(proba.shape[0]): # if i == 1: # continue # proba[i] = 0 # # rescale ventricule # target_volume = 182950.0*0.001**3 # #target_volume = 151807.0*0.001**3 # if k == 0: # target_volume *= 0.5 # # elif k == 1: # # target_volume *= 0.25 # # elif k == 2: # # target_volume *= 0.5 # box_volume = float(proba.shape[1])*proba.header['pixelSize'][2]*float(proba.shape[2])*proba.header['pixelSize'][1]*float(proba.shape[3])*proba.header['pixelSize'][0] # ratio = float(target_volume) / float(box_volume) # #print "ratio", ratio # q0 = mquantiles( tmp_proba.flatten(), prob=[1.0-ratio] ) # tmp_proba[proba[1]<q0] = q0 # tmp_proba -= tmp_proba.min() # tmp_proba /= tmp_proba.max() # lcc = irtk.largest_connected_component(tmp_proba,fill_holes=False) # tmp_proba[lcc==0] = 0 # proba[1] = tmp_proba if debug: print "done autocontext", k # irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba) if not return_all: return proba else: return all_steps
def predict_autocontext(self, img, mask, extra_layers, metadata, nb_labels, ga, nb_autocontext, debug=False, return_all=False): proba = np.ones((nb_labels, img.shape[0], img.shape[1], img.shape[2]), dtype='float32') proba /= nb_labels header = img.get_header() header['dim'][3] = nb_labels proba = irtk.Image(proba, header) all_steps = [] for k in xrange(nb_autocontext): metadata = self.get_center_axis(proba, k) knowledge = self.get_knowledge(img, proba, extra_layers, mask=mask) if debug: irtk.imwrite("knowledge_" + str(k) + ".nii.gz", knowledge) forest = integralForest(folder=self.folder(k), test=self.params['test'], parallel=self.params['parallel'], nb_knowledge_layers=knowledge.shape[0]) proba = forest.predict_autocontext(img, knowledge, mask, self.params['ksampling'], metadata) proba = irtk.Image(proba, header) if return_all: all_steps.append(proba.copy()) if debug: irtk.imwrite("debug_" + str(k) + ".nii.gz", proba) if k < 1: for i in xrange(proba.shape[0]): if i == 0: proba[i] = 0 else: proba[i] = self.groups[i - 1].get_center(proba[i]) # # volume constraint # # set not ventricule to 0 # tmp_proba = proba[1] # for i in xrange(proba.shape[0]): # if i == 1: # continue # proba[i] = 0 # # rescale ventricule # target_volume = 182950.0*0.001**3 # #target_volume = 151807.0*0.001**3 # if k == 0: # target_volume *= 0.5 # # elif k == 1: # # target_volume *= 0.25 # # elif k == 2: # # target_volume *= 0.5 # box_volume = float(proba.shape[1])*proba.header['pixelSize'][2]*float(proba.shape[2])*proba.header['pixelSize'][1]*float(proba.shape[3])*proba.header['pixelSize'][0] # ratio = float(target_volume) / float(box_volume) # #print "ratio", ratio # q0 = mquantiles( tmp_proba.flatten(), prob=[1.0-ratio] ) # tmp_proba[proba[1]<q0] = q0 # tmp_proba -= tmp_proba.min() # tmp_proba /= tmp_proba.max() # lcc = irtk.largest_connected_component(tmp_proba,fill_holes=False) # tmp_proba[lcc==0] = 0 # proba[1] = tmp_proba if debug: print "done autocontext", k # irtk.imwrite("debug_rescaled_"+str(k)+".nii.gz", proba) if not return_all: return proba else: return all_steps