def evaluate_segmentation(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train, num_patients_val, patients_segmentation, threshold, data_dirs, dataset, executable_path, csv_path_per_patient, csv_path, measures): """ Main function for evaluating segmentation. :param patch_size: training patch size. :param num_epochs: number of epochs in training. :param batch_size: training batch size. :param lr: training learning rate. :param dropout: training dropout rate. :param num_patients_train: number of patients in training set. :param num_patients_val: number of patients in validation set. :param patients_segmentation: number of patients in the particular set on which the segmentation performance is evaluated. :param threshold: threshold value. :param data_dirs: directory where the segmentations are saved. :param dataset: train/val/test. :param executable_path: path to EvaluateSegmentation.exe. :param csv_path_per_patient: path to csv file with results per patient. :param csv_path: path to csv file with results averaged over all patients. :param measures: string of measures to calculate. """ # create the name of current run run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train, num_patients_val) print(run_name) xml_paths = [] for patient in patients_segmentation: print( '________________________________________________________________________________' ) print('patient:', patient) print('patch size', patch_size) print('batch size', batch_size) print('learning rate', lr) print('dropout', dropout) print('threshold', threshold) # load labels and segmentations label_path = data_dirs[dataset] + patient + '_label.nii' segmentation_path = config.get_probs_filepath(run_name, patient, dataset) # for saving results of evaluate segmentation to xml and to csv xml_path_patient = config.get_eval_segment_dataset_xmlpath( run_name, patient, dataset) xml_paths.append(xml_path_patient) # compare the segmentation with ground truth and save the xml file in the results folder segment_comparison(label_path, segmentation_path, executable_path, xml_path_patient, threshold, measures) # parse the generated xmls and insert two more metrics: Sensibility and Conformity sensibility_conformity_to_xml(xml_path_patient) # parse the xml files in each folder, do stats and save the dataframes as csvs with the parse_xml # function run_params = { 'patch size': patch_size, 'num epochs': num_epochs, 'batch size': batch_size, 'learning rate': lr, 'dropout': dropout, 'patient': patient } parse_xml_to_csv(xml_path_patient, csv_path_per_patient, run_params) run_params = { 'patch size': patch_size, 'num epochs': num_epochs, 'batch size': batch_size, 'learning rate': lr, 'dropout': dropout } parse_xml_to_csv_avg_for_patients(xml_paths, csv_path, run_params)
for patch_size in patch_size_list: for batch_size in batch_size_list: for lr in learning_rate_list: for dropout in dropout_list: print( '________________________________________________________________________________' ) print('patch size', patch_size) print('batch size', batch_size) print('learning rate', lr) print('dropout', dropout) # create the name of current run run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train, num_patients_val) print(run_name) # ----------------------------------------------------------- # LOADING MODEL DATA # ----------------------------------------------------------- train_X, train_y, val_X, val_y, mean, std = create_training_datasets( patch_size, config.NUM_PATCHES, config.PATIENTS) # ----------------------------------------------------------- # LOADING MODEL # ----------------------------------------------------------- model_filepath = config.get_model_filepath(run_name) model = load_model(model_filepath, custom_objects={
def measure_performance_and_save_to_csv(patch_size, num_epochs, batch_size, lr, dropout, threshold, num_patients_train, num_patients_val, patients_segmentation, data_dirs, dataset, result_file): print( '________________________________________________________________________________' ) print('patch size', patch_size) print('batch size', batch_size) print('learning rate', lr) print('dropout', dropout) print('threshold', threshold) start_row = time.time() # create the name of current run run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train, num_patients_val) print(run_name) # ----------------------------------------------------------- # TRAINING RESULTS # ----------------------------------------------------------- train_metadata_filepath = config.get_train_metadata_filepath(run_name) with open(train_metadata_filepath, 'rb') as handle: train_metadata = pickle.load(handle) print('Train params:') print(train_metadata['params']) print('Train performance:') tr_perf = train_metadata['performance'] print(tr_perf) row = [ patch_size, num_epochs, batch_size, lr, dropout, tr_perf['train_true_positives'], tr_perf['train_false_negatives'], tr_perf['train_false_positives'], tr_perf['train_true_negatives'], tr_perf['train_auc'], tr_perf['train_acc'], tr_perf['train_avg_acc'], tr_perf['train_dice'], tr_perf['val_true_positives'], tr_perf['val_false_negatives'], tr_perf['val_false_positives'], tr_perf['val_true_negatives'], tr_perf['val_auc'], tr_perf['val_acc'], tr_perf['val_avg_acc'], tr_perf['val_dice'] ] # ----------------------------------------------------------- # VALIDATION / TEST RESULTS # ----------------------------------------------------------- tp_list = [] fn_list = [] fp_list = [] tn_list = [] auc_list = [] acc_list = [] avg_acc_list = [] dice_list = [] for patient in patients_segmentation: print(patient) print('> Loading label...') label_mat = helper.load_nifti_mat_from_file( data_dirs[dataset] + patient + '_label.nii') # values 0 or 1 print('> Loading probability map...') prob_mat = helper.load_nifti_mat_from_file( config.get_probs_filepath(run_name, patient, dataset)) # values between 0 and 1 pred_class = (prob_mat > threshold).astype( np.uint8) # convert from boolean to int, values 0 or 1 print() print('Computing performance measures...') label_mat_f = np.asarray(label_mat).flatten() prob_mat_f = np.asarray(prob_mat).flatten() pred_classes_f = np.asarray(pred_class).flatten() val_auc = roc_auc_score(label_mat_f, prob_mat_f) val_acc = accuracy_score(label_mat_f, pred_classes_f) val_avg_acc, val_tn, val_fp, val_fn, val_tp = avg_class_acc( label_mat_f, pred_classes_f) val_dice = f1_score(label_mat_f, pred_classes_f) tp_list.append(val_tp) fn_list.append(val_fn) fp_list.append(val_fp) tn_list.append(val_tn) auc_list.append(val_auc) acc_list.append(val_acc) avg_acc_list.append(val_avg_acc) dice_list.append(val_dice) row = row + [ np.mean(tp_list), np.mean(fn_list), np.mean(fp_list), np.mean(tn_list), np.mean(auc_list), np.mean(acc_list), np.mean(avg_acc_list), np.mean(dice_list) ] print('Complete row:', row) print('Writing to csv...') with open(result_file, 'a') as f: writer = csv.writer(f) writer.writerow(row) duration_row = int(time.time() - start_row) print('performance assessment took:', (duration_row // 3600) % 60, 'hours', (duration_row // 60) % 60, 'minutes', duration_row % 60, 'seconds')
def predict_and_save(patch_size, num_epochs, batch_size, lr, dropout, patient, num_patients_train, num_patients_val, data_dirs, dataset): print('________________________________________________________________________________') print('patch size', patch_size) print('batch size', batch_size) print('learning rate', lr) print('dropout', dropout) print('patient:', patient) # create the name of current run run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train, num_patients_val) print(run_name) # ----------------------------------------------------------- # LOADING MODEL, RESULTS AND WHOLE BRAIN MATRICES # ----------------------------------------------------------- model_filepath = config.get_model_filepath(run_name) print(model_filepath) model = load_model(model_filepath, custom_objects={'dice_coef_loss': dice_coef_loss, 'dice_coef': dice_coef}) train_metadata_filepath = config.get_train_metadata_filepath(run_name) with open(train_metadata_filepath, 'rb') as handle: train_metadata = pickle.load(handle) print('train setting: ', train_metadata['params']) print('> Loading image...') img_mat = helper.load_nifti_mat_from_file( data_dirs[dataset] + patient + '_img.nii') # values between 0 and 255 print('> Loading mask...') mask_mat = helper.load_nifti_mat_from_file( data_dirs[dataset] + patient + '_mask.nii') # values 0 and 1 # ----------------------------------------------------------- # PREDICTION # ----------------------------------------------------------- # the segmentation is going to be saved in this probability matrix prob_mat = np.zeros(img_mat.shape, dtype=np.float32) x_dim, y_dim, z_dim = prob_mat.shape # get the x, y and z coordinates where there is brain x, y, z = np.where(mask_mat) print('x shape:', x.shape) print('y shape:', y.shape) print('z shape:', z.shape) # get the z slices with brain z_slices = np.unique(z) # start cutting out and predicting the patches starttime_total = time.time() # proceed slice by slice for i in z_slices: print('Slice:', i) starttime_slice = time.time() slice_vox_inds = np.where(z == i) # find all x and y coordinates with brain in given slice x_in_slice = x[slice_vox_inds] y_in_slice = y[slice_vox_inds] # find min and max x and y coordinates slice_x_min = min(x_in_slice) slice_x_max = max(x_in_slice) slice_y_min = min(y_in_slice) slice_y_max = max(y_in_slice) # calculate number of predicted patches in x and y direction in given slice num_of_x_patches = np.int(np.ceil((slice_x_max - slice_x_min) / patch_size)) num_of_y_patches = np.int(np.ceil((slice_y_max - slice_y_min) / patch_size)) print('num x patches', num_of_x_patches) print('num y patches', num_of_y_patches) # predict patch by patch in given slice for j in range(num_of_x_patches): for k in range(num_of_y_patches): # find the starting and ending x and y coordinates of given patch patch_start_x = slice_x_min + patch_size * j patch_end_x = slice_x_min + patch_size * (j + 1) patch_start_y = slice_y_min + patch_size * k patch_end_y = slice_y_min + patch_size * (k + 1) # if the dimensions of the probability matrix are exceeded shift back the last patch if patch_end_x > x_dim: patch_end_x = slice_x_max patch_start_x = slice_x_max - patch_size if patch_end_y > y_dim: patch_end_y = slice_y_max patch_start_y = slice_y_max - patch_size # get the patch with the found coordinates from the image matrix img_patch = img_mat[patch_start_x: patch_end_x, patch_start_y: patch_end_y, i] # normalize the patch with mean and standard deviation calculated over training set img_patch = img_patch.astype(np.float) img_patch -= train_metadata['params']['mean'] img_patch /= train_metadata['params']['std'] # predict the patch with the model and save to probability matrix prob_mat[patch_start_x: patch_end_x, patch_start_y: patch_end_y, i] = np.reshape( model.predict( np.reshape(img_patch, (1, patch_size, patch_size, 1)), batch_size=1, verbose=0), (patch_size, patch_size)) # how long does the prediction take for one slice duration_slice = time.time() - starttime_slice print('prediction in slice took:', (duration_slice // 3600) % 60, 'hours', (duration_slice // 60) % 60, 'minutes', duration_slice % 60, 'seconds') # how long does the prediction take for a patient duration_total = time.time() - starttime_total print('prediction in total took:', (duration_total // 3600) % 60, 'hours', (duration_total // 60) % 60, 'minutes', duration_total % 60, 'seconds') # ----------------------------------------------------------- # SAVE AS NIFTI # ----------------------------------------------------------- helper.create_and_save_nifti(prob_mat, config.get_probs_filepath(run_name, patient, dataset))
def train_and_save(train_X, train_y, val_X, val_y, patch_size, num_epochs, batch_size, lr, dropout, num_channels, activation, final_activation, optimizer, loss, metrics, num_patches, factor_train_samples, mean, std, threshold, rotation_range, horizontal_flip, vertical_flip, shear_range, width_shift_range, height_shift_range): print('patch size', patch_size) print('number of epochs', num_epochs) print('batch size', batch_size) print('learning rate', lr) print('dropout', dropout) # create the name of current run run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, len(train_X) // num_patches, len(val_X) // num_patches) model_filepath = config.get_model_filepath(run_name) train_metadata_filepath = config.get_train_metadata_filepath(run_name) # if model does not exit, train it loading = False if not os.path.isfile(model_filepath): # ----------------------------------------------------------- # CREATING MODEL # ----------------------------------------------------------- model = get_unet(patch_size, num_channels, activation, final_activation, optimizer, lr, dropout, loss, metrics) # ----------------------------------------------------------- # CREATING DATA GENERATOR # ----------------------------------------------------------- # transforming images and masks together data_gen_args = dict(rotation_range=rotation_range, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, shear_range=shear_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, fill_mode='constant') X_datagen = ImageDataGenerator(**data_gen_args) y_datagen = ImageDataGenerator(**data_gen_args) # Provide the same seed and keyword arguments to the fit and flow methods seed = 1 X_datagen.fit(train_X, augment=True, seed=seed) y_datagen.fit(train_y, augment=True, seed=seed) X_generator = X_datagen.flow(train_X, batch_size=batch_size, seed=seed, shuffle=True) y_generator = y_datagen.flow(train_y, batch_size=batch_size, seed=seed, shuffle=True) # combine generators into one which yields image and label train_generator = zip(X_generator, y_generator) # sanity check, visualise augmented patches # pyplot.figure() # shift_plotting = 0 # for i in range(0, 9): # pyplot.suptitle('original patches') # pyplot.subplot(330 + 1 + i) # pyplot.imshow(train_X[i + shift_plotting].reshape(patch_size, patch_size)) # pyplot.figure() # for i in range(0, 9): # pyplot.subplot(330 + 1 + i) # pyplot.imshow(train_y[i + shift_plotting].reshape(patch_size, patch_size)) # pyplot.figure() # for X_batch in X_generator: # pyplot.suptitle('flip, rotation 30°, shear 20°') # for i in range(0, 9): # pyplot.subplot(330 + 1 + i) # pyplot.imshow(X_batch[i + shift_plotting].reshape(patch_size, patch_size)) # break # pyplot.figure() # for y_batch in y_generator: # for i in range(0, 9): # pyplot.subplot(330 + 1 + i) # pyplot.imshow(y_batch[i + shift_plotting].reshape(patch_size, patch_size)) # break # pyplot.show() # ----------------------------------------------------------- # TRAINING MODEL # ----------------------------------------------------------- start_train = time.time() # keras callback for saving the training history to csv file csv_logger = CSVLogger(config.get_train_history_filepath(run_name)) # training history = model.fit_generator(train_generator, validation_data=(val_X, val_y), steps_per_epoch=factor_train_samples * len(train_X) // batch_size, epochs=num_epochs, verbose=2, shuffle=True, callbacks=[csv_logger]) duration_train = int(time.time() - start_train) print('training took:', (duration_train // 3600) % 60, 'hours', (duration_train // 60) % 60, 'minutes', duration_train % 60, 'seconds') # ----------------------------------------------------------- # SAVING MODEL # ----------------------------------------------------------- print('Saving model to ', model_filepath) model.save(model_filepath) print('Saving params to ', train_metadata_filepath) history.params['batchsize'] = batch_size history.params['dropout'] = dropout history.params['patch_size'] = patch_size history.params['learning_rate'] = lr history.params['loss'] = loss history.params['mean'] = mean # mean used for training data centering history.params['std'] = std # std used for training data normalization history.params['samples'] = factor_train_samples * len(train_X) history.params['val_samples'] = len(val_X) history.params['total_time'] = duration_train history.params['rotation range'] = rotation_range history.params['horizontal_flip'] = horizontal_flip history.params['vertical_flip'] = vertical_flip history.params['shear_range'] = shear_range history.params['width_shift_range'] = width_shift_range history.params['height_shift_range'] = height_shift_range results = {'params': history.params, 'history': history.history} with open(train_metadata_filepath, 'wb') as handle: pickle.dump(results, handle) else: # model exists, load it # ----------------------------------------------------------- # LOADING MODEL # ----------------------------------------------------------- loading = True print('Loading model from', model_filepath) model = load_model(model_filepath, custom_objects={'dice_coef_loss': dice_coef_loss, 'dice_coef': dice_coef}) model.summary() with open(train_metadata_filepath, 'rb') as handle: history = pickle.load(handle) # ----------------------------------------------------------- # PERFORMANCE MEASURES # ----------------------------------------------------------- start_perf = time.time() train_y_pred_probs = model.predict(train_X, batch_size=batch_size, verbose=1) train_y_pred_class = (train_y_pred_probs > threshold).astype( np.uint8) # convert from boolean to int, values 0 or 1 val_y_pred_probs = model.predict(val_X, batch_size=batch_size, verbose=1) val_y_pred_class = (val_y_pred_probs > threshold).astype( np.uint8) # convert from boolean to int, values 0 or 1 print() print("Performance:") train_y_f = train_y.flatten() train_y_pred_probs_f = train_y_pred_probs.flatten() train_y_pred_class_f = train_y_pred_class.flatten() val_y_f = val_y.flatten() val_y_pred_probs_f = val_y_pred_probs.flatten() val_y_pred_class_f = val_y_pred_class.flatten() train_auc = roc_auc_score(train_y_f, train_y_pred_probs_f) train_acc = accuracy_score(train_y_f, train_y_pred_class_f) train_avg_acc, train_tn, train_fp, train_fn, train_tp = avg_class_acc(train_y_f, train_y_pred_class_f) train_dice = f1_score(train_y_f, train_y_pred_class_f) val_auc = roc_auc_score(val_y_f, val_y_pred_probs_f) val_acc = accuracy_score(val_y_f, val_y_pred_class_f) val_avg_acc, val_tn, val_fp, val_fn, val_tp = avg_class_acc(val_y_f, val_y_pred_class_f) val_dice = f1_score(val_y_f, val_y_pred_class_f) print('train auc:', train_auc) print('train acc:', train_acc) print('train avg acc:', train_avg_acc) print('train dice:', train_dice) print('val auc:', val_auc) print('val acc:', val_acc) print('val avg acc:', val_avg_acc) print('val dice:', val_dice) duration_perf = int(time.time() - start_perf) print('performance assessment took:', (duration_perf // 3600) % 60, 'hours', (duration_perf // 60) % 60, 'minutes', duration_perf % 60, 'seconds') if not loading: duration_total = history.params['total_time'] + duration_perf else: duration_total = history['params']['total_time'] + duration_perf print('total time:', (duration_total // 3600) % 60, 'hours', (duration_total // 60) % 60, 'minutes', duration_total % 60, 'seconds') # ----------------------------------------------------------- # SAVING RESULTS # ----------------------------------------------------------- print('Saving training results to ', train_metadata_filepath) performance = {'train_true_positives': train_tp, 'train_true_negatives': train_tn, 'train_false_positives': train_fp, 'train_false_negatives': train_fn, 'train_auc': train_auc, 'train_acc': train_acc, 'train_avg_acc': train_avg_acc, 'train_dice': train_dice, 'val_true_positives': val_tp, 'val_true_negatives': val_tn, 'val_false_positives': val_fp, 'val_false_negatives': val_fn, 'val_auc': val_auc, 'val_acc': val_acc, 'val_avg_acc': val_avg_acc, 'val_dice': val_dice} if not loading: history.params['total_time'] = duration_total results = {'params': history.params, 'history': history.history, 'performance': performance} else: history['params']['total_time'] = duration_total results = {'params': history['params'], 'history': history['history'], 'performance': performance} with open(train_metadata_filepath, 'wb') as handle: pickle.dump(results, handle) print('________________________________________________________________________________')