def __init__(self, dim, data_path, temp_path, save_path, num_train_data, num_labeled_train, patients_per_batch, nr_class, batch_size, dataset_name): self.data_path = data_path self.temp_path = temp_path self.save_path = save_path self.num_train_data = num_train_data # total training data = Labelled + UnLabelled self.num_labeled_train = num_labeled_train # labelled training data self.num_un_labeled_train = num_train_data - num_labeled_train # unlabelled data self.patients_per_batch = patients_per_batch self.nr_class = nr_class self.dim = dim self.batch_size = batch_size self.nr_dim = len(dim) makedir(self.temp_path) makedir(os.path.join(self.temp_path, ENS_GT)) makedir(os.path.join(save_path, 'tb', dataset_name)) makedir(os.path.join(save_path, 'csv', dataset_name)) makedir(os.path.join(save_path, 'model', dataset_name)) ens_gt_shape = (dim[0], dim[1], nr_class) if self.nr_dim == 2 else (dim[0], dim[1], dim[2], nr_class) ens = np.zeros(ens_gt_shape) for patient in np.arange(num_train_data): np.save(os.path.join(self.temp_path, ENS_GT, str(patient) + NPY), ens)
def generate_supervised_dataset(dataset_name, fold_num, labelled_perc, seed=1234): metadata = get_metadata(dataset_name) folds_root_path = metadata[m_data_path] save_root_path = os.path.join( folds_root_path, dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc)) supervised_fold_path = os.path.join(folds_root_path, dataset_name, 'fold_' + str(fold_num)) labelled_num = metadata[m_labelled_train] # training makedir(os.path.join(save_root_path, 'train', 'imgs'), delete_existing=True) makedir(os.path.join(save_root_path, 'train', 'gt'), delete_existing=True) makedir(os.path.join(save_root_path, 'val', 'imgs'), delete_existing=True) makedir(os.path.join(save_root_path, 'val', 'gt'), delete_existing=True) num_labeled_train = int(labelled_perc * labelled_num) np.random.seed(seed) rng = default_rng() labelled_num_considrd = rng.choice(labelled_num, size=num_labeled_train, replace=False) # labelled_num_considrd = np.random.randint(0, labelled_num, size=num_labeled_train, dtype=np.int) counter = 0 for i in labelled_num_considrd: np.save( os.path.join(save_root_path, 'train', 'imgs', str(counter)), np.load( os.path.join(supervised_fold_path, 'train', 'imgs', str(i) + '.npy'))) np.save( os.path.join(save_root_path, 'train', 'gt', str(counter)), np.load( os.path.join(supervised_fold_path, 'train', 'gt', str(i) + '.npy'))) counter = counter + 1 print(i, counter) print('copied labelled training images') counter = 0 for i in np.arange(metadata[m_labelled_val]): np.save( os.path.join(save_root_path, 'val', 'imgs', str(counter)), np.load( os.path.join(supervised_fold_path, 'val', 'imgs', str(i) + '.npy'))) np.save( os.path.join(save_root_path, 'val', 'gt', str(counter)), np.load( os.path.join(supervised_fold_path, 'val', 'gt', str(i) + '.npy'))) print(i) print('copied labelled val images')
def generate_supervised_dataset(dataset_name, fold_num, labelled_perc, seed=1234): metadata = get_metadata(dataset_name) save_root_path = os.path.join(metadata[m_data_path] , dataset_name ) supervised_fold_path = os.path.join(save_root_path ,'fold_' + str(fold_num) + '_P' + str(labelled_perc)) labelled_files_lst = np.load(os.path.join(metadata[m_folds_path], 'train_fold'+str(fold_num)+'.npy')) labelled_train_num = len(labelled_files_lst) labelled_path = metadata[m_raw_data_path] +'/labelled/train/' print(labelled_files_lst[0:10]) np.random.seed(seed) np.random.shuffle(labelled_files_lst) labelled_num_considrd = labelled_files_lst[:int(labelled_train_num * labelled_perc)] validation_imgs = np.load(os.path.join(metadata[m_folds_path], 'val_fold' + str(fold_num) + '.npy')) counter = 0 makedir( os.path.join(supervised_fold_path, 'train','imgs'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'train','gt'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'val','imgs'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'val','gt'), delete_existing=True) print('training images created') for i in labelled_num_considrd: print(i, counter) np.save(os.path.join(supervised_fold_path, 'train', 'imgs', str(counter) + '.npy'), np.load(os.path.join(labelled_path, 'imgs', i)) / 255 ) GT_lesion = np.load(os.path.join(labelled_path, 'GT', i.replace('.npy', '_segmentation.npy'))) / 255 GT_bg = np.where(GT_lesion == 0, np.ones_like(GT_lesion), np.zeros_like(GT_lesion)) np.save(os.path.join(supervised_fold_path, 'train', 'gt', str(counter) + '.npy'), np.concatenate((GT_bg, GT_lesion), -1)) counter = counter + 1 print('validation images created') counter = 0 for i in validation_imgs: print(i, counter) np.save(os.path.join(supervised_fold_path, 'val', 'imgs', str(counter) + '.npy'), np.load(os.path.join(labelled_path, 'imgs', i)) / 255) GT_lesion = np.load(os.path.join(labelled_path, 'GT', i.replace('.npy', '_segmentation.npy'))) / 255 GT_bg = np.where(GT_lesion == 0, np.ones_like(GT_lesion), np.zeros_like(GT_lesion)) np.save(os.path.join(supervised_fold_path, 'val', 'gt', str(counter) + '.npy'), np.concatenate((GT_bg, GT_lesion), -1)) counter = counter + 1
def train(gpu_id, nb_gpus, dataset_name, labelled_perc, fold_num, model_type, is_augmented=True): metadata = get_metadata(dataset_name) name = 'supervised_F' + str(fold_num) + '_P' + str(labelled_perc) data_path = os.path.join( metadata[m_data_path], dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc), 'train') print('data directory:', data_path) tb_log_dir = os.path.join(metadata[m_save_path], 'tb', dataset_name, name + '_' + str(metadata[m_lr]) + '/') model_name = os.path.join(metadata[m_trained_model_path], dataset_name, name + H5) csv_name = os.path.join(metadata[m_save_path], 'csv', dataset_name, name + '.csv') dim = metadata[m_dim] bs = metadata[m_batch_size] num_labeled_train = int(labelled_perc * metadata[m_labelled_train]) # actual labelled data print("Labelled Images:", num_labeled_train) print('-' * 30) print('Creating and compiling model...') print('-' * 30) inp_shape = dim if len(dim) == 3 else (dim[0], dim[1], metadata[m_nr_channels]) model = model_type.build_model(img_shape=inp_shape, learning_rate=metadata[m_lr], gpu_id=gpu_id, nb_gpus=nb_gpus) model.summary() # callbacks print('-' * 30) print('Creating callbacks...') print('-' * 30) makedir(os.path.join(metadata[m_save_path], 'csv', dataset_name)) csv_logger = CSVLogger(csv_name, append=True, separator=';') if nb_gpus is not None and nb_gpus > 1: model_checkpoint = ModelCheckpointParallel(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') else: model_checkpoint = ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') tensorboard = TensorBoard(log_dir=tb_log_dir, write_graph=False, write_grads=False, histogram_freq=0, batch_size=1, write_images=False) es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE_EARLY_STOP, min_delta=DELTA) cb = [model_checkpoint, tensorboard, csv_logger, es] print('Callbacks: ', cb) print('-' * 30) print('Fitting model...') print('-' * 30) training_generator = get_supervised_data_generator(dataset_name, data_path, num_labeled_train, is_augmented) steps = (metadata[m_labelled_train] * metadata[m_aug_num]) // bs x_val, y_val = get_supervised_val_data(data_path, dim, metadata[m_nr_class], metadata[m_nr_channels]) history = model.fit_generator(generator=training_generator, steps_per_epoch=steps, validation_data=[x_val, y_val], epochs=NUM_EPOCH, callbacks=cb) return history
def __init__(self, dim, data_path, temp_path, save_path, num_train_data, num_labeled_train, patients_per_batch, pixel_perc_arr, val_metric_keys, nr_class, batch_size, mc_forward_pass_num, dataset_name, mc_model): self.data_path = data_path self.temp_path = temp_path self.save_path = save_path self.num_train_data = num_train_data # total training data = Labelled + UnLabelled self.num_labeled_train = num_labeled_train # labelled training data self.num_un_labeled_train = num_train_data - num_labeled_train # unlabelled data self.patients_per_batch = patients_per_batch self.nr_class = nr_class self.pixel_perc_arr = pixel_perc_arr self.val_metric_keys = val_metric_keys self.save_flag = np.zeros((self.nr_class), dtype=np.bool) self.val_dice_coef = np.zeros((self.nr_class), dtype=np.float) self.confident_pixels_no_per_batch = np.zeros([self.nr_class], dtype=np.int) self.dim = dim self.batch_size = batch_size self.nr_dim = len(dim) self.mc_forward_pass_num = mc_forward_pass_num self.mc_model = mc_model if len(dim) == 3: flag_1 = np.ones(shape=dim, dtype='int64') flag_0 = np.zeros(shape=dim, dtype='int64') else: flag_1 = np.ones(shape=(dim[0], dim[1]), dtype='int64') flag_0 = np.zeros(shape=(dim[0], dim[1]), dtype='int64') makedir(self.temp_path) makedir(os.path.join(self.temp_path, ENS_GT)) makedir(os.path.join(self.temp_path, FLAG)) makedir(os.path.join(save_path, 'tb', dataset_name)) makedir(os.path.join(save_path, 'csv', dataset_name)) makedir(os.path.join(save_path, 'model', dataset_name)) for patient in np.arange(num_train_data): copyfile(os.path.join(data_path, GT, str(patient) + NPY), os.path.join(self.temp_path, ENS_GT, str(patient) + NPY)) if patient < num_labeled_train: np.save(os.path.join(self.temp_path, FLAG, str(patient) + NPY), flag_1) else: np.save(os.path.join(self.temp_path, FLAG, str(patient) + NPY), flag_0) del flag_1, flag_0
def evaluateFiles_arr(prediction, img_arr, GT_arr, csvName, connected_component=False, eval=True, save_dir=None): nrImgs = img_arr.shape[0] if not eval: makedir(save_dir + '/imgs/') makedir(save_dir + '/GT/') with open(csvName, 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=';', lineterminator='\n', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow([ 'Case', 'Class0 Dice', 'Class0 MeanDis', 'Class0 95-HD', 'Class1 Dice', 'Class1 MeanDis', 'Class1 95-HD', 'Class2 Dice', 'Class2 MeanDis', 'Class2 95-HD' ]) if eval: dices = np.zeros((nrImgs, 3), dtype=np.float32) print(dices.shape) mad = np.zeros((nrImgs, 3), dtype=np.float32) hdf = np.zeros((nrImgs, 3), dtype=np.float32) else: makedir(save_dir + '/imgs') makedir(save_dir + '/GT') for imgNumber in range(0, nrImgs): if connected_component: prediction_temp = removeIslands( np.asarray(prediction)[:, imgNumber, :, :, :]) else: prediction_temp = np.asarray(prediction)[:, imgNumber, :, :, :] if not eval: #np.save(save_dir + '/imgs/' + str(imgNumber) + '.npy', img_arr[imgNumber]) #np.save(save_dir + '/GT/' + str(imgNumber) + '.npy', GT_arr[imgNumber]) np.save(save_dir + '/baseline/' + str(imgNumber) + '.npy', prediction_temp) values = ['Case' + str(imgNumber)] print('Case' + str(int(imgNumber))) if eval: for class_idx in range(0, 3): pred_arr = prediction_temp[class_idx, :, :, :] pred_arr = thresholdArray(pred_arr, 0.5) pred_img = sitk.GetImageFromArray(pred_arr) GT_label = sitk.GetImageFromArray( GT_arr[imgNumber, :, :, :, class_idx]) GT_label.SetSpacing([1.0, 1.0, 1.0]) pred_img = castImage(pred_img, sitk.sitkUInt8) pred_img.SetSpacing([1.0, 1.0, 1.0]) GT_label = castImage(GT_label, sitk.sitkUInt8) dice = getDice(pred_img, GT_label) print(class_idx, dice) # avd = relativeAbsoluteVolumeDifference(pred_img, GT_label) [hausdorff, avgDist] = getBoundaryDistances(pred_img, GT_label) dices[imgNumber, class_idx] = dice mad[imgNumber, class_idx] = avgDist hdf[imgNumber, class_idx] = hausdorff # auc[imgNumber, zoneIndex] = roc_auc values.append(dice) values.append(avgDist) values.append(hausdorff) csvwriter.writerow(values) csvwriter.writerow('') average = [ 'Average', np.average(dices[:, 0]), np.average(mad[:, 0]), np.average(hdf[:, 0]), np.average(dices[:, 1]), np.average(mad[:, 1]), np.average(hdf[:, 1]), np.average(dices[:, 2]), np.average(mad[:, 2]), np.average(hdf[:, 2]) ] median = [ 'Median', np.median(dices[:, 0]), np.median(mad[:, 0]), np.median(hdf[:, 0]), np.median(dices[:, 1]), np.median(mad[:, 1]), np.median(hdf[:, 1]), np.median(dices[:, 2]), np.median(mad[:, 2]), np.median(hdf[:, 2]) ] std = [ 'STD', np.std(dices[:, 0]), np.std(mad[:, 0]), np.std(hdf[:, 0]), np.std(dices[:, 1]), np.std(mad[:, 1]), np.std(hdf[:, 1]), np.std(dices[:, 2]), np.std(mad[:, 2]), np.std(hdf[:, 2]) ] csvwriter.writerow(average) csvwriter.writerow(median) csvwriter.writerow(std) if eval: print('Dices') print(np.average(dices, axis=0)) print('Mean Dist') print(np.average(mad, axis=0)) print('Hausdorff 95%') print(np.average(hdf, axis=0))
def train(gpu_id, nb_gpus, dataset_name, ens_folder_name, labelled_perc, fold_num, model_type, is_augmented=True, early_stop=True): metadata = get_metadata(dataset_name) name = 'uats_softmax_F' + str(fold_num) + '_Perct_Labelled_' + str( labelled_perc) data_path = os.path.join( metadata[m_data_path], dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc), 'train') print('data directory:', data_path) tb_log_dir = os.path.join(metadata[m_save_path], 'tb', dataset_name, name + '_' + str(metadata[m_lr]) + '/') model_name = os.path.join(metadata[m_save_path], 'model', 'uats', dataset_name, name + H5) makedir(os.path.join(metadata[m_save_path], 'model', 'uats', dataset_name)) csv_name = os.path.join(metadata[m_save_path], 'csv', dataset_name, name + '.csv') makedir(os.path.join(metadata[m_save_path], 'csv', dataset_name)) ens_path = os.path.join(metadata[m_root_temp_path], ens_folder_name) trained_model_path = os.path.join( metadata[m_trained_model_path], dataset_name, 'supervised_F' + str(fold_num) + '_P' + str(labelled_perc) + H5) dim = metadata[m_dim] inp_shape = dim if len(dim) == 3 else [ dim[0], dim[1], metadata[m_nr_channels] ] bs = metadata[m_batch_size] num_labeled_train = int(labelled_perc * metadata[m_labelled_train]) # actual labelled data num_ul = metadata[m_unlabelled_train] num_train_data = num_labeled_train + num_ul print("Labelled Images:", num_labeled_train) print("Unlabeled Images:", metadata[m_unlabelled_train]) print("Total Images:", num_train_data) print('-' * 30) print('Creating and compiling model...') print('-' * 30) model = model_type.build_model(img_shape=inp_shape, learning_rate=metadata[m_lr], gpu_id=gpu_id, nb_gpus=nb_gpus, trained_model=trained_model_path, temp=1) model.summary() # callbacks print('-' * 30) print('Creating callbacks...') print('-' * 30) csv_logger = CSVLogger(csv_name, append=True, separator=';') if nb_gpus is not None and nb_gpus > 1: model_checkpoint = ModelCheckpointParallel(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') else: model_checkpoint = ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') tensorboard = TensorBoard(log_dir=tb_log_dir, write_graph=False, write_grads=False, histogram_freq=0, batch_size=1, write_images=False) tcb = TemporalCallback(dim, data_path, ens_path, metadata[m_save_path], num_train_data, num_labeled_train, metadata[m_patients_per_batch], metadata[m_labelled_perc], metadata[m_metric_keys], metadata[m_nr_class], bs, dataset_name) lcb = model_type.LossCallback() cb = [model_checkpoint, tcb, tensorboard, lcb, csv_logger] if early_stop: es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE_EARLY_STOP, min_delta=DELTA) cb.append(es) print('Callbacks: ', cb) print('-' * 30) print('Fitting model...') print('-' * 30) training_generator = get_uats_data_generator(dataset_name, data_path, ens_path, num_train_data, num_labeled_train, bs, is_augmented) steps = ((metadata[m_labelled_train] + num_ul) * metadata[m_aug_num]) // bs x_val, y_val = get_uats_val_data(data_path, metadata[m_dim], metadata[m_nr_class], metadata[m_nr_channels]) history = model.fit_generator(generator=training_generator, steps_per_epoch=steps, validation_data=[x_val, y_val], epochs=NUM_EPOCH, callbacks=cb) return history
def __init__(self, dim, data_path, temp_path, save_path, num_train_data, num_labeled_train, patients_per_batch, val_metric_keys, nr_class, batch_size, dataset_name): self.data_path = data_path self.temp_path = temp_path self.save_path = save_path self.num_train_data = num_train_data # total training data = Labelled + UnLabelled self.num_labeled_train = num_labeled_train # labelled training data self.num_un_labeled_train = num_train_data - num_labeled_train # unlabelled data self.patients_per_batch = patients_per_batch self.nr_class = nr_class self.val_metric_keys = val_metric_keys self.save_flag = np.zeros((self.nr_class), dtype=np.bool) self.val_dice_coef = np.zeros((self.nr_class), dtype=np.float) self.dim = dim self.batch_size = batch_size self.nr_dim = len(dim) flag_1 = np.ones(shape=dim, dtype='int64') makedir(self.temp_path) makedir(os.path.join(self.temp_path, ENS_GT)) makedir(os.path.join(self.temp_path, FLAG)) makedir(os.path.join(save_path, 'tb', dataset_name)) makedir(os.path.join(save_path, 'csv', dataset_name)) makedir(os.path.join(save_path, 'model', dataset_name)) for patient in np.arange(num_train_data): copyfile(os.path.join(data_path, GT, str(patient) + NPY), os.path.join(self.temp_path, ENS_GT, str(patient) + NPY)) np.save(os.path.join(self.temp_path, FLAG, str(patient) + NPY), flag_1) del flag_1