def get_supervised_data_generator(dataset_name, data_path, num_train, is_augmented=True): if dataset_name == PROSTATE_DATASET_NAME: metadata = get_metadata(dataset_name) train_id_list = np.arange(num_train) np.random.shuffle(train_id_list) print(train_id_list[0:10]) from dataset_specific.prostate.generator.baseline import DataGenerator as train_gen return train_gen(data_path, train_id_list, batch_size=metadata[m_batch_size], dim=metadata[m_dim], is_augmented=is_augmented) elif dataset_name == SKIN_DATASET_NAME: metadata = get_metadata(dataset_name) train_id_list = np.arange(num_train) np.random.shuffle(train_id_list) print(train_id_list[0:10]) from dataset_specific.skin_2D.generator.baseline import DataGenerator as train_gen return train_gen(data_path, train_id_list, batch_size=metadata[m_batch_size], dim=metadata[m_dim], is_augmented=is_augmented)
def generate_uats_dataset(dataset_name, fold_num, labelled_perc, ul_imgs_path, supervised_model_path): metadata = get_metadata(dataset_name) unlabeled_imgs = np.load(ul_imgs_path) supervised_fold_path = os.path.join( metadata[m_data_path], dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc)) labelled_num_considrd = len( os.listdir(os.path.join(supervised_fold_path, 'train', 'imgs'))) counter = labelled_num_considrd # generate predictions using suprvised model pseudolabels = generate_predictions( os.path.join(supervised_model_path, dataset_name), 'supervised_F' + str(fold_num) + '_P' + str(labelled_perc), unlabeled_imgs) # some unlabelled prostate images were excluded because they looked weird # filtering was done manually questionable = [47, 109, 203, 215] bad = [99, 100, 101, 103] all = np.arange(unlabeled_imgs.shape[0]) conc = questionable + bad good_imgs_list = set(all) - set(conc) print('copied labelled training images') for i in good_imgs_list: np.save( os.path.join(supervised_fold_path, 'train', 'imgs', str(counter)) + '.npy', unlabeled_imgs[i]) np.save( os.path.join(supervised_fold_path, 'train', 'gt', str(counter)) + '.npy', pseudolabels[i]) counter += 1 print(i, counter) print('copied unlabelled training images')
def generate_supervised_dataset(dataset_name, fold_num, labelled_perc, seed=1234): metadata = get_metadata(dataset_name) folds_root_path = metadata[m_data_path] save_root_path = os.path.join( folds_root_path, dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc)) supervised_fold_path = os.path.join(folds_root_path, dataset_name, 'fold_' + str(fold_num)) labelled_num = metadata[m_labelled_train] # training makedir(os.path.join(save_root_path, 'train', 'imgs'), delete_existing=True) makedir(os.path.join(save_root_path, 'train', 'gt'), delete_existing=True) makedir(os.path.join(save_root_path, 'val', 'imgs'), delete_existing=True) makedir(os.path.join(save_root_path, 'val', 'gt'), delete_existing=True) num_labeled_train = int(labelled_perc * labelled_num) np.random.seed(seed) rng = default_rng() labelled_num_considrd = rng.choice(labelled_num, size=num_labeled_train, replace=False) # labelled_num_considrd = np.random.randint(0, labelled_num, size=num_labeled_train, dtype=np.int) counter = 0 for i in labelled_num_considrd: np.save( os.path.join(save_root_path, 'train', 'imgs', str(counter)), np.load( os.path.join(supervised_fold_path, 'train', 'imgs', str(i) + '.npy'))) np.save( os.path.join(save_root_path, 'train', 'gt', str(counter)), np.load( os.path.join(supervised_fold_path, 'train', 'gt', str(i) + '.npy'))) counter = counter + 1 print(i, counter) print('copied labelled training images') counter = 0 for i in np.arange(metadata[m_labelled_val]): np.save( os.path.join(save_root_path, 'val', 'imgs', str(counter)), np.load( os.path.join(supervised_fold_path, 'val', 'imgs', str(i) + '.npy'))) np.save( os.path.join(save_root_path, 'val', 'gt', str(counter)), np.load( os.path.join(supervised_fold_path, 'val', 'gt', str(i) + '.npy'))) print(i) print('copied labelled val images')
def generate_supervised_dataset(dataset_name, fold_num, labelled_perc, seed=1234): metadata = get_metadata(dataset_name) save_root_path = os.path.join(metadata[m_data_path] , dataset_name ) supervised_fold_path = os.path.join(save_root_path ,'fold_' + str(fold_num) + '_P' + str(labelled_perc)) labelled_files_lst = np.load(os.path.join(metadata[m_folds_path], 'train_fold'+str(fold_num)+'.npy')) labelled_train_num = len(labelled_files_lst) labelled_path = metadata[m_raw_data_path] +'/labelled/train/' print(labelled_files_lst[0:10]) np.random.seed(seed) np.random.shuffle(labelled_files_lst) labelled_num_considrd = labelled_files_lst[:int(labelled_train_num * labelled_perc)] validation_imgs = np.load(os.path.join(metadata[m_folds_path], 'val_fold' + str(fold_num) + '.npy')) counter = 0 makedir( os.path.join(supervised_fold_path, 'train','imgs'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'train','gt'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'val','imgs'), delete_existing=True) makedir( os.path.join(supervised_fold_path, 'val','gt'), delete_existing=True) print('training images created') for i in labelled_num_considrd: print(i, counter) np.save(os.path.join(supervised_fold_path, 'train', 'imgs', str(counter) + '.npy'), np.load(os.path.join(labelled_path, 'imgs', i)) / 255 ) GT_lesion = np.load(os.path.join(labelled_path, 'GT', i.replace('.npy', '_segmentation.npy'))) / 255 GT_bg = np.where(GT_lesion == 0, np.ones_like(GT_lesion), np.zeros_like(GT_lesion)) np.save(os.path.join(supervised_fold_path, 'train', 'gt', str(counter) + '.npy'), np.concatenate((GT_bg, GT_lesion), -1)) counter = counter + 1 print('validation images created') counter = 0 for i in validation_imgs: print(i, counter) np.save(os.path.join(supervised_fold_path, 'val', 'imgs', str(counter) + '.npy'), np.load(os.path.join(labelled_path, 'imgs', i)) / 255) GT_lesion = np.load(os.path.join(labelled_path, 'GT', i.replace('.npy', '_segmentation.npy'))) / 255 GT_bg = np.where(GT_lesion == 0, np.ones_like(GT_lesion), np.zeros_like(GT_lesion)) np.save(os.path.join(supervised_fold_path, 'val', 'gt', str(counter) + '.npy'), np.concatenate((GT_bg, GT_lesion), -1)) counter = counter + 1
def generate_uats_dataset(dataset_name, fold_num, labelled_perc, ul_imgs_path, supervised_model_path): metadata = get_metadata(dataset_name) dim = metadata[m_dim] nr_channels = metadata[m_nr_channels] supervised_fold_path = os.path.join(metadata[m_data_path] , dataset_name , 'fold_' + str(fold_num)+'_P' + str(labelled_perc)) labelled_num_considrd = len(os.listdir(os.path.join(supervised_fold_path, 'train', 'imgs'))) counter = labelled_num_considrd # generate predictions using suprvised model cases = sorted(os.listdir(os.path.join(ul_imgs_path))) img_arr = np.zeros((len(cases),dim[0],dim[1], nr_channels), dtype=float) for i in range(len(cases)): img_arr[i] = np.load(os.path.join(ul_imgs_path, cases[i])) / 255 print(img_arr.shape[0],' unlabelled images loaded') from dataset_specific.skin_2D.model.baseline import weighted_model wm = weighted_model() model = wm.build_model(img_shape=(dim[0], dim[1], nr_channels), learning_rate=5e-05) model.load_weights(supervised_model_path) prediction = model.predict(img_arr, batch_size=1) nrImgs = prediction[0].shape[0] for imgNumber in range(0, nrImgs): prediction_skin = prediction[1] pred_arr = prediction_skin[imgNumber] pred_arr = thresholdArray(pred_arr, 0.5) pred_img = sitk.GetImageFromArray(pred_arr) # only foreground=skin for evaluation pred_img = castImage(pred_img, sitk.sitkUInt8) prediction_bg = prediction[0] pred_arr_bg = prediction_bg[imgNumber] pred_arr_bg = thresholdArray(pred_arr_bg, 0.5) pred_img = getConnectedComponents(pred_img) GT_out = np.zeros([pred_arr.shape[0], pred_arr.shape[1], 2]) GT_out[:, :, 0] = pred_arr_bg GT_out[:, :, 1] = pred_arr np.save(os.path.join(supervised_fold_path, 'train', 'imgs', str(counter)) + '.npy', img_arr[imgNumber]) np.save(os.path.join(supervised_fold_path, 'train', 'gt', str(counter)) + '.npy', GT_out) counter+=1 print(i, counter) print('copied unlabelled training images')
def train(gpu_id, nb_gpus, dataset_name, labelled_perc, fold_num, model_type, is_augmented=True): metadata = get_metadata(dataset_name) name = 'supervised_F' + str(fold_num) + '_P' + str(labelled_perc) data_path = os.path.join( metadata[m_data_path], dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc), 'train') print('data directory:', data_path) tb_log_dir = os.path.join(metadata[m_save_path], 'tb', dataset_name, name + '_' + str(metadata[m_lr]) + '/') model_name = os.path.join(metadata[m_trained_model_path], dataset_name, name + H5) csv_name = os.path.join(metadata[m_save_path], 'csv', dataset_name, name + '.csv') dim = metadata[m_dim] bs = metadata[m_batch_size] num_labeled_train = int(labelled_perc * metadata[m_labelled_train]) # actual labelled data print("Labelled Images:", num_labeled_train) print('-' * 30) print('Creating and compiling model...') print('-' * 30) inp_shape = dim if len(dim) == 3 else (dim[0], dim[1], metadata[m_nr_channels]) model = model_type.build_model(img_shape=inp_shape, learning_rate=metadata[m_lr], gpu_id=gpu_id, nb_gpus=nb_gpus) model.summary() # callbacks print('-' * 30) print('Creating callbacks...') print('-' * 30) makedir(os.path.join(metadata[m_save_path], 'csv', dataset_name)) csv_logger = CSVLogger(csv_name, append=True, separator=';') if nb_gpus is not None and nb_gpus > 1: model_checkpoint = ModelCheckpointParallel(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') else: model_checkpoint = ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') tensorboard = TensorBoard(log_dir=tb_log_dir, write_graph=False, write_grads=False, histogram_freq=0, batch_size=1, write_images=False) es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE_EARLY_STOP, min_delta=DELTA) cb = [model_checkpoint, tensorboard, csv_logger, es] print('Callbacks: ', cb) print('-' * 30) print('Fitting model...') print('-' * 30) training_generator = get_supervised_data_generator(dataset_name, data_path, num_labeled_train, is_augmented) steps = (metadata[m_labelled_train] * metadata[m_aug_num]) // bs x_val, y_val = get_supervised_val_data(data_path, dim, metadata[m_nr_class], metadata[m_nr_channels]) history = model.fit_generator(generator=training_generator, steps_per_epoch=steps, validation_data=[x_val, y_val], epochs=NUM_EPOCH, callbacks=cb) return history
def train(gpu_id, nb_gpus, dataset_name, ens_folder_name, labelled_perc, fold_num, model_type, is_augmented=True, early_stop=True): global metadata metadata = get_metadata(dataset_name) name = 'bai_F' + str(fold_num) + '_Perct_Labelled_' + str(labelled_perc) data_path = os.path.join( metadata[m_data_path], dataset_name, 'fold_' + str(fold_num) + '_P' + str(labelled_perc), 'train') print('data directory:', data_path) tb_log_dir = os.path.join(metadata[m_save_path], 'tb', dataset_name, name + '_' + str(metadata[m_lr]) + '/') model_name = os.path.join(metadata[m_save_path], 'model', 'bai', dataset_name, name + H5) csv_name = os.path.join(metadata[m_save_path], 'csv', dataset_name, name + '.csv') ens_path = os.path.join(metadata[m_root_temp_path], ens_folder_name) trained_model_path = os.path.join( metadata[m_trained_model_path], dataset_name, 'supervised_F' + str(fold_num) + '_P' + str(labelled_perc) + H5) dim = metadata[m_dim] bs = metadata[m_batch_size] num_train_data = metadata[m_labelled_train] + metadata[m_unlabelled_train] num_labeled_train = int(labelled_perc * metadata[m_labelled_train]) print('-' * 30) print('Creating and compiling model...') print('-' * 30) model = model_type.build_model(img_shape=(dim[0], dim[1], dim[2]), learning_rate=metadata[m_lr], gpu_id=gpu_id, nb_gpus=nb_gpus, trained_model=trained_model_path) model.summary() # callbacks print('-' * 30) print('Creating callbacks...') print('-' * 30) csv_logger = CSVLogger(csv_name, append=True, separator=';') if nb_gpus is not None and nb_gpus > 1: model_checkpoint = ModelCheckpointParallel(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') else: model_checkpoint = ModelCheckpoint(model_name, monitor='val_loss', save_best_only=True, verbose=1, mode='min') tensorboard = TensorBoard(log_dir=tb_log_dir, write_graph=False, write_grads=True, histogram_freq=2, batch_size=1, write_images=False) tcb = TemporalCallback(dim, data_path, ens_path, metadata[m_save_path], num_train_data, num_labeled_train, metadata[m_patients_per_batch], metadata[m_nr_class], bs, metadata[m_update_epoch_num], dataset_name) lcb = model_type.LossCallback() cb = [model_checkpoint, tcb, tensorboard, lcb, csv_logger] if early_stop: es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=PATIENCE_EARLY_STOP, min_delta=DELTA) cb.append(es) print('Callbacks: ', cb) print('-' * 30) print('Fitting model...') print('-' * 30) training_generator = get_temporal_data_generator(dataset_name, data_path, ens_path, num_train_data, num_labeled_train, bs, is_augmented) steps = ((metadata[m_labelled_train] + metadata[m_unlabelled_train]) * metadata[m_aug_num]) // bs x_val, y_val = get_temporal_val_data(data_path, dim, metadata[m_nr_class], metadata[m_nr_channels]) history = model.fit_generator(generator=training_generator, steps_per_epoch=steps, validation_data=[x_val, y_val], epochs=NUM_EPOCH, callbacks=cb) return history
type=float, default=1.0, help='Percentage of labelled data used') #0.1 0.25 0.5 1.0 parser.add_argument('-d', '--ds', type=str, default=PROSTATE_DATASET_NAME, help='dataset name') config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True args = parser.parse_args() try: gpu_num = args.gpu_num fold_num = args.fold_num perc = args.perc os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num metadata = get_metadata(args.ds) # Build Model wm = weighted_model() train(None, None, dataset_name=args.ds, labelled_perc=perc, fold_num=fold_num, model_type=wm) finally: print('clean up done!')
config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True try: # fold_num = args.fold_num # perc = args.perc # temp_path = args.temp_path # gpu_num = args.gpu_num gpu_num = '3' fold_num = 1 perc = 1.0 temp_path = 'sadv4' args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num metadata = get_metadata(args.ds, fold_num, perc) # Build Model wm = weighted_model() train(None, None, dataset_name=args.ds, ens_folder_name=temp_path, labelled_perc=perc, fold_num=fold_num, model_type=wm) finally: if os.path.exists(metadata[m_root_temp_path] + temp_path): cleanup(metadata[m_root_temp_path] + temp_path) print('clean up done!')
np.load( os.path.join(supervised_fold_path, 'val', 'imgs', str(i) + '.npy'))) np.save( os.path.join(save_root_path, 'val', 'gt', str(counter)), np.load( os.path.join(supervised_fold_path, 'val', 'gt', str(i) + '.npy'))) print(i) print('copied labelled val images') if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"] = '2' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" ds = PROSTATE_DATASET_NAME metadata = get_metadata(ds) perc = 0.1 fold_num = 3 # generate_supervised_dataset(ds, # fold_num=fold_num, # labelled_perc=perc, # seed=0) generate_uats_dataset(ds, fold_num=fold_num, labelled_perc=perc, ul_imgs_path='/cache/suhita/data/' + ds + '/npy_img_unlabeled.npy', supervised_model_path=metadata[m_trained_model_path])