def main(dataset): if dataset not in ('brats', 'isic'): raise ValueError('dataset must be "isic" or "brats"') if dataset == 'brats': data, ids_names_dict, task = get_brats_data() subjects = ['Brats18_TCIA01_390_1', 'Brats18_CBICA_AUN_1', 'Brats18_CBICA_ASY_1'] out_dir = os.path.join(dirs.BRATS_PLOT_DIR, dirs.CALIB_NAME) else: data, ids_names_dict, task = get_isic_data() subjects = ['ISIC_0012388', 'ISIC_0012654', 'ISIC_0012447'] out_dir = os.path.join(dirs.ISIC_PLOT_DIR, dirs.CALIB_NAME) fh.create_dir_if_not_exists(out_dir) run_ids = [] frames = [] for run_id, file_path in data: frames.append(pd.read_csv(file_path)) run_ids.append(run_id) df = pd.concat(frames, keys=run_ids, names=['run_id']) # create a calibration plot with all run_ids create_pdf_all_run_id(df, out_dir, ids_names_dict, task, legend=False) # create a calibration pdf for each subject create_subject_pdfs(df, out_dir, ids_names_dict, subjects, legend=False) create_legend_only(ids_names_dict, out_dir) miscalibration_percentage(df, ids_names_dict, task)
def main(split_type: str): if split_type not in ('ensemble', 'k-fold', 'resplit-train'): raise ValueError('invalid split type "{}"'.format(split_type)) data_dir_with_prefix = dirs.ISIC_PREPROCESSED_TRAIN_DATA_DIR out_dir = dirs.SPLITS_DIR fh.create_dir_if_not_exists(out_dir) collector = collect.IsicCollector(data_dir_with_prefix, True) subject_files = collector.get_subject_files() subject_names = [sf.subject for sf in subject_files] # only training data that should be randomized train_names = subject_names if split_type == 'ensemble': k = 10 splits = split.split_subjects_k_fold(train_names, k) train_names_k, _ = zip(*splits) valid_names_k = k * [None] test_names_k = None file_name = 'split_isic-train_k{}_{}-{}-{}.json'.format( k, len(train_names_k[0]), 0, 0) split.save_split(os.path.join(out_dir, file_name), train_names_k, valid_names_k, test_names_k) elif split_type == 'k-fold': k = 5 splits = split.split_subjects_k_fold(train_names, k) train_names_k, valid_names_k = zip(*splits) nb_valid = int(len(train_names) / k) nb_train = len(train_names) - nb_valid file_name = 'split_isic_cv_k{}_{}-{}-{}.json'.format( k, nb_train, nb_valid, nb_valid) # valid is test, too for cross-validation split.save_split(os.path.join(out_dir, file_name), train_names_k, valid_names_k, valid_names_k) elif split_type == 'resplit-train': nb_train_percentage = 0.10 nb_train_new = int(len(train_names) * nb_train_percentage) state = random.getstate() random.seed(100) random.shuffle(train_names) random.setstate(state) counts = (nb_train_new, len(train_names) - nb_train_new) new_train_names, _ = split.split_subjects(train_names, counts) file_name = 'split_isic_sub_{}-{}-{}.json'.format(nb_train_new, 0, 0) split.save_split(os.path.join(out_dir, file_name), new_train_names, [None], None)
def main(dataset: str, to_plot: list): if dataset not in ('brats', 'isic'): raise ValueError( 'Invalid dataset "{}". Chose "brats" or "isic"'.format(dataset)) task = dataset if task == 'brats': eval_data_list = evdata.get_brats_eval_data(to_plot) subjects = [ 'Brats18_TCIA01_390_1', 'Brats18_CBICA_AUN_1', 'Brats18_CBICA_ASY_1' ] min_max_dir = os.path.join(dirs.BRATS_EVAL_DIR, dirs.MINMAX_NAME) plot_dir = os.path.join(dirs.BRATS_PLOT_DIR, 'images') img_key = 'flair' else: eval_data_list = evdata.get_isic_eval_data(to_plot) subjects = ['ISIC_0012388', 'ISIC_0012654', 'ISIC_0012447'] min_max_dir = os.path.join(dirs.ISIC_EVAL_DIR, dirs.MINMAX_NAME) plot_dir = os.path.join(dirs.ISIC_PLOT_DIR, 'images') img_key = 'image' fh.create_dir_if_not_exists(plot_dir) writer = OutWriterPng(plot_dir, task, img_key) for entry in eval_data_list: prepare, id_ = analysis.get_uncertainty_preparation( entry, rescale_confidence='subject', rescale_sigma='global', min_max_dir=min_max_dir) print(id_) subject_files = [ sf for sf in entry.subject_files if sf.subject in subjects ] for sf in subject_files: subject_dir = os.path.join(plot_dir, sf.subject) if not os.path.isdir(subject_dir): os.makedirs(subject_dir) loader = analysis.Loader() d = loader.get_data( sf, analysis.Loader.Params(entry.confidence_entry, need_target=True, need_prediction=True, images_needed=[img_key])) d = prepare(d) writer.on_new_subject(sf.subject, d) writer.on_test_id(entry.id_, d)
def build_brats_dataset(params: BuildParameters): collector = collect.Brats17Collector( params.in_dir) # 17 is same dataset as 18 subject_files = collector.get_subject_files() if params.split_file is not None: if params.is_train_data: train_subjects, valid_subjects, _ = split.load_split( params.split_file) selection = train_subjects + valid_subjects else: _, _, selection = split.load_split(params.split_file) subject_files = [sf for sf in subject_files if sf.subject in selection] assert len(subject_files) == len(selection) # sort the subject files according to the subject name (identifier) subject_files.sort(key=lambda sf: sf.subject) if params.prediction_path is not None: subject_files = params.add_prediction_fn(subject_files, params.prediction_path) fh.create_dir_if_not_exists(params.out_file, is_file=True) fh.remove_if_exists(params.out_file) with crt.get_writer(params.out_file) as writer: callbacks = [ crt.MonitoringCallback(), crt.WriteNamesCallback(writer), crt.WriteFilesCallback(writer), crt.WriteDataCallback(writer), crt.WriteSubjectCallback(writer), crt.WriteImageInformationCallback(writer), ] has_grade = params.in_dir.endswith('Training') if has_grade: callbacks.append(WriteGradeCallback(writer)) callback = crt.ComposeCallback(callbacks) traverser = crt.SubjectFileTraverser() traverser.traverse(subject_files, callback=callback, load=LoadSubject(), transform=tfm.ComposeTransform(params.transforms))
def main(ds): if ds not in ('brats', 'isic'): raise ValueError('dataset must be "isic" or "brats"') task = ds if task == 'brats': out_dir = os.path.join(dirs.BRATS_PLOT_DIR, 'suppl_mat') data, ids_names_dict = get_brats_data() else: out_dir = os.path.join(dirs.ISIC_PLOT_DIR, 'suppl_mat') data, ids_names_dict = get_isic_data() fh.create_dir_if_not_exists(out_dir) df = gather_base(data) out_file = os.path.join(out_dir, 'error_prec_recall_{}.svg'.format(task)) plot_precision_recall(df, ids_names_dict, out_file) create_legend_only(ids_names_dict, out_dir)
def on_new_subject(self, subject_name: str, img_data: dict): self.subject_dir = os.path.join(self.out_dir, subject_name) fh.create_dir_if_not_exists(self.subject_dir) self.selected_slice = self.get_slice_and_str_fn(img_data, self.task) slice_str = '_sl{}'.format(self.selected_slice) if not isinstance( self.selected_slice, slice) else '' if self.task == 'isic': self.bbox = self._get_bbox( img_data[self.img_key][self.selected_slice], squared='min', dims=2) else: self.bbox = self._get_bbox( img_data[self.img_key][self.selected_slice], squared='max') gt = self._apply_bbox(img_data['target'].astype( np.uint8)[self.selected_slice]) ma_gt = np.ma.masked_where(gt == 0, gt) img_path = os.path.join(self.subject_dir, '{}{}.png'.format(self.img_key, slice_str)) self._save_image( self._apply_bbox(img_data[self.img_key][self.selected_slice]), img_path) overlay_path = os.path.join( self.subject_dir, '{}_gt_overlay{}.png'.format(self.img_key, slice_str)) self.img_to_overlay = self._apply_bbox( img_data[self.img_key][self.selected_slice]) self._save_label_overlay(self.img_to_overlay, ma_gt, overlay_path, alpha=0.5, cm=self.gt_cm)
def save_checkpoint(checkpoint_path: str, epoch: int, model: nn.Module, optimizer: optim.Optimizer, **others) -> None: fh.create_dir_if_not_exists(checkpoint_path, is_file=True) save_dict = {'state_dict': model.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict(), **others} torch.save(save_dict, checkpoint_path)
def backup_model_parameters(model_path: str, model_params: cfg.DictableParameter, optimizer_params: cfg.DictableParameter, **others) -> None: fh.create_dir_if_not_exists(model_path, is_file=True) with open(model_path, 'w') as f: json.dump({'model': model_params.to_dict(), 'optimizer': optimizer_params.to_dict(), **others}, f)
def main(split_type: str): if split_type not in ('default', 'ensemble', 'k-fold', 'resplit-train'): raise ValueError('invalid split type "{}"'.format(split_type)) data_dir = dirs.BRATS_ORIG_DATA_DIR out_dir = dirs.SPLITS_DIR fh.create_dir_if_not_exists(out_dir) collector = collect.Brats17Collector(data_dir) subject_files = collector.get_subject_files() subject_names = [sf.subject for sf in subject_files] def get_grade_from_subject_file(subject_file): first_image_path = list( subject_file.categories['images'].entries.values())[0] grade = os.path.basename( os.path.dirname(os.path.dirname(first_image_path))) return grade grades = [get_grade_from_subject_file(sf) for sf in subject_files] nb_train = 100 nb_valid = 25 nb_test = len(subject_names) - nb_train - nb_valid counts = (nb_train, nb_valid, nb_test) grade_ints = [0 if g == 'HGG' else 1 for g in grades] train_names, valid_names, test_names = split.create_stratified_shuffled_split( subject_names, grade_ints, counts, seed=100) if split_type == 'default': file_name = 'split_brats18_{}-{}-{}.json'.format( nb_train, nb_valid, nb_test) split.save_split(os.path.join(out_dir, file_name), train_names, valid_names, test_names) elif split_type == 'ensemble': k = 10 splits = split.split_subjects_k_fold(train_names, k) train_names_k, _ = zip(*splits) valid_names_k = k * [valid_names] test_names_k = k * [test_names] file_name = 'split_brats18_k{}_{}-{}-{}.json'.format( k, len(train_names) - k, nb_valid, nb_test) split.save_split(os.path.join(out_dir, file_name), train_names_k, valid_names_k, test_names_k) elif split_type == 'k-fold': k = 5 name_grade_dict = {s: g for s, g in zip(subject_names, grade_ints)} to_fold = train_names + valid_names to_fold_grades = [name_grade_dict[s] for s in to_fold] splits = split.split_subject_k_fold_stratified(to_fold, to_fold_grades, k) train_names_k, valid_names_k = zip(*splits) nb_valid = int(len(to_fold) / k) nb_train = len(to_fold) - nb_valid file_name = 'split_brats18_cv_k{}_{}-{}-{}.json'.format( k, nb_train, nb_valid, nb_valid) # valid is test, too for cross-validation split.save_split(os.path.join(out_dir, file_name), train_names_k, valid_names_k, valid_names_k) elif split_type == 'resplit-train': # note: not stratified, since we want same subject to be include in a larger train_new split nb_train_new = 10 counts = (nb_train_new, len(train_names) - nb_train_new) new_train_names, _ = split.split_subjects(train_names, counts) file_name = 'split_brats18_sub_{}-{}-{}.json'.format( nb_train_new, nb_valid, nb_test) split.save_split(os.path.join(out_dir, file_name), new_train_names, valid_names, test_names)