コード例 #1
0
def main(dataset):

    if dataset not in ('brats', 'isic'):
        raise ValueError('dataset must be "isic" or "brats"')

    if dataset == 'brats':
        data, ids_names_dict, task = get_brats_data()
        subjects = ['Brats18_TCIA01_390_1', 'Brats18_CBICA_AUN_1', 'Brats18_CBICA_ASY_1']
        out_dir = os.path.join(dirs.BRATS_PLOT_DIR, dirs.CALIB_NAME)
    else:
        data, ids_names_dict, task = get_isic_data()
        subjects = ['ISIC_0012388', 'ISIC_0012654', 'ISIC_0012447']
        out_dir = os.path.join(dirs.ISIC_PLOT_DIR, dirs.CALIB_NAME)

    fh.create_dir_if_not_exists(out_dir)

    run_ids = []
    frames = []
    for run_id, file_path in data:
        frames.append(pd.read_csv(file_path))
        run_ids.append(run_id)

    df = pd.concat(frames, keys=run_ids, names=['run_id'])

    # create a calibration plot with all run_ids
    create_pdf_all_run_id(df, out_dir, ids_names_dict, task, legend=False)
    # create a calibration pdf for each subject
    create_subject_pdfs(df, out_dir, ids_names_dict, subjects, legend=False)
    create_legend_only(ids_names_dict, out_dir)

    miscalibration_percentage(df, ids_names_dict, task)
def main(split_type: str):

    if split_type not in ('ensemble', 'k-fold', 'resplit-train'):
        raise ValueError('invalid split type "{}"'.format(split_type))

    data_dir_with_prefix = dirs.ISIC_PREPROCESSED_TRAIN_DATA_DIR
    out_dir = dirs.SPLITS_DIR
    fh.create_dir_if_not_exists(out_dir)

    collector = collect.IsicCollector(data_dir_with_prefix, True)
    subject_files = collector.get_subject_files()
    subject_names = [sf.subject for sf in subject_files]

    # only training data that should be randomized
    train_names = subject_names

    if split_type == 'ensemble':

        k = 10
        splits = split.split_subjects_k_fold(train_names, k)
        train_names_k, _ = zip(*splits)
        valid_names_k = k * [None]
        test_names_k = None

        file_name = 'split_isic-train_k{}_{}-{}-{}.json'.format(
            k, len(train_names_k[0]), 0, 0)
        split.save_split(os.path.join(out_dir, file_name), train_names_k,
                         valid_names_k, test_names_k)

    elif split_type == 'k-fold':
        k = 5

        splits = split.split_subjects_k_fold(train_names, k)
        train_names_k, valid_names_k = zip(*splits)

        nb_valid = int(len(train_names) / k)
        nb_train = len(train_names) - nb_valid

        file_name = 'split_isic_cv_k{}_{}-{}-{}.json'.format(
            k, nb_train, nb_valid, nb_valid)
        # valid is test, too for cross-validation
        split.save_split(os.path.join(out_dir, file_name), train_names_k,
                         valid_names_k, valid_names_k)

    elif split_type == 'resplit-train':
        nb_train_percentage = 0.10
        nb_train_new = int(len(train_names) * nb_train_percentage)

        state = random.getstate()
        random.seed(100)
        random.shuffle(train_names)
        random.setstate(state)

        counts = (nb_train_new, len(train_names) - nb_train_new)
        new_train_names, _ = split.split_subjects(train_names, counts)

        file_name = 'split_isic_sub_{}-{}-{}.json'.format(nb_train_new, 0, 0)
        split.save_split(os.path.join(out_dir, file_name), new_train_names,
                         [None], None)
コード例 #3
0
def main(dataset: str, to_plot: list):

    if dataset not in ('brats', 'isic'):
        raise ValueError(
            'Invalid dataset "{}". Chose "brats" or "isic"'.format(dataset))
    task = dataset

    if task == 'brats':
        eval_data_list = evdata.get_brats_eval_data(to_plot)
        subjects = [
            'Brats18_TCIA01_390_1', 'Brats18_CBICA_AUN_1',
            'Brats18_CBICA_ASY_1'
        ]
        min_max_dir = os.path.join(dirs.BRATS_EVAL_DIR, dirs.MINMAX_NAME)
        plot_dir = os.path.join(dirs.BRATS_PLOT_DIR, 'images')
        img_key = 'flair'
    else:
        eval_data_list = evdata.get_isic_eval_data(to_plot)
        subjects = ['ISIC_0012388', 'ISIC_0012654', 'ISIC_0012447']
        min_max_dir = os.path.join(dirs.ISIC_EVAL_DIR, dirs.MINMAX_NAME)
        plot_dir = os.path.join(dirs.ISIC_PLOT_DIR, 'images')
        img_key = 'image'

    fh.create_dir_if_not_exists(plot_dir)
    writer = OutWriterPng(plot_dir, task, img_key)

    for entry in eval_data_list:
        prepare, id_ = analysis.get_uncertainty_preparation(
            entry,
            rescale_confidence='subject',
            rescale_sigma='global',
            min_max_dir=min_max_dir)
        print(id_)
        subject_files = [
            sf for sf in entry.subject_files if sf.subject in subjects
        ]
        for sf in subject_files:
            subject_dir = os.path.join(plot_dir, sf.subject)
            if not os.path.isdir(subject_dir):
                os.makedirs(subject_dir)

            loader = analysis.Loader()
            d = loader.get_data(
                sf,
                analysis.Loader.Params(entry.confidence_entry,
                                       need_target=True,
                                       need_prediction=True,
                                       images_needed=[img_key]))
            d = prepare(d)

            writer.on_new_subject(sf.subject, d)
            writer.on_test_id(entry.id_, d)
def build_brats_dataset(params: BuildParameters):
    collector = collect.Brats17Collector(
        params.in_dir)  # 17 is same dataset as 18
    subject_files = collector.get_subject_files()

    if params.split_file is not None:
        if params.is_train_data:
            train_subjects, valid_subjects, _ = split.load_split(
                params.split_file)
            selection = train_subjects + valid_subjects
        else:
            _, _, selection = split.load_split(params.split_file)

        subject_files = [sf for sf in subject_files if sf.subject in selection]
        assert len(subject_files) == len(selection)

    # sort the subject files according to the subject name (identifier)
    subject_files.sort(key=lambda sf: sf.subject)

    if params.prediction_path is not None:
        subject_files = params.add_prediction_fn(subject_files,
                                                 params.prediction_path)

    fh.create_dir_if_not_exists(params.out_file, is_file=True)
    fh.remove_if_exists(params.out_file)

    with crt.get_writer(params.out_file) as writer:
        callbacks = [
            crt.MonitoringCallback(),
            crt.WriteNamesCallback(writer),
            crt.WriteFilesCallback(writer),
            crt.WriteDataCallback(writer),
            crt.WriteSubjectCallback(writer),
            crt.WriteImageInformationCallback(writer),
        ]

        has_grade = params.in_dir.endswith('Training')
        if has_grade:
            callbacks.append(WriteGradeCallback(writer))
        callback = crt.ComposeCallback(callbacks)

        traverser = crt.SubjectFileTraverser()
        traverser.traverse(subject_files,
                           callback=callback,
                           load=LoadSubject(),
                           transform=tfm.ComposeTransform(params.transforms))
def main(ds):
    if ds not in ('brats', 'isic'):
        raise ValueError('dataset must be "isic" or "brats"')

    task = ds

    if task == 'brats':
        out_dir = os.path.join(dirs.BRATS_PLOT_DIR, 'suppl_mat')
        data, ids_names_dict = get_brats_data()
    else:
        out_dir = os.path.join(dirs.ISIC_PLOT_DIR, 'suppl_mat')
        data, ids_names_dict = get_isic_data()

    fh.create_dir_if_not_exists(out_dir)

    df = gather_base(data)
    out_file = os.path.join(out_dir, 'error_prec_recall_{}.svg'.format(task))
    plot_precision_recall(df, ids_names_dict, out_file)

    create_legend_only(ids_names_dict, out_dir)
コード例 #6
0
    def on_new_subject(self, subject_name: str, img_data: dict):
        self.subject_dir = os.path.join(self.out_dir, subject_name)
        fh.create_dir_if_not_exists(self.subject_dir)

        self.selected_slice = self.get_slice_and_str_fn(img_data, self.task)
        slice_str = '_sl{}'.format(self.selected_slice) if not isinstance(
            self.selected_slice, slice) else ''

        if self.task == 'isic':
            self.bbox = self._get_bbox(
                img_data[self.img_key][self.selected_slice],
                squared='min',
                dims=2)
        else:
            self.bbox = self._get_bbox(
                img_data[self.img_key][self.selected_slice], squared='max')

        gt = self._apply_bbox(img_data['target'].astype(
            np.uint8)[self.selected_slice])
        ma_gt = np.ma.masked_where(gt == 0, gt)

        img_path = os.path.join(self.subject_dir,
                                '{}{}.png'.format(self.img_key, slice_str))
        self._save_image(
            self._apply_bbox(img_data[self.img_key][self.selected_slice]),
            img_path)

        overlay_path = os.path.join(
            self.subject_dir,
            '{}_gt_overlay{}.png'.format(self.img_key, slice_str))
        self.img_to_overlay = self._apply_bbox(
            img_data[self.img_key][self.selected_slice])

        self._save_label_overlay(self.img_to_overlay,
                                 ma_gt,
                                 overlay_path,
                                 alpha=0.5,
                                 cm=self.gt_cm)
コード例 #7
0
 def save_checkpoint(checkpoint_path: str, epoch: int, model: nn.Module, optimizer: optim.Optimizer, **others) -> None:
     fh.create_dir_if_not_exists(checkpoint_path, is_file=True)
     save_dict = {'state_dict': model.state_dict(), 'epoch': epoch, 'optimizer': optimizer.state_dict(), **others}
     torch.save(save_dict, checkpoint_path)
コード例 #8
0
 def backup_model_parameters(model_path: str, model_params: cfg.DictableParameter,
                             optimizer_params: cfg.DictableParameter, **others) -> None:
     fh.create_dir_if_not_exists(model_path, is_file=True)
     with open(model_path, 'w') as f:
         json.dump({'model': model_params.to_dict(), 'optimizer': optimizer_params.to_dict(), **others}, f)
コード例 #9
0
def main(split_type: str):

    if split_type not in ('default', 'ensemble', 'k-fold', 'resplit-train'):
        raise ValueError('invalid split type "{}"'.format(split_type))

    data_dir = dirs.BRATS_ORIG_DATA_DIR
    out_dir = dirs.SPLITS_DIR
    fh.create_dir_if_not_exists(out_dir)

    collector = collect.Brats17Collector(data_dir)
    subject_files = collector.get_subject_files()
    subject_names = [sf.subject for sf in subject_files]

    def get_grade_from_subject_file(subject_file):
        first_image_path = list(
            subject_file.categories['images'].entries.values())[0]
        grade = os.path.basename(
            os.path.dirname(os.path.dirname(first_image_path)))
        return grade

    grades = [get_grade_from_subject_file(sf) for sf in subject_files]

    nb_train = 100
    nb_valid = 25
    nb_test = len(subject_names) - nb_train - nb_valid
    counts = (nb_train, nb_valid, nb_test)

    grade_ints = [0 if g == 'HGG' else 1 for g in grades]
    train_names, valid_names, test_names = split.create_stratified_shuffled_split(
        subject_names, grade_ints, counts, seed=100)

    if split_type == 'default':
        file_name = 'split_brats18_{}-{}-{}.json'.format(
            nb_train, nb_valid, nb_test)
        split.save_split(os.path.join(out_dir, file_name), train_names,
                         valid_names, test_names)
    elif split_type == 'ensemble':
        k = 10
        splits = split.split_subjects_k_fold(train_names, k)
        train_names_k, _ = zip(*splits)
        valid_names_k = k * [valid_names]
        test_names_k = k * [test_names]

        file_name = 'split_brats18_k{}_{}-{}-{}.json'.format(
            k,
            len(train_names) - k, nb_valid, nb_test)
        split.save_split(os.path.join(out_dir, file_name), train_names_k,
                         valid_names_k, test_names_k)
    elif split_type == 'k-fold':
        k = 5

        name_grade_dict = {s: g for s, g in zip(subject_names, grade_ints)}

        to_fold = train_names + valid_names
        to_fold_grades = [name_grade_dict[s] for s in to_fold]

        splits = split.split_subject_k_fold_stratified(to_fold, to_fold_grades,
                                                       k)
        train_names_k, valid_names_k = zip(*splits)

        nb_valid = int(len(to_fold) / k)
        nb_train = len(to_fold) - nb_valid

        file_name = 'split_brats18_cv_k{}_{}-{}-{}.json'.format(
            k, nb_train, nb_valid, nb_valid)
        # valid is test, too for cross-validation
        split.save_split(os.path.join(out_dir, file_name), train_names_k,
                         valid_names_k, valid_names_k)
    elif split_type == 'resplit-train':
        # note: not stratified, since we want same subject to be include in a larger train_new split
        nb_train_new = 10

        counts = (nb_train_new, len(train_names) - nb_train_new)
        new_train_names, _ = split.split_subjects(train_names, counts)

        file_name = 'split_brats18_sub_{}-{}-{}.json'.format(
            nb_train_new, nb_valid, nb_test)
        split.save_split(os.path.join(out_dir, file_name), new_train_names,
                         valid_names, test_names)