Esempio n. 1
0
def check_sha256(file_config):
    """
    This function checks if sha256 is generated in according to config file
    """
    initial_config = imed_config_manager.ConfigurationManager(file_config).get_config()
    result = []
    name = "config_file.json"
    for root, dirs, files in os.walk(os.path.dirname(initial_config["path_output"])):
        if name in files:
            result.append(os.path.join(root, name))
    assert result != []
    for generated_config in result:
        config = imed_config_manager.ConfigurationManager(generated_config).get_config()
        assert 'training_sha256' in config
Esempio n. 2
0
def check_sha256(file_config):
    """
    This function checks if sha256 is generated in according to config file
    """
    initial_config = imed_config_manager.ConfigurationManager(
        file_config).get_config()
    result = []
    name = "config_file.json"
    for path_object in Path(initial_config["path_output"]).parent.glob("**/*"):
        if path_object.is_file() and name in path_object.name:
            result.append(str(path_object))
    assert result != []
    for generated_config in result:
        config = imed_config_manager.ConfigurationManager(
            generated_config).get_config()
        assert 'training_sha256' in config
Esempio n. 3
0
def test_object_detection(train_lst, target_lst, config):
    # Load config file
    context = imed_config_manager.ConfigurationManager("testing_data/model_config.json").get_config()
    context.update(config)

    command = "ivadomed_download_data -d findcord_tumor"
    subprocess.check_output(command, shell=True)

    imed.run_command(context)
Esempio n. 4
0
def run_main(args):
    context = imed_config_manager.ConfigurationManager(args.c).get_config()

    transform_lst = torch_transforms.Compose([
        imed_transforms.Resample(wspace=0.75, hspace=0.75),
        imed_transforms.CenterCrop([128, 128]),
        imed_transforms.NumpyToTensor(),
        imed_transforms.NormalizeInstance(),
    ])

    train_lst, valid_lst, test_lst = imed_loader_utils.split_dataset(
        context["path_data"], context["center_test"], context["split_method"],
        context["random_seed"])

    balance_dct = {}
    for ds_lst, ds_name in zip([train_lst, valid_lst, test_lst],
                               ['train', 'valid', 'test']):
        print("\nLoading {} set.\n".format(ds_name))
        ds = imed_loader.BidsDataset(
            context["path_data"],
            subject_lst=ds_lst,
            target_suffix=context["target_suffix"],
            contrast_lst=context["contrast_test"]
            if ds_name == 'test' else context["contrast_train_validation"],
            metadata_choice=context["metadata"],
            contrast_balance=context["contrast_balance"],
            transform=transform_lst,
            slice_filter_fn=imed_loader_utils.SliceFilter())

        print("Loaded {} axial slices for the {} set.".format(
            len(ds), ds_name))
        ds_loader = DataLoader(ds,
                               batch_size=1,
                               shuffle=False,
                               pin_memory=False,
                               collate_fn=imed_loader_utils.imed_collate,
                               num_workers=1)

        balance_lst = []
        for i, batch in enumerate(ds_loader):
            gt_sample = batch["gt"].numpy().astype(np.int)[0, 0, :, :]
            nb_ones = (gt_sample == 1).sum()
            nb_voxels = gt_sample.size
            balance_lst.append(nb_ones * 100.0 / nb_voxels)

        balance_dct[ds_name] = balance_lst

    for ds_name in balance_dct:
        print('\nClass balance in {} set:'.format(ds_name))
        print_stats(balance_dct[ds_name])

    print('\nClass balance in full set:')
    print_stats([e for d in balance_dct for e in balance_dct[d]])
Esempio n. 5
0
def run_main():
    imed_utils.init_ivadomed()

    parser = get_parser()
    args = parser.parse_args()

    # Get context from configuration file
    path_config_file = args.config
    context = imed_config_manager.ConfigurationManager(
        path_config_file).get_config()

    # Run command
    run_command(
        context=context,
        n_gif=args.gif if args.gif is not None else 0,
        thr_increment=args.thr_increment if args.thr_increment else None,
        resume_training=bool(args.resume_training))
Esempio n. 6
0
def run_main(args):
    context = imed_config_manager.ConfigurationManager(args.c).get_config()

    path_folder = os.path.join(context['path_data'], 'derivatives', 'labels')

    bin_struct = generate_binary_structure(3, 2)  # 18-connectivity

    vox_lst, mm3_lst = [], []
    for s in os.listdir(path_folder):
        s_fold = os.path.join(path_folder, s, 'anat')
        if os.path.isdir(s_fold):
            for f in os.listdir(s_fold):
                c = f.split(s + '_')[-1].split(context["target_suffix"])[0]
                if f.endswith(context["target_suffix"] +
                              '.nii.gz') and c in context["contrast_test"]:
                    f_path = os.path.join(s_fold, f)
                    im = nib.load(f_path)
                    data = np.asanyarray(im.dataobj)
                    px, py, pz = im.header['pixdim'][1:4]
                    del im

                    if np.any(data):
                        data_label, n = label(data, structure=bin_struct)
                        for idx in range(1, n + 1):
                            data_idx = (data_label == idx).astype(np.int)

                            n_vox = np.count_nonzero(data_idx)
                            vox_lst.append(n_vox)
                            mm3_lst.append(n_vox * px * py * pz)

    print('\nTarget distribution in vox:')
    print_stats(vox_lst)
    plot_distrib(vox_lst, context["target_suffix"] + ' size in vox',
                 [0, np.percentile(vox_lst, 90)],
                 context["target_suffix"] + '_vox.png')

    print('\nTarget distribution in mm3:')
    print_stats(mm3_lst)
    plot_distrib(mm3_lst, context["target_suffix"] + ' size in mm3',
                 [0, np.percentile(mm3_lst, 90)],
                 context["target_suffix"] + '_mm3.png')
def main():
    parser = get_parser()
    args = parser.parse_args()

    for logdir, output_path in zip(args.logdir, args.output_path):
        config = os.path.join(logdir, "config_file.json")
        context = imed_config_manager.ConfigurationManager(config).get_config()

        df_list = []
        metrics = []
        for i in range(int(args.iterations)):
            df = get_results(context)
            df = compute_csa(context, df, logdir, args.bids)
            metrics = list(df.columns)
            df_list.append(np.array(df))

        # Get average and std for each subject (intra subject), then average on all subjects
        average = np.average(np.average(np.array(df_list), axis=0), axis=0)
        std = np.average(np.std(np.array(df_list, dtype=np.float), axis=0),
                         axis=0)
        pd.DataFrame(np.stack([average, std], axis=1),
                     index=metrics,
                     columns=["mean", "std"]).to_csv(output_path + ".csv")
def run_visualization(input, config, number, output, roi):
    """Utility function to visualize Data Augmentation transformations.

    Data augmentation is a key part of the Deep Learning training scheme. This script aims at facilitating the
    fine-tuning of data augmentation parameters. To do so, this script provides a step-by-step visualization of the
    transformations that are applied on data.

    This function applies a series of transformations (defined in a configuration file
    ``-c``) to ``-n`` 2D slices randomly extracted from an input image (``-i``), and save as png the resulting sample
    after each transform.

    For example::

        ivadomed_visualize_transforms -i t2s.nii.gz -n 1 -c config.json -r t2s_seg.nii.gz

    Provides a visualization of a series of three transformation on a randomly selected slice:

    .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/scripts/transforms_im.png
        :width: 600px
        :align: center

    And on a binary mask::

        ivadomed_visualize_transforms -i t2s_gmseg.nii.gz -n 1 -c config.json -r t2s_seg.nii.gz

    Gives:

    .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/scripts/transforms_gt.png
        :width: 600px
        :align: center

    Args:
         input (string): Image filename. Flag: ``--input``, ``-i``
         config (string): Configuration file filename. Flag: ``--config``, ``-c``
         number (int): Number of slices randomly extracted. Flag: ``--number``, ``-n``
         output (string): Folder path where the results are saved. Flag: ``--ofolder``, ``-o``
         roi (string): Filename of the region of interest. Only needed if ROICrop is part of the transformations.
                       Flag: ``--roi``, ``-r``
    """
    # Load context
    context = imed_config_manager.ConfigurationManager(config).get_config()

    # Create output folder
    if not Path(output).is_dir():
        Path(output).mkdir(parents=True)

    # Slice extracted according to below axis
    axis = imed_utils.AXIS_DCT[context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.SLICE_AXIS]]
    # Get data
    input_img, input_data = get_data(input, axis)
    # Image or Mask
    is_mask = np.array_equal(input_data, input_data.astype(bool))
    # Get zooms
    zooms = imed_loader_utils.orient_shapes_hwd(input_img.header.get_zooms(), slice_axis=axis)
    # Get indexes
    indexes = random.sample(range(0, input_data.shape[2]), number)

    # Get training transforms
    training_transforms, _, _ = imed_transforms.get_subdatasets_transforms(context[ConfigKW.TRANSFORMATION])

    if TransformationKW.ROICROP in training_transforms:
        if roi and Path(roi).is_file():
            roi_img, roi_data = get_data(roi, axis)
        else:
            raise ValueError("\nPlease provide ROI image (-r) in order to apply ROICrop transformation.")

    # Compose transforms
    dict_transforms = {}
    stg_transforms = ""
    for transform_name in training_transforms:
        # We skip NumpyToTensor transform since that s only a change of data type
        if transform_name == "NumpyToTensor":
            continue

        # Update stg_transforms
        stg_transforms += transform_name + "_"

        # Add new transform to Compose
        dict_transforms.update({transform_name: training_transforms[transform_name]})
        composed_transforms = imed_transforms.Compose(dict_transforms)

        # Loop across slices
        for i in indexes:
            data = [input_data[:, :, i]]
            # Init metadata
            metadata = SampleMetadata({MetadataKW.ZOOMS: zooms, MetadataKW.DATA_TYPE: "gt" if is_mask else "im"})

            # Apply transformations to ROI
            if TransformationKW.CENTERCROP in training_transforms or \
                    (TransformationKW.ROICROP in training_transforms and Path(roi).is_file()):
                metadata.__setitem__(MetadataKW.CROP_PARAMS, {})

            # Apply transformations to image
            stack_im, _ = composed_transforms(sample=data,
                                              metadata=[metadata for _ in range(number)],
                                              data_type="im")

            # Plot before / after transformation
            fname_out = str(Path(output, stg_transforms + "slice" + str(i) + ".png"))
            logger.debug(f"Fname out: {fname_out}.")
            logger.debug(f"\t{dict(metadata)}")
            # rescale intensities
            if len(stg_transforms[:-1].split("_")) == 1:
                before = np.rot90(imed_maths.rescale_values_array(data[0], 0.0, 1.0))
            else:
                before = after
            if isinstance(stack_im[0], torch.Tensor):
                after = np.rot90(imed_maths.rescale_values_array(stack_im[0].numpy(), 0.0, 1.0))
            else:
                after = np.rot90(imed_maths.rescale_values_array(stack_im[0], 0.0, 1.0))
            # Plot
            imed_utils.plot_transformed_sample(before,
                                               after,
                                               list_title=["\n".join(stg_transforms[:-1].split("_")[:-1]),
                                                           "\n".join(stg_transforms[:-1].split("_"))],
                                               fname_out=fname_out,
                                               cmap="jet" if is_mask else "gray")
Esempio n. 9
0
def segment_volume(folder_model, fname_images, gpu_number=0, options=None):
    """Segment an image.
    Segment an image (`fname_image`) using a pre-trained model (`folder_model`). If provided, a region of interest
    (`fname_roi`) is used to crop the image prior to segment it.
    Args:
        folder_model (str): foldername which contains
            (1) the model ('folder_model/folder_model.pt') to use
            (2) its configuration file ('folder_model/folder_model.json') used for the training,
            see https://github.com/neuropoly/ivadomed/wiki/configuration-file
        fname_images (list): list of image filenames (e.g. .nii.gz) to segment. Multichannel models require multiple
            images to segment, e.i., len(fname_images) > 1.
        gpu_number (int): Number representing gpu number if available.
        options (dict): Contains postprocessing steps and prior filename (fname_prior) which is an image filename
            (e.g., .nii.gz) containing processing information (e.i., spinal cord segmentation, spinal location or MS
            lesion classification)
            e.g., spinal cord centerline, used to crop the image prior to segment it if provided.
            The segmentation is not performed on the slices that are empty in this image.
    Returns:
        list: List of nibabel objects containing the soft segmentation(s), one per prediction class.
        list: List of target suffix associated with each prediction in `pred_list`

    """
    # Define device
    cuda_available = torch.cuda.is_available()
    device = torch.device("cpu") if not cuda_available else torch.device(
        "cuda:" + str(gpu_number))

    # Check if model folder exists and get filenames
    fname_model, fname_model_metadata = imed_models.get_model_filenames(
        folder_model)

    # Load model training config
    context = imed_config_manager.ConfigurationManager(
        fname_model_metadata).get_config()

    postpro_list = [
        'binarize_prediction', 'keep_largest', ' fill_holes', 'remove_small'
    ]
    if options is not None and any(pp in options for pp in postpro_list):
        postpro = {}
        if 'binarize_prediction' in options and options['binarize_prediction']:
            postpro['binarize_prediction'] = {
                "thr": options['binarize_prediction']
            }
        if 'keep_largest' in options and options['keep_largest'] is not None:
            if options['keep_largest']:
                postpro['keep_largest'] = {}
            # Remove key in context if value set to 0
            elif 'keep_largest' in context['postprocessing']:
                del context['postprocessing']['keep_largest']
        if 'fill_holes' in options and options['fill_holes'] is not None:
            if options['fill_holes']:
                postpro['fill_holes'] = {}
            # Remove key in context if value set to 0
            elif 'fill_holes' in context['postprocessing']:
                del context['postprocessing']['fill_holes']
        if 'remove_small' in options and options['remove_small'] and \
                ('mm' in options['remove_small'][-1] or 'vox' in options['remove_small'][-1]):
            unit = 'mm3' if 'mm3' in options['remove_small'][-1] else 'vox'
            thr = [int(t.replace(unit, "")) for t in options['remove_small']]
            postpro['remove_small'] = {"unit": unit, "thr": thr}

        context['postprocessing'].update(postpro)

    # LOADER
    loader_params = context["loader_parameters"]
    slice_axis = imed_utils.AXIS_DCT[loader_params['slice_axis']]
    metadata = {}
    fname_roi = None
    fname_prior = options['fname_prior'] if (options is not None) and (
        'fname_prior' in options) else None
    if fname_prior is not None:
        if 'roi_params' in loader_params and loader_params['roi_params'][
                'suffix'] is not None:
            fname_roi = fname_prior
        # TRANSFORMATIONS
        # If ROI is not provided then force center cropping
        if fname_roi is None and 'ROICrop' in context["transformation"].keys():
            print(
                "\n WARNING: fname_roi has not been specified, then a cropping around the center of the image is "
                "performed instead of a cropping around a Region of Interest.")

            context["transformation"] = dict(
                (key, value) if key != 'ROICrop' else ('CenterCrop', value)
                for (key, value) in context["transformation"].items())

        if 'object_detection_params' in context and \
                context['object_detection_params']['object_detection_path'] is not None:
            imed_obj_detect.bounding_box_prior(
                fname_prior, metadata, slice_axis,
                context['object_detection_params']['safety_factor'])
            metadata = [metadata] * len(fname_images)

    # Compose transforms
    _, _, transform_test_params = imed_transforms.get_subdatasets_transforms(
        context["transformation"])

    tranform_lst, undo_transforms = imed_transforms.prepare_transforms(
        transform_test_params)

    # Force filter_empty_mask to False if fname_roi = None
    if fname_roi is None and 'filter_empty_mask' in loader_params[
            "slice_filter_params"]:
        print(
            "\nWARNING: fname_roi has not been specified, then the entire volume is processed."
        )
        loader_params["slice_filter_params"]["filter_empty_mask"] = False

    filename_pairs = [(fname_images, None, fname_roi,
                       metadata if isinstance(metadata, list) else [metadata])]

    kernel_3D = bool('Modified3DUNet' in context and context['Modified3DUNet']['applied']) or \
                not context['default_model']['is_2d']
    if kernel_3D:
        ds = imed_loader.MRI3DSubVolumeSegmentationDataset(
            filename_pairs,
            transform=tranform_lst,
            length=context["Modified3DUNet"]["length_3D"],
            stride=context["Modified3DUNet"]["stride_3D"])
    else:
        ds = imed_loader.MRI2DSegmentationDataset(
            filename_pairs,
            slice_axis=slice_axis,
            cache=True,
            transform=tranform_lst,
            slice_filter_fn=imed_loader_utils.SliceFilter(
                **loader_params["slice_filter_params"]))
        ds.load_filenames()

    if kernel_3D:
        print("\nLoaded {} {} volumes of shape {}.".format(
            len(ds), loader_params['slice_axis'],
            context['Modified3DUNet']['length_3D']))
    else:
        print("\nLoaded {} {} slices.".format(len(ds),
                                              loader_params['slice_axis']))

    model_params = {}
    if 'FiLMedUnet' in context and context['FiLMedUnet']['applied']:
        metadata_dict = joblib.load(
            os.path.join(folder_model, 'metadata_dict.joblib'))
        for idx in ds.indexes:
            for i in range(len(idx)):
                idx[i]['input_metadata'][0][context['FiLMedUnet']
                                            ['metadata']] = options['metadata']
                idx[i]['input_metadata'][0]['metadata_dict'] = metadata_dict

        ds = imed_film.normalize_metadata(ds, None, context["debugging"],
                                          context['FiLMedUnet']['metadata'])
        onehotencoder = joblib.load(
            os.path.join(folder_model, 'one_hot_encoder.joblib'))

        model_params.update({
            "name":
            'FiLMedUnet',
            "film_onehotencoder":
            onehotencoder,
            "n_metadata":
            len([ll for l in onehotencoder.categories_ for ll in l])
        })

    # Data Loader
    data_loader = DataLoader(
        ds,
        batch_size=context["training_parameters"]["batch_size"],
        shuffle=False,
        pin_memory=True,
        collate_fn=imed_loader_utils.imed_collate,
        num_workers=0)

    # MODEL
    if fname_model.endswith('.pt'):
        model = torch.load(fname_model, map_location=device)
        # Inference time
        model.eval()

    # Loop across batches
    preds_list, slice_idx_list = [], []
    last_sample_bool, volume, weight_matrix = False, None, None
    for i_batch, batch in enumerate(data_loader):
        with torch.no_grad():
            img = imed_utils.cuda(batch['input'],
                                  cuda_available=cuda_available)

            if ('FiLMedUnet' in context and context['FiLMedUnet']['applied']) or \
                    ('HeMISUnet' in context and context['HeMISUnet']['applied']):
                metadata = imed_training.get_metadata(batch["input_metadata"],
                                                      model_params)
                preds = model(img, metadata)

            else:
                preds = model(img) if fname_model.endswith(
                    '.pt') else onnx_inference(fname_model, img)

            preds = preds.cpu()

        # Set datatype to gt since prediction should be processed the same way as gt
        for b in batch['input_metadata']:
            for modality in b:
                modality['data_type'] = 'gt'

        # Reconstruct 3D object
        for i_slice in range(len(preds)):
            if "bounding_box" in batch['input_metadata'][i_slice][0]:
                imed_obj_detect.adjust_undo_transforms(
                    undo_transforms.transforms, batch, i_slice)

            batch['gt_metadata'] = [[metadata[0]] * preds.shape[1]
                                    for metadata in batch['input_metadata']]
            if kernel_3D:
                preds_undo, metadata, last_sample_bool, volume, weight_matrix = \
                    volume_reconstruction(batch, preds, undo_transforms, i_slice, volume, weight_matrix)
                preds_list = [np.array(preds_undo)]
            else:
                # undo transformations
                preds_i_undo, metadata_idx = undo_transforms(
                    preds[i_slice],
                    batch["input_metadata"][i_slice],
                    data_type='gt')

                # Add new segmented slice to preds_list
                preds_list.append(np.array(preds_i_undo))
                # Store the slice index of preds_i_undo in the original 3D image
                slice_idx_list.append(
                    int(batch['input_metadata'][i_slice][0]['slice_index']))

            # If last batch and last sample of this batch, then reconstruct 3D object
            if (i_batch == len(data_loader) - 1
                    and i_slice == len(batch['gt']) - 1) or last_sample_bool:
                pred_nib = pred_to_nib(
                    data_lst=preds_list,
                    fname_ref=fname_images[0],
                    fname_out=None,
                    z_lst=slice_idx_list,
                    slice_axis=slice_axis,
                    kernel_dim='3d' if kernel_3D else '2d',
                    debug=False,
                    bin_thr=-1,
                    postprocessing=context['postprocessing'])

                pred_list = split_classes(pred_nib)
                target_list = context['loader_parameters']['target_suffix']

    return pred_list, target_list
Esempio n. 10
0
        try:
            visualize_pca(gammas[layer_no], metadata_values, layer_no,
                          out_dir + f"/pca_gamma_{layer_no}.png")
        except ValueError:
            print(
                f"No PCA for gamma from the film layer {layer_no} because of a too small dimension."
            )
        try:
            visualize_pca(betas[layer_no], metadata_values, layer_no,
                          out_dir + f"/pca_beta_{layer_no}.png")
        except ValueError:
            print(
                f"No PCA for beta from the film layer {layer_no} because of a too small dimension."
            )

    # save tsne for betas and gammas
    for layer_no in gammas.keys():
        visualize_tsne(gammas[layer_no], metadata_values, layer_no,
                       out_dir + f"/tsne_gamma_{layer_no}.png")
        visualize_tsne(betas[layer_no], metadata_values, layer_no,
                       out_dir + f"/tsne_beta_{layer_no}.png")


if __name__ == "__main__":
    fname_config_file = sys.argv[1]

    context = imed_config_manager.ConfigurationManager(
        fname_config_file).get_config()

    run_main(context)
Esempio n. 11
0
def run_main(args):
    thrPred = None if args.thrPred is None else float(args.thrPred)
    thrUnc = None if args.thrUnc is None else float(args.thrUnc)
    sufUnc = args.suffixUnc

    context = imed_config_manager.ConfigurationManager(args.c).get_config()

    ofolder = args.ofolder
    if not os.path.isdir(ofolder):
        os.makedirs(ofolder)

    pred_folder = os.path.join(context['path_output'], 'pred_masks')
    if not os.path.isdir(pred_folder):
        imed.cmd_test(context)

    subj_acq_lst = list(
        set([
            f.split('_pred')[0] for f in os.listdir(pred_folder)
            if f.endswith('.nii.gz') and '_pred' in f
        ]))
    # subj_acq_lst = [subj_acq_lst[0]]
    gt_folder = os.path.join(context['path_data'], 'derivatives', 'labels')

    if thrPred is None:
        for exp in exp_dct.keys():
            config_dct = exp_dct[exp]
            logger.debug(config_dct['uncertainty_measure'])

            # print_unc_stats is used to determine 'uncertainty_thr'
            print_unc_stats(config_dct['uncertainty_measure'], pred_folder,
                            subj_acq_lst)

            res_ofname = os.path.join(
                ofolder, config_dct['uncertainty_measure'] + '.joblib')
            if not os.path.isfile(res_ofname):
                res = run_experiment(
                    level=config_dct['level'],
                    unc_name=config_dct['uncertainty_measure'],
                    thr_unc_lst=config_dct['uncertainty_thr'],
                    thr_pred_lst=config_dct['prediction_thr'],
                    gt_folder=gt_folder,
                    pred_folder=pred_folder,
                    im_lst=subj_acq_lst,
                    target_suf=context["target_suffix"][0],
                    param_eval=context["eval_params"])
                joblib.dump(res, res_ofname)
            else:
                res = joblib.load(res_ofname)

            print_retained_elt(thr_unc_lst=config_dct['uncertainty_thr'],
                               retained_elt_lst=res['retained_elt'])

            plot_roc(thr_unc_lst=config_dct['uncertainty_thr'],
                     thr_pred_lst=config_dct['prediction_thr'],
                     res_dct=res,
                     metric=config_dct['uncertainty_measure'],
                     fname_out=os.path.join(
                         ofolder, config_dct['uncertainty_measure'] + '.png'))
    else:
        df = run_inference(pred_folder=pred_folder,
                           im_lst=subj_acq_lst,
                           thr_pred=thrPred,
                           gt_folder=gt_folder,
                           target_suf=context["target_suffix"][0],
                           param_eval=context["eval_params"],
                           unc_name=sufUnc,
                           thr_unc=thrUnc)
        logger.debug(df.head())
        vals = [v for v in df.dice_class0 if str(v) != 'nan']
        logger.info(
            f"Median (IQR): {np.median(vals)} ({np.percentile(vals, 25)} - {np.percentile(vals, 75)})."
        )
        df.to_csv(
            os.path.join(
                ofolder,
                '_'.join([str(sufUnc), str(thrUnc),
                          str(thrPred)]) + '.csv'))
Esempio n. 12
0
def automate_training(config,
                      param,
                      fixed_split,
                      all_combin,
                      n_iterations=1,
                      run_test=False,
                      all_logs=False,
                      thr_increment=None,
                      multiple_params=False):
    """Automate multiple training processes on multiple GPUs.

    Hyperparameter optimization of models is tedious and time-consuming. This function automatizes this optimization
    across multiple GPUs. It runs trainings, on the same training and validation datasets, by combining a given set of
    parameters and set of values for each of these parameters. Results are collected for each combination and reported
    into a dataframe to allow their comparison. The script efficiently allocates each training to one of the available
    GPUs.

    Usage example::

        ivadomed_automate_training -c config.json -p params.json -n n_iterations

    .. csv-table:: Example of dataframe
       :file: ../../images/detailed_results.csv

    Args:
        config (string): Configuration filename, which is used as skeleton to configure the training. Some of its
            parameters (defined in `param` file) are modified across experiments. Flag: ``--config``, ``-c``
        param (string): json file containing parameters configurations to compare. Parameter "keys" of this file
            need to match the parameter "keys" of `config` file. Parameter "values" are in a list. Flag: ``--param``, ``-p``

            Example::

                {"default_model": {"depth": [2, 3, 4]}}

        fixed_split (bool): If True, all the experiments are run on the same training/validation/testing subdatasets.
                            Flag: ``--fixed-split``
        all_combin (bool): If True, all parameters combinations are run. Flag: ``--all-combin``
        n_iterations (int): Controls the number of time that each experiment (ie set of parameter) are run.
                            Flag: ``--n-iteration``, ``-n``
        run_test (bool): If True, the trained model is also run on the testing subdataset. flag: ``--run-test``
        all_logs (bool): If True, all the log directories are kept for every iteration. Flag: ``--all-logs``, ``-l``
        thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and
            the validation sub-dataset to find the optimal binarization threshold. The specified value indicates the
            increment between 0 and 1 used during the ROC analysis (e.g. 0.1). Flag: ``-t``, ``--thr-increment``
        multiple_params (bool): If True, more than one parameter will be change at the time from the hyperparameters.
            All the first elements from the hyperparameters list will be applied, then all the second, etc.
    """
    # Load initial config
    initial_config = imed_config_manager.ConfigurationManager(
        config).get_config()

    # Hyperparameters values to experiment
    with open(param, "r") as fhandle:
        hyperparams = json.load(fhandle)
    param_dict, names_dict = {}, {}
    for category in hyperparams.keys():
        assert category in initial_config
        base_item = initial_config[category]
        keys = list(hyperparams[category].keys())
        values = [hyperparams[category][k] for k in keys]
        new_parameters, names = make_category(base_item, keys, values,
                                              all_combin)
        param_dict[category] = new_parameters
        names_dict[category] = names

    # Split dataset if not already done
    if fixed_split and (initial_config.get("split_path") is None):
        train_lst, valid_lst, test_lst = imed_loader_utils.get_new_subject_split(
            path_folder=initial_config["loader_parameters"]["bids_path"],
            center_test=initial_config["split_dataset"]["center_test"],
            split_method=initial_config["split_dataset"]["method"],
            random_seed=initial_config["split_dataset"]["random_seed"],
            train_frac=initial_config["split_dataset"]["train_fraction"],
            test_frac=initial_config["split_dataset"]["test_fraction"],
            log_directory="./",
            balance=initial_config["split_dataset"]['balance']
            if 'balance' in initial_config["split_dataset"] else None)

        # save the subject distribution
        split_dct = {'train': train_lst, 'valid': valid_lst, 'test': test_lst}
        split_path = "./" + "common_split_datasets.joblib"
        joblib.dump(split_dct, split_path)
        initial_config["split_dataset"]["fname_split"] = split_path

    config_list = []
    # Test all combinations (change multiple parameters for each test)
    if all_combin:

        # Cartesian product (all combinations)
        combinations = (dict(zip(param_dict.keys(), values))
                        for values in product(*param_dict.values()))
        names = list(product(*names_dict.values()))

        for idx, combination in enumerate(combinations):

            new_config = copy.deepcopy(initial_config)

            for i, param in enumerate(combination):
                value = combination[param]
                new_config[param] = value
                new_config["log_directory"] = new_config[
                    "log_directory"] + names[idx][i]

            config_list.append(copy.deepcopy(new_config))
    elif multiple_params:
        params_len = set()
        for param in param_dict:
            params_len.add(len(param_dict[param]))
        # All lists in hyperparameter file should be the same length
        if len(params_len) != 1:
            raise ValueError(
                "To use flag --multi-params or -m, all hyperparameter lists need to be the same size."
            )

        for config_idx in range(params_len.pop()):
            new_config = copy.deepcopy(initial_config)
            config_name = ""
            for param in param_dict:
                new_config[param] = param_dict[param][config_idx]
                config_name += names_dict[param][config_idx]
            new_config["log_directory"] = initial_config[
                "log_directory"] + config_name
            config_list.append(copy.deepcopy(new_config))

    # Change a single parameter for each test
    else:
        for param in param_dict:
            new_config = copy.deepcopy(initial_config)
            for value, name in zip(param_dict[param], names_dict[param]):
                new_config[param] = value
                new_config[
                    "log_directory"] = initial_config["log_directory"] + name
                config_list.append(copy.deepcopy(new_config))

    # CUDA problem when forking process
    # https://github.com/pytorch/pytorch/issues/2517
    mp.set_start_method('spawn')

    # Run all configs on a separate process, with a maximum of n_gpus  processes at a given time
    pool = mp.Pool(processes=len(initial_config["gpu"]))

    results_df = pd.DataFrame()
    eval_df = pd.DataFrame()
    all_mean = pd.DataFrame()
    for i in range(n_iterations):
        if not fixed_split:
            # Set seed for iteration
            seed = random.randint(1, 10001)
            for config in config_list:
                config["split_dataset"]["random_seed"] = seed
                if all_logs:
                    if i:
                        config["log_directory"] = config[
                            "log_directory"].replace(
                                "_n=" + str(i - 1).zfill(2),
                                "_n=" + str(i).zfill(2))
                    else:
                        config["log_directory"] += "_n=" + str(i).zfill(2)
        validation_scores = pool.map(
            partial(train_worker, thr_incr=thr_increment), config_list)
        val_df = pd.DataFrame(validation_scores,
                              columns=[
                                  'log_directory', 'best_training_dice',
                                  'best_training_loss', 'best_validation_dice',
                                  'best_validation_loss'
                              ])

        if run_test:
            new_config_list = []
            for config in config_list:
                # Delete path_pred
                path_pred = os.path.join(config['log_directory'], 'pred_masks')
                if os.path.isdir(path_pred) and n_iterations > 1:
                    try:
                        shutil.rmtree(path_pred)
                    except OSError as e:
                        print("Error: %s - %s." % (e.filename, e.strerror))

                # Take the config file within the log_directory because binarize_prediction may have been updated
                json_path = os.path.join(config['log_directory'],
                                         'config_file.json')
                new_config = imed_config_manager.ConfigurationManager(
                    json_path).get_config()
                new_config["gpu"] = config["gpu"]
                new_config_list.append(new_config)

            test_results = pool.map(test_worker, new_config_list)

            df_lst = []
            # Merge all eval df together to have a single excel file
            for j, result in enumerate(test_results):
                df = result[-1]

                if i == 0:
                    all_mean = df.mean(axis=0)
                    std_metrics = df.std(axis=0)
                    metrics = pd.concat([all_mean, std_metrics],
                                        sort=False,
                                        axis=1)
                else:
                    all_mean = pd.concat([all_mean, df.mean(axis=0)],
                                         sort=False,
                                         axis=1)
                    mean_metrics = all_mean.mean(axis=1)
                    std_metrics = all_mean.std(axis=1)
                    metrics = pd.concat([mean_metrics, std_metrics],
                                        sort=False,
                                        axis=1)

                metrics.rename({0: "mean"}, axis=1, inplace=True)
                metrics.rename({1: "std"}, axis=1, inplace=True)
                id = result[0].split("_n=")[0]
                cols = metrics.columns.values
                for idx, col in enumerate(cols):
                    metrics.rename({col: col + "_" + id}, axis=1, inplace=True)
                df_lst.append(metrics)
                test_results[j] = result[:2]

            # Init or add eval results to dataframe
            eval_df = pd.concat(df_lst, sort=False, axis=1)

            test_df = pd.DataFrame(test_results,
                                   columns=['log_directory', 'test_dice'])
            combined_df = val_df.set_index('log_directory').join(
                test_df.set_index('log_directory'))
            combined_df = combined_df.reset_index()

        else:
            combined_df = val_df

        results_df = pd.concat([results_df, combined_df])
        results_df.to_csv("temporary_results.csv")
        eval_df.to_csv("average_eval.csv")

    # Merge config and results in a df
    config_df = pd.DataFrame.from_dict(config_list)
    keep = list(param_dict.keys())
    keep.append("log_directory")
    config_df = config_df[keep]

    results_df = config_df.set_index('log_directory').join(
        results_df.set_index('log_directory'))
    results_df = results_df.reset_index()
    results_df = results_df.sort_values(by=['best_validation_loss'])

    results_df.to_csv("detailed_results.csv")

    print("Detailed results")
    print(results_df)

    # Compute avg, std, p-values
    if n_iterations > 1:
        compute_statistics(results_df, n_iterations, run_test)
Esempio n. 13
0
def run_visualization(input, config, number, output, roi):
    """Utility function to visualize Data Augmentation transformations.

    Data augmentation is a key part of the Deep Learning training scheme. This script aims at facilitating the
    fine-tuning of data augmentation parameters. To do so, this script provides a step-by-step visualization of the
    transformations that are applied on data.

    This function applies a series of transformations (defined in a configuration file
    ``-c``) to ``-n`` 2D slices randomly extracted from an input image (``-i``), and save as png the resulting sample
    after each transform.

    For example::

        ivadomed_visualize_transforms -i t2s.nii.gz -n 1 -c config.json -r t2s_seg.nii.gz

    Provides a visualization of a series of three transformation on a randomly selected slice:

    .. image:: ../../images/transforms_im.png
        :width: 600px
        :align: center

    And on a binary mask::

        ivadomed_visualize_transforms -i t2s_gmseg.nii.gz -n 1 -c config.json -r t2s_seg.nii.gz

    Gives:

    .. image:: ../../images/transforms_gt.png
        :width: 600px
        :align: center

    Args:
         input (string): Image filename. Flag: ``--input``, ``-i``
         config (string): Configuration file filename. Flag: ``--config``, ``-c``
         number (int): Number of slices randomly extracted. Flag: ``--number``, ``-n``
         output (string): Folder path where the results are saved. Flag: ``--ofolder``, ``-o``
         roi (string): Filename of the region of interest. Only needed if ROICrop is part of the transformations.
                       Flag: ``--roi``, ``-r``
    """
    # Load context
    context = imed_config_manager.ConfigurationManager(config).get_config()

    # Create output folder
    if not os.path.isdir(output):
        os.makedirs(output)

    # Slice extracted according to below axis
    axis = imed_utils.AXIS_DCT[context["loader_parameters"]["slice_axis"]]
    # Get data
    input_img, input_data = get_data(input, axis)
    # Image or Mask
    is_mask = np.array_equal(input_data, input_data.astype(bool))
    # Get zooms
    zooms = imed_loader_utils.orient_shapes_hwd(input_img.header.get_zooms(),
                                                slice_axis=axis)
    # Get indexes
    indexes = random.sample(range(0, input_data.shape[2]), number)

    # Get training transforms
    training_transforms, _, _ = imed_transforms.get_subdatasets_transforms(
        context["transformation"])

    if "ROICrop" in training_transforms:
        if roi and os.path.isfile(roi):
            roi_img, roi_data = get_data(roi, axis)
        else:
            raise ValueError(
                "\nPlease provide ROI image (-r) in order to apply ROICrop transformation."
            )

    # Compose transforms
    dict_transforms = {}
    stg_transforms = ""
    for transform_name in training_transforms:
        # We skip NumpyToTensor transform since that s only a change of data type
        if transform_name == "NumpyToTensor":
            continue

        # Update stg_transforms
        stg_transforms += transform_name + "_"

        # Add new transform to Compose
        dict_transforms.update(
            {transform_name: training_transforms[transform_name]})
        composed_transforms = imed_transforms.Compose(dict_transforms)

        # Loop across slices
        for i in indexes:
            data = [input_data[:, :, i]]
            # Init metadata
            metadata = imed_loader_utils.SampleMetadata({
                "zooms":
                zooms,
                "data_type":
                "gt" if is_mask else "im"
            })

            # Apply transformations to ROI
            if "CenterCrop" in training_transforms or (
                    "ROICrop" in training_transforms and os.path.isfile(roi)):
                metadata.__setitem__('crop_params', {})

            # Apply transformations to image
            stack_im, _ = composed_transforms(
                sample=data,
                metadata=[metadata for _ in range(number)],
                data_type="im")

            # Plot before / after transformation
            fname_out = os.path.join(
                output, stg_transforms + "slice" + str(i) + ".png")
            print("Fname out: {}.".format(fname_out))
            print("\t{}".format(dict(metadata)))
            # rescale intensities
            if len(stg_transforms[:-1].split("_")) == 1:
                before = np.rot90(
                    imed_maths.rescale_values_array(data[0], 0.0, 1.0))
            else:
                before = after
            after = np.rot90(
                imed_maths.rescale_values_array(stack_im[0], 0.0, 1.0))
            # Plot
            imed_utils.plot_transformed_sample(
                before,
                after,
                list_title=[
                    "\n".join(stg_transforms[:-1].split("_")[:-1]),
                    "\n".join(stg_transforms[:-1].split("_"))
                ],
                fname_out=fname_out,
                cmap="jet" if is_mask else "gray")
Esempio n. 14
0
def automate_training(file_config,
                      file_config_hyper,
                      fixed_split,
                      all_combin,
                      path_data=None,
                      n_iterations=1,
                      run_test=False,
                      all_logs=False,
                      thr_increment=None,
                      multi_params=False,
                      output_dir=None,
                      plot_comparison=False):
    """Automate multiple training processes on multiple GPUs.

    Hyperparameter optimization of models is tedious and time-consuming. This function automatizes
    this optimization across multiple GPUs. It runs trainings, on the same training and validation
    datasets, by combining a given set of parameters and set of values for each of these parameters.
    Results are collected for each combination and reported into a dataframe to allow their
    comparison. The script efficiently allocates each training to one of the available GPUs.

    Usage Example::

        ivadomed_automate_training -c config.json -p config_hyper.json -n n_iterations

    .. csv-table:: Example of dataframe
       :file: ../../images/detailed_results.csv

    Config File:

        The config file is the standard config file used in ``ivadomed`` functions. We use this
        as the basis. We call a key of this config file a ``category``. In the example below,
        we would say that ``training_parameters``, ``default_model``, and ``path_output`` are
        ``categories``.

        .. code-block:: JSON

            {
                "training_parameters": {
                    "batch_size": 18,
                    "loss": {"name": "DiceLoss"}
                },
                "default_model":     {
                    "name": "Unet",
                    "dropout_rate": 0.3,
                    "depth": 3
                },
                "model_name": "seg_tumor_t2",
                "path_output": "./tmp/"
            }

    Hyperparameter Config File:

        The hyperparameter config file should have the same layout as the config file. To select
        a hyperparameter you would like to vary, just list the different options under the
        appropriate key, which we call the ``base_key``. In the example below, we want to vary the
        ``loss``, ``depth``, and ``model_name``; these are our 3 ``base_keys``. As you can see,
        we have listed our different options for these keys. For ``depth``, we have listed
        ``2``, ``3``, and ``4`` as our different options.
        How we implement this depends on 3 settings: ``all_combin``, ``multi_param``,
        or the default.

        .. code-block:: JSON

            {
              "training_parameters": {
                "loss": [
                  {"name": "DiceLoss"},
                  {"name": "FocalLoss", "gamma": 0.2, "alpha" : 0.5}
                ],
              },
              "default_model": {"depth": [2, 3, 4]},
              "model_name": ["seg_sc_t2star", "find_disc_t1"]
            }

    Default:

        The default option is to change only one parameter at a time relative to the base
        config file. We then create a list of config options, called ``config_list``.
        Using the examples above, we would have ``2 + 2 + 3 = 7`` different config options:

        .. code-block:: python

            config_list = [
                {
                    "training_parameters": {
                        "batch_size": 18,
                        "loss": {"name": "DiceLoss"}
                    },
                    "default_model":     {
                        "name": "Unet",
                        "dropout_rate": 0.3,
                        "depth": 3
                    },
                    "model_name": "seg_tumor_t2",
                    "path_output": "./tmp/-loss={'name': 'DiceLoss'}"
                },
                {
                    "training_parameters": {
                        "batch_size": 18,
                        "loss": {"name": "FocalLoss", "gamma": 0.2, "alpha": 0.5}
                    },
                    "default_model":     {
                        "name": "Unet",
                        "dropout_rate": 0.3,
                        "depth": 3
                    },
                    "model_name": "seg_tumor_t2",
                    "path_output": "./tmp/-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}"
                },
                {
                    "training_parameters": {
                        "batch_size": 18,
                        "loss": {"name": "DiceLoss"}
                    },
                    "default_model":     {
                        "name": "Unet",
                        "dropout_rate": 0.3,
                        "depth": 2
                    },
                    "model_name": "seg_tumor_t2",
                    "path_output": "./tmp/-depth=2"
                },
                # etc ...
            ]


    All Combinations:

        If we select the ``all_combin`` option, we will create a list of configuration options
        combinatorically. Using the config examples above, we would have ``2 * 3 * 2 = 12``
        different config options. I'm not going to write out the whole ``config_list`` because it's
        quite long, but here are the combinations:

        .. code-block::

            loss = DiceLoss, depth = 2, model_name = "seg_sc_t2star"
            loss = FocalLoss, depth = 2, model_name = "seg_sc_t2star"
            loss = DiceLoss, depth = 3, model_name = "seg_sc_t2star"
            loss = FocalLoss, depth = 3, model_name = "seg_sc_t2star"
            loss = DiceLoss, depth = 4, model_name = "seg_sc_t2star"
            loss = FocalLoss, depth = 4, model_name = "seg_sc_t2star"
            loss = DiceLoss, depth = 2, model_name = "find_disc_t1"
            loss = FocalLoss, depth = 2, model_name = "find_disc_t1"
            loss = DiceLoss, depth = 3, model_name = "find_disc_t1"
            loss = FocalLoss, depth = 3, model_name = "find_disc_t1"
            loss = DiceLoss, depth = 4, model_name = "find_disc_t1"
            loss = FocalLoss, depth = 4, model_name = "find_disc_t1"

    Multiple Parameters:

        The ``multi_params`` option entails changing all the first elements from the list,
        then all the second parameters from the list, etc. If the lists are different lengths,
        we will just use the first ``n`` elements. In our example above, the lists are of length
        2 or 3, so we will only use the first 2 elements:

        .. code-block::

            loss = DiceLoss, depth = 2, model_name = "seg_sc_t2star"
            loss = FocalLoss, depth = 3, model_name = "find_disc_t1"


    Args:
        file_config (string): Configuration filename, which is used as skeleton to configure the
            training. This is the standard config file used in ``ivadomed`` functions. In the
            code, we call the keys from this config file ``categories``.
            Flag: ``--config``, ``-c``
        file_config_hyper (string): json file containing parameters configurations to compare.
            Parameter "keys" of this file need to match the parameter "keys" of `config` file.
            Parameter "values" are in a list. Flag: ``--config-hyper``, ``-ch``

            Example::

                {"default_model": {"depth": [2, 3, 4]}}

        fixed_split (bool): If True, all the experiments are run on the same
            training/validation/testing subdatasets. Flag: ``--fixed-split``
        all_combin (bool): If True, all parameters combinations are run. Flag: ``--all-combin``
        n_iterations (int): Controls the number of time that each experiment (ie set of parameter)
            are run. Flag: ``--n-iteration``, ``-n``
        run_test (bool): If True, the trained model is also run on the testing subdataset and violiplots are displayed
            with the dicescores for each new output folder created.
            Flag: ``--run-test``
        all_logs (bool): If True, all the log directories are kept for every iteration.
            Flag: ``--all-logs``, ``-l``
        thr_increment (float): A threshold analysis is performed at the end of the training
            using the trained model and the validation sub-dataset to find the optimal binarization
            threshold. The specified value indicates the increment between 0 and 1 used during the
            ROC analysis (e.g. 0.1). Flag: ``-t``, ``--thr-increment``
        multi_params (bool): If True, more than one parameter will be change at the time from
            the hyperparameters. All the first elements from the hyperparameters list will be
            applied, then all the second, etc.
        output_dir (str): Path to where the results will be saved.
    """
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not output_dir:
        output_dir = ""

    # Load initial config
    initial_config = imed_config_manager.ConfigurationManager(
        file_config).get_config()

    if path_data is not None:
        initial_config["loader_parameters"]["path_data"] = path_data

    # Split dataset if not already done
    if fixed_split and (initial_config.get("split_path") is None):
        initial_config = split_dataset(initial_config)

    # Hyperparameters values to experiment
    with open(file_config_hyper, "r") as fhandle:
        config_hyper = json.load(fhandle)

    param_list = get_param_list(config_hyper, [], [])
    config_list = make_config_list(param_list, initial_config, all_combin,
                                   multi_params)

    # CUDA problem when forking process
    # https://github.com/pytorch/pytorch/issues/2517
    ctx = mp.get_context("spawn")

    # Run all configs on a separate process, with a maximum of n_gpus  processes at a given time
    logging.info(initial_config['gpu_ids'])

    results_df = pd.DataFrame()
    eval_df = pd.DataFrame()
    all_mean = pd.DataFrame()

    with ctx.Pool(processes=len(initial_config["gpu_ids"])) as pool:
        for i in range(n_iterations):
            if not fixed_split:
                # Set seed for iteration
                seed = random.randint(1, 10001)
                for config in config_list:
                    config["split_dataset"]["random_seed"] = seed
                    if all_logs:
                        if i:
                            config["path_output"] = config[
                                "path_output"].replace(
                                    "_n=" + str(i - 1).zfill(2),
                                    "_n=" + str(i).zfill(2))
                        else:
                            config["path_output"] += "_n=" + str(i).zfill(2)

                validation_scores = pool.map(
                    partial(train_worker, thr_incr=thr_increment), config_list)

            val_df = pd.DataFrame(validation_scores,
                                  columns=[
                                      'path_output', 'best_training_dice',
                                      'best_training_loss',
                                      'best_validation_dice',
                                      'best_validation_loss'
                                  ])

            if run_test:
                new_config_list = []
                for config in config_list:
                    # Delete path_pred
                    path_pred = os.path.join(config['path_output'],
                                             'pred_masks')
                    if os.path.isdir(path_pred) and n_iterations > 1:
                        try:
                            shutil.rmtree(path_pred)
                        except OSError as e:
                            logging.info("Error: %s - %s." %
                                         (e.filename, e.strerror))

                    # Take the config file within the path_output because binarize_prediction may have been updated
                    json_path = os.path.join(config['path_output'],
                                             'config_file.json')
                    new_config = imed_config_manager.ConfigurationManager(
                        json_path).get_config()
                    new_config["gpu_ids"] = config["gpu_ids"]
                    new_config_list.append(new_config)

                test_results = pool.map(test_worker, new_config_list)

                df_lst = []
                # Merge all eval df together to have a single excel file
                for j, result in enumerate(test_results):
                    df = result[-1]

                    if i == 0:
                        all_mean = df.mean(axis=0)
                        std_metrics = df.std(axis=0)
                        metrics = pd.concat([all_mean, std_metrics],
                                            sort=False,
                                            axis=1)
                    else:
                        all_mean = pd.concat(
                            [all_mean, df.mean(axis=0)], sort=False, axis=1)
                        mean_metrics = all_mean.mean(axis=1)
                        std_metrics = all_mean.std(axis=1)
                        metrics = pd.concat([mean_metrics, std_metrics],
                                            sort=False,
                                            axis=1)

                    metrics.rename({0: "mean"}, axis=1, inplace=True)
                    metrics.rename({1: "std"}, axis=1, inplace=True)
                    id = result[0].split("_n=")[0]
                    cols = metrics.columns.values
                    for idx, col in enumerate(cols):
                        metrics.rename({col: col + "_" + id},
                                       axis=1,
                                       inplace=True)
                    df_lst.append(metrics)
                    test_results[j] = result[:2]

                # Init or add eval results to dataframe
                eval_df = pd.concat(df_lst, sort=False, axis=1)

                test_df = pd.DataFrame(test_results,
                                       columns=['path_output', 'test_dice'])
                combined_df = val_df.set_index('path_output').join(
                    test_df.set_index('path_output'))
                combined_df = combined_df.reset_index()

            else:
                combined_df = val_df

            results_df = pd.concat([results_df, combined_df])
            results_df.to_csv(os.path.join(output_dir,
                                           "temporary_results.csv"))
            eval_df.to_csv(os.path.join(output_dir, "average_eval.csv"))

    results_df = format_results(results_df, config_list, param_list)
    results_df.to_csv(os.path.join(output_dir, "detailed_results.csv"))

    logging.info("Detailed results")
    logging.info(results_df)

    # Compute avg, std, p-values
    if n_iterations > 1:
        compute_statistics(results_df, n_iterations, run_test)

    # If the test is selected, also show the violin plots
    if plot_comparison:
        output_folders = [
            config_list[i]["path_output"] for i in range(len(config_list))
        ]
        violin_plots.visualize_and_compare_models(ofolders=output_folders)
Esempio n. 15
0
def segment_volume(folder_model: str,
                   fname_images: list,
                   gpu_id: int = 0,
                   options: dict = None):
    """Segment an image.

    Segment an image (`fname_image`) using a pre-trained model (`folder_model`). If provided, a region of interest
    (`fname_roi`) is used to crop the image prior to segment it.

    Args:
        folder_model (str): foldername which contains
            (1) the model ('folder_model/folder_model.pt') to use
            (2) its configuration file ('folder_model/folder_model.json') used for the training,
            see https://github.com/neuropoly/ivadomed/wiki/configuration-file
        fname_images (list): list of image filenames (e.g. .nii.gz) to segment. Multichannel models require multiple
            images to segment, e.i., len(fname_images) > 1.
        gpu_id (int): Number representing gpu number if available. Currently does NOT support multiple GPU segmentation.
        options (dict): Contains postprocessing steps and prior filename (fname_prior) which is an image filename
            (e.g., .nii.gz) containing processing information (e.i., spinal cord segmentation, spinal location or MS
            lesion classification)
            e.g., spinal cord centerline, used to crop the image prior to segment it if provided.
            The segmentation is not performed on the slices that are empty in this image.

    Returns:
        list: List of nibabel objects containing the soft segmentation(s), one per prediction class.
        list: List of target suffix associated with each prediction in `pred_list`

    """

    # Check if model folder exists and get filenames to be stored as string
    fname_model: str
    fname_model_metadata: str
    fname_model, fname_model_metadata = imed_models.get_model_filenames(
        folder_model)

    # Load model training config
    context = imed_config_manager.ConfigurationManager(
        fname_model_metadata).get_config()

    postpro_list = [
        'binarize_prediction', 'keep_largest', ' fill_holes', 'remove_small'
    ]
    if options is not None and any(pp in options for pp in postpro_list):
        set_postprocessing_options(options, context)

    # LOADER
    loader_params = context["loader_parameters"]
    slice_axis = imed_utils.AXIS_DCT[loader_params['slice_axis']]
    metadata = {}
    fname_roi = None
    fname_prior = options['fname_prior'] if (options is not None) and (
        'fname_prior' in options) else None
    if fname_prior is not None:
        if 'roi_params' in loader_params and loader_params['roi_params'][
                'suffix'] is not None:
            fname_roi = fname_prior
        # TRANSFORMATIONS
        metadata = process_transformations(context, fname_roi, fname_prior,
                                           metadata, slice_axis, fname_images)

    # Compose transforms
    _, _, transform_test_params = imed_transforms.get_subdatasets_transforms(
        context["transformation"])

    tranform_lst, undo_transforms = imed_transforms.prepare_transforms(
        transform_test_params)

    # Force filter_empty_mask to False if fname_roi = None
    if fname_roi is None and 'filter_empty_mask' in loader_params[
            "slice_filter_params"]:
        logger.warning(
            "fname_roi has not been specified, then the entire volume is processed."
        )
        loader_params["slice_filter_params"]["filter_empty_mask"] = False

    filename_pairs = [(fname_images, None, fname_roi,
                       metadata if isinstance(metadata, list) else [metadata])]

    kernel_3D = bool('Modified3DUNet' in context and context['Modified3DUNet']['applied']) or \
                not context['default_model']['is_2d']
    if kernel_3D:
        ds = imed_loader.MRI3DSubVolumeSegmentationDataset(
            filename_pairs,
            transform=tranform_lst,
            length=context["Modified3DUNet"]["length_3D"],
            stride=context["Modified3DUNet"]["stride_3D"])
        logger.info(
            f"Loaded {len(ds)} {loader_params['slice_axis']} volumes of shape "
            f"{context['Modified3DUNet']['length_3D']}.")
    else:
        ds = imed_loader.MRI2DSegmentationDataset(
            filename_pairs,
            slice_axis=slice_axis,
            cache=True,
            transform=tranform_lst,
            slice_filter_fn=imed_loader_utils.SliceFilter(
                **loader_params["slice_filter_params"]))
        ds.load_filenames()
        logger.info(f"Loaded {len(ds)} {loader_params['slice_axis']} slices.")

    model_params = {}
    if 'FiLMedUnet' in context and context['FiLMedUnet']['applied']:
        onehotencoder = get_onehotencoder(context, folder_model, options, ds)
        model_params.update({
            "name":
            'FiLMedUnet',
            "film_onehotencoder":
            onehotencoder,
            "n_metadata":
            len([ll for l in onehotencoder.categories_ for ll in l])
        })

    # Data Loader
    data_loader = DataLoader(
        ds,
        batch_size=context["training_parameters"]["batch_size"],
        shuffle=False,
        pin_memory=True,
        collate_fn=imed_loader_utils.imed_collate,
        num_workers=0)

    # Loop across batches
    preds_list, slice_idx_list = [], []
    last_sample_bool, weight_matrix, volume = False, None, None
    for i_batch, batch in enumerate(data_loader):
        preds = get_preds(context, fname_model, model_params, gpu_id, batch)

        # Set datatype to gt since prediction should be processed the same way as gt
        for b in batch['input_metadata']:
            for modality in b:
                modality['data_type'] = 'gt'

        # Reconstruct 3D object
        pred_list, target_list, last_sample_bool, weight_matrix, volume = reconstruct_3d_object(
            context, batch, undo_transforms, preds, preds_list, kernel_3D,
            slice_axis, slice_idx_list, data_loader, fname_images, i_batch,
            last_sample_bool, weight_matrix, volume)

    return pred_list, target_list
Esempio n. 16
0
def segment_volume(folder_model: str,
                   fname_images: list,
                   gpu_id: int = 0,
                   options: dict = None):
    """Segment an image.

    Segment an image (`fname_image`) using a pre-trained model (`folder_model`). If provided, a region of interest
    (`fname_roi`) is used to crop the image prior to segment it.

    Args:
        folder_model (str): foldername which contains
            (1) the model ('folder_model/folder_model.pt') to use
            (2) its configuration file ('folder_model/folder_model.json') used for the training,
            see https://github.com/neuropoly/ivadomed/wiki/configuration-file
        fname_images (list): list of image filenames (e.g. .nii.gz) to segment. Multichannel models require multiple
            images to segment, e.i., len(fname_images) > 1.
        gpu_id (int): Number representing gpu number if available. Currently does NOT support multiple GPU segmentation.
        options (dict): This can optionally contain any of the following key-value pairs:

            * 'binarize_prediction': (float) Binarize segmentation with specified threshold. \
                Predictions below the threshold become 0, and predictions above or equal to \
                threshold become 1. Set to -1 for no thresholding (i.e., soft segmentation).
            * 'binarize_maxpooling': (bool) Binarize by setting to 1 the voxel having the maximum prediction across \
                all classes. Useful for multiclass models.
            * 'fill_holes': (bool) Fill small holes in the segmentation.
            * 'keep_largest': (bool) Keep the largest connected-object for each class from the output segmentation.
            * 'remove_small': (list of str) Minimal object size to keep with unit (mm3 or vox). A single value can be provided \
                              or one value per prediction class. Single value example: ["1mm3"], ["5vox"]. Multiple values \
                              example: ["10", "20", "10vox"] (remove objects smaller than 10 voxels for class 1 and 3, \
                              and smaller than 20 voxels for class 2).
            * 'pixel_size': (list of float) List of microscopy pixel size in micrometers. \
                            Length equals 2 [PixelSizeX, PixelSizeY] for 2D or 3 [PixelSizeX, PixelSizeY, PixelSizeZ] for 3D, \
                            where X is the width, Y the height and Z the depth of the image.
            * 'pixel_size_units': (str) Units of pixel size (Must be either "mm", "um" or "nm")
            * 'overlap_2D': (list of int) List of overlaps in pixels for 2D patching. Length equals 2 [OverlapX, OverlapY], \
                            where X is the width and Y the height of the image.
            * 'metadata': (str) Film metadata.
            * 'fname_prior': (str) An image filename (e.g., .nii.gz) containing processing information \
                (e.g., spinal cord segmentation, spinal location or MS lesion classification, spinal cord centerline), \
                used to crop the image prior to segment it if provided. \
                The segmentation is not performed on the slices that are empty in this image.

    Returns:
        list, list: List of nibabel objects containing the soft segmentation(s), one per prediction class, \
            List of target suffix associated with each prediction in `pred_list`

    """

    # Check if model folder exists and get filenames to be stored as string
    fname_model: str
    fname_model_metadata: str
    fname_model, fname_model_metadata = imed_models.get_model_filenames(
        folder_model)

    # Load model training config
    context = imed_config_manager.ConfigurationManager(
        fname_model_metadata).get_config()

    postpro_list = [
        'binarize_prediction', 'binarize_maxpooling', 'keep_largest',
        ' fill_holes', 'remove_small'
    ]
    if options is not None and any(pp in options for pp in postpro_list):
        set_postprocessing_options(options, context)

    # LOADER
    loader_params = context[ConfigKW.LOADER_PARAMETERS]
    slice_axis = imed_utils.AXIS_DCT[loader_params[LoaderParamsKW.SLICE_AXIS]]
    metadata = {}
    fname_roi = None

    if (options is not None) and (OptionKW.FNAME_PRIOR in options):
        fname_prior = options.get(OptionKW.FNAME_PRIOR)
    else:
        fname_prior = None

    if fname_prior is not None:
        if LoaderParamsKW.ROI_PARAMS in loader_params and loader_params[
                LoaderParamsKW.ROI_PARAMS][ROIParamsKW.SUFFIX] is not None:
            fname_roi = fname_prior
        # TRANSFORMATIONS
        metadata = process_transformations(context, fname_roi, fname_prior,
                                           metadata, slice_axis, fname_images)

    # Compose transforms
    _, _, transform_test_params = imed_transforms.get_subdatasets_transforms(
        context[ConfigKW.TRANSFORMATION])

    tranform_lst, undo_transforms = imed_transforms.prepare_transforms(
        transform_test_params)

    # Force filter_empty_mask to False if fname_roi = None
    if fname_roi is None and SliceFilterParamsKW.FILTER_EMPTY_MASK in loader_params[
            LoaderParamsKW.SLICE_FILTER_PARAMS]:
        logger.warning(
            "fname_roi has not been specified, then the entire volume is processed."
        )
        loader_params[LoaderParamsKW.SLICE_FILTER_PARAMS][
            SliceFilterParamsKW.FILTER_EMPTY_MASK] = False

    kernel_3D = bool(ConfigKW.MODIFIED_3D_UNET in context and context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.APPLIED]) or \
                not context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D]

    # Assign length_2D and stride_2D for 2D patching
    length_2D = context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.LENGTH_2D] if \
        ModelParamsKW.LENGTH_2D in context[ConfigKW.DEFAULT_MODEL] else []
    stride_2D = context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.STRIDE_2D] if \
        ModelParamsKW.STRIDE_2D in context[ConfigKW.DEFAULT_MODEL] else []
    is_2d_patch = bool(length_2D)

    if is_2d_patch and (options is not None) and (OptionKW.OVERLAP_2D
                                                  in options):
        overlap_2D = options.get(OptionKW.OVERLAP_2D)
        # Swap OverlapX and OverlapY resulting in an array in order [OverlapY, OverlapX]
        # to match length_2D and stride_2D in [Height, Width] orientation.
        overlap_2D[1], overlap_2D[0] = overlap_2D[0], overlap_2D[1]
        # Adjust stride_2D with overlap_2D
        stride_2D = [x1 - x2 for (x1, x2) in zip(length_2D, overlap_2D)]

    # Add microscopy pixel size and pixel size units from options to metadata for filenames_pairs
    if (options is not None) and (OptionKW.PIXEL_SIZE in options):
        metadata[MetadataKW.PIXEL_SIZE] = options.get(OptionKW.PIXEL_SIZE)
    if (options is not None) and (OptionKW.PIXEL_SIZE_UNITS in options):
        metadata[MetadataKW.PIXEL_SIZE_UNITS] = options.get(
            OptionKW.PIXEL_SIZE_UNITS)

    filename_pairs = [(fname_images, None, fname_roi,
                       metadata if isinstance(metadata, list) else [metadata])]

    if kernel_3D:
        ds = MRI3DSubVolumeSegmentationDataset(
            filename_pairs,
            transform=tranform_lst,
            length=context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.LENGTH_3D],
            stride=context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.STRIDE_3D],
            slice_axis=slice_axis)
        logger.info(
            f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} volumes of shape "
            f"{context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.LENGTH_3D]}.")
    else:
        ds = MRI2DSegmentationDataset(
            filename_pairs,
            length=length_2D,
            stride=stride_2D,
            slice_axis=slice_axis,
            nibabel_cache=True,
            transform=tranform_lst,
            slice_filter_fn=SliceFilter(
                **loader_params[LoaderParamsKW.SLICE_FILTER_PARAMS]))
        ds.load_filenames()
        if is_2d_patch:
            logger.info(
                f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} patches of shape {length_2D}."
            )
        else:
            logger.info(
                f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} slices."
            )

    model_params = {}
    if ConfigKW.FILMED_UNET in context and context[ConfigKW.FILMED_UNET][
            ModelParamsKW.APPLIED]:
        onehotencoder = get_onehotencoder(context, folder_model, options, ds)
        model_params.update({
            ModelParamsKW.NAME:
            ConfigKW.FILMED_UNET,
            ModelParamsKW.FILM_ONEHOTENCODER:
            onehotencoder,
            ModelParamsKW.N_METADATA:
            len([ll for l in onehotencoder.categories_ for ll in l])
        })

    # Data Loader
    data_loader = DataLoader(ds,
                             batch_size=context[ConfigKW.TRAINING_PARAMETERS][
                                 TrainingParamsKW.BATCH_SIZE],
                             shuffle=False,
                             pin_memory=True,
                             collate_fn=imed_loader_utils.imed_collate,
                             num_workers=0)

    # Loop across batches
    preds_list, slice_idx_list = [], []
    last_sample_bool, weight_matrix, volume, image = False, None, None, None
    for i_batch, batch in enumerate(data_loader):
        preds = get_preds(context, fname_model, model_params, gpu_id, batch)

        # Set datatype to gt since prediction should be processed the same way as gt
        for b in batch[MetadataKW.INPUT_METADATA]:
            for modality in b:
                modality['data_type'] = 'gt'

        # Reconstruct 3D object
        pred_list, target_list, last_sample_bool, weight_matrix, volume, image = reconstruct_3d_object(
            context, batch, undo_transforms, preds, preds_list, kernel_3D,
            is_2d_patch, slice_axis, slice_idx_list, data_loader, fname_images,
            i_batch, last_sample_bool, weight_matrix, volume, image)

    return pred_list, target_list