def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                mode,
                                size,
                                target_resolution,
                                force_overwrite=False,
                                cv_fold_num = 1):

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    data_file_name = 'data_%s_size_%s_res_%s_cv_fold_%d.hdf5' % (mode, size_str, res_str, cv_fold_num)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info('This configuration of mode, size and target resolution has not yet been preprocessed')
        logging.info('Preprocessing now!')
        prepare_data(input_folder,
                     data_file_path,
                     mode,
                     size,
                     target_resolution,
                     cv_fold_num)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    # a h5py.File object is returned. This acts like a python dictionary.
    # list(data_acdc.keys()) gives the keys in this dictionary.
    return h5py.File(data_file_path, 'r')
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                input_channels,
                                force_overwrite=False):
    '''
    This function is used to load and if necessary preprocesses the ACDC challenge data

    :param input_folder: Folder where the raw ACDC challenge data is located
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param size: Size of the output slices/volumes in pixels/voxels
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]

    :return: Returns an h5py.File handle to the dataset
    '''

    data_file_name = 'data_3D.hdf5'

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder, data_file_path, input_channels)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
Beispiel #3
0
def main(bmicdatasets_adni_images, processed_images_folder):
    utils.makefolder(processed_images_folder)
    summary_csv_file = os.path.join(processed_images_folder,
                                    'summary_alldata.csv')

    do_reorientation = True
    do_registration = True
    do_bias_correction = True
    do_cropping = True
    do_skull_stripping = True

    start_time = time.time()

    adnimerge_table = pd.read_csv(adni_merge_path)
    do_preprocessing(adnimerge_table,
                     0,
                     processed_images_folder,
                     summary_csv_file,
                     do_reorientation=do_reorientation,
                     do_registration=do_registration,
                     do_bias_correction=do_bias_correction,
                     do_cropping=do_cropping,
                     do_skull_stripping=do_skull_stripping,
                     write_csv=True,
                     bmicdatasets_adni_images=bmicdatasets_adni_images)

    logging.info('Elapsed time %f secs' % (time.time() - start_time))
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                idx_start,
                                idx_end,
                                protocol,
                                size,
                                target_resolution,
                                force_overwrite=False):
    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    data_file_name = 'data_%s_3d_size_%s_res_%s_from_%d_to_%d.hdf5' % (
        protocol, size_str, res_str, idx_start, idx_end)
    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder, data_file_path, idx_start, idx_end,
                     protocol, size, target_resolution, preprocessing_folder)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
def load_data(input_folder,
              preproc_folder,
              protocol,
              idx_start,
              idx_end,
              force_overwrite=False):
    '''
    This function is used to load and if necessary preprocess the HCP data
    
    :param input_folder: Folder where the raw HCP challenge data is located 
    :param preproc_folder: Folder where the proprocessed data should be written to
    :param protocol: Can either be 'T1w_', 'T2w_' or 'both'. Indicates the protocol of the training data
    :param data_partition: can be training, validation or testing.
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]
     
    :return: return the read data
    '''

    # ==========================
    # create the pre-processing folder, if it does not exist
    # ==========================
    utils.makefolder(preproc_folder)

    logging.info(
        '============================================================')
    logging.info('Loading data for %s images...' % (protocol))

    # ==========================
    # make appropriate filenames according to the requested indices of training, validation and test images
    # ==========================
    config_details = '%sfrom%dto%d_' % (protocol, idx_start, idx_end)
    filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    # ==========================
    # if the images have not already been extracted, do so
    # ==========================
    if not os.path.exists(filepath_images) or force_overwrite:

        logging.info(
            'This configuration of protocol and data indices has not yet been preprocessed'
        )
        logging.info('Preprocessing now...')
        images, masks, affines, patnames = prepare_data(
            input_folder, preproc_folder, protocol, idx_start, idx_end)

    else:

        logging.info('Already preprocessed this configuration. Loading now...')
        # read from already created npy files
        images = np.load(filepath_images)
        masks = np.load(filepath_masks)
        affines = np.load(filepath_affine)
        patnames = np.load(filepath_patnames)

    return images, masks, affines, patnames
Beispiel #6
0
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                size,
                                target_resolution,
                                label_list,
                                offset=None,
                                rescale_to_one=False,
                                force_overwrite=False):
    '''
    This function is used to load and if necessary preprocesses the ACDC challenge data
    
    :param input_folder: Folder where the raw ACDC challenge data is located 
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param size: Size of the output slices/volumes in pixels/voxels
    :param target_resolution: Resolution to which the data should resampled. Should have same shape as size
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]
     
    :return: Returns an h5py.File handle to the dataset
    '''

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    lbl_str = '_'.join([str(i) for i in label_list])

    if rescale_to_one:
        rescale_postfix = '_intrangeone'
    else:
        rescale_postfix = ''

    if offset is not None:
        offset_postfix = '_offset_%d_%d_%d' % offset
    else:
        offset_postfix = ''

    data_file_name = 'all_data_size_%s_res_%s_lbl_%s%s%s.hdf5' % (
        size_str, res_str, lbl_str, rescale_postfix, offset_postfix)
    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder,
                     data_file_path,
                     size,
                     target_resolution,
                     label_list,
                     offset=offset,
                     rescale_to_one=rescale_to_one)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                mode,
                                size,
                                target_resolution,
                                force_overwrite=False,
                                cv_fold_num=1):

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    data_file_name = 'data_%s_size_%s_res_%s_cv_fold_%d.hdf5' % (
        mode, size_str, res_str, cv_fold_num)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder, data_file_path, mode, size,
                     target_resolution, cv_fold_num)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    # a h5py.File object is returned. This acts like a python dictionary.
    # list(data_acdc.keys()) gives the keys in this dictionary.
    return h5py.File(data_file_path, 'r')


# ====================================================
# Each patient examination typically includes between 200 and 280 images, with 20 images per cardiac cycle.
# Manual delineation is performed on ED and ES images, yielding around 15 manually segmented images per patient.
# ====================================================
# Images are named in this format :
# P[TWO DIGIT NB]-[FOUR DIGIT NUMBER].dcm
# TWO DIGIT NB: patient number
# FOUR DIGIT NUMBER: it signifies the position of the image within the series.
# For example, P01-0000  would be the first (ED) phase of the slice closest to the base.
# Taking into account that there are twenty phases per slice, you can then determine that an image with the number 0121 would represent the second phase of the sixth slice from the bottom.
# ED images are P01-0000.dcm, P01-0020.dcm, P01-0040.dcm, etc.
# ES images may be for example, depending on the ES definition, P01-0008.dcm, P01-0028.dcm, P01-0048.dcm, etc.

# Contours are named in this format:
# P[TWO DIGIT NB]-[FOUR DIGIT NUMBER]-[i/o]contour-[manual/auto].txt
# TWO DIGIT NB: patient number
# FOUR DIGIT NUMBER: position of the image within the series (please see image naming)
# i/o: it signifies the area that the contour represents.
# I contours are inner contours, or contours that segment the endocardium. (only extrract this, as ACDC labels are for the RV endocardium)
# O contours are outer contours, contours that segment the epicardium.
# manual/auto: it signifies the process through which the contours were obtained.
# All of the expert contours will be denoted manual as they were drawn by humans, and all of the contours that were algorithmically generated should be labelled auto (for automatic).
# ====================================================
Beispiel #8
0
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                site_name,
                                idx_start,
                                idx_end,
                                protocol,
                                size,
                                depth,
                                target_resolution,
                                force_overwrite=False):

    # ==============================================
    # Set the 'first_run' option to True for the first run. This copies the files from the bmicnas server
    # into a local directory (preprocessing folder) and carries out bias field correction using N4.
    # For the 1st run, copy the files to a local directory and run bias field correction on the images.
    # ==============================================
    # This is not required now. we have already done bias correction and saved the corresponding images in the input_folder.
    # ==============================================
    # if first_run is True:
    #    if site_name is 'caltech':
    #        list_of_patients_to_skip = ['A00033264', 'A00033493']
    #    else:
    #        list_of_patients_to_skip = ['A00033547']
    #    logging.info('Copying files to local directory and carrying out bias field correction...')
    #    copy_files_to_local_directory_and_correct_bias_field(src_folder = input_folder + site_name + '/nifti/',
    #                                                         dst_folder = preprocessing_folder + site_name + '/',
    #                                                         list_of_patients_to_skip = list_of_patients_to_skip)

    # ==============================================
    # now, pre-process the data
    # ==============================================
    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])
    preprocessing_folder = preprocessing_folder + site_name + '/'
    data_file_name = 'data_%s_2d_size_%s_depth_%d_res_%s_from_%d_to_%d.hdf5' % (
        protocol, size_str, depth, res_str, idx_start, idx_end)
    data_file_path = os.path.join(preprocessing_folder, data_file_name)
    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder, data_file_path, site_name, idx_start,
                     idx_end, protocol, size, depth, target_resolution,
                     preprocessing_folder)

    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                mode,
                                size,
                                target_resolution,
                                force_overwrite=False,
                                split_test_train=True):

    '''
    This function is used to load and if necessary preprocesses the ACDC challenge data
    
    :param input_folder: Folder where the raw ACDC challenge data is located 
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param mode: Can either be '2D' or '3D'. 2D saves the data slice-by-slice, 3D saves entire volumes
    :param size: Size of the output slices/volumes in pixels/voxels
    :param target_resolution: Resolution to which the data should resampled. Should have same shape as size
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]
     
    :return: Returns an h5py.File handle to the dataset
    '''

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    if not split_test_train:
        data_file_name = 'data_%s_size_%s_res_%s_onlytrain.hdf5' % (mode, size_str, res_str)
    else:
        data_file_name = 'data_%s_size_%s_res_%s.hdf5' % (mode, size_str, res_str)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:

        logging.info('This configuration of mode, size and target resolution has not yet been preprocessed')
        logging.info('Preprocessing now!')
        prepare_data(input_folder, data_file_path, mode, size, target_resolution, split_test_train=split_test_train)

    elif os.path.getsize(data_file_path) < 10485760:  # If file is smaller than 10MB

        logging.warning('WARNING: Your preprocessed data file is smaller than 10MB. It is likely that something went '
                        'wrong with the preprocessing.')
        logging.warning("To make sure, delete '%s' and run the code again")
        logging.info('Continuing anyway...')

    else:

        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
Beispiel #10
0
def load_and_maybe_process_data(image_input_folder,
                                table_input_folder,
                                preprocessing_folder,
                                size,
                                target_resolution,
                                split_path,
                                nr_augmented,
                                noise_mean_dist=5,
                                clip_range=(-200, 800),
                                force_overwrite=False):
    '''
    This function is used to load and if necessary preprocesses the CT chest landmark data

    :param input_folder: Folder where the raw Nifti images are located
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param size: Size of the output volumes in voxels
    :param target_resolution: Resolution to which the data should resampled. Should have same shape as size
    :param noise_mean_dist: mean distance of noisy point to ground truth, mm
    :param clip_range: The intensity range to clip the image at
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]

    :return: Returns an h5py.File handle to the dataset
    '''

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])
    nraug_str = str(nr_augmented)

    data_file_name = 'voiclass_size_%s_res_%s_nraug_%s.hdf5' % (size_str, res_str, nraug_str)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info('This configuration of mode, size and target resolution has not yet been preprocessed')
        logging.info('Preprocessing now!')
        prepare_data(image_input_folder,
                     table_input_folder,
                     data_file_path,
                     size,
                     target_resolution,
                     split_path,
                     nr_augmented,
                     noise_mean_dist,
                     clip_range)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                size,
                                target_resolution,
                                force_overwrite=False,
                                split_test_train=True):
    '''
    This function is used to load and if necessary preprocesses the data
    
    :param input_folder: Folder where the raw data is located 
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param size: Size of the output slices/volumes in pixels/voxels
    :param target_resolution: Resolution to which the data should resampled. Should have same shape as size
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]
     
    :return: Returns an h5py.File handle to the dataset
    '''
    mode = '2D'
    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    if not split_test_train:
        data_file_name = 'data_%s_size_%s_res_%s_onlytrain.hdf5' % (
            mode, size_str, res_str)
    else:
        data_file_name = 'data_%s_size_%s_res_%s.hdf5' % (mode, size_str,
                                                          res_str)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(input_folder,
                     data_file_path,
                     mode,
                     size,
                     target_resolution,
                     split_test_train=split_test_train)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
def load_and_maybe_process_data(input_folder,
                                preprocessing_folder,
                                mode,
                                size,
                                target_resolution,
                                force_overwrite=False,
                                cv_fold_num = 1):
    '''
    This function is used to load and if necessary preprocesses the ACDC challenge data

    :param input_folder: Folder where the raw ACDC challenge data is located
    :param preprocessing_folder: Folder where the proprocessed data should be written to
    :param mode: Can either be '2D' or '3D'. 2D saves the data slice-by-slice, 3D saves entire volumes
    :param size: Size of the output slices/volumes in pixels/voxels
    :param target_resolution: Resolution to which the data should resampled. Should have same shape as size
    :param force_overwrite: Set this to True if you want to overwrite already preprocessed data [default: False]

    :return: Returns an h5py.File handle to the dataset
    '''

    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])

    data_file_name = 'data_%s_size_%s_res_%s_cv_fold_%d.hdf5' % (mode, size_str, res_str, cv_fold_num)

    data_file_path = os.path.join(preprocessing_folder, data_file_name)

    utils.makefolder(preprocessing_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info('This configuration of mode, size and target resolution has not yet been preprocessed')
        logging.info('Preprocessing now!')
        prepare_data(input_folder,
                     data_file_path,
                     mode,
                     size,
                     target_resolution,
                     cv_fold_num)
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    # a h5py.File object is returned. This acts like a python dictionary.
    # list(data_acdc.keys()) gives the keys in this dictionary.
    return h5py.File(data_file_path, 'r')
def load_data(input_folder,
              preproc_folder,
              idx_start,
              idx_end,
              bias_correction=False,
              force_overwrite=False):

    # create the pre-processing folder, if it does not exist
    utils.makefolder(preproc_folder)

    logging.info(
        '============================================================')
    logging.info('Loading data...')

    # make appropriate filenames according to the requested indices of training, validation and test images
    config_details = 'from%dto%d_' % (idx_start, idx_end)
    if bias_correction is True:
        filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy'
    else:
        filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    # if the images have not already been extracted, do so
    if not os.path.exists(filepath_images) or force_overwrite:
        logging.info(
            'This configuration of protocol and data indices has not yet been preprocessed'
        )
        logging.info('Preprocessing now...')
        images, masks, affines, patnames = prepare_data(
            input_folder, preproc_folder, idx_start, idx_end, bias_correction)
    else:
        logging.info('Already preprocessed this configuration. Loading now...')
        # read from already created npy files
        images = np.load(filepath_images)
        masks = np.load(filepath_masks)
        affines = np.load(filepath_affine)
        patnames = np.load(filepath_patnames)

    return images, masks, affines, patnames
Beispiel #14
0
def load_data(input_folder,
              preproc_folder,
              idx_start,
              idx_end,
              force_overwrite=False):

    # create the pre-processing folder, if it does not exist
    utils.makefolder(preproc_folder)

    logging.info(
        '============================================================')
    logging.info('Loading data...')

    # make appropriate filenames according to the requested indices of training, validation and test images
    config_details = 'from%dto%d_' % (idx_start, idx_end)
    filepath_images = preproc_folder + config_details + 'images.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    # if the images have not already been extracted, do so
    if not os.path.exists(filepath_images) or force_overwrite:
        logging.info(
            'This configuration of protocol and data indices has not yet been preprocessed'
        )
        logging.info('Preprocessing now...')
        images, masks, affines, patnames = prepare_data(
            input_folder, preproc_folder, idx_start, idx_end)
    else:
        logging.info('Already preprocessed this configuration. Loading now...')
        # read from already created npy files
        images = np.load(filepath_images)
        #         reading hist eq images
        #        images = np.load('/usr/bmicnas01/data-biwi-01/nkarani/projects/hcp_segmentation/data/preproc_data/ixi/from18to38_images_histeq5.npy')
        #        images = np.reshape(images,[-1,images.shape[2], images.shape[3]])
        masks = np.load(filepath_masks)
        affines = np.load(filepath_affine)
        patnames = np.load(filepath_patnames)

    return images, masks, affines, patnames
def load_and_maybe_generate_data(output_folder,
                                 image_size,
                                 effect_size,
                                 num_samples,
                                 moving_effect,
                                 scale_to_one=False,
                                 force_overwrite=False):

    image_size = image_size[0]  # Images are always square

    size_str = str(image_size)
    effect_str = str(effect_size)
    sample_str = str(num_samples)

    rescale_postfix = '_intrangeone' if scale_to_one else ''
    moving_postfix = '_moving' if moving_effect else ''

    data_file_name = 'synthdata_num_%s_imsize_%s_effect_%s%s%s.hdf5' % \
                     (sample_str, size_str, effect_str, moving_postfix, rescale_postfix)
    data_file_path = os.path.join(output_folder, data_file_name)

    utils.makefolder(output_folder)

    if not os.path.exists(data_file_path) or force_overwrite:
        logging.info(
            'This configuration of mode, size and target resolution has not yet been preprocessed'
        )
        logging.info('Preprocessing now!')
        prepare_data(data_file_path,
                     effect_size,
                     num_samples,
                     image_size,
                     moving_effect,
                     scale_to_one,
                     save_type='hdf5')
    else:
        logging.info('Already preprocessed this configuration. Loading now!')

    return h5py.File(data_file_path, 'r')
Beispiel #16
0
def load_data(input_folder,
              preproc_folder,
              idx_start,
              idx_end,
              size,
              target_resolution,
              labeller='kc',
              force_overwrite=False):

    # ===============================
    # create the pre-processing folder, if it does not exist
    # ===============================
    utils.makefolder(preproc_folder)

    # ===============================
    # file to create or directly read if it already exists
    # ===============================
    size_str = '_'.join([str(i) for i in size])
    res_str = '_'.join([str(i) for i in target_resolution])
    data_file_name = 'data_2d_from_%d_to_%d_size_%s_res_%s_%s.hdf5' % (
        idx_start, idx_end, size_str, res_str, labeller)
    data_file_path = os.path.join(preproc_folder, data_file_name)

    # ===============================
    # if the images have not already been extracted, do so
    # ===============================
    if not os.path.exists(data_file_path) or force_overwrite:

        logging.info(
            'This configuration of protocol and data indices has not yet been preprocessed'
        )
        logging.info('Preprocessing now...')
        prepare_data(input_folder, data_file_path, idx_start, idx_end, size,
                     target_resolution, labeller)
    else:
        logging.info('Already preprocessed this configuration. Loading now...')

    return h5py.File(data_file_path, 'r')
    if recursion is None:
        recursion = most_recent_recursion(model_path)
        print("Recursion {} from folder {}".format(recursion, model_path))
        if recursion == -1:
            output_path = os.path.join(model_path, 'predictions')
            recursion = None
        else:
            output_path = os.path.join(model_path, 'predictions_recursion_{}'.format(recursion))

    else:
        output_path = os.path.join(model_path, 'predictions_recursion_{}'.format(args.RECURSION))

    printing = args.SAVE_IMAGES == 1
    path_pred = os.path.join(output_path, 'prediction')
    path_gt = os.path.join(output_path, 'ground_truth')
    path_diff = os.path.join(output_path, 'difference')
    path_image = os.path.join(output_path, 'image')
    path_eval = os.path.join(output_path, 'eval')

    utils.makefolder(path_gt)
    utils.makefolder(path_pred)
    utils.makefolder(path_diff)
    utils.makefolder(path_image)
    init_iteration = score_data(input_path, output_path, model_path, do_postprocessing=True, exp_config=exp_config, recursion=recursion)






Beispiel #18
0
    config_file = args.EXP_PATH
    config_module = config_file.split('/')[-1].rstrip('.py')

    if args.LOCAL == 'local':
        print('Running with local configuration')
        import config.local_config as sys_config
        import matplotlib.pyplot as plt
    else:
        import config.system as sys_config

    exp_config = SourceFileLoader(config_module, config_file).load_module()

    log_dir = os.path.join(sys_config.log_root, exp_config.log_dir_name,
                           exp_config.experiment_name)

    utils.makefolder(log_dir)

    shutil.copy(exp_config.__file__, log_dir)

    basic_logger = utils.setup_logger('basic_logger',
                                      log_dir + '/test_log_lowest_ged.log')

    basic_logger.info('Running experiment with script: {}'.format(config_file))

    basic_logger.info('!!!! Copied exp_config file to experiment folder !!!!')

    basic_logger.info(
        '**************************************************************')
    basic_logger.info(' *** Running Experiment: %s',
                      exp_config.experiment_name)
    basic_logger.info(
Beispiel #19
0
def prepare_data(input_folder, preproc_folder, idx_start, idx_end):

    images = []
    affines = []
    patnames = []
    masks = []

    # read the filenames which have segmentations available
    filenames = sorted(glob.glob(input_folder + '*_seg.nii'))
    logging.info(
        'Number of images in the dataset that have ground truth annotations: %s'
        % str(len(filenames)))

    # iterate through all indices
    for idx in range(len(filenames)):

        # only consider images within the indices requested
        if (idx < idx_start) or (idx >= idx_end):
            #logging.info('skipping subject: %d' %idx)
            continue

        logging.info('==============================================')

        # get the name of the ground truth annotation for this subject
        filename_seg = filenames[idx]
        filename_img = filename_seg[:-8] + '.nii.gz'
        _patname = filename_seg[filename_seg[:-1].rfind('/') + 1:-8]

        if _patname == 'IXI014-HH-1236-T2':  # this subject has very poor resolution - 256x256x28
            continue

        # read the image
        logging.info('reading image: %s' % _patname)
        _img_data, _img_affine, _img_header = utils.load_nii(filename_img)
        # make all the images of the same size by appending zero slices to facilitate appending
        # most images are of the size 256*256*130
        if (_img_data.shape[2] is not 130):
            num_zero_slices = 130 - _img_data.shape[2]
            zero_slices = np.zeros(
                (_img_data.shape[0], _img_data.shape[1], num_zero_slices))
            _img_data = np.concatenate((_img_data, zero_slices), axis=-1)
        # normalise the image
        _img_data = image_utils.normalise_image(_img_data,
                                                norm_type='div_by_max')
        # save the pre-processed image
        utils.makefolder(preproc_folder + _patname)
        savepath = preproc_folder + _patname + '/preprocessed_image.nii'
        utils.save_nii(savepath, _img_data, _img_affine)
        # append to the list of all images, affines and patient names
        images.append(_img_data)
        affines.append(_img_affine)
        patnames.append(_patname)

        # read the segmentation mask (already grouped)
        _seg_data, _seg_affine, _seg_header = utils.load_nii(filename_seg)
        # make all the images of the same size by appending zero slices to facilitate appending
        # most images are of the size 256*256*130
        if (_seg_data.shape[2] is not 130):
            num_zero_slices = 130 - _seg_data.shape[2]
            zero_slices = np.zeros(
                (_seg_data.shape[0], _seg_data.shape[1], num_zero_slices))
            _seg_data = np.concatenate((_seg_data, zero_slices), axis=-1)
        # save the pre-processed segmentation ground truth
        utils.makefolder(preproc_folder + _patname)
        savepath = preproc_folder + _patname + '/preprocessed_gt15.nii'
        utils.save_nii(savepath, _seg_data, _seg_affine)
        # append to the list of all masks
        masks.append(_seg_data)

    # convert the lists to arrays
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    logging.info('Saving pre-processed files...')
    config_details = 'from%dto%d_' % (idx_start, idx_end)

    filepath_images = preproc_folder + config_details + 'images.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
Beispiel #20
0
def generate_with_noise(gan_experiment_path_list,
                        noise_list,
                        image_saving_indices=set(),
                        image_saving_path3d=None,
                        image_saving_path2d=None):
    """

    :param gan_experiment_path_list: list of GAN experiment paths to be evaluated. They must all have the same image settings and source/target field strengths as the classifier
    :param clf_experiment_path: AD classifier used
    :param image_saving_indices: set of indices of the images to be saved
    :param image_saving_path: where to save the images. They are saved in subfolders for each experiment
    :return:
    """

    batch_size = 1
    logging.info('batch size %d is used for everything' % batch_size)

    for gan_experiment_path in gan_experiment_path_list:
        gan_config, logdir_gan = utils.load_log_exp_config(gan_experiment_path)

        gan_experiment_name = gan_config.experiment_name

        log_dir_ending = logdir_gan.split('_')[-1]
        continued_experiment = (log_dir_ending == 'cont')
        if continued_experiment:
            gan_experiment_name += '_cont'

        # make sure the noise has the right dimension
        assert gan_config.use_generator_input_noise
        assert gan_config.generator_input_noise_shape[
            1:] == std_params.generator_input_noise_shape[1:]

        # Load data
        data = adni_data_loader_all.load_and_maybe_process_data(
            input_folder=gan_config.data_root,
            preprocessing_folder=gan_config.preproc_folder,
            size=gan_config.image_size,
            target_resolution=gan_config.target_resolution,
            label_list=gan_config.label_list,
            offset=gan_config.offset,
            rescale_to_one=gan_config.rescale_to_one,
            force_overwrite=False)

        # extract images and indices of source/target images for the test set
        images_test = data['images_test']

        im_s = gan_config.image_size

        img_tensor_shape = [batch_size, im_s[0], im_s[1], im_s[2], 1]

        logging.info('\nGAN Experiment (%.1f T to %.1f T): %s' %
                     (gan_config.source_field_strength,
                      gan_config.target_field_strength, gan_experiment_name))
        logging.info(gan_config)
        # open GAN save file from the selected experiment

        # prevents ResourceExhaustError when a lot of memory is used
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  # Do not assign whole gpu memory, just use it on the go
        config.allow_soft_placement = True  # If a operation is not defined in the default device, let it execute in another.

        source_indices = []
        target_indices = []
        for i, field_strength in enumerate(data['field_strength_test']):
            if field_strength == gan_config.source_field_strength:
                source_indices.append(i)
            elif field_strength == gan_config.target_field_strength:
                target_indices.append(i)

        num_source_images = len(source_indices)
        num_target_images = len(target_indices)

        logging.info('Data summary:')
        logging.info(' - Images:')
        logging.info(images_test.shape)
        logging.info(images_test.dtype)
        logging.info(' - Domains:')
        logging.info('number of source images: ' + str(num_source_images))
        logging.info('number of target images: ' + str(num_target_images))

        # save real images
        source_image_path = os.path.join(image_saving_path3d, 'source')
        utils.makefolder(source_image_path)
        sorted_saving_indices = sorted(image_saving_indices)

        source_saving_indices = [
            source_indices[index] for index in sorted_saving_indices
        ]
        for source_index in source_saving_indices:
            source_img_name = 'source_img_%.1fT_%d.nii.gz' % (
                gan_config.source_field_strength, source_index)
            utils.create_and_save_nii(
                images_test[source_index],
                os.path.join(source_image_path, source_img_name))
            logging.info(source_img_name + ' saved')

        logging.info('source images saved')

        logging.info('loading GAN')
        # open the latest GAN savepoint
        init_checkpoint_path_gan, last_gan_step = utils.get_latest_checkpoint_and_step(
            logdir_gan, 'model.ckpt')

        logging.info(init_checkpoint_path_gan)

        # build a separate graph for the generator
        graph_generator, generator_img_pl, z_noise_pl, x_fake_op, init_gan_op, saver_gan = build_gen_graph(
            img_tensor_shape, gan_config)

        # Create a session for running Ops on the Graph.
        sess_gan = tf.Session(config=config, graph=graph_generator)

        # Run the Op to initialize the variables.
        sess_gan.run(init_gan_op)
        saver_gan.restore(sess_gan, init_checkpoint_path_gan)

        # path where the generated images are saved
        experiment_generate_path_3d = os.path.join(
            image_saving_path_3d, gan_experiment_name +
            ('_%.1fT_source' % gan_config.source_field_strength))
        # make a folder for the generated images
        utils.makefolder(experiment_generate_path_3d)

        # path where the generated image 2d cuts are saved
        experiment_generate_path_2d = os.path.join(
            image_saving_path_2d, gan_experiment_name +
            ('_%.1fT_source' % gan_config.source_field_strength))
        # make a folder for the generated images
        utils.makefolder(experiment_generate_path_2d)

        logging.info('image generation begins')
        generated_pred = []
        batch_beginning_index = 0
        # loops through all images from the source domain
        for image_index, curr_img in zip(
                source_saving_indices,
                itertools.compress(images_test, source_saving_indices)):
            img_folder_name = 'image_test%d' % image_index
            curr_img_path_3d = os.path.join(experiment_generate_path_3d,
                                            img_folder_name)
            utils.makefolder(curr_img_path_3d)
            curr_img_path_2d = os.path.join(experiment_generate_path_2d,
                                            img_folder_name)
            utils.makefolder(curr_img_path_2d)
            # save source image
            source_img_name = 'source_img'
            utils.save_image_and_cut(np.squeeze(curr_img),
                                     source_img_name,
                                     curr_img_path_3d,
                                     curr_img_path_2d,
                                     vmin=-1,
                                     vmax=1)
            logging.info(source_img_name + ' saved')
            img_list = []
            for noise_index, noise in enumerate(noise_list):
                fake_img = sess_gan.run(x_fake_op,
                                        feed_dict={
                                            generator_img_pl:
                                            np.reshape(curr_img,
                                                       img_tensor_shape),
                                            z_noise_pl:
                                            noise
                                        })
                fake_img = np.squeeze(fake_img)
                # make sure the dimensions are right
                assert len(fake_img.shape) == 3

                img_list.append(fake_img)

                generated_img_name = 'generated_img_noise_%d' % (noise_index)
                utils.save_image_and_cut(np.squeeze(fake_img),
                                         generated_img_name,
                                         curr_img_path_3d,
                                         curr_img_path_2d,
                                         vmin=-1,
                                         vmax=1)
                logging.info(generated_img_name + ' saved')

                # save the difference g(xs)-xs
                difference_image_gs = np.squeeze(fake_img) - curr_img
                difference_img_name = 'difference_img_noise_%d' % (noise_index)
                utils.save_image_and_cut(difference_image_gs,
                                         difference_img_name,
                                         curr_img_path_3d,
                                         curr_img_path_2d,
                                         vmin=-1,
                                         vmax=1)
                logging.info(difference_img_name + ' saved')

            # works because axis 0
            all_imgs = np.stack(img_list, axis=0)
            std_img = np.std(all_imgs, axis=0)
            std_img_name = 'std_img'
            utils.save_image_and_cut(std_img,
                                     std_img_name,
                                     curr_img_path_3d,
                                     curr_img_path_2d,
                                     vmin=0,
                                     vmax=1)
            logging.info(std_img_name + ' saved')

        logging.info('generated all images for %s' % (gan_experiment_name))
Beispiel #21
0
def do_preprocessing(adnimerge_table_arg,
                     tmp_index,
                     processed_images_folder,
                     summary_csv_file,
                     do_reorientation=False,
                     do_registration=False,
                     do_bias_correction=False,
                     do_cropping=False,
                     do_skull_stripping=False,
                     write_csv=True,
                     bmicdatasets_adni_images=None):

    if do_reorientation | do_registration | do_bias_correction | do_cropping | do_skull_stripping == False:
        do_postprocessing = False
    else:
        do_postprocessing = True

    vitals_table = pd.read_csv(vitals_path)

    mri_3_0_meta_table = pd.read_csv(mri_3_0_meta_path)
    mri_1_5_meta_table = pd.read_csv(mri_1_5_meta_path)

    diagnosis_table = pd.read_csv(diagnosis_path)

    tmp_file_folder = os.path.join(processed_images_folder, 'tmp')
    if do_postprocessing:
        utils.makefolder(tmp_file_folder)

    with open(summary_csv_file, 'w') as csvfile:

        if write_csv:
            csvwriter = csv.writer(csvfile, delimiter=',')

            csvwriter.writerow([
                'rid', 'phase', 'image_exists', 'site', 'viscode', 'exam_date',
                'field_strength', 'diagnosis', 'diagnosis_3cat', 'age',
                'gender', 'weight', 'education', 'ethnicity', 'race', 'apoe4',
                'adas13', 'mmse', 'faq', 'counter'
            ])

        for ii, row in adnimerge_table_arg.iterrows():

            viscode = row['VISCODE']
            rid = row['RID']

            print('new')
            print(rid)
            print(viscode)

            phase = row['COLPROT']

            if phase in ['ADNI3']:
                continue

            site = row['SITE']

            age_at_bl = row[
                'AGE']  # Note ADNIMERGE age is always the same, even for the follow up scans years later
            gender = row['PTGENDER']
            education = row['PTEDUCAT']
            ethnicity = row['PTETHCAT']
            race = row['PTRACCAT']
            apoe4 = row['APOE4']
            adas13 = row['ADAS13']
            mmse = row['MMSE']
            faq = row['FAQ']

            diagnosis_row = find_by_conditions(diagnosis_table,
                                               and_condition_dict={
                                                   'RID': rid,
                                                   'VISCODE2': viscode
                                               })
            if phase == 'ADNI1':
                diagnosis = diagnosis_row['DXCURREN'].values
            else:
                diagnosis = diagnosis_row['DXCHANGE'].values

            print('---- rid %s -----' % rid)
            print(viscode)
            print(diagnosis)

            if len(diagnosis) == 0:
                diagnosis = 0
                if viscode == 'm03':
                    diagnosis_bl = row['DX_bl']
                    diagnosis_3cat = diagnosis_to_3categories_blformat(
                        diagnosis_bl)
                else:
                    diagnosis_3cat = 'unknown'
            else:
                diagnosis = int(diagnosis[0])
                diagnosis_3cat = diagnosis_to_3categories(diagnosis)

            vitals_row = find_by_conditions(vitals_table, {
                'RID': rid,
                'VISCODE2': 'bl'
            })  # here also examdates sometimes don't correspond
            if len(vitals_row) == 0:
                vitals_row = find_by_conditions(vitals_table, {
                    'RID': rid,
                    'VISCODE2': 'sc'
                })

            assert len(vitals_row) <= 1, 'in vitals table found %d rows for case with rid=%s, and viscode=bl. Expected one.' \
                                         % (len(vitals_row), rid)

            # Getting some vitals information
            if len(vitals_row) == 1:
                weight = vitals_row['VSWEIGHT'].values[0]
                weight_units = vitals_row['VSWTUNIT'].values[0]
                weight = convert_weight_to_kg(weight, weight_units)
            else:
                weight = 'unknown'

            mri_1_5_meta_row = find_by_conditions(mri_1_5_meta_table,
                                                  and_condition_dict={
                                                      'RID': rid,
                                                      'VISCODE2': viscode
                                                  })
            if len(mri_1_5_meta_row) == 0 and viscode == 'bl':
                mri_1_5_meta_row = find_by_conditions(mri_1_5_meta_table,
                                                      and_condition_dict={
                                                          'RID': rid,
                                                          'VISCODE2': 'sc'
                                                      })

            mri_3_0_meta_row = find_by_conditions(mri_3_0_meta_table,
                                                  and_condition_dict={
                                                      'RID': rid,
                                                      'VISCODE2': viscode
                                                  })
            if len(mri_3_0_meta_row) == 0 and viscode == 'bl':
                mri_3_0_meta_row = find_by_conditions(
                    mri_3_0_meta_table,
                    and_condition_dict={'RID': rid},
                    or_condition_dict={'VISCODE2': ['sc', 'scmri']})

            exam_dates = list(mri_1_5_meta_row['EXAMDATE'].values) + list(
                mri_3_0_meta_row['EXAMDATE'].values)
            field_strengths = [1.5] * len(mri_1_5_meta_row['EXAMDATE']) + [
                3.0
            ] * len(mri_3_0_meta_row['EXAMDATE'])
            viscodes = list(mri_1_5_meta_row['VISCODE2'].values) + list(
                mri_3_0_meta_row['VISCODE2'].values)

            subj_subfolder = '%s_S_%s' % (str(site).zfill(3),
                                          str(rid).zfill(4))

            # Remove nans from exam dates and corresponding field strengths
            exam_dates_tmp = []
            field_strengths_tmp = []
            viscodes_tmp = []
            for ed, fs, vc in zip(exam_dates, field_strengths, viscodes):
                if str(ed) != 'nan':
                    exam_dates_tmp.append(ed)
                    field_strengths_tmp.append(fs)
                    viscodes_tmp.append(vc)
            exam_dates = exam_dates_tmp
            field_strengths = field_strengths_tmp
            viscodes = viscodes_tmp

            # If all exam dates are the same keep only one
            if len(exam_dates) > 1 and all_same(exam_dates):

                print('Multiple equal exam dates')
                print(field_strengths)

                exam_dates = [exam_dates[0]]
                field_strengths = [field_strengths[0]]
                viscodes = [viscodes[0]]

            # If all there are duplicate viscodes keep the first and say 1.5T because duplicates are almost always 1.5T
            if len(viscodes) > 1 and all_same(
                    map_all_baseline_to_bl(viscodes)):
                print('Identical viscodes')
                print(field_strengths)
                exam_dates = [exam_dates[0]]
                if phase in ['ADNI1', 'ADNIGO']:
                    field_strengths = [
                        field_strengths[0]
                    ]  # 1.5 is always the first item anyways
                else:
                    print('!! Multiple viscodes. Duplicate that was in ADNI2')
                    print(field_strengths)
                    field_strengths = [field_strengths[0]]

            if not len(exam_dates) > 0:
                continue

            # Philips scanners have do not have the gradwarp preprocessed images. I am assuming MT1__N3m is fine even
            # though B1_Correctino is missing.
            # This webpage: http://adni.loni.usc.edu/methods/mri-analysis/mri-pre-processing/ says all files with a N3m
            # in the end are fine to use. I am assuming that MPR____N3 and MPR__GradWarp__N3 also indicate that the
            # whole preprocessing pipeline was applied.
            preproc_subfolders = [
                'MPR__GradWarp__B1_Correction__N3', 'MPR____N3', 'MT1__N3m',
                'MT1__GradWarp__N3m', 'MPR__GradWarp__N3'
            ]

            nii_files = []

            for exam_date, field_strength in zip(exam_dates, field_strengths):

                # figure out age:
                # get baseline examdate from adnimerge
                baseline_row = find_by_conditions(
                    adnimerge_table_arg,
                    and_condition_dict={'RID': rid},
                    or_condition_dict={'VISCODE': ['sc', 'scmri', 'bl']})

                baseline_exam_dates = baseline_row['EXAMDATE'].values

                if len(baseline_exam_dates) <= 0:
                    current_age = 'unknown'
                else:
                    baseline_exam_date = baseline_exam_dates[0]

                    year_diff = int(exam_date.split('-')[0]) - int(
                        baseline_exam_date.split('-')[0])
                    month_diff = int(exam_date.split('-')[1]) - int(
                        baseline_exam_date.split('-')[1])
                    day_diff = int(exam_date.split('-')[2]) - int(
                        baseline_exam_date.split('-')[2])

                    decimal_year_diff = year_diff + (1.0 / 12) * month_diff + (
                        1.0 / (12 * 30) * day_diff)

                    assert decimal_year_diff >= -0.75, 'Year diff cannot be (too) negative! Was %f' % decimal_year_diff

                    if decimal_year_diff < 0:
                        decimal_year_diff = 0.0

                    current_age = age_at_bl + decimal_year_diff

                for preproc_subfolder in preproc_subfolders:
                    nii_search_str = os.path.join(subj_subfolder,
                                                  preproc_subfolder,
                                                  exam_date + '_*', '*/*.nii')
                    print(
                        os.path.join(bmicdatasets_adni_images, nii_search_str))
                    nii_files += glob.glob(
                        os.path.join(bmicdatasets_adni_images, nii_search_str))

                # If some files have gradwarp prefer those files
                contains_GradWarp = any(['GradWarp' in ff for ff in nii_files])
                if contains_GradWarp:
                    nii_files = [ff for ff in nii_files if 'GradWarp' in ff]

                # if some files have MT1 and MPR prefer the MT1
                contains_MT1 = any(['MT1' in ff for ff in nii_files])
                if contains_MT1:
                    nii_files = [ff for ff in nii_files if 'MT1' in ff]

                # if some files have B1 correction prefer those
                contains_B1 = any(['B1_Correction' in ff for ff in nii_files])
                if contains_B1:
                    nii_files = [
                        ff for ff in nii_files if 'B1_Correction' in ff
                    ]

                image_exists = True if len(nii_files) > 0 else False

                if image_exists:

                    start_time = time.time()

                    if not DO_ONLY_TABLE:

                        nii_use_file = nii_files[0]
                        logging.info(nii_use_file)

                        gz_postfix = '.gz' if do_postprocessing else ''
                        patient_folder = 'rid_%s' % (str(rid).zfill(4))
                        out_file_name = '%s_%sT_%s_rid%s_%s.nii%s' % (
                            phase.lower(), field_strength, diagnosis_3cat,
                            str(rid).zfill(4), viscode, gz_postfix)

                        out_folder = os.path.join(processed_images_folder,
                                                  patient_folder)
                        utils.makefolder(out_folder)

                        out_file_path = os.path.join(out_folder, out_file_name)

                        if os.path.exists(out_file_path):
                            logging.info('!!! File already exists. Skipping')
                            continue
                        else:
                            logging.info('--- Doing File: %s' % out_file_path)

                        if not do_postprocessing:
                            logging.info('Not doing any preprocessing...')
                            shutil.copyfile(nii_use_file, out_file_path)
                        else:
                            tmp_file_path = os.path.join(
                                tmp_file_folder, 'tmp_rid%s_%s.nii.gz' %
                                (str(rid).zfill(4), str(tmp_index)))
                            tmp_file_path2 = os.path.join(
                                tmp_file_folder, 'tmp_rid%s_%s2.nii.gz' %
                                (str(rid).zfill(4), str(tmp_index)))
                            shutil.copyfile(nii_use_file, tmp_file_path)

                            if do_reorientation:
                                # fsl orientation enforcing:
                                logging.info('Reorienting to MNI space...')

                                Popen('fslreorient2std {0} {1}'.format(
                                    tmp_file_path, tmp_file_path2),
                                      shell=True).communicate()
                                try:
                                    os.remove(tmp_file_path)
                                except FileNotFoundError:
                                    pass
                                try:
                                    shutil.copyfile(tmp_file_path2,
                                                    tmp_file_path)
                                except FileNotFoundError:
                                    logging.info('ID6542130: %s' %
                                                 (nii_use_file))
                                    continue
                                try:
                                    os.remove(tmp_file_path2)
                                except FileNotFoundError:
                                    pass

                            if do_cropping:

                                # field of view cropping
                                logging.info('Cropping the field of view...')
                                Popen('robustfov -i {0} -r {1}'.format(
                                    tmp_file_path, tmp_file_path2),
                                      shell=True).communicate()
                                try:
                                    os.remove(tmp_file_path)
                                except FileNotFoundError:
                                    pass
                                try:
                                    shutil.copyfile(tmp_file_path2,
                                                    tmp_file_path)
                                except FileNotFoundError:
                                    logging.info('ID6542131: %s' %
                                                 (nii_use_file))
                                    continue
                                try:
                                    os.remove(tmp_file_path2)
                                except FileNotFoundError:
                                    pass
                            if do_bias_correction:
                                # bias correction with N4:
                                logging.info('Bias correction...')
                                Popen('{0} -i {1} -o {2}'.format(
                                    N4_executable, tmp_file_path,
                                    tmp_file_path2),
                                      shell=True).communicate()
                                try:
                                    os.remove(tmp_file_path)
                                except FileNotFoundError:
                                    pass
                                try:
                                    shutil.copyfile(tmp_file_path2,
                                                    tmp_file_path)
                                except FileNotFoundError:
                                    logging.info('ID6542132: %s' %
                                                 (nii_use_file))
                                    continue
                                try:
                                    os.remove(tmp_file_path2)
                                except FileNotFoundError:
                                    pass

                            if do_registration:

                                # registration with flirt to MNI 152:
                                logging.info(
                                    'Registering the structural image...')
                                Popen(
                                    'flirt -in {0} -ref {1} -out {2} -searchrx -45 45 -searchry -45 45 -searchrz -45 45 -dof 7'
                                    .format(tmp_file_path, mni_template_t1,
                                            tmp_file_path2),
                                    shell=True).communicate()
                                try:
                                    os.remove(tmp_file_path)
                                except FileNotFoundError:
                                    pass
                                try:
                                    shutil.copyfile(tmp_file_path2,
                                                    tmp_file_path)
                                except FileNotFoundError:
                                    logging.info('ID6542133: %s' %
                                                 (nii_use_file))
                                    continue
                                try:
                                    os.remove(tmp_file_path2)
                                except FileNotFoundError:
                                    pass

                            if do_skull_stripping:

                                # skull stripping with bet2
                                logging.info('Skull stripping...')
                                Popen('{0} {1} {2} -R -f 0.5 -g 0'.format(
                                    robex_executable, tmp_file_path,
                                    tmp_file_path2),
                                      shell=True).communicate()
                                try:
                                    os.remove(tmp_file_path)
                                except FileNotFoundError:
                                    pass
                                try:
                                    shutil.copyfile(tmp_file_path2,
                                                    tmp_file_path)
                                except FileNotFoundError:
                                    logging.info('ID6542134: %s' %
                                                 (nii_use_file))
                                    continue
                                try:
                                    os.remove(tmp_file_path2)
                                except FileNotFoundError:
                                    pass
                                logging.info('Finished.')

                            logging.info(
                                'Copying tmp file: %s, to output: %s' %
                                (tmp_file_path, out_file_path))
                            shutil.copyfile(tmp_file_path, out_file_path)
                            try:
                                os.remove(tmp_file_path)
                            except FileNotFoundError:
                                pass

                    if write_csv:
                        csvwriter.writerow([
                            rid, phase, image_exists, site, viscode, exam_date,
                            field_strength, diagnosis, diagnosis_3cat,
                            current_age, gender, weight, education, ethnicity,
                            race, apoe4, adas13, mmse, faq, 1
                        ])

                    elapsed_time = time.time() - start_time
                    logging.info('Elapsed time: %.2f secs' % elapsed_time)

                if not image_exists and INCLUDE_MISSING_IMAGES_IN_TABLE and write_csv:
                    # If the include missing images constant is set to true it will write all the rows to the table

                    csvwriter.writerow([
                        rid, phase, image_exists, site, viscode, exam_date,
                        field_strength, diagnosis, diagnosis_3cat, current_age,
                        gender, weight, education, ethnicity, race, apoe4,
                        adas13, mmse, faq, 1
                    ])
def main(model_path, exp_config):

    # Make and restore vagan model
    phiseg_model = phiseg(exp_config=exp_config)
    phiseg_model.load_weights(model_path, type=model_selection)

    data_loader = data_switch(exp_config.data_identifier)
    data = data_loader(exp_config)

    N = data.test.images.shape[0]

    n_images = 16
    n_samples = 16

    # indices = np.arange(N)
    # sample_inds = np.random.choice(indices, n_images)
    sample_inds = [165, 280, 213]  # <-- prostate
    # sample_inds = [1551] #[907, 1296, 1551]  # <-- LIDC

    for ii in sample_inds:

        print('------- Processing image %d -------' % ii)

        outfolder = os.path.join(model_path, 'samples_%s' % model_selection,
                                 str(ii))
        utils.makefolder(outfolder)

        x_b = data.test.images[ii,
                               ...].reshape([1] + list(exp_config.image_size))
        s_b = data.test.labels[ii, ...]

        if np.sum(s_b) < 10:
            print('WARNING: skipping cases with no structures')
            continue

        s_b_r = utils.convert_batch_to_onehot(s_b.transpose((2, 0, 1)),
                                              exp_config.nlabels)

        print('Plotting input image')
        plt.figure()
        x_b_d = preproc_image(x_b)
        plt.imshow(x_b_d, cmap='gray')
        plt.axis('off')
        plt.savefig(os.path.join(outfolder, 'input_img_%d.png' % ii),
                    bbox_inches='tight')

        print('Generating 100 samples')
        s_p_list = []
        for kk in range(100):
            s_p_list.append(
                phiseg_model.predict_segmentation_sample(x_b,
                                                         return_softmax=True))
        s_p_arr = np.squeeze(np.asarray(s_p_list))

        print('Plotting %d of those samples' % n_samples)
        for jj in range(n_samples):

            s_p_sm = s_p_arr[jj, ...]
            s_p_am = np.argmax(s_p_sm, axis=-1)

            plt.figure()
            s_p_d = preproc_image(s_p_am, nlabels=exp_config.nlabels)
            plt.imshow(s_p_d, cmap='gray')
            plt.axis('off')
            plt.savefig(os.path.join(outfolder,
                                     'sample_img_%d_samp_%d.png' % (ii, jj)),
                        bbox_inches='tight')

        print('Plotting ground-truths masks')
        for jj in range(s_b_r.shape[0]):

            s_b_sm = s_b_r[jj, ...]
            s_b_am = np.argmax(s_b_sm, axis=-1)

            plt.figure()
            s_p_d = preproc_image(s_b_am, nlabels=exp_config.nlabels)
            plt.imshow(s_p_d, cmap='gray')
            plt.axis('off')
            plt.savefig(os.path.join(outfolder,
                                     'gt_img_%d_samp_%d.png' % (ii, jj)),
                        bbox_inches='tight')

        print('Generating error masks')
        E_ss, E_sy_avg, E_yy_avg = generate_error_maps(s_p_arr, s_b_r)

        print('Plotting them')
        plt.figure()
        plt.imshow(preproc_image(E_ss))
        plt.axis('off')
        plt.savefig(os.path.join(outfolder, 'E_ss_%d.png' % ii),
                    bbox_inches='tight')

        print('Plotting them')
        plt.figure()
        plt.imshow(preproc_image(np.log(E_ss)))
        plt.axis('off')
        plt.savefig(os.path.join(outfolder, 'log_E_ss_%d.png' % ii),
                    bbox_inches='tight')

        plt.figure()
        plt.imshow(preproc_image(E_sy_avg))
        plt.axis('off')
        plt.savefig(os.path.join(outfolder, 'E_sy_avg_%d_.png' % ii),
                    bbox_inches='tight')

        plt.figure()
        plt.imshow(preproc_image(E_yy_avg))
        plt.axis('off')
        plt.savefig(os.path.join(outfolder, 'E_yy_avg_%d_.png' % ii),
                    bbox_inches='tight')

        plt.close('all')
def prepare_data(input_folder, preproc_folder, idx_start, idx_end,
                 bias_correction):

    images = []
    affines = []
    patnames = []
    masks = []

    # read the foldernames
    foldernames = sorted(glob.glob(input_folder + '*/'))
    logging.info('Number of images in the dataset: %s' % str(len(foldernames)))

    # iterate through all indices
    for idx in range(len(foldernames)):

        # only consider images within the indices requested
        if (idx < idx_start) or (idx >= idx_end):
            logging.info('skipping subject: %d' % idx)
            continue

        # get the file name for this subject
        foldername = foldernames[idx]

        # extract the patient name
        _patname = foldername[foldername[:-1].rfind('/') + 1:-1]
        if _patname == 'A00033264':  # this subject has images of a different size
            continue

        # ====================================================
        # search for the segmentation file
        # ====================================================
        name = foldername + 'orig_labels_aligned_with_true_image.nii.gz'  # segmentation mask with ~100 classes
        logging.info('==============================================')
        logging.info('reading segmentation mask: %s' % name)

        # read the segmentation mask
        _seg_data, _seg_affine, _seg_header = utils.load_nii(name)

        # group the segmentation classes as required
        _seg_data = utils.group_segmentation_classes(_seg_data)

        # ====================================================
        # read the image file
        # ====================================================
        if bias_correction is True:
            name = foldername + 'MPRAGE_n4.nii'  # read the original image
        else:
            name = foldername + 'MPRAGE.nii'  # read the original image

        # ====================================================
        # bias correction  before reading the image file (optional)
        # ====================================================

        # read the image
        logging.info('reading image: %s' % name)
        _img_data, _img_affine, _img_header = utils.load_nii(name)
        # _img_header.get_zooms() = (1.0, 1.0, 1.0)

        # ============
        # create a segmentation mask and use it to get rid of the skull in the image
        # ============
        seg_mask = np.copy(_seg_data)
        seg_mask[_seg_data > 0] = 1
        img_masked = _img_data * seg_mask

        # normalise the image
        _img_data = utils.normalise_image(img_masked, norm_type='div_by_max')

        # ============
        # rescale the image and the segmentation mask so that their pixel size in mm matches that of the hcp images
        # ============
        img_rescaled = rescale(image=_img_data,
                               scale=10 / 7,
                               order=1,
                               preserve_range=True,
                               multichannel=False)
        seg_rescaled = rescale(image=_seg_data,
                               scale=10 / 7,
                               order=0,
                               preserve_range=True,
                               multichannel=False)

        # ============
        # A lot of the periphery is just zeros, so get rid of some of it
        # ============
        # define how much of the image can be cropped out as it consists of zeros
        x_start = 13
        x_end = -14
        y_start = 55
        y_end = -55
        z_start = 55 + 16 + 50
        z_end = -55 - 16 + 50
        # original images are 176 * 256 * 256
        # rescaling them makes them 251 * 366 * 366
        # cropping them down to 224 * 256 * 224
        img_rescaled = img_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]
        seg_rescaled = seg_rescaled[x_start:x_end, y_start:y_end,
                                    z_start:z_end]

        # save the pre-processed segmentation ground truth
        utils.makefolder(preproc_folder + _patname)
        utils.save_nii(preproc_folder + _patname + '/preprocessed_gt15.nii',
                       seg_rescaled, _seg_affine)
        if bias_correction is True:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image_n4.nii',
                img_rescaled, _img_affine)
        else:
            utils.save_nii(
                preproc_folder + _patname + '/preprocessed_image.nii',
                img_rescaled, _img_affine)

        # append to lists
        images.append(img_rescaled)
        affines.append(_img_affine)
        patnames.append(_patname)
        masks.append(seg_rescaled)

    # convert the lists to arrays
    images = np.array(images)
    affines = np.array(affines)
    patnames = np.array(patnames)
    masks = np.array(masks, dtype='uint8')

    # ========================
    # merge along the y-zis to get a stack of x-z slices, for the images as well as the masks
    # ========================
    images = images.swapaxes(1, 2)
    images = images.reshape(-1, images.shape[2], images.shape[3])
    masks = masks.swapaxes(1, 2)
    masks = masks.reshape(-1, masks.shape[2], masks.shape[3])

    # save the processed images and masks so that they can be directly read the next time
    # make appropriate filenames according to the requested indices of training, validation and test images
    logging.info('Saving pre-processed files...')
    config_details = 'from%dto%d_' % (idx_start, idx_end)

    if bias_correction is True:
        filepath_images = preproc_folder + config_details + 'images_2d_bias_corrected.npy'
    else:
        filepath_images = preproc_folder + config_details + 'images_2d.npy'
    filepath_masks = preproc_folder + config_details + 'annotations15_2d.npy'
    filepath_affine = preproc_folder + config_details + 'affines.npy'
    filepath_patnames = preproc_folder + config_details + 'patnames.npy'

    np.save(filepath_images, images)
    np.save(filepath_masks, masks)
    np.save(filepath_affine, affines)
    np.save(filepath_patnames, patnames)

    return images, masks, affines, patnames
    if evaluate_test_set:
        logging.warning('EVALUATING ON TEST SET')
        input_path = sys_config.test_data_root
        output_path = os.path.join(model_path, 'predictions_testset')
    elif evaluate_all:
        logging.warning('EVALUATING ON ALL TRAINING DATA')
        input_path = sys_config.data_root
        output_path = os.path.join(model_path, 'predictions_alltrain')
    else:
        logging.warning('EVALUATING ON VALIDATION SET')
        input_path = sys_config.data_root
        output_path = os.path.join(model_path, 'predictions')

    path_pred = os.path.join(output_path, 'prediction')
    path_image = os.path.join(output_path, 'image')
    utils.makefolder(path_pred)
    utils.makefolder(path_image)

    init_iteration = score_data(input_path,
                                output_path,
                                model_path,
                                exp_config=exp_config,
                                do_postprocessing=True,
                                gt_exists=(not evaluate_test_set),
                                evaluate_all=evaluate_all,
                                use_iter=use_iter,
                                recursion=recursion,
                                apply_crf=apply_crf)

    if not evaluate_test_set:
        metrics_acdc.main(path_gt, path_pred, path_eval)
    base_path = sys_config.project_root
    model_path = os.path.join(base_path, args.EXP_PATH)
    config_file = glob.glob(model_path + '/*py')[0]
    config_module = config_file.split('/')[-1].rstrip('.py')
    exp_config = SourceFileLoader(
        fullname=config_module, path=os.path.join(config_file)).load_module()

    logging.warning("GENERATING EXAMPLES FOR TESTING SET")

    # Setup input and output paths
    input_path = sys_config.test_data_root
    output_path = os.path.join(model_path,
                               'adversarial_examples_' + args.ATTACK)
    image_path = os.path.join(output_path, 'image')
    diff_path = os.path.join(output_path, 'difference')
    utils.makefolder(image_path)
    utils.makefolder(diff_path)

    if args.ATTACK == 'spgd':
        sizes = [5, 7, 11, 15, 19]
        sigmas = [1.0, 3.0, 5.0, 10.0, 15.0]
        print('sizes:', sizes)
        print('sigmas:', sigmas)
        crafting_sizes = []
        crafting_weights = []
        for size in sizes:
            for sigma in sigmas:
                crafting_sizes.append(size)
                weight = gaussian_kernel(size, size / 2,
                                         sigma)[:, :, tf.newaxis, tf.newaxis]
                crafting_weights.append(weight)
Beispiel #26
0
    return init_iteration


if __name__ == '__main__':

    base_path = config.project_root
    logging.info(base_path)
    model_path = config.weights_root
    logging.info(model_path)

    logging.warning('EVALUATING ON TEST SET')
    input_path = config.test_data_root
    output_path = os.path.join(model_path, 'predictions')

    path_pred = os.path.join(output_path, 'prediction')
    utils.makefolder(path_pred)
    path_eval = os.path.join(output_path, 'eval')
        
    gt_exists = config.gt_exists      #True if it exists the ground_truth images, otherwise set False.
                                      #if True it will be defined evalutation (eval)
   

    init_iteration = score_data(input_path,
                                output_path,
                                model_path,
                                config=config,
                                do_postprocessing=True,
                                gt_exists)


    if gt_exists:
Beispiel #27
0
def score_data(input_folder, output_folder, model_path, config, do_postprocessing=False, gt_exists=True):

    nx, ny = config.image_size[:2]
    batch_size = 1
    num_channels = config.nlabels

    image_tensor_shape = [batch_size] + list(config.image_size) + [1]
    images_pl = tf.placeholder(tf.float32, shape=image_tensor_shape, name='images')

    # According to the experiment config, pick a model and predict the output
    mask_pl, softmax_pl = model.predict(images_pl, config)
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()

    with tf.Session() as sess:

        sess.run(init)

        checkpoint_path = utils.get_latest_model_checkpoint_path(model_path, 'model_best_dice.ckpt')
        saver.restore(sess, checkpoint_path)

        init_iteration = int(checkpoint_path.split('/')[-1].split('-')[-1])

        total_time = 0
        total_volumes = 0
        scale_vector = [config.pixel_size[0] / target_resolution[0], config.pixel_size[1] / target_resolution[1]]

        path_img = os.path.join(input_folder, 'img')
        if gt_exists:
            path_mask = os.path.join(input_folder, 'mask')
        
        for folder in os.listdir(path_img):
            
            logging.info(' ----- Doing image: -------------------------')
            logging.info('Doing: %s' % folder)
            logging.info(' --------------------------------------------')
            folder_path = os.path.join(path_img, folder)   #ciclo su cartelle paz
            
            utils.makefolder(os.path.join(path_pred, folder))
            
            if os.path.isdir(folder_path):
                
                for phase in os.listdir(folder_path):    #ciclo su cartelle ED ES
                    
                    save_path = os.path.join(path_pred, folder, phase)
                    utils.makefolder(save_path)
                    
                    predictions = []
                    mask_arr = []
                    img_arr = []
                    masks = []
                    imgs = []
                    path = os.path.join(folder_path, phase)
                    for file in os.listdir(path):
                        img = plt.imread(os.path.join(path,file))
                        if config.standardize:
                            img = image_utils.standardize_image(img)
                        if config.normalize:
                            img = cv2.normalize(img, dst=None, alpha=config.min, beta=config.max, norm_type=cv2.NORM_MINMAX)
                        img_arr.append(img)
                    if  gt_exists:
                        for file in os.listdir(os.path.join(path_mask,folder,phase)):
                            mask_arr.append(plt.imread(os.path.join(path_mask,folder,phase,file)))
                    
                    img_arr = np.transpose(np.asarray(img_arr),(1,2,0))      # x,y,N
                    if  gt_exists:
                        mask_arr = np.transpose(np.asarray(mask_arr),(1,2,0))
                    
                    start_time = time.time()
                    
                    if config.data_mode == '2D':
                        
                        for zz in range(img_arr.shape[2]):
                            
                            slice_img = np.squeeze(img_arr[:,:,zz])
                            slice_rescaled = transform.rescale(slice_img,
                                                               scale_vector,
                                                               order=1,
                                                               preserve_range=True,
                                                               multichannel=False,
                                                               anti_aliasing=True,
                                                               mode='constant')
                                
                            slice_mask = np.squeeze(mask_arr[:, :, zz])
                            slice_cropped = read_data.crop_or_pad_slice_to_size(slice_rescaled, nx, ny)
                            slice_cropped = np.float32(slice_cropped)
                            x = image_utils.reshape_2Dimage_to_tensor(slice_cropped)
                            imgs.append(np.squeeze(x))
                            if gt_exists:
                                mask_rescaled = transform.rescale(slice_mask,
                                                                  scale_vector,
                                                                  order=0,
                                                                  preserve_range=True,
                                                                  multichannel=False,
                                                                  anti_aliasing=True,
                                                                  mode='constant')

                                mask_cropped = read_data.crop_or_pad_slice_to_size(mask_rescaled, nx, ny)
                                mask_cropped = np.asarray(mask_cropped, dtype=np.uint8)
                                y = image_utils.reshape_2Dimage_to_tensor(mask_cropped)
                                masks.append(np.squeeze(y))
                                
                            # GET PREDICTION
                            feed_dict = {
                            images_pl: x,
                            }
                                
                            mask_out, logits_out = sess.run([mask_pl, softmax_pl], feed_dict=feed_dict)
                            
                            prediction_cropped = np.squeeze(logits_out[0,...])

                            # ASSEMBLE BACK THE SLICES
                            slice_predictions = np.zeros((nx,ny,num_channels))
                            slice_predictions = prediction_cropped
                            # RESCALING ON THE LOGITS
                            if gt_exists:
                                prediction = transform.resize(slice_predictions,
                                                              (nx, ny, num_channels),
                                                              order=1,
                                                              preserve_range=True,
                                                              anti_aliasing=True,
                                                              mode='constant')
                            else:
                                prediction = transform.rescale(slice_predictions,
                                                               (1.0/scale_vector[0], 1.0/scale_vector[1], 1),
                                                               order=1,
                                                               preserve_range=True,
                                                               multichannel=False,
                                                               anti_aliasing=True,
                                                               mode='constant')

                            prediction = np.uint8(np.argmax(prediction, axis=-1))
                            
                            predictions.append(prediction)
                            
       
                        predictions = np.transpose(np.asarray(predictions, dtype=np.uint8), (1,2,0))
                        masks = np.transpose(np.asarray(masks, dtype=np.uint8), (1,2,0))
                        imgs = np.transpose(np.asarray(imgs, dtype=np.float32), (1,2,0))                   

                            
                    # This is the same for 2D and 3D
                    if do_postprocessing:
                        predictions = image_utils.keep_largest_connected_components(predictions)

                    elapsed_time = time.time() - start_time
                    total_time += elapsed_time
                    total_volumes += 1

                    logging.info('Evaluation of volume took %f secs.' % elapsed_time)

                    
                    # Save predicted mask
                    for ii in range(predictions.shape[2]):
                        image_file_name = os.path.join('paz', str(ii).zfill(3) + '.png')
                        cv2.imwrite(os.path.join(save_path , image_file_name), np.squeeze(predictions[:,:,ii]))
                                        
                    if gt_exists:
Beispiel #28
0
def generate_and_evaluate_fieldstrength_classification(gan_experiment_path_list, fclf_experiment_path, verbose=True,
                                                       num_saved_images=0, image_saving_path=None):
    """Old function without the balanced test set

    :param gan_experiment_path_list:
    :param fclf_experiment_path:
    :param verbose:
    :param num_saved_images:
    :param image_saving_path:
    :return:
    """
    # bigger does not work currently (because of the statistics)
    batch_size = 1

    fclf_config, logdir_fclf = utils.load_log_exp_config(fclf_experiment_path)

    im_s = fclf_config.image_size
    img_tensor_shape = [batch_size, im_s[0], im_s[1], im_s[2], 1]

    # prevents ResourceExhaustError when a lot of memory is used
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # Do not assign whole gpu memory, just use it on the go
    config.allow_soft_placement = True  # If a operation is not defined in the default device, let it execute in another.

    # open field strength classifier save file from the selected experiment
    logging.info('loading field strength classifier')
    graph_fclf, fclf_pl, predictions_fclf_op, init_fclf_op, saver_fclf = build_clf_graph(img_tensor_shape, fclf_config)
    init_checkpoint_path_fclf = get_latest_checkpoint_and_log(logdir_fclf, 'model_best_diag_f1.ckpt')
    sess_fclf = tf.Session(config=config, graph=graph_fclf)
    sess_fclf.run(init_fclf_op)
    saver_fclf.restore(sess_fclf, init_checkpoint_path_fclf)

    # import data
    data = adni_data_loader.load_and_maybe_process_data(
            input_folder=fclf_config.data_root,
            preprocessing_folder=fclf_config.preproc_folder,
            size=fclf_config.image_size,
            target_resolution=fclf_config.target_resolution,
            label_list = (0, 1, 2),
            force_overwrite=False
        )

    scores = {}
    for gan_experiment_path in gan_experiment_path_list:
        gan_config, logdir_gan = utils.load_log_exp_config(gan_experiment_path)
        gan_experiment_name = gan_config.experiment_name
        logging.info('\nGAN Experiment (%f T to %f T): %s' % (gan_config.source_field_strength,
                                                              gan_config.target_field_strength, gan_experiment_name))

        # extract images and indices of source/target images for the training and validation set
        images_train, source_images_train_ind, target_images_train_ind,\
        images_val, source_images_val_ind, target_images_val_ind = data_utils.get_images_and_fieldstrength_indices(
            data, gan_config.source_field_strength, gan_config.target_field_strength)

        # open GAN save file from the selected experiment
        logging.info('loading GAN')
        init_checkpoint_path_gan = get_latest_checkpoint_and_log(logdir_gan, 'model.ckpt')

        # build a separate graph for the generator and the classifier respectively
        graph_generator, gan_pl, x_fake_op, init_gan_op, saver_gan = test_utils.build_gen_graph(img_tensor_shape, gan_config)


        # Create a session for running Ops on the Graph.
        sess_gan = tf.Session(config=config, graph=graph_generator)

        # Run the Op to initialize the variables.
        sess_gan.run(init_gan_op)
        saver_gan.restore(sess_gan, init_checkpoint_path_gan)

        # path where the generated images are saved
        experiment_generate_path = os.path.join(image_saving_path, gan_experiment_name)
        # make a folder for the generated images
        utils.makefolder(experiment_generate_path)

        # create selectors
        train_source_sel, val_source_sel = utils.index_sets_to_selectors(source_images_train_ind, source_images_val_ind)

        source_label, target_label = utils.fstr_to_label([gan_config.source_field_strength, gan_config.target_field_strength],
                                                             fclf_config.field_strength_list, fclf_config.fs_label_list)

        # s for source, t for target. First the prediction on the source image, then the prediction on the generated image
        prediction_count = {(0, 0): 0, (0, 1): 0, (1, 0): 0, (1, 1): 0}
        # loops through all images from the source domain
        for img_num, source_img in enumerate(itertools.chain(itertools.compress(images_train, train_source_sel),
                                          itertools.compress(images_val, val_source_sel))):
            source_image_input = np.reshape(source_img, img_tensor_shape)
            # generate image
            feeddict_gan = {gan_pl['source_img']: source_image_input, gan_pl['training']: False}
            fake_img = sess_gan.run(x_fake_op, feed_dict=feeddict_gan)
            # classify images
            feeddict_fclf = {fclf_pl['source_img']: source_image_input, fclf_pl['fake_img']: fake_img, fclf_pl['training']: False}
            fclf_predictions_dict = sess_fclf.run(predictions_fclf_op, feed_dict=feeddict_fclf)

            # save images
            if img_num < num_saved_images:
                source_img_name = 'source_img_%.1fT_%d.nii.gz' % (gan_config.source_field_strength, img_num)
                generated_img_name = 'generated_img_%.1fT_%d.nii.gz' % (gan_config.target_field_strength, img_num)
                utils.create_and_save_nii(np.squeeze(source_img), os.path.join(experiment_generate_path, source_img_name))
                utils.create_and_save_nii(np.squeeze(fake_img), os.path.join(experiment_generate_path, generated_img_name))
                logging.info('images saved')


            # record occurences of the four possible combinations of source_prediction and fake_prediction
            label_tuple = (fclf_predictions_dict['source_label'], fclf_predictions_dict['fake_label'])
            prediction_count[label_tuple] += 1

            if verbose:
                logging.info("NEW IMAGE")
                logging.info("real label of source image: " + str(source_label))
                logging.info("predictions: " + str(fclf_predictions_dict))

        scores[gan_experiment_name] = log_stats_fclf(prediction_count, source_label, target_label)

    return scores
Beispiel #29
0
def generate_and_evaluate_ad_classification(gan_experiment_path_list, clf_experiment_path, score_functions,
                                            image_saving_indices=set(), image_saving_path=None, max_batch_size=np.inf):
    """

    :param gan_experiment_path_list: list of GAN experiment paths to be evaluated. They must all have the same image settings and source/target field strengths as the classifier
    only gan experiments with the same source and target field strength are permitted
    :param clf_experiment_path: AD classifier used
    :param verbose: boolean. log all image classifications
    :param image_saving_indices: set of indices of the images to be saved
    :param image_saving_path: where to save the images. They are saved in subfolders for each experiment
    :return:
    """

    clf_config, logdir_clf = utils.load_log_exp_config(clf_experiment_path)

    # Load data
    data = adni_data_loader_all.load_and_maybe_process_data(
        input_folder=clf_config.data_root,
        preprocessing_folder=clf_config.preproc_folder,
        size=clf_config.image_size,
        target_resolution=clf_config.target_resolution,
        label_list=clf_config.label_list,
        offset=clf_config.offset,
        rescale_to_one=clf_config.rescale_to_one,
        force_overwrite=False
    )

    # extract images and indices of source/target images for the test set
    images_test = data['images_test']
    labels_test = data['diagnosis_test']
    ages_test = data['age_test']

    im_s = clf_config.image_size
    batch_size = min(clf_config.batch_size, std_params.batch_size, max_batch_size)
    logging.info('batch size %d is used for everything' % batch_size)
    img_tensor_shape = [batch_size, im_s[0], im_s[1], im_s[2], 1]
    clf_remainder_batch_size = images_test.shape[0] % batch_size

    # prevents ResourceExhaustError when a lot of memory is used
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # Do not assign whole gpu memory, just use it on the go
    config.allow_soft_placement = True  # If a operation is not defined in the default device, let it execute in another.

    # open field strength classifier save file from the selected experiment
    logging.info("loading Alzheimer's disease classifier")
    graph_clf, image_pl, predictions_clf_op, init_clf_op, saver_clf = build_clf_graph(img_tensor_shape, clf_config)
    # logging.info("getting savepoint with the best cross entropy")
    # init_checkpoint_path_clf = get_latest_checkpoint_and_log(logdir_clf, 'model_best_xent.ckpt')
    logging.info("getting savepoint with the best f1 score")
    init_checkpoint_path_clf = get_latest_checkpoint_and_log(logdir_clf, 'model_best_diag_f1.ckpt')
    sess_clf = tf.Session(config=config, graph=graph_clf)
    sess_clf.run(init_clf_op)
    saver_clf.restore(sess_clf, init_checkpoint_path_clf)

    # make a separate graph for the last batch where the batchsize is smaller
    if clf_remainder_batch_size > 0:
        img_tensor_shape_gan_remainder = [clf_remainder_batch_size, im_s[0], im_s[1], im_s[2], 1]
        graph_clf_rem, image_pl_rem, predictions_clf_op_rem, init_clf_op_rem, saver_clf_rem = build_clf_graph(img_tensor_shape_gan_remainder, clf_config)
        sess_clf_rem = tf.Session(config=config, graph=graph_clf_rem)
        sess_clf_rem.run(init_clf_op_rem)
        saver_clf_rem.restore(sess_clf_rem, init_checkpoint_path_clf)

    # classifiy all real test images
    logging.info('classify all original images')
    real_pred = []
    for batch in iterate_minibatches(images_test,
                                     [labels_test, ages_test],
                                     batch_size=batch_size,
                                     exp_config=clf_config,
                                     map_labels_to_standard_range=False,
                                     shuffle_data=False,
                                     skip_remainder=False):
        # ignore the labels because data are in order, which means the label list in data can be used
        image_batch, [real_label, real_age] = batch

        current_batch_size = image_batch.shape[0]
        if current_batch_size < batch_size:
            clf_prediction_real = sess_clf_rem.run(predictions_clf_op_rem, feed_dict={image_pl_rem: image_batch})
        else:
            clf_prediction_real = sess_clf.run(predictions_clf_op, feed_dict={image_pl: image_batch})

        real_pred = real_pred + list(clf_prediction_real['label'])
        logging.info('new image batch')
        logging.info('ground truth labels: ' + str(real_label))
        logging.info('predicted labels: ' + str(clf_prediction_real['label']))

    gan_config0, logdir_gan0 = utils.load_log_exp_config(gan_experiment_path_list[0])

    source_indices = []
    target_indices = []
    source_true_labels = []
    target_true_labels = []
    for i, field_strength in enumerate(data['field_strength_test']):
        if field_strength == gan_config0.source_field_strength:
            source_indices.append(i)
            source_true_labels.append(labels_test[i])
        elif field_strength == gan_config0.target_field_strength:
            target_indices.append(i)
            target_true_labels.append(labels_test[i])

    # balance the test set
    (source_indices, source_true_labels), (
    target_indices, target_true_labels) = utils.balance_source_target(
        (source_indices, source_true_labels), (target_indices, target_true_labels), random_seed=0)
    source_pred = [pred for ind, pred in enumerate(real_pred) if ind in source_indices]
    target_pred = [pred for ind, pred in enumerate(real_pred) if ind in target_indices]

    assert len(source_pred) == len(source_true_labels)
    assert len(target_pred) == len(target_true_labels)

    # no unexpected labels
    assert all([label in clf_config.label_list for label in source_true_labels])
    assert all([label in clf_config.label_list for label in target_true_labels])
    assert all([label in clf_config.label_list for label in source_pred])
    assert all([label in clf_config.label_list for label in target_pred])

    num_source_images = len(source_indices)
    num_target_images = len(target_indices)

    source_label_count = Counter(source_true_labels)
    target_label_count = Counter(target_true_labels)

    logging.info('Data summary:')
    logging.info(' - Domains:')
    logging.info('number of source images: ' + str(num_source_images))
    logging.info('source label distribution ' + str(source_label_count))
    logging.info('number of target images: ' + str(num_target_images))
    logging.info('target label distribution ' + str(target_label_count))

    assert num_source_images == num_target_images
    assert source_label_count == target_label_count

    #2d image saving folder
    folder_2d = 'coronal_2d'
    image_saving_path2d = os.path.join(image_saving_path, folder_2d)
    utils.makefolder(image_saving_path2d)

    # save real images
    target_image_path = os.path.join(image_saving_path, 'target')
    source_image_path = os.path.join(image_saving_path, 'source')
    utils.makefolder(target_image_path)
    utils.makefolder(source_image_path)
    target_image_path2d = os.path.join(image_saving_path2d, 'target')
    source_image_path2d = os.path.join(image_saving_path2d, 'source')
    utils.makefolder(target_image_path2d)
    utils.makefolder(source_image_path2d)
    sorted_saving_indices = sorted(image_saving_indices)
    target_saving_indices = [target_indices[index] for index in sorted_saving_indices]
    for target_index in target_saving_indices:
        target_img_name = 'target_img_%.1fT_diag%d_ind%d' % (gan_config0.target_field_strength, labels_test[target_index], target_index)
        utils.save_image_and_cut(images_test[target_index], target_img_name, target_image_path, target_image_path2d)
        logging.info(target_img_name + ' saved')

    source_saving_indices = [source_indices[index] for index in sorted_saving_indices]
    for source_index in source_saving_indices:
        source_img_name = 'source_img_%.1fT_diag%d_ind%d' % (gan_config0.source_field_strength, labels_test[source_index], source_index)
        utils.save_image_and_cut(images_test[source_index], source_img_name, source_image_path,
                                 source_image_path2d)
        logging.info(source_img_name + ' saved')

    logging.info('source and target images saved')

    gan_remainder_batch_size = num_source_images % batch_size

    scores = {}
    for gan_experiment_path in gan_experiment_path_list:
        gan_config, logdir_gan = utils.load_log_exp_config(gan_experiment_path)

        gan_experiment_name = gan_config.experiment_name

        # make sure the experiments all have the same configuration as the classifier
        assert gan_config.source_field_strength == gan_config0.source_field_strength
        assert gan_config.target_field_strength == gan_config0.target_field_strength
        assert gan_config.image_size == clf_config.image_size
        assert gan_config.target_resolution == clf_config.target_resolution
        assert gan_config.offset == clf_config.offset

        logging.info('\nGAN Experiment (%.1f T to %.1f T): %s' % (gan_config.source_field_strength,
                                                              gan_config.target_field_strength, gan_experiment_name))
        logging.info(gan_config)
        # open GAN save file from the selected experiment
        logging.info('loading GAN')
        # open the latest GAN savepoint
        init_checkpoint_path_gan = get_latest_checkpoint_and_log(logdir_gan, 'model.ckpt')

        # build a separate graph for the generator
        graph_generator, generator_img_pl, x_fake_op, init_gan_op, saver_gan = test_utils.build_gen_graph(img_tensor_shape, gan_config)

        # Create a session for running Ops on the Graph.
        sess_gan = tf.Session(config=config, graph=graph_generator)

        # Run the Op to initialize the variables.
        sess_gan.run(init_gan_op)
        saver_gan.restore(sess_gan, init_checkpoint_path_gan)

        # path where the generated images are saved
        experiment_generate_path = os.path.join(image_saving_path, gan_experiment_name)
        experiment_generate_path2d = os.path.join(image_saving_path2d, gan_experiment_name)
        # make a folder for the generated images
        utils.makefolder(experiment_generate_path)
        utils.makefolder(experiment_generate_path2d)

        # make separate graphs for the last batch where the batchsize is smaller
        if clf_remainder_batch_size > 0:
            img_tensor_shape_gan_remainder = [gan_remainder_batch_size, im_s[0], im_s[1], im_s[2], 1]
            # classifier
            graph_clf_rem, image_pl_rem, predictions_clf_op_rem, init_clf_op_rem, saver_clf_rem = build_clf_graph(img_tensor_shape_gan_remainder, clf_config)
            sess_clf_rem = tf.Session(config=config, graph=graph_clf_rem)
            sess_clf_rem.run(init_clf_op_rem)
            saver_clf_rem.restore(sess_clf_rem, init_checkpoint_path_clf)

            # generator
            graph_generator_rem, generator_img_rem_pl, x_fake_op_rem, init_gan_op_rem, saver_gan_rem = \
                test_utils.build_gen_graph(img_tensor_shape_gan_remainder, gan_config)
            # Create a session for running Ops on the Graph.
            sess_gan_rem = tf.Session(config=config, graph=graph_generator_rem)
            # Run the Op to initialize the variables.
            sess_gan_rem.run(init_gan_op_rem)
            saver_gan_rem.restore(sess_gan_rem, init_checkpoint_path_gan)

        logging.info('image generation begins')
        generated_pred = []
        batch_beginning_index = 0
        # loops through all images from the source domain
        for batch in iterate_minibatches(images_test,
                                     [labels_test, ages_test],
                                     batch_size=batch_size,
                                     exp_config=clf_config,
                                     map_labels_to_standard_range=False,
                                     selection_indices=source_indices,
                                     shuffle_data=False,
                                     skip_remainder=False):
            # ignore the labels because data are in order, which means the label list in data can be used
            image_batch, [real_label, real_age] = batch

            current_batch_size = image_batch.shape[0]
            if current_batch_size < batch_size:
                fake_img = sess_gan_rem.run(x_fake_op_rem, feed_dict={generator_img_rem_pl: image_batch})
                # classify fake image
                clf_prediction_fake = sess_clf_rem.run(predictions_clf_op_rem, feed_dict={image_pl_rem: fake_img})
            else:
                fake_img = sess_gan.run(x_fake_op, feed_dict={generator_img_pl: image_batch})
                # classify fake image
                clf_prediction_fake = sess_clf.run(predictions_clf_op, feed_dict={image_pl: fake_img})

            generated_pred = generated_pred + list(clf_prediction_fake['label'])

            # save images
            current_source_indices = range(batch_beginning_index, batch_beginning_index + current_batch_size)

            # test whether minibatches are really iterated in order by checking if the labels are as expected
            assert [source_true_labels[i] for i in current_source_indices] == list(real_label)

            source_indices_to_save = image_saving_indices.intersection(set(current_source_indices))
            for source_index in source_indices_to_save:
                batch_index = source_index - batch_beginning_index
                # index of the image in the complete test data
                global_index = source_indices[source_index]
                generated_img_name = 'generated_img_%.1fT_diag%d_ind%d' % (gan_config.target_field_strength, labels_test[global_index], global_index)
                utils.save_image_and_cut(np.squeeze(fake_img[batch_index]), generated_img_name, experiment_generate_path, experiment_generate_path2d)
                logging.info(generated_img_name + ' saved')
                # save the difference g(xs)-xs
                corresponding_source_img = images_test[global_index]
                difference_image_gs = np.squeeze(fake_img[batch_index]) - corresponding_source_img
                difference_img_name = 'difference_img_%.1fT_diag%d_ind%d' % (gan_config.target_field_strength, labels_test[global_index], global_index)
                utils.save_image_and_cut(difference_image_gs, difference_img_name,
                                         experiment_generate_path, experiment_generate_path2d)
                logging.info(difference_img_name + ' saved')

            logging.info('new image batch')
            logging.info('ground truth labels: ' + str(real_label))
            logging.info('predicted labels for generated images: ' + str(clf_prediction_fake['label']))
            # no unexpected labels
            assert all([label in clf_config.label_list for label in clf_prediction_fake['label']])

            batch_beginning_index += current_batch_size
        logging.info('generated prediction for %s: %s' % (gan_experiment_name, str(generated_pred)))
        scores[gan_experiment_name] = evaluate_scores(source_true_labels, generated_pred, score_functions)

    logging.info('source prediction: ' + str(source_pred))
    logging.info('source ground truth: ' + str(source_true_labels))
    logging.info('target prediction: ' + str(target_pred))
    logging.info('target ground truth: ' + str(target_true_labels))

    scores['source_%.1fT' % gan_config0.source_field_strength] = evaluate_scores(source_true_labels, source_pred, score_functions)
    scores['target_%.1fT' % gan_config0.target_field_strength] = evaluate_scores(target_true_labels, target_pred, score_functions)

    return scores
def main(model_path, exp_config):

    # Make and restore vagan model
    phiseg_model = phiseg(exp_config=exp_config)
    phiseg_model.load_weights(model_path, type='best_ged')

    data_loader = data_switch(exp_config.data_identifier)
    data = data_loader(exp_config)

    lat_lvls = exp_config.latent_levels

    # RANDOM IMAGE
    # x_b, s_b = data.test.next_batch(1)

    # FIXED IMAGE
    # Cardiac: 100 normal image
    # LIDC: 200 large lesion, 203, 1757 complicated lesion
    # Prostate: 165 nice slice, 170 is a challenging and interesting slice
    index = 165  # #

    if SAVE_GIF:
        outfolder_gif = os.path.join(model_path,
                                     'model_samples_id%d_gif' % index)
        utils.makefolder(outfolder_gif)

    x_b = data.test.images[index,
                           ...].reshape([1] + list(exp_config.image_size))

    x_b_d = utils.convert_to_uint8(np.squeeze(x_b))
    x_b_d = utils.resize_image(x_b_d, video_target_size)

    if exp_config.data_identifier == 'uzh_prostate':
        # rotate
        rows, cols = x_b_d.shape
        M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 270, 1)
        x_b_d = cv2.warpAffine(x_b_d, M, (cols, rows))

    if SAVE_VIDEO:
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        outfile = os.path.join(model_path, 'model_samples_id%d.avi' % index)
        out = cv2.VideoWriter(outfile, fourcc, 5.0,
                              (2 * video_target_size[1], video_target_size[0]))

    samps = 20
    for ii in range(samps):

        # fix all below current level (the correct implementation)
        feed_dict = {}
        feed_dict[phiseg_model.training_pl] = False
        feed_dict[phiseg_model.x_inp] = x_b

        s_p, s_p_list = phiseg_model.sess.run(
            [phiseg_model.s_out_eval, phiseg_model.s_out_eval_list],
            feed_dict=feed_dict)
        s_p = np.argmax(s_p, axis=-1)

        # s_p_d = utils.convert_to_uint8(np.squeeze(s_p))
        s_p_d = np.squeeze(np.uint8((s_p / exp_config.nlabels) * 255))
        s_p_d = utils.resize_image(s_p_d,
                                   video_target_size,
                                   interp=cv2.INTER_NEAREST)

        if exp_config.data_identifier == 'uzh_prostate':
            #rotate
            s_p_d = cv2.warpAffine(s_p_d, M, (cols, rows))

        img = np.concatenate([x_b_d, s_p_d], axis=1)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        img = histogram_equalization(img)

        if exp_config.data_identifier == 'acdc':
            # labels (0 85 170 255)
            rv = cv2.inRange(s_p_d, 84, 86)
            my = cv2.inRange(s_p_d, 169, 171)
            rv_cnt, hierarchy = cv2.findContours(rv, cv2.RETR_TREE,
                                                 cv2.CHAIN_APPROX_SIMPLE)
            my_cnt, hierarchy = cv2.findContours(my, cv2.RETR_TREE,
                                                 cv2.CHAIN_APPROX_SIMPLE)

            cv2.drawContours(img, rv_cnt, -1, (0, 255, 0), 1)
            cv2.drawContours(img, my_cnt, -1, (0, 0, 255), 1)
        if exp_config.data_identifier == 'uzh_prostate':

            print(np.unique(s_p_d))
            s1 = cv2.inRange(s_p_d, 84, 86)
            s2 = cv2.inRange(s_p_d, 169, 171)
            # s3 = cv2.inRange(s_p_d, 190, 192)
            s1_cnt, hierarchy = cv2.findContours(s1, cv2.RETR_TREE,
                                                 cv2.CHAIN_APPROX_SIMPLE)
            s2_cnt, hierarchy = cv2.findContours(s2, cv2.RETR_TREE,
                                                 cv2.CHAIN_APPROX_SIMPLE)
            # s3_cnt, hierarchy = cv2.findContours(s3, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

            cv2.drawContours(img, s1_cnt, -1, (0, 255, 0), 1)
            cv2.drawContours(img, s2_cnt, -1, (0, 0, 255), 1)
            # cv2.drawContours(img, s3_cnt, -1, (255, 0, 255), 1)
        elif exp_config.data_identifier == 'lidc':
            thresh = cv2.inRange(s_p_d, 127, 255)
            lesion, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                                 cv2.CHAIN_APPROX_SIMPLE)
            cv2.drawContours(img, lesion, -1, (0, 255, 0), 1)

        if SAVE_VIDEO:
            out.write(img)

        if SAVE_GIF:
            outfile_gif = os.path.join(outfolder_gif,
                                       'frame_%s.png' % str(ii).zfill(3))
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # scipy.misc.imsave(outfile_gif, img_rgb)
            im = Image.fromarray(img_rgb)
            im = im.resize((im.size[0] * 2, im.size[1] * 2), Image.ANTIALIAS)

            im.save(outfile_gif)

        if DISPLAY_VIDEO:
            cv2.imshow('frame', img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    if SAVE_VIDEO:
        out.release()
    cv2.destroyAllWindows()