예제 #1
0
def main():
    # hdf5_file_main = createHDF5File(config)
    hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='a')
    # Go inside the "original_data" parent directory.
    # we need to create the validation data dataset again since the shape has changed.
    hdf5_file = hdf5_file_main['original_data']
    del hdf5_file['validation_data']
    del hdf5_file['validation_data_pat_name']
    # Validation Data, with no segmentation masks
    hdf5_file.create_dataset("validation_data", config['val_shape'],
                             np.float32)
    hdf5_file.create_dataset("validation_data_pat_name",
                             (config['val_shape'][0], ),
                             dtype="S100")

    for dataset_splits in glob.glob(
            os.path.join(config['data_dir_prefix'],
                         '*')):  # Training/Validation data?
        if os.path.isdir(
                dataset_splits
        ) and 'Validation' in dataset_splits:  # make sure its a directory
            # VALIDATION data handler
            logger.info('currently loading Validation data.')
            count = 0
            # validation data does not have HGG and LGG distinctions
            for images, pats in dataloader.loadDataGenerator(
                    dataset_splits,
                    batch_size=config['batch_size'],
                    loadSurvival=False,
                    csvFilePath=None,
                    loadSeg=False,
                    preprocess=PREPROCESS_DATA):
                hdf5_file['validation_data'][count:count +
                                             config['batch_size'],
                                             ...] = images
                t = 0

                for i in range(count, count + config['batch_size']):
                    hdf5_file['validation_data_pat_name'][i] = pats[t].split(
                        '/')[-1]
                    t += 1

                # logger.debug('array equal?: {}'.format(np.array_equal(hdf5_file['validation_data'][count:count+config['batch_size'],...], images)))
                logger.info('loaded {} patient(s) from {}'.format(
                    count + config['batch_size'], dataset_splits))
                count += config['batch_size']

        # else:
        # # TRAINING data handler
        #     if os.path.isdir(dataset_splits) and 'Training' in dataset_splits:
        #         for grade_type in glob.glob(os.path.join(dataset_splits, '*')):
        #             # there may be other files in there (like the survival data), ignore them.
        #             if os.path.isdir(grade_type):
        #                 count = 0
        #                 logger.info('currently loading Training data.')
        #                 for images, segmasks, pats in dataloader.loadDataGenerator(grade_type,
        #                                                     batch_size=config['batch_size'], loadSurvival=False,
        #                                                     csvFilePath=None, loadSeg=True,
        #                                                     preprocess=PREPROCESS_DATA):
        #                     logger.info('loading patient {} from {}'.format(count, grade_type))
        #                     if 'HGG' in grade_type:
        #                         hdf5_file['training_data_hgg'][count:count+config['batch_size'],...] = images
        #                         hdf5_file['training_data_segmasks_hgg'][count:count+config['batch_size'], ...] = segmasks
        #                         t = 0
        #                         for i in range(count, count + config['batch_size']):
        #                             hdf5_file['training_data_hgg_pat_name'][i] = pats[t].split('/')[-1]
        #                             t += 1
        #                     elif 'LGG' in grade_type:
        #                         hdf5_file['training_data_lgg'][count:count+config['batch_size'], ...] = images
        #                         hdf5_file['training_data_segmasks_lgg'][count:count+config['batch_size'], ...] = segmasks
        #                         t = 0
        #                         for i in range(count, count + config['batch_size']):
        #                             hdf5_file['training_data_lgg_pat_name'][i] = pats[t].split('/')[-1]
        #                             t += 1
        #
        #                     logger.info('loaded {} patient(s) from {}'.format(count + config['batch_size'], grade_type))
        #                     count += config['batch_size']
    # close the HDF5 file
    hdf5_file_main.close()
예제 #2
0
        plt.suptitle('Patient {}'.format(pat))
        plt.show()

# # In depth testing
if options.in_depth == True:
    logger.info('Perfoming in depth tests..this may take some time.')
    for dataset_splits in glob.glob(
            os.path.join(config['data_dir_prefix'], '*')):
        if os.path.isdir(dataset_splits) and 'Validation' in dataset_splits:
            # VALIDATION data handler
            logger.info('currently loading Validation data.')
            count = 0
            # validation data does not have HGG and LGG distinctions
            for images, pats in dataloader.loadDataGenerator(
                    dataset_splits,
                    batch_size=config['batch_size'],
                    loadSurvival=False,
                    csvFilePath=None,
                    loadSeg=False):
                logger.info('data equal?')
                val = np.array_equal(
                    hdf5_file['validation_data'][count:count +
                                                 config['batch_size'], ...],
                    images)
                logger.info(val)
                assert val == True

                t = 0
                for i in range(count, count + config['batch_size']):
                    logger.info('pat_name equal?')
                    val = hdf5_file['validation_data_pat_name'][i] == pats[
                        t].split('/')[-1]
예제 #3
0
def main():
    hdf5_file_main = createHDF5File(config)
    # hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='a')
    # Go inside the "original_data" parent directory.
    # we need to create the validation data dataset again since the shape has changed.
    hdf5_file = hdf5_file_main['original_data']
    contents = glob.glob(os.path.join(config['data_dir_prefix'], '*'))

    # for debugging, making sure Training set is loaded first not Testing, since that is tested already.
    contents.reverse()
    for dataset_splits in contents:  # Challenge/LeaderBoard data?
        if os.path.isdir(dataset_splits):  # make sure its a directory
            for grade_type in glob.glob(os.path.join(dataset_splits, '*')):
                # there may be other files in there (like the survival data), ignore them.
                if os.path.isdir(grade_type):
                    count = 0
                    if 'Testing' in dataset_splits:
                        logger.info(
                            'currently loading Testing -> {} data.'.format(
                                os.path.basename(grade_type)))
                        ty = 'Testing'

                        for images, pats in dataloader.loadDataGenerator(
                                grade_type,
                                batch_size=config['batch_size'],
                                loadSurvival=False,
                                csvFilePath=None,
                                loadSeg=False,
                                preprocess=PREPROCESS_DATA,
                                dataset='ISLES'):
                            logger.info('loading patient {} from {}'.format(
                                count, grade_type))
                            if 'HGG_LGG' in grade_type:
                                if ty == 'Testing':
                                    main_data_name = 'testing_hgglgg_patients'
                                    main_data_pat_name = 'testing_hgglgg_patients_pat_name'

                                hdf5_file[main_data_name][count:count +
                                                          config['batch_size'],
                                                          ...] = images
                                t = 0
                                for i in range(count,
                                               count + config['batch_size']):
                                    hdf5_file[main_data_pat_name][i] = pats[
                                        t].split('.')[-2]
                                    t += 1

                            logger.info('loaded {} patient(s) from {}'.format(
                                count + config['batch_size'], grade_type))
                            count += config['batch_size']
                    else:
                        # TRAINING data handler
                        if os.path.isdir(dataset_splits
                                         ) and 'Training' in dataset_splits:
                            for grade_type in glob.glob(
                                    os.path.join(dataset_splits, '*')):
                                # there may be other files in there (like the survival data), ignore them.
                                if os.path.isdir(grade_type):
                                    count = 0
                                    logger.info(
                                        'currently loading Training data.')
                                    for images, segmasks, pats in dataloader.loadDataGenerator(
                                            grade_type,
                                            batch_size=config['batch_size'],
                                            loadSurvival=False,
                                            csvFilePath=None,
                                            loadSeg=True,
                                            preprocess=PREPROCESS_DATA,
                                            dataset='ISLES'):
                                        logger.info(
                                            'loading patient {} from {}'.
                                            format(count, grade_type))

                                        hdf5_file['training_data'][
                                            count:count + config['batch_size'],
                                            ...] = images
                                        hdf5_file['training_data_segmasks'][
                                            count:count + config['batch_size'],
                                            ...] = segmasks
                                        t = 0
                                        for i in range(
                                                count,
                                                count + config['batch_size']):
                                            hdf5_file[
                                                'training_data_pat_name'][
                                                    i] = pats[t].split('/')[-1]
                                            t += 1

                                        logger.info(
                                            'loaded {} patient(s) from {}'.
                                            format(
                                                count + config['batch_size'],
                                                grade_type))
                                        count += config['batch_size']
        # close the HDF5 file
    # close the HDF5 file
    hdf5_file_main.close()
예제 #4
0
def main():
    hdf5_file_main = createHDF5File(config)
    # hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='w')
    # Go inside the "original_data" parent directory.
    # we need to create the validation data dataset again since the shape has changed.
    hdf5_file = hdf5_file_main['original_data']
    del hdf5_file['validation_data']
    del hdf5_file['validation_data_pat_name']
    # Validation Data, with no segmentation masks
    hdf5_file.create_dataset("validation_data", config['val_shape'],
                             np.float32)
    hdf5_file.create_dataset("validation_data_pat_name",
                             (config['val_shape'][0], ),
                             dtype="S100")

    for dataset_splits in config['pathd_src'].iterdir(
    ):  # Training/Validation data?
        if dataset_splits.is_dir(
        ) and 'Validation' in dataset_splits.name:  # make sure its a directory
            # VALIDATION data handler
            logger.info('currently loading Validation data.')
            count = 0
            # validation data does not have HGG and LGG distinctions
            for images, paths_pat in dataloader.loadDataGenerator(
                    dataset_splits,
                    loadSurvival=False,
                    csvFilePath=None,
                    loadSeg=False,
                    preprocess=PREPROCESS_DATA):
                hdf5_file['validation_data'][count] = images
                hdf5_file['validation_data_pat_name'][
                    count] = paths_pat.name.encode('utf-8')

                # logger.debug('array equal?: {}'.format(np.array_equal(hdf5_file['validation_data'][count:count+config['batch_size'],...], images)))
                # logger.info('loaded {} patient(s) from {}'.format(count + config['batch_size'], dataset_splits))
                count += 1

        else:
            # TRAINING data handler
            if dataset_splits.is_dir() and 'Training' in dataset_splits.name:
                for grade_type in dataset_splits.iterdir():
                    # there may be other files in there (like the survival data), ignore them.
                    if grade_type.is_dir():
                        count = 0
                        logger.info('currently loading Training data.')
                        for images, segmask, paths_pat in dataloader.loadDataGenerator(
                                grade_type,
                                loadSurvival=False,
                                csvFilePath=None,
                                loadSeg=True,
                                preprocess=PREPROCESS_DATA):
                            logger.info('loading patient {} from {}'.format(
                                paths_pat.name, grade_type))
                            if 'HGG' in grade_type.name:
                                hdf5_file['training_data_hgg'][count] = images
                                hdf5_file['training_data_segmasks_hgg'][
                                    count] = segmask
                                hdf5_file['training_data_hgg_pat_name'][
                                    count] = paths_pat.name.encode('utf-8')

                            elif 'LGG' in grade_type.name:
                                hdf5_file['training_data_lgg'][count] = images
                                hdf5_file['training_data_segmasks_lgg'][
                                    count] = segmask
                                hdf5_file['training_data_lgg_pat_name'][
                                    count] = paths_pat.name.encode('utf-8')
                            count += 1

                        logger.info('loaded {} patient(s) from {}'.format(
                            count, grade_type))

    # close the HDF5 file
    hdf5_file_main.close()