def main(): # hdf5_file_main = createHDF5File(config) hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='a') # Go inside the "original_data" parent directory. # we need to create the validation data dataset again since the shape has changed. hdf5_file = hdf5_file_main['original_data'] del hdf5_file['validation_data'] del hdf5_file['validation_data_pat_name'] # Validation Data, with no segmentation masks hdf5_file.create_dataset("validation_data", config['val_shape'], np.float32) hdf5_file.create_dataset("validation_data_pat_name", (config['val_shape'][0], ), dtype="S100") for dataset_splits in glob.glob( os.path.join(config['data_dir_prefix'], '*')): # Training/Validation data? if os.path.isdir( dataset_splits ) and 'Validation' in dataset_splits: # make sure its a directory # VALIDATION data handler logger.info('currently loading Validation data.') count = 0 # validation data does not have HGG and LGG distinctions for images, pats in dataloader.loadDataGenerator( dataset_splits, batch_size=config['batch_size'], loadSurvival=False, csvFilePath=None, loadSeg=False, preprocess=PREPROCESS_DATA): hdf5_file['validation_data'][count:count + config['batch_size'], ...] = images t = 0 for i in range(count, count + config['batch_size']): hdf5_file['validation_data_pat_name'][i] = pats[t].split( '/')[-1] t += 1 # logger.debug('array equal?: {}'.format(np.array_equal(hdf5_file['validation_data'][count:count+config['batch_size'],...], images))) logger.info('loaded {} patient(s) from {}'.format( count + config['batch_size'], dataset_splits)) count += config['batch_size'] # else: # # TRAINING data handler # if os.path.isdir(dataset_splits) and 'Training' in dataset_splits: # for grade_type in glob.glob(os.path.join(dataset_splits, '*')): # # there may be other files in there (like the survival data), ignore them. # if os.path.isdir(grade_type): # count = 0 # logger.info('currently loading Training data.') # for images, segmasks, pats in dataloader.loadDataGenerator(grade_type, # batch_size=config['batch_size'], loadSurvival=False, # csvFilePath=None, loadSeg=True, # preprocess=PREPROCESS_DATA): # logger.info('loading patient {} from {}'.format(count, grade_type)) # if 'HGG' in grade_type: # hdf5_file['training_data_hgg'][count:count+config['batch_size'],...] = images # hdf5_file['training_data_segmasks_hgg'][count:count+config['batch_size'], ...] = segmasks # t = 0 # for i in range(count, count + config['batch_size']): # hdf5_file['training_data_hgg_pat_name'][i] = pats[t].split('/')[-1] # t += 1 # elif 'LGG' in grade_type: # hdf5_file['training_data_lgg'][count:count+config['batch_size'], ...] = images # hdf5_file['training_data_segmasks_lgg'][count:count+config['batch_size'], ...] = segmasks # t = 0 # for i in range(count, count + config['batch_size']): # hdf5_file['training_data_lgg_pat_name'][i] = pats[t].split('/')[-1] # t += 1 # # logger.info('loaded {} patient(s) from {}'.format(count + config['batch_size'], grade_type)) # count += config['batch_size'] # close the HDF5 file hdf5_file_main.close()
plt.suptitle('Patient {}'.format(pat)) plt.show() # # In depth testing if options.in_depth == True: logger.info('Perfoming in depth tests..this may take some time.') for dataset_splits in glob.glob( os.path.join(config['data_dir_prefix'], '*')): if os.path.isdir(dataset_splits) and 'Validation' in dataset_splits: # VALIDATION data handler logger.info('currently loading Validation data.') count = 0 # validation data does not have HGG and LGG distinctions for images, pats in dataloader.loadDataGenerator( dataset_splits, batch_size=config['batch_size'], loadSurvival=False, csvFilePath=None, loadSeg=False): logger.info('data equal?') val = np.array_equal( hdf5_file['validation_data'][count:count + config['batch_size'], ...], images) logger.info(val) assert val == True t = 0 for i in range(count, count + config['batch_size']): logger.info('pat_name equal?') val = hdf5_file['validation_data_pat_name'][i] == pats[ t].split('/')[-1]
def main(): hdf5_file_main = createHDF5File(config) # hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='a') # Go inside the "original_data" parent directory. # we need to create the validation data dataset again since the shape has changed. hdf5_file = hdf5_file_main['original_data'] contents = glob.glob(os.path.join(config['data_dir_prefix'], '*')) # for debugging, making sure Training set is loaded first not Testing, since that is tested already. contents.reverse() for dataset_splits in contents: # Challenge/LeaderBoard data? if os.path.isdir(dataset_splits): # make sure its a directory for grade_type in glob.glob(os.path.join(dataset_splits, '*')): # there may be other files in there (like the survival data), ignore them. if os.path.isdir(grade_type): count = 0 if 'Testing' in dataset_splits: logger.info( 'currently loading Testing -> {} data.'.format( os.path.basename(grade_type))) ty = 'Testing' for images, pats in dataloader.loadDataGenerator( grade_type, batch_size=config['batch_size'], loadSurvival=False, csvFilePath=None, loadSeg=False, preprocess=PREPROCESS_DATA, dataset='ISLES'): logger.info('loading patient {} from {}'.format( count, grade_type)) if 'HGG_LGG' in grade_type: if ty == 'Testing': main_data_name = 'testing_hgglgg_patients' main_data_pat_name = 'testing_hgglgg_patients_pat_name' hdf5_file[main_data_name][count:count + config['batch_size'], ...] = images t = 0 for i in range(count, count + config['batch_size']): hdf5_file[main_data_pat_name][i] = pats[ t].split('.')[-2] t += 1 logger.info('loaded {} patient(s) from {}'.format( count + config['batch_size'], grade_type)) count += config['batch_size'] else: # TRAINING data handler if os.path.isdir(dataset_splits ) and 'Training' in dataset_splits: for grade_type in glob.glob( os.path.join(dataset_splits, '*')): # there may be other files in there (like the survival data), ignore them. if os.path.isdir(grade_type): count = 0 logger.info( 'currently loading Training data.') for images, segmasks, pats in dataloader.loadDataGenerator( grade_type, batch_size=config['batch_size'], loadSurvival=False, csvFilePath=None, loadSeg=True, preprocess=PREPROCESS_DATA, dataset='ISLES'): logger.info( 'loading patient {} from {}'. format(count, grade_type)) hdf5_file['training_data'][ count:count + config['batch_size'], ...] = images hdf5_file['training_data_segmasks'][ count:count + config['batch_size'], ...] = segmasks t = 0 for i in range( count, count + config['batch_size']): hdf5_file[ 'training_data_pat_name'][ i] = pats[t].split('/')[-1] t += 1 logger.info( 'loaded {} patient(s) from {}'. format( count + config['batch_size'], grade_type)) count += config['batch_size'] # close the HDF5 file # close the HDF5 file hdf5_file_main.close()
def main(): hdf5_file_main = createHDF5File(config) # hdf5_file_main = h5py.File(config['hdf5_filepath_prefix'], mode='w') # Go inside the "original_data" parent directory. # we need to create the validation data dataset again since the shape has changed. hdf5_file = hdf5_file_main['original_data'] del hdf5_file['validation_data'] del hdf5_file['validation_data_pat_name'] # Validation Data, with no segmentation masks hdf5_file.create_dataset("validation_data", config['val_shape'], np.float32) hdf5_file.create_dataset("validation_data_pat_name", (config['val_shape'][0], ), dtype="S100") for dataset_splits in config['pathd_src'].iterdir( ): # Training/Validation data? if dataset_splits.is_dir( ) and 'Validation' in dataset_splits.name: # make sure its a directory # VALIDATION data handler logger.info('currently loading Validation data.') count = 0 # validation data does not have HGG and LGG distinctions for images, paths_pat in dataloader.loadDataGenerator( dataset_splits, loadSurvival=False, csvFilePath=None, loadSeg=False, preprocess=PREPROCESS_DATA): hdf5_file['validation_data'][count] = images hdf5_file['validation_data_pat_name'][ count] = paths_pat.name.encode('utf-8') # logger.debug('array equal?: {}'.format(np.array_equal(hdf5_file['validation_data'][count:count+config['batch_size'],...], images))) # logger.info('loaded {} patient(s) from {}'.format(count + config['batch_size'], dataset_splits)) count += 1 else: # TRAINING data handler if dataset_splits.is_dir() and 'Training' in dataset_splits.name: for grade_type in dataset_splits.iterdir(): # there may be other files in there (like the survival data), ignore them. if grade_type.is_dir(): count = 0 logger.info('currently loading Training data.') for images, segmask, paths_pat in dataloader.loadDataGenerator( grade_type, loadSurvival=False, csvFilePath=None, loadSeg=True, preprocess=PREPROCESS_DATA): logger.info('loading patient {} from {}'.format( paths_pat.name, grade_type)) if 'HGG' in grade_type.name: hdf5_file['training_data_hgg'][count] = images hdf5_file['training_data_segmasks_hgg'][ count] = segmask hdf5_file['training_data_hgg_pat_name'][ count] = paths_pat.name.encode('utf-8') elif 'LGG' in grade_type.name: hdf5_file['training_data_lgg'][count] = images hdf5_file['training_data_segmasks_lgg'][ count] = segmask hdf5_file['training_data_lgg_pat_name'][ count] = paths_pat.name.encode('utf-8') count += 1 logger.info('loaded {} patient(s) from {}'.format( count, grade_type)) # close the HDF5 file hdf5_file_main.close()