def __init__( self, volpath, ext='.npz', nb_restart_cycle=None, # number of files to restart after name='single_vol', # name fixed_vol_size=True, # assumes each volume is fixed size ): # get filenames at given paths volfiles = _get_file_list(volpath, ext, vol_rand_seed) nb_files = len(volfiles) assert nb_files > 0, "Could not find any files at %s with extension %s" % ( volpath, ext) # set up restart cycle for volume files -- # i.e. after how many volumes do we restart if nb_restart_cycle is None: nb_restart_cycle = nb_files # compute subvolume split vol_data = _load_medical_volume(os.path.join(volpath, volfiles[0]), ext) # process volume if data_proc_fn is not None: vol_data = data_proc_fn(vol_data) [f for f in _npz_headers(npz, namelist=['vol_data.npy'])][0][1] nb_patches_per_vol = 1 if fixed_vol_size and (patch_size is not None) and all( f is not None for f in patch_size): nb_patches_per_vol = np.prod( pl.gridsize(vol_data.shape, patch_size, patch_stride)) assert nb_restart_cycle <= (nb_files * nb_patches_per_vol), \ '%s restart cycle (%s) too big (%s) in %s' % \ (name, nb_restart_cycle, nb_files * nb_patches_per_vol, volpath)
def vol( volpath, ext='.npz', batch_size=1, expected_nb_files=-1, expected_files=None, data_proc_fn=None, # processing function that takes in one arg (the volume) relabel=None, # relabeling array nb_labels_reshape=0, # reshape to categorial format for keras, need # labels keep_vol_size=False, # whether to keep the volume size on categorical resizing name='single_vol', # name, optional nb_restart_cycle=None, # number of files to restart after patch_size=None, # split the volume in patches? if so, get patch_size patch_stride=1, # split the volume in patches? if so, get patch_stride collapse_2d=None, extract_slice=None, force_binary=False, nb_feats=1, patch_rand=False, patch_rand_seed=None, vol_rand_seed=None, binary=False, yield_incomplete_final_batch=True, verbose=False): """ generator for single volume (or volume patches) from a list of files simple volume generator that loads a volume (via npy/mgz/nii/niigz), processes it, and prepares it for keras model formats if a patch size is passed, breaks the volume into patches and generates those """ # get filenames at given paths volfiles = _get_file_list(volpath, ext, vol_rand_seed) nb_files = len(volfiles) assert nb_files > 0, "Could not find any files at %s with extension %s" % ( volpath, ext) # compute subvolume split vol_data = _load_medical_volume(os.path.join(volpath, volfiles[0]), ext) # process volume if data_proc_fn is not None: vol_data = data_proc_fn(vol_data) nb_patches_per_vol = 1 if patch_size is not None and all(f is not None for f in patch_size): if relabel is None and len(patch_size) == (len(vol_data.shape) - 1): tmp_patch_size = [f for f in patch_size] patch_size = [*patch_size, vol_data.shape[-1]] patch_stride = [f for f in patch_stride] patch_stride = [*patch_stride, vol_data.shape[-1]] assert len(vol_data.shape) == len( patch_size), "Vol dims %d are not equal to patch dims %d" % (len( vol_data.shape), len(patch_size)) nb_patches_per_vol = np.prod( pl.gridsize(vol_data.shape, patch_size, patch_stride)) if nb_restart_cycle is None: print("setting restart cycle to", nb_files) nb_restart_cycle = nb_files assert nb_restart_cycle <= (nb_files * nb_patches_per_vol), \ '%s restart cycle (%s) too big (%s) in %s' % \ (name, nb_restart_cycle, nb_files * nb_patches_per_vol, volpath) # check the number of files matches expected (if passed) if expected_nb_files >= 0: assert nb_files == expected_nb_files, \ "number of files do not match: %d, %d" % (nb_files, expected_nb_files) if expected_files is not None: if not (volfiles == expected_files): print( 'file lists did not match. You should probably stop execution.', file=sys.stderr) print(len(volfiles), len(expected_files)) if verbose: print('nb_restart_cycle:', nb_restart_cycle) # iterate through files fileidx = -1 batch_idx = -1 feat_idx = 0 batch_shape = None while 1: fileidx = np.mod(fileidx + 1, nb_restart_cycle) if verbose and fileidx == 0: print('starting %s cycle' % name) # read next file (circular) try: if verbose: print('opening %s' % os.path.join(volpath, volfiles[fileidx])) file_name = os.path.join(volpath, volfiles[fileidx]) vol_data = _load_medical_volume(file_name, ext, verbose) # print(file_name, " was loaded", vol_data.shape) except: debug_error_msg = "#files: %d, fileidx: %d, nb_restart_cycle: %d. error: %s" print( debug_error_msg % (len(volfiles), fileidx, nb_restart_cycle, sys.exc_info()[0])) raise # process volume if data_proc_fn is not None: vol_data = data_proc_fn(vol_data) # the original segmentation files have non-sequential relabel (i.e. some relabel are # missing to avoid exploding our model, we only care about the relabel that exist. if relabel is not None: vol_data = _relabel(vol_data, relabel) # split volume into patches if necessary and yield if patch_size is None: this_patch_size = vol_data.shape patch_stride = [1 for f in this_patch_size] else: this_patch_size = [f for f in patch_size] for pi, p in enumerate(this_patch_size): if p is None: this_patch_size[pi] = vol_data.shape[pi] patch_stride[pi] = 1 assert ~np.any( np.isnan(vol_data)), "Found a nan for %s" % volfiles[fileidx] assert np.all( np.isfinite(vol_data)), "Found a inf for %s" % volfiles[fileidx] patch_gen = patch(vol_data, this_patch_size, patch_stride=patch_stride, nb_labels_reshape=nb_labels_reshape, batch_size=1, infinite=False, collapse_2d=collapse_2d, patch_rand=patch_rand, patch_rand_seed=patch_rand_seed, keep_vol_size=keep_vol_size) empty_gen = True patch_idx = -1 for lpatch in patch_gen: empty_gen = False patch_idx += 1 # add to feature if np.mod(feat_idx, nb_feats) == 0: vol_data_feats = lpatch else: vol_data_feats = np.concatenate([vol_data_feats, lpatch], np.ndim(lpatch) - 1) feat_idx += 1 if binary: vol_data_feats = vol_data_feats.astype(bool) if np.mod(feat_idx, nb_feats) == 0: feats_shape = vol_data_feats[1:] # yield previous batch if the new volume has different patch sizes if batch_shape is not None and (feats_shape != batch_shape): batch_idx = -1 batch_shape = None print('switching patch sizes') yield np.vstack(vol_data_batch) # add to batch of volume data, unless the batch is currently empty if batch_idx == -1: vol_data_batch = [vol_data_feats] batch_shape = vol_data_feats[1:] else: vol_data_batch = [*vol_data_batch, vol_data_feats] # yield patch batch_idx += 1 batch_done = batch_idx == batch_size - 1 files_done = np.mod(fileidx + 1, nb_restart_cycle) == 0 final_batch = yield_incomplete_final_batch and files_done and patch_idx == ( nb_patches_per_vol - 1) if final_batch: # verbose and print('last batch in %s cycle %d. nb_batch:%d' % (name, fileidx, len(vol_data_batch))) if batch_done or final_batch: batch_idx = -1 q = np.vstack(vol_data_batch) yield q if empty_gen: raise ValueError('Patch generator was empty for file %s', volfiles[fileidx])