def __init__( self, conf_path: str, patch_shape: Sequence[int], # [z]yx transform: Callable = transforms.Identity(), bounds: Optional[Sequence[Sequence[int]]] = None, # xyz mag: int = 1, in_memory: bool = True, epoch_size: int = 100, disable_memory_check: bool = False, verbose: bool = False): self.conf_path = conf_path self.patch_shape = np.array(patch_shape) self.transform = transform self.mag = mag self.in_memory = in_memory self.epoch_size = epoch_size self.disable_memory_check = disable_memory_check self.verbose = verbose self.kd = knossos_utils.KnossosDataset(self.conf_path, show_progress=self.verbose) self.dim = len(self.patch_shape) patch_shape_xyz = self.patch_shape[::-1] # zyx -> xyz if self.dim == 2: patch_shape_xyz = np.array([*patch_shape_xyz, 1]) # z=1 for 2D self.patch_shape_xyz = patch_shape_xyz if bounds is None: bounds = [[0, 0, 0], self.kd.boundary] self.bounds = np.array(bounds) self.shape = self.bounds[1] - self.bounds[0] self.raw = None # Will be filled with raw data if in_memory is True if self.in_memory: self._load_into_memory()
def __init__( self, inp_paths, target_paths, transform=transforms.Identity(), offset=None, in_memory=True, inp_dtype=np.float32, target_dtype=np.int64, epoch_multiplier=1, # Pretend to have more data in one epoch ): super().__init__() self.inp_paths = inp_paths self.target_paths = target_paths self.transform = transform self.offset = offset self.in_memory = in_memory self.inp_dtype = inp_dtype self.target_dtype = target_dtype self.epoch_multiplier = epoch_multiplier if self.in_memory: self.inps = [ np.array(imageio.imread(fname)).astype(np.float32)[None] for fname in self.inp_paths ] self.targets = [ np.array(imageio.imread(fname)).astype(np.int64) for fname in self.target_paths ]
def __init__( self, inp_paths, target_paths, transform=transforms.Identity(), in_memory=True, inp_dtype=np.float32, target_dtype=np.int64, ): super().__init__() self.inp_paths = inp_paths self.target_paths = target_paths self.transform = transform self.in_memory = in_memory self.inp_dtype = inp_dtype self.target_dtype = target_dtype if self.in_memory: self.inps = [ np.array(imageio.imread(fname)).astype(np.float32)[None] for fname in self.inp_paths ] self.targets = [ np.array(imageio.imread(fname)).astype(np.int64) for fname in self.target_paths ]
def get_preview_batch(h5data: Tuple[str, str], preview_shape: Optional[Tuple[int, ...]] = None, transform: Callable = transforms.Identity(), in_memory: bool = False) -> torch.Tensor: fname, key = h5data inp_h5 = h5py.File(fname, 'r')[key] if in_memory: inp_h5 = inp_h5.value dim = len(preview_shape) # 2D or 3D inp_shape = np.array(inp_h5.shape[-dim:]) if preview_shape is None: # Slice everything inp_lo = np.zeros_like(inp_shape) inp_hi = inp_shape else: # Slice only a preview_shape-sized region from the center of the input halfshape = np.array(preview_shape) // 2 inp_center = inp_shape // 2 inp_lo = inp_center - halfshape inp_hi = inp_center + halfshape if np.any(inp_center < halfshape): raise ValueError( 'preview_shape is too big for shape of input source.' f'Requested {preview_shape}, but can only deliver {tuple(inp_shape)}.' ) memstr = ' (in memory)' if in_memory else '' logger.info(f'\nPreview data{memstr}:') logger.info( f' input: {fname}[{key}]: {inp_h5.shape} ({inp_h5.dtype})\n') inp_np = slice_h5(inp_h5, inp_lo, inp_hi, prepend_empty_axis=True) if inp_np.ndim == dim + 1: # Should be dim + 2 for (N, C) dims inp_np = inp_np[:, None] # Add missing C dim inp_np, _ = transform(inp_np, None) inp = torch.from_numpy(inp_np) return inp
def __init__( self, conf_path: str, patch_shape: Sequence[int], # [z]yx transform: Callable = transforms.Identity(), bounds: Optional[Sequence[Sequence[int]]] = None, # xyz mag: int = 1, mode: str = 'in_memory', epoch_size: int = 100, disable_memory_check: bool = False, verbose: bool = False, cache_size: int = 50, cache_reuses: int = 10): self.conf_path = conf_path self.patch_shape = np.array(patch_shape) self.transform = transform self.mag = mag self.epoch_size = epoch_size self.disable_memory_check = disable_memory_check self.verbose = verbose self.cache_size = cache_size self.cache_reusages = cache_reuses if mode not in ['in_memory', 'caching', 'disk']: raise ValueError( f'mode has to be one of ``in_memory``, ``caching`` or ``disk``, but is {mode}' ) self.mode = mode self.kd = knossos_utils.KnossosDataset(self.conf_path, show_progress=self.verbose) self.dim = len(self.patch_shape) patch_shape_xyz = self.patch_shape[::-1] # zyx -> xyz if self.dim == 2: patch_shape_xyz = np.array([*patch_shape_xyz, 1]) # z=1 for 2D self.patch_shape_xyz = patch_shape_xyz if bounds is None: bounds = [[0, 0, 0], self.kd.boundary] self.bounds = np.array(bounds) self.shape = self.bounds[1] - self.bounds[0] self.raw = None # Will be filled with raw data if in_memory is True if self.mode == 'in_memory': self._load_into_memory() elif self.mode == 'caching': self._fill_cache()
def __init__( self, inp_path=None, target_path=None, train=True, inp_key='raw', target_key='lab', # offset=(0, 0, 0), pool=(1, 1, 1), transform: Callable = transforms.Identity(), out_channels: Optional[int] = None, ): super().__init__() self.transform = transform self.out_channels = out_channels cube_id = 0 if train else 2 if inp_path is None: inp_path = expanduser(f'~/neuro_data_cdhw/raw_{cube_id}.h5') if target_path is None: target_path = expanduser( f'~/neuro_data_cdhw/barrier_int16_{cube_id}.h5') self.inp_file = h5py.File(os.path.expanduser(inp_path), 'r') self.target_file = h5py.File(os.path.expanduser(target_path), 'r') self.inp = self.inp_file[inp_key][()].astype(np.float32) self.target = self.target_file[target_key][()].astype(np.int64) self.target = self.target[0] # Squeeze superfluous first dimension self.target = self.target[::pool[0], ::pool[1], :: pool[2]] # Handle pooling (dirty hack TODO) # Cut inp and target to same size inp_shape = np.array(self.inp.shape[1:]) target_shape = np.array(self.target.shape) diff = inp_shape - target_shape offset = diff // 2 # offset from image boundaries self.inp = self.inp[:, offset[0]:inp_shape[0] - offset[0], offset[1]:inp_shape[1] - offset[1], offset[2]:inp_shape[2] - offset[2], ] self.close_files( ) # Using file contents from memory -> no need to keep the file open.
def __init__( self, inp_paths, target_paths, transform=transforms.Identity(), offset=None, in_memory=True, inp_dtype=np.float32, target_dtype=np.int64, epoch_multiplier=1, # Pretend to have more data in one epoch ): super().__init__() self.inp_paths = inp_paths self.target_paths = target_paths self.transform = transform self.offset = offset self.in_memory = in_memory self.inp_dtype = inp_dtype self.target_dtype = target_dtype self.epoch_multiplier = epoch_multiplier if self.in_memory: self.inps = [] rgb_fname = None for fname in self.inp_paths: inp = imageio.imread(fname).astype(np.float32) if rgb_fname is not None and inp.ndim != 3: raise RuntimeError(f'Mixed multi-channel {rgb_fname} and single-channel images {fname} in gt.') if inp.ndim == 2: inp = inp[None] # (H, W) -> (C=1, H, W) elif inp.ndim == 3: rgb_fname = fname inp = inp.transpose(2, 0, 1) # (H, W, C) -> (C, H, W) else: raise RuntimeError(f'Image {fname} has shape {inp.shape}, but ndim should be 2 or 3.') self.inps.append(inp) self.targets = [ np.array(imageio.imread(fname)).astype(np.int64) for fname in self.target_paths ]
def __init__( self, conf_path_label: str, conf_path_raw_data: str, dir_path_label: str, patch_shape: Sequence[int], # [z]yx transform: Callable = transforms.Identity(), mag: int = 1, epoch_size: int = 100, label_names: Optional[Sequence[str]] = None, knossos_bounds: Optional[Sequence[Sequence[ Sequence[int]]]] = None, # xyz label_offset: int = 0, label_order: Optional[Sequence[int]] = None): self.conf_path_label = conf_path_label self.conf_path_raw_data = conf_path_raw_data self.patch_shape = np.array(patch_shape) self.dim = len(self.patch_shape) patch_shape_xyz = self.patch_shape[::-1] # zyx -> xyz if self.dim == 2: patch_shape_xyz = np.array([*patch_shape_xyz, 1]) # z=1 for 2D self.patch_shape_xyz = patch_shape_xyz self.transform = transform self.mag = mag self.epoch_size = epoch_size self.kd = knossos_utils.KnossosDataset(self.conf_path_label, show_progress=False) self.inp_targets = [] self.file_bounds = {} self.kzip_files_path = [] self.dir_path = dir_path_label self.knossos_bounds = knossos_bounds self.label_offset = label_offset # todo: verify correct handling of this offset self.label_order = label_order self._get_file_bounds(label_names) self._get_data()
def __init__( self, inp_paths, target_paths, transform=transforms.Identity(), offset: Sequence[int] = (0, 0, 0), in_memory=True, inp_dtype=np.float32, target_dtype=np.int64, epoch_multiplier=1, # Pretend to have more data in one epoch ): super().__init__() self.inp_paths = inp_paths self.target_paths = target_paths self.transform = transform self.offset = offset self.in_memory = in_memory self.inp_dtype = inp_dtype self.target_dtype = target_dtype self.epoch_multiplier = epoch_multiplier def load_image(fname): inp = imageio.imread(fname).astype(np.float32) if inp.ndim == 2: inp = inp[None] # (H, W) -> (C=1, H, W) elif inp.ndim == 3: inp = inp.transpose(2, 0, 1) # (H, W, C) -> (C, H, W) else: raise RuntimeError( f'Image {fname} has shape {inp.shape}, but ndim should be 2 or 3.' ) return inp if self.in_memory: self.inputs = [] rgb_fnames = {} gray_fnames = {} for input_path in self.inp_paths: if os.path.isdir(input_path): multi_input = [] for channel_idx, input_file in enumerate( sorted(glob.glob(str(input_path) + '/*'))): inp = load_image(str(input_file)) if inp.shape[0] == 1: gray_fnames[channel_idx] = input_file elif inp.shape[0] == 3: rgb_fnames[channel_idx] = input_file rgb_fname = rgb_fnames.get(channel_idx) if rgb_fname is not None and inp.shape[0] == 1: raise RuntimeError( f'GT input layer {channel_idx} has mixed multi-channel ({rgb_fname}) and single-channel images ({input_file}).' ) gray_fname = gray_fnames.get(channel_idx) if gray_fname is not None and inp.shape[0] == 3: raise RuntimeError( f'GT input layer {channel_idx} has mixed multi-channel ({input_file}) and single-channel images ({gray_fname}).' ) multi_input.append(inp) self.inputs.append(np.concatenate(multi_input)) else: inp = load_image(input_path) if inp.shape[0] == 1: gray_fnames[0] = input_path elif inp.shape[0] == 3: rgb_fnames[0] = input_path if len(rgb_fnames) > 0 and inp.shape[0] == 1 or len( gray_fnames) > 0 and inp.shape[0] == 3: raise RuntimeError( f'Mixed multi-channel ({rgb_fnames[0]}) and single-channel images ({gray_fnames[0]}) in gt.' ) self.inputs.append(inp) self.targets = [ np.array(imageio.imread(fname)).astype(np.int64) for fname in self.target_paths ]
def __init__( self, input_sources: List[Tuple[str, str]], patch_shape: Sequence[int], target_sources: Optional[List[Tuple[str, str]]] = None, offset: Sequence[int] = (0, 0, 0), cube_prios: Optional[Sequence[float]] = None, aniso_factor: int = 2, target_discrete_ix: Optional[List[int]] = None, input_discrete_ix: Optional[List[int]] = None, target_dtype: np.dtype = np.int64, train: bool = True, warp_prob: Union[bool, float] = False, warp_kwargs: Optional[Dict[str, Any]] = None, epoch_size: int = 100, transform: Callable = transforms.Identity(), in_memory: bool = False, cube_meta=_DefaultCubeMeta(), ): # Early checks if target_sources is not None and len(input_sources) != len( target_sources): raise ValueError( 'If target_sources is not None, input_sources and ' 'target_sources must be lists of same length.') if not train: if warp_prob > 0: logger.warning( 'Augmentations should not be used on validation data.') # batch properties self.train = train self.warp_prob = warp_prob self.warp_kwargs = warp_kwargs if warp_kwargs is not None else {} # general properties self.input_sources = input_sources self.target_sources = target_sources self.cube_meta = cube_meta self.cube_prios = cube_prios self.aniso_factor = aniso_factor self.target_discrete_ix = target_discrete_ix self.input_discrete_ix = input_discrete_ix self.epoch_size = epoch_size self._orig_epoch_size = epoch_size # Store original epoch_size so it can be reset later. self.in_memory = in_memory self.patch_shape = np.array(patch_shape, dtype=np.int) self.ndim = self.patch_shape.ndim self.offset = np.array(offset) self.target_patch_shape = self.patch_shape - self.offset * 2 self._target_dtype = target_dtype self.transform = transform # Setup internal stuff self.pid = os.getpid() # The following fields will be filled when reading data self.n_labelled_pixels = 0 self.inputs: List[DataSource] = [] self.targets: List[DataSource] = [] self.load_data() # Open dataset files self.n_successful_warp = 0 self.n_failed_warp = 0 self._failed_warp_warned = False
def __init__( self, input_h5data: List[Tuple[str, str]], target_h5data: List[Tuple[str, str]], patch_shape: Sequence[int], cube_prios: Optional[Sequence[float]] = None, aniso_factor: int = 2, target_discrete_ix: Optional[List[int]] = None, train: bool = True, preview_shape: Optional[Sequence[int]] = None, warp: Union[bool, float] = False, warp_kwargs: Optional[Dict[str, Any]] = None, epoch_size: int = 100, transform: Callable = transforms.Identity(), classes: Optional[Sequence[int]] = None ): # Early checks if len(input_h5data) != len(target_h5data): raise ValueError("input_h5data and target_h5data must be lists of same length!") if not train: if warp: logger.warning( 'Augmentations should not be used on validation data.' ) else: if preview_shape is not None: raise ValueError() # batch properties self.train = train self.warp = warp self.warp_kwargs = warp_kwargs # general properties input_h5data = [(expanduser(fn), key) for (fn, key) in input_h5data] target_h5data = [(expanduser(fn), key) for (fn, key) in target_h5data] self.input_h5data = input_h5data self.target_h5data = target_h5data self.cube_prios = cube_prios self.aniso_factor = aniso_factor self.target_discrete_ix = target_discrete_ix self.epoch_size = epoch_size self._orig_epoch_size = epoch_size # Store original epoch_size so it can be reset later. # TODO: This is currently only used for determining num_classes. It # could be used for adding support for targets that are not # labelled in the expected order [0, 1, ..., num_classes - 1] or # as a whitelist that excludes classes that should be ignored. self.classes = classes self.num_classes = None if classes is None else len(classes) self.patch_shape = np.array(patch_shape, dtype=np.int) self.ndim = self.patch_shape.ndim # TODO: Make strides and offsets for targets configurable # self.strides = ... # strides will need to be applied *during* dataset iteration now # (-> strided reading in slice_h5()... or should strides be applied # with some fancy downscaling operator? Naively strided reading # could mess up targets in unfortunate cases: # e.g. ``[0, 1, 0, 1, 0, 1][::2] == [0, 0, 0]``, discarding all 1s). self.offsets = np.array([0, 0, 0]) self.target_patch_size = self.patch_shape - self.offsets * 2 self._target_dtype = np.int64 # The following will be inferred when reading data self.n_labelled_pixels = 0 # Actual data fields self.inputs = [] self.targets = [] self.preview_shape = preview_shape self._preview_batch = None # Setup internal stuff self.rng = np.random.RandomState( np.uint32((time.time() * 0.0001 - int(time.time() * 0.0001)) * 4294967295) ) self.pid = os.getpid() self._sampling_weight = None self._training_count = None self._count = None self.n_successful_warp = 0 self.n_failed_warp = 0 self.n_read_failures = 0 self.load_data() # Open dataset files if transform is None: transform = lambda x: x self.transform = transform # Load preview data on initialization so read errors won't occur late # and reading doesn't have to be done by each background worker process separately. _ = self.preview_batch