def __init__(self, array: np.ndarray, voxel_offset: Cartesian = None, voxel_size: Cartesian = None): assert array.ndim >= 3 and array.ndim <= 4 assert isinstance(array, np.ndarray) or isinstance(array, Chunk) self.array = array if voxel_offset is None: if isinstance(array, Chunk): self.array = array.array voxel_offset = array.voxel_offset else: voxel_offset = Cartesian(0, 0, 0) if voxel_offset is not None: if len(voxel_offset) == 4: assert voxel_offset[0] == 0 voxel_offset = voxel_offset[1:] assert len(voxel_offset) == 3 if not isinstance(voxel_offset, Cartesian): voxel_offset = Cartesian.from_collection(voxel_offset) self.voxel_offset = voxel_offset if voxel_size is not None and not isinstance(voxel_size, Cartesian): voxel_size = Cartesian.from_collection(voxel_size) self.voxel_size = voxel_size if voxel_size is not None: assert len(voxel_size) == 3 assert np.alltrue([vs > 0 for vs in voxel_size])
def properties(self) -> dict: props = dict() if self.voxel_offset is not None or self.voxel_offset != Cartesian( 0, 0, 0): props['voxel_offset'] = self.voxel_offset if self.voxel_size is not None or self.voxel_size != Cartesian( 1, 1, 1): props['voxel_size'] = self.voxel_size return props
def __call__(self, output_bbox: BoundingBox): # if we do not clone this bounding box, # the bounding box in task will be modified! assert isinstance(output_bbox, BoundingBox) output_bbox = output_bbox.clone() output_bbox.adjust(self.expand_margin_size) chunk_slices = output_bbox.to_slices() if self.dry_run: # input_bbox = BoundingBox.from_slices(chunk_slices) # we can not use pattern=zero since it might got skipped by # the operator of skip-all-zero return Chunk.from_bbox( output_bbox, pattern='random', dtype=self.vol.dtype, voxel_size=Cartesian.from_collection( self.vol.resolution[::-1]), ) logging.info('cutout {} from {}'.format(chunk_slices[::-1], self.volume_path)) # always reverse the indexes since cloudvolume use x,y,z indexing chunk = self.vol[chunk_slices[::-1]] chunk = np.asarray(chunk) # the cutout is fortran ordered, so need to transpose and make it C order chunk = chunk.transpose() # we can delay this transpose later # actually we do not need to make it contiguous # chunk = np.ascontiguousarray(chunk) # if the channel number is 1, squeeze it as 3d array # this should not be neccessary # TODO: remove this step and use 4D array all over this package. # always use 4D array will simplify some operations # voxel_offset = Cartesian(s.start for s in chunk_slices) if chunk.shape[0] == 1: chunk = np.squeeze(chunk, axis=0) chunk = Chunk(chunk, voxel_offset=output_bbox.start, voxel_size=Cartesian.from_collection( self.vol.resolution[::-1])) if self.blackout_sections: chunk = self._blackout_sections(chunk) if self.validate_mip: self._validate_chunk(chunk) return chunk
def __init__(self, volume_path: str, mip: int = 0, expand_margin_size: Cartesian = Cartesian(0, 0, 0), expand_direction: int = None, fill_missing: bool = False, validate_mip: int = None, blackout_sections: bool = None, dry_run: bool = False, name: str = 'cutout'): super().__init__(name=name) self.volume_path = volume_path self.mip = mip self.fill_missing = fill_missing self.validate_mip = validate_mip self.blackout_sections = blackout_sections self.dry_run = dry_run if isinstance(expand_margin_size, tuple): expand_margin_size = Cartesian.from_collection(expand_margin_size) if expand_direction == 1: expand_margin_size = (0, 0, 0, *expand_margin_size) elif expand_direction == -1: expand_margin_size = (*expand_margin_size, 0, 0, 0) else: assert expand_direction is None self.expand_margin_size = expand_margin_size if blackout_sections: stor = CloudFiles(volume_path) self.blackout_section_ids = stor.get_json( 'blackout_section_ids.json')['section_ids'] verbose = (logging.getLogger().getEffectiveLevel() <= 30) self.vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=verbose, mip=self.mip, cache=False, green_threads=True)
def maskout(self, chunk: Chunk): """ Make part of the chunk to be black. """ assert chunk.voxel_size is not None assert self.voxel_size is not None assert self.voxel_size >= chunk.voxel_size # the voxel size should be divisible assert Cartesian(0, 0, 0) == self.voxel_size % chunk.voxel_size factor = self.voxel_size // chunk.voxel_size for offset in np.ndindex(factor): chunk.array[..., np.s_[offset[0]::factor[0]], np.s_[offset[1]::factor[1]], np.s_[offset[2]::factor[2]]] *= self.array
def to_h5(self, file_name: str, with_offset: bool = True, chunk_size: Cartesian = Cartesian(64, 64, 64), with_unique: bool = True, compression="gzip", voxel_size: tuple = None): """ :param file_name: output file name. If it is not end with h5, the coordinate will be appended to the file name. :param with_offset: save the voxel offset or not :param with_unique: if this is a segmentation chunk, save the unique object ids or not. :param compression: use HDF5 compression or not. Options are gzip, lzf """ if chunk_size: assert len(chunk_size) == 3 if not file_name.endswith('.h5'): file_name += self.bbox.to_filename() + '.h5' logging.info(f'write chunk to file: {file_name}') if os.path.exists(file_name): print(yellow(f'deleting existing file: {file_name}')) os.remove(file_name) with h5py.File(file_name, 'w') as f: f.create_dataset('/main', data=self.array, chunks=chunk_size, compression=compression) if voxel_size is None and self.voxel_size is not None: voxel_size = self.voxel_size if voxel_size is not None: f.create_dataset('/voxel_size', data=voxel_size) if with_offset and self.voxel_offset is not None: f.create_dataset('/voxel_offset', data=self.voxel_offset) if with_unique and self.is_segmentation: unique = np.unique(self.array) if unique[0]: unique = unique[1:] f.create_dataset('/unique_nonzeros', data=unique) return file_name
def __call__(self, chunk): """ Make part of chunk to be black according to a mask chunk. """ assert isinstance(chunk, Chunk) mask_voxel_size = Cartesian.from_collection( self.mask_vol.resolution[::-1]) factor = mask_voxel_size // chunk.voxel_size # factor = tuple(m//c for m, c in zip(self.mask_vol.resolution[::-1], chunk.voxel_size)) for m, c in zip(mask_voxel_size, chunk.voxel_size): assert m >= c assert m % c == 0 if np.alltrue(chunk == 0): logging.warning("chunk is all black, return directly") return chunk mask_in_high_mip = self._read_mask_in_high_mip(chunk.bbox, factor) if np.alltrue(mask_in_high_mip == 0): logging.warning( 'the mask is all black, mask all the voxels directly') np.multiply(chunk, 0, out=chunk) return chunk if np.all(mask_in_high_mip): logging.warning("mask elements are all positive, return directly") return chunk assert np.any(mask_in_high_mip) # make it the same type with input mask_in_high_mip = mask_in_high_mip.astype(chunk.dtype) for offset in np.ndindex(factor): chunk.array[..., np.s_[offset[0]::factor[0]], np.s_[offset[1]::factor[1]], np.s_[offset[2]::factor[2]]] *= mask_in_high_mip return chunk
def test_bounding_box(): bbox = Bbox.from_delta((1, 3, 2), (64, 32, 8)) bbox = BoundingBox.from_bbox(bbox) assert bbox.start == Cartesian(1, 3, 2) assert bbox.stop == Cartesian(65, 35, 10) bbox = bbox.clone() assert isinstance(bbox, BoundingBox) minpt = Cartesian(1, 2, 3) maxpt = Cartesian(2, 3, 4) bbox = BoundingBox(minpt, maxpt) bbox = BoundingBox.from_center(Cartesian(1, 2, 3), 3) assert bbox == BoundingBox.from_list([-2, -1, 0, 4, 5, 6]) bbox = BoundingBox.from_center(Cartesian(1, 2, 3), 3, even_size=False) assert bbox == BoundingBox.from_list([-2, -1, 0, 5, 6, 7])
def test_cartesian(): assert to_cartesian(None) == None ct = (1, 2, 3) assert to_cartesian(ct) == Cartesian(1, 2, 3) ct = Cartesian(1, 2, 3) ct += 2 assert ct == Cartesian(3, 4, 5) ct -= 2 assert ct == Cartesian(1, 2, 3) np.testing.assert_equal(ct.vec, Vec(1, 2, 3)) ct = Cartesian(3, 4, 5) ct = ct // 2 assert ct == Cartesian(1, 2, 2) # note that 2*ct will repeat the elements of ct! ct2 = ct * 2 assert ct2 > ct assert ct2 >= ct assert ct < ct2 assert ct <= ct2 ct3 = ct / 2 assert ct3 == Cartesian(0.5, 1, 1) ct4 = Cartesian.from_collection((1, 2, 3)) assert ct4 == Cartesian(1, 2, 3) assert Cartesian(0, 0, 0) * Cartesian(1, 2, 3) == Cartesian(0, 0, 0) assert Cartesian(4, 6, 8) / Cartesian(2, 3, 2) == Cartesian(2, 2, 4) assert -Cartesian(1, -2, 3) == Cartesian(-1, 2, -3)
def test_read_write_aff(self): print('test affinitymap io...') arr = np.random.rand(3, 8, 16, 16).astype(np.float32) chunk = Chunk(arr, voxel_offset=Cartesian(1, 2, 3)) read_write_h5(chunk)
def test_read_write_image(self): print('test image io...') arr = np.random.randint(0, 256, size=(8, 16, 16), dtype=np.uint8) chunk = Chunk(arr, voxel_offset=Cartesian(1, 2, 3)) read_write_h5(chunk) read_write_tif(chunk)
def from_h5(cls, file_name: str, voxel_offset: tuple = None, dataset_path: str = None, voxel_size: tuple = None, cutout_start: tuple = None, cutout_stop: tuple = None, cutout_size: tuple = None, zero_filling: bool = False, dtype: str = None): assert os.path.exists(file_name) if cutout_start is not None and cutout_size is not None: cutout_stop = tuple(t + s for t, s in zip(cutout_start, cutout_size)) if not h5py.is_hdf5(file_name): assert cutout_start is not None assert cutout_stop is not None bbox = BoundingBox.from_list([*cutout_start, *cutout_stop]) file_name += f'{bbox.to_filename()}.h5' if zero_filling and (not os.path.exists(file_name) or os.path.getsize(file_name) == 0): # fill with zero assert dtype is not None logging.info(f'{file_name} do not exist, will return None.') # return cls.from_bbox(bbox, dtype=dtype, voxel_size=voxel_size, all_zero=True) return None with h5py.File(file_name, 'r') as f: if dataset_path is None: for key in f.keys(): if 'offset' not in key and 'unique' not in key: # the first name without offset inside dataset_path = key break dset = f[dataset_path] if voxel_offset is None: if 'voxel_offset' in f: voxel_offset = Cartesian(*f['voxel_offset']) else: voxel_offset = Cartesian(0, 0, 0) if voxel_size is None: if 'voxel_size' in f: voxel_size = Cartesian(*f['voxel_size']) else: voxel_size = Cartesian(1, 1, 1) if cutout_start is None: cutout_start = voxel_offset if cutout_size is None: cutout_size = dset.shape[-3:] if cutout_stop is None: cutout_stop = tuple(t + s for t, s in zip(cutout_start, cutout_size)) for c, v in zip(cutout_start, voxel_offset): assert c >= v, "can only cutout after the global voxel offset." assert len(cutout_start) == 3 assert len(cutout_stop) == 3 dset = dset[..., cutout_start[0] - voxel_offset[0]:cutout_stop[0] - voxel_offset[0], cutout_start[1] - voxel_offset[1]:cutout_stop[1] - voxel_offset[1], cutout_start[2] - voxel_offset[2]:cutout_stop[2] - voxel_offset[2], ] logging.info( f"""read from HDF5 file: {file_name} and start with {cutout_start}, \ ends with {cutout_stop}, size is {cutout_size}, voxel size is {voxel_size}.""") arr = np.asarray(dset) if arr.dtype == np.dtype('<f4'): arr = arr.astype('float32') elif arr.dtype == np.dtype('<f8'): arr = arr.astype('float64') logging.info(f'new chunk voxel offset: {cutout_start}') return cls(arr, voxel_offset=cutout_start, voxel_size=voxel_size)
def create(cls, size: Cartesian = Cartesian(64, 64, 64), dtype: type = np.uint8, voxel_offset: Cartesian = Cartesian(0, 0, 0), voxel_size: Cartesian = None, pattern: str = 'sin', high: int = 255): """create a fake chunk for tests. Args: size (tuple, Cartesian, optional): chunk size or shape. Defaults to (64, 64, 64). dtype (type, optional): data type like numpy. Defaults to np.uint8. voxel_offset (Cartesian, optional): coordinate of starting voxel. Defaults to Cartesian(0, 0, 0). voxel_size (Cartesian, optional): physical size of each voxel. Defaults to None. pattern (str, optional): ways to create an array. ['sin', 'random', 'zero']. Defaults to 'sin'. high (int, optional): the high value of random integer array. Defaults to 255. Raises: NotImplementedError: not support pattern or data type was used. Returns: Chunk: the random chunk created. """ # if not isinstance(size, Cartesian): # size = Cartesian.from_collection(size) if isinstance(dtype, str): dtype = np.dtype(dtype) if pattern == 'zero': arr = np.zeros(size, dtype=dtype) elif pattern == 'sin': ix, iy, iz = np.meshgrid( *[np.linspace(0, 1, n) for n in size[-3:]], indexing='ij') arr = np.abs(np.sin(4 * (ix + iy + iz))) if len(size) == 4: arr = np.expand_dims(arr, axis=0) arr = np.repeat(arr, size[0], axis=0) if dtype == np.uint8: arr = (arr * 255).astype(dtype) elif dtype == np.uint32 or dtype == np.uint64: arr = (arr > 0.5).astype(dtype) arr = cc3d.connected_components(arr, connectivity=6) elif np.issubdtype(dtype, np.floating): arr = arr.astype(dtype) else: raise NotImplementedError( f'do not support this data type: {dtype}') elif pattern == 'random': if np.issubdtype(dtype, np.floating): arr = np.random.rand(*size) arr = arr.astype(dtype) elif np.issubdtype(dtype, np.integer): arr = np.random.randint(high, size=size, dtype=dtype) arr = cc3d.connected_components(arr, connectivity=6) else: raise NotImplementedError( f'do not support this data type: {dtype}') else: raise NotImplementedError(f'do not support the pattern: {pattern}') return cls(arr, voxel_offset=voxel_offset, voxel_size=voxel_size)
def __init__(self, convnet_model: Union[str, PatchInferencerBase], convnet_weight_path: str, input_patch_size: Union[tuple, list, Cartesian], output_patch_size: Union[tuple, list, Cartesian] = None, patch_num: Union[tuple, list, Cartesian] = None, num_output_channels: int = 3, output_patch_overlap: Union[tuple, list, Cartesian] = None, output_crop_margin: Union[tuple, list, Cartesian] = None, dtype='float32', framework: str = 'universal', batch_size: int = 1, bump: str = 'wu', input_size: Union[tuple, list, Cartesian] = None, mask_output_chunk: bool = True, mask_myelin_threshold=None, test_time_augmentation: bool = False, dry_run: bool = False): """convnet inference patch by patch in a chunk Args: convnet_model (Union[str, PatchInferencerBase]): the path of convnet model convnet_weight_path (str): the path of trained model weights input_patch_size (Union[tuple, list, Cartesian]): input patch size, zyx output_patch_size (Union[tuple, list, Cartesian], optional): output patch size. Defaults to the same with input patch size. patch_num (Union[tuple, list, Cartesian], optional): number of patches. Defaults to be computed. num_output_channels (int, optional): number of output channels. Defaults to 3. output_patch_overlap (Union[tuple, list, Cartesian], optional): the overlap size of output patch size. Defaults to be half of output patch size. output_crop_margin (Union[tuple, list, Cartesian], optional): crop some output patch margin. Defaults to None. dtype (str, optional): data type named consistantly with numpy. Defaults to 'float32'. framework (str, optional): ['universal', 'identity', 'pytorch']. Defaults to 'universal'. batch_size (int, optional): batch size in one pass. this parameter seems do not accelarate computation. Defaults to 1. bump (str, optional): bump function. Defaults to 'wu'. input_size (Union[tuple, list, Cartesian], optional): input chunk size. Defaults to None. mask_output_chunk (bool, optional): normalize on the chunk level rather than patch level. Defaults to True. mask_myelin_threshold (_type_, optional): threshold to segment the myelin. Defaults to None. test_time_augmentation (bool, optional): augment the image patch, inference, transform back and blend. Defaults to True. dry_run (bool, optional): only compute parameters and setup, do not perform any real computation. Defaults to False. """ assert input_size is None or patch_num is None if logging.getLogger().getEffectiveLevel() <= 30: self.verbose = True else: self.verbose = False input_patch_size = to_cartesian(input_patch_size) patch_num = to_cartesian(patch_num) input_size = to_cartesian(input_size) output_patch_size = to_cartesian(output_patch_size) output_patch_overlap = to_cartesian(output_patch_overlap) output_crop_margin = to_cartesian(output_crop_margin) if output_patch_size is None: output_patch_size = input_patch_size if output_patch_overlap is None: output_patch_overlap = output_patch_size // 2 self.input_patch_size = input_patch_size self.output_patch_size = output_patch_size self.output_patch_overlap = output_patch_overlap self.patch_num = patch_num self.batch_size = batch_size self.input_size = input_size if output_crop_margin is None: if mask_output_chunk: self.output_crop_margin = Cartesian(0, 0, 0) else: self.output_crop_margin = self.output_patch_overlap else: self.output_crop_margin = output_crop_margin # we should always crop more than the patch overlap # since the overlap region is reweighted by patch mask assert self.output_crop_margin >= self.output_patch_overlap # if self.input_patch_size != self.output_patch_size: # breakpoint() # self.output_patch_crop_margin = tuple((ips-ops)//2 for ips, ops in zip( # input_patch_size, output_patch_size)) self.output_patch_crop_margin = (input_patch_size - output_patch_size) // 2 #self.output_offset = tuple(opcm+ocm for opcm, ocm in zip( # self.output_patch_crop_margin, self.output_crop_margin)) self.output_offset = self.output_crop_margin self.output_patch_stride = tuple( s - o for s, o in zip(output_patch_size, output_patch_overlap)) self.input_patch_overlap = tuple(opcm * 2 + oo for opcm, oo in zip( self.output_patch_crop_margin, self.output_patch_overlap)) self.input_patch_stride = tuple( ps - po for ps, po in zip(input_patch_size, self.input_patch_overlap)) # no chunk wise mask, the patches should be aligned inside chunk if not mask_output_chunk: assert (self.input_size is not None) or (self.patch_num is not None) if patch_num is None: assert input_size is not None self.patch_num = tuple((isz - o) // s for isz, o, s in zip( self.input_size, self.input_patch_overlap, self.input_patch_stride)) if self.input_size is None: assert self.patch_num is not None self.input_size = tuple(pst * pn + po for pst, pn, po in zip( self.input_patch_stride, self.patch_num, self.input_patch_overlap)) self.output_size = tuple( pst * pn + po - 2 * ocm for pst, pn, po, ocm in zip( self.output_patch_stride, self.patch_num, self.output_patch_overlap, self.output_crop_margin)) else: # we can handle arbitrary input and output size self.input_size = None self.output_size = None self.num_output_channels = num_output_channels self.mask_output_chunk = mask_output_chunk self.output_chunk_mask = None self.dtype = dtype self.mask_myelin_threshold = mask_myelin_threshold self.dry_run = dry_run # allocate a buffer to avoid redundant memory allocation self.input_patch_buffer = np.zeros((batch_size, 1, *input_patch_size), dtype=dtype) self.patch_slices_list = [] if isinstance(convnet_model, str): convnet_model = os.path.expanduser(convnet_model) if isinstance(convnet_weight_path, str): convnet_weight_path = os.path.expanduser(convnet_weight_path) self._prepare_patch_inferencer(framework, convnet_model, convnet_weight_path, bump) self.test_time_augmentation = test_time_augmentation
def from_dvid_list(cls, syns: list, resolution: Cartesian = None): """from a dict fetched from DVID using fivol Args: syns (list): the synapse list fetched from DVID Returns: Synapses: a Synapses instance Example: syns = fivol.get_syndata(dvid_url, uuid) synapses = Synapses.from_dvid_list(syns) """ print(f'loading {len(syns)} synapses...') pre_list = [] post_list = [] pre_confidence = [] pre_users = [] for syn in syns: if 'Pre' in syn['Kind']: # map from xyz to zyx pos = syn['Pos'][::-1] pos = Cartesian(*pos) pre_list.append(pos) if 'conf' in syn['Prop']: conf = syn['Prop']['conf'] conf = float(conf) else: conf = 1.0 pre_confidence.append(conf) user = syn['Prop']['user'] pre_users.append(user) print('loading post synapses...') pre_set = set(pre_list) post_users = [] for syn in syns: if 'Post' in syn['Kind']: # map from xyz to zyx pos = syn['Pos'][::-1] pos = Cartesian(*pos) # if 'To' not in syn['Prop']: # print(syn) # breakpoint() if len(syn['Rels']) > 0: pre_pos = syn['Rels'][0]['To'][::-1] pre_pos = Cartesian(*pre_pos) if pre_pos in pre_set: post_list.append((pos, pre_pos)) user = syn['Prop']['user'] post_users.append(user) else: print('found a postsynapse with deleted presynapse: ', syn) else: print('found an post synapse without presynapse: ', syn) # build a map from pre position to index pre_pos2idx = {} for idx, pos in enumerate(pre_list): pre_pos2idx[pos] = idx assert len(pre_pos2idx) == len(pre_list) # breakpoint() post_to_pre_indices = [] for _, pre_pos in post_list: pre_idx = pre_pos2idx[pre_pos] post_to_pre_indices.append(pre_idx) assert len(post_to_pre_indices) == len(post_list) pre = np.asarray(pre_list, dtype=np.int32) pre_confidence = np.asarray(pre_confidence, dtype=np.float32) post_to_pre_indices = np.asarray(post_to_pre_indices, dtype=np.int32) post_list = [x[0] for x in post_list] post_list = np.asarray(post_list, dtype=np.int32) post_to_pre_indices = np.expand_dims(post_to_pre_indices, 1) post = np.hstack((post_to_pre_indices, post_list)) users = set(pre_users).union(set(post_users)) users = list(users) user2id = {} for idx, user in enumerate(users): user2id[user] = idx for idx, user in enumerate(pre_users): pre_users[idx] = user2id[user] for idx, user in enumerate(post_users): post_users[idx] = user2id[user] pre_users = np.asarray(pre_users, dtype=np.int32) post_users = np.asarray(post_users, dtype=np.int32) return cls( pre, post=post, pre_confidence=pre_confidence, resolution=resolution, users=users, pre_users=pre_users, post_users=post_users, )