def gt_region_for_roi(self, target_spec): if self.mask_distances: gt_spec = target_spec.copy() gt_spec.roi = gt_spec.roi.grow( Coordinate((self.max_distance,) * gt_spec.voxel_size.dims), Coordinate((self.max_distance,) * gt_spec.voxel_size.dims), ).snap_to_grid(gt_spec.voxel_size, mode="shrink") else: gt_spec = target_spec.copy() return gt_spec
def __init__(self, array_config): self.name = array_config.name self._source_array = array_config.source_array_config.array_type( array_config.source_array_config ) self.upsample = Coordinate(max(u, 1) for u in array_config.upsample) self.downsample = Coordinate(max(d, 1) for d in array_config.downsample) self.interp_order = array_config.interp_order assert ( self.voxel_size * self.upsample ) / self.downsample == self._source_array.voxel_size
def padding(neighborhood, voxel_size): """ Get the appropriate padding to make sure all provided affinities are "True" """ dims = voxel_size.dims padding_neg = (Coordinate( min([0] + [a[d] for a in neighborhood]) for d in range(dims)) * voxel_size) padding_pos = (Coordinate( max([0] + [a[d] for a in neighborhood]) for d in range(dims)) * voxel_size) return padding_neg, padding_pos
def __getitem__(self, roi: Roi) -> np.ndarray[Any, Any]: if not self.roi.contains(roi): raise ValueError( f"Cannot fetch data from outside my roi: {self.roi}!") assert roi.offset % self.voxel_size == Coordinate( (0, ) * self.dims ), f"Given roi offset: {roi.offset} is not a multiple of voxel_size: {self.voxel_size}" assert roi.shape % self.voxel_size == Coordinate( (0, ) * self.dims ), f"Given roi shape: {roi.shape} is not a multiple of voxel_size: {self.voxel_size}" slices = tuple(self._slices(roi)) return self.data[slices]
def gt_region_for_roi(self, target_spec): gt_spec = target_spec.copy() pad_neg, pad_pos = aff_padding(self.neighborhood, target_spec.voxel_size) if self.lsds: pad_neg = Coordinate(*[ max(a, b) for a, b in zip(pad_neg, self.lsd_pad(target_spec.voxel_size)) ]) pad_pos = Coordinate(*[ max(a, b) for a, b in zip(pad_pos, self.lsd_pad(target_spec.voxel_size)) ]) gt_spec.roi = gt_spec.roi.grow(pad_neg, pad_pos) gt_spec.dtype = None return gt_spec
def __get_output_shape(self, input_shape: Coordinate, in_channels: int) -> Tuple[int, Coordinate]: device = torch.device("cpu") for parameter in self.parameters(): device = parameter.device break dummy_data = torch.zeros((1, in_channels) + input_shape, device=device) with torch.no_grad(): out = self.forward(dummy_data) return out.shape[1], Coordinate(out.shape[2:])
def register_hooks(converter): """Central place to register all conversion hooks with the given converter.""" ######################### # DaCapo specific hooks # ######################### # class hierarchies: register_hierarchy_hooks(converter) ################# # general hooks # ################# # path to string and back converter.register_unstructure_hook( Path, lambda o: str(o), ) converter.register_structure_hook( Path, lambda o, _: Path(o), ) # Coordinate to tuple and back converter.register_unstructure_hook( Coordinate, lambda o: tuple(o), ) converter.register_structure_hook( Coordinate, lambda o, _: Coordinate(o), ) # Roi to coordinate tuple and back converter.register_unstructure_hook( Roi, lambda o: (converter.unstructure(o.offset), converter.unstructure(o.shape)), ) converter.register_structure_hook( Roi, lambda o, _: Roi(*o), )
def voxel_size(self): return Coordinate(1, 2, 2)
def predict( model: Model, raw_array: Array, prediction_array_identifier: LocalArrayIdentifier, num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, ): # get the model's input and output size input_voxel_size = Coordinate(raw_array.voxel_size) output_voxel_size = model.scale(input_voxel_size) input_shape = Coordinate(model.eval_input_shape) input_size = input_voxel_size * input_shape output_size = output_voxel_size * model.compute_output_shape(input_shape)[1] logger.info( "Predicting with input size %s, output size %s", input_size, output_size ) # calculate input and output rois context = (input_size - output_size) / 2 if output_roi is None: input_roi = raw_array.roi output_roi = input_roi.grow(-context, -context) else: input_roi = output_roi.grow(context, context) logger.info("Total input ROI: %s, output ROI: %s", input_roi, output_roi) # prepare prediction dataset axes = ["c"] + [axis for axis in raw_array.axes if axis != "c"] ZarrArray.create_from_array_identifier( prediction_array_identifier, axes, output_roi, model.num_out_channels, output_voxel_size, np.float32, ) # create gunpowder keys raw = gp.ArrayKey("RAW") prediction = gp.ArrayKey("PREDICTION") # assemble prediction pipeline # prepare data source pipeline = DaCapoArraySource(raw_array, raw) # raw: (c, d, h, w) pipeline += gp.Pad(raw, Coordinate((None,) * input_voxel_size.dims)) # raw: (c, d, h, w) pipeline += gp.Unsqueeze([raw]) # raw: (1, c, d, h, w) gt_padding = (output_size - output_roi.shape) % output_size prediction_roi = output_roi.grow(gt_padding) # predict pipeline += gp_torch.Predict( model=model, inputs={"x": raw}, outputs={0: prediction}, array_specs={ prediction: gp.ArraySpec( roi=prediction_roi, voxel_size=output_voxel_size, dtype=np.float32 ) }, spawn_subprocess=False, device=str(compute_context.device), ) # raw: (1, c, d, h, w) # prediction: (1, [c,] d, h, w) # prepare writing pipeline += gp.Squeeze([raw, prediction]) # raw: (c, d, h, w) # prediction: (c, d, h, w) # raw: (c, d, h, w) # prediction: (c, d, h, w) # write to zarr pipeline += gp.ZarrWrite( {prediction: prediction_array_identifier.dataset}, prediction_array_identifier.container.parent, prediction_array_identifier.container.name, ) # create reference batch request ref_request = gp.BatchRequest() ref_request.add(raw, input_size) ref_request.add(prediction, output_size) pipeline += gp.Scan(ref_request) # build pipeline and predict in complete output ROI with gp.build(pipeline): pipeline.request_batch(gp.BatchRequest()) container = zarr.open(prediction_array_identifier.container) dataset = container[prediction_array_identifier.dataset] dataset.attrs["axes"] = ( raw_array.axes if "c" in raw_array.axes else ["c"] + raw_array.axes )
def shape(self) -> Coordinate: data_shape = self.data.shape spatial_shape = Coordinate( [data_shape[self.axes.index(axis)] for axis in self.spatial_axes]) return spatial_shape
def lsd_pad(self, voxel_size): multiplier = 3 # from AddLocalShapeDescriptor Node in funlib.lsd padding = Coordinate(self.sigma(voxel_size) * multiplier) return padding
def padding(self, gt_voxel_size: Coordinate) -> Coordinate: return Coordinate((self.max_distance,) * gt_voxel_size.dims)
def build_batch_provider(self, datasets, model, task, snapshot_container=None): input_shape = Coordinate(model.input_shape) output_shape = Coordinate(model.output_shape) # get voxel sizes raw_voxel_size = datasets[0].raw.voxel_size prediction_voxel_size = model.scale(raw_voxel_size) # define input and output size: # switch to world units input_size = raw_voxel_size * input_shape output_size = prediction_voxel_size * output_shape # padding of groundtruth/mask gt_mask_padding = output_size + task.predictor.padding(prediction_voxel_size) # define keys: raw_key = gp.ArrayKey("RAW") gt_key = gp.ArrayKey("GT") mask_key = gp.ArrayKey("MASK") target_key = gp.ArrayKey("TARGET") weight_key = gp.ArrayKey("WEIGHT") # Get source nodes dataset_sources = [] for dataset in datasets: raw_source = DaCapoArraySource(dataset.raw, raw_key) raw_source += gp.Pad(raw_key, None, 0) gt_source = DaCapoArraySource(dataset.gt, gt_key) gt_source += gp.Pad(gt_key, gt_mask_padding, 0) if dataset.mask is not None: mask_source = DaCapoArraySource(dataset.mask, mask_key) else: # Always provide a mask. By default it is simply an array # of ones with the same shape/roi as gt. Avoids making us # specially handle no mask case and allows padding of the # ground truth without worrying about training on incorrect # data. mask_source = DaCapoArraySource(OnesArray.like(dataset.gt), mask_key) mask_source += gp.Pad(mask_key, gt_mask_padding, 0) array_sources = [raw_source, gt_source, mask_source] dataset_source = ( tuple(array_sources) + gp.MergeProvider() + gp.RandomLocation() ) dataset_sources.append(dataset_source) pipeline = tuple(dataset_sources) + gp.RandomProvider() for augment in self.augments: pipeline += augment.node(raw_key, gt_key, mask_key) pipeline += gp.Reject(mask_key, min_masked=self.min_masked) # Add predictor nodes to pipeline pipeline += DaCapoTargetFilter( task.predictor, gt_key=gt_key, target_key=target_key, weights_key=weight_key, mask_key=mask_key, ) # Trainer attributes: if self.num_data_fetchers > 1: pipeline += gp.PreCache(num_workers=self.num_data_fetchers) # stack to create a batch dimension pipeline += gp.Stack(self.batch_size) # print profiling stats pipeline += gp.PrintProfilingStats(every=self.print_profiling) # generate request for all necessary inputs to training request = gp.BatchRequest() request.add(raw_key, input_size) request.add(target_key, output_size) request.add(weight_key, output_size) # request additional keys for snapshots request.add(gt_key, output_size) request.add(mask_key, output_size) self._request = request self._pipeline = pipeline self._raw_key = raw_key self._gt_key = gt_key self._mask_key = mask_key self._weight_key = weight_key self._target_key = target_key self._loss = task.loss self.snapshot_container = snapshot_container
def eval_shape_increase(self) -> Coordinate: """ How much to increase the input shape during prediction. """ return Coordinate((0, ) * self.input_shape.dims)
def input_shape(self): return Coordinate(40, 20, 20)
def padding(self, gt_voxel_size: Coordinate) -> Coordinate: return Coordinate((0, ) * gt_voxel_size.dims)
def create_from_array_identifier( cls, array_identifier, axes, roi, num_channels, voxel_size, dtype, write_size=None, name=None, ): """ Create a new ZarrArray given an array identifier. It is assumed that this array_identifier points to a dataset that does not yet exist """ if write_size is None: # total storage per block is approx c*x*y*z*dtype_size # appropriate block size about 5MB. axis_length = ( (1024**2 * 5 / (num_channels if num_channels is not None else 1) / np.dtype(dtype).itemsize)**(1 / voxel_size.dims)) // 1 write_size = Coordinate( (axis_length, ) * voxel_size.dims) * voxel_size write_size = Coordinate( (min(a, b) for a, b in zip(write_size, roi.shape))) zarr_container = zarr.open(array_identifier.container, "a") try: daisy.prepare_ds( f"{array_identifier.container}", array_identifier.dataset, roi, voxel_size, dtype, num_channels=num_channels, write_size=write_size, ) zarr_dataset = zarr_container[array_identifier.dataset] zarr_dataset.attrs["offset"] = roi.offset zarr_dataset.attrs["resolution"] = voxel_size zarr_dataset.attrs["axes"] = axes except zarr.errors.ContainsArrayError: zarr_dataset = zarr_container[array_identifier.dataset] assert (tuple(zarr_dataset.attrs["offset"]) == roi.offset ), f"{zarr_dataset.attrs['offset']}, {roi.offset}" assert (tuple(zarr_dataset.attrs["resolution"]) == voxel_size ), f"{zarr_dataset.attrs['resolution']}, {voxel_size}" assert tuple(zarr_dataset.attrs["axes"]) == tuple( axes), f"{zarr_dataset.attrs['axes']}, {axes}" assert ( zarr_dataset.shape == ( (num_channels, ) if num_channels is not None else ()) + roi.shape / voxel_size ), f"{zarr_dataset.shape}, {((num_channels,) if num_channels is not None else ()) + roi.shape / voxel_size}" zarr_dataset[:] = np.zeros(zarr_dataset.shape, dtype) zarr_array = cls.__new__(cls) zarr_array.file_name = array_identifier.container zarr_array.dataset = array_identifier.dataset zarr_array._axes = None zarr_array._attributes = zarr_array.data.attrs zarr_array.snap_to_grid = None return zarr_array
def sigma(self, voxel_size): voxel_dist = max(voxel_size) # arbitrarily chosen num_voxels = 10 # arbitrarily chosen sigma = voxel_dist * num_voxels return Coordinate((sigma, ) * self.dims)