def _open_file(self, filename): return z5py.File(ensure_str(filename), mode='r')
def _open_file(self, filename): return ZarrFile(ensure_str(filename), mode='a')
def _make_crop_source(crop: Dict[str, Any], data_dir: Optional[str], subsample_variant: Optional[ Union[int, str]], gt_version: str, labels: List[CNNectome.utils.label.Label], ak_raw: ArrayKey, ak_labels: ArrayKey, ak_labels_downsampled: ArrayKey, ak_mask: ArrayKey, input_size: Coordinate, output_size: Coordinate, voxel_size_input: Coordinate, voxel_size: Coordinate, crop_width: Coordinate, keep_thr: float) -> gunpowder.batch_provider_tree.BatchProviderTree: """ Generate a batch provider for a specific crop, including label data, raw data, generating per label mask, rejection based on `min_masked_voxels` and contrast scaling. Args: crop: Instance of an entry in the crop database. data_dir: Path to directory where data is stored. If None, read from config file. subsample_variant: If using raw data that has been subsampled from its original resolution, `subsample_variant` is the name of the dataset in the group "volumes/subsampled/raw" containing the subsampled raw data. If None, use the raw data at original resolution from "volumes/raw/s0". gt_version: Version of groundtruth annotations, e.g. "v0003" labels: List of labels that the network needs to be trained for. ak_raw: array key for raw data ak_labels: array key for label data ak_labels_downsampled: array key for downsampled label data ak_mask: array key for mask input_size: Size of input arrays of network. output_size: Size of output arrays of network. voxel_size_input: Voxel size of the input arrays. voxel_size: Voxel size of the output arrays. crop_width: additional padding width on top of `output_size` keep_thr: threshold for ratio of voxels that need to be annotated for a batch to not be rejected. Returns: Gunpowder batch provider tree for grabbing batches from the specified crop. """ if data_dir is None: data_dir = CNNectome.utils.config_loader.get_config()["organelles"]["data_path"] n5file = zarr.open(ensure_str(os.path.join(data_dir, crop["parent"])), mode='r') blueprint_label_ds = "volumes/groundtruth/{version:}/crop{cropno:}/labels/{{label:}}" blueprint_labelmask_ds = "volumes/groundtruth/{version:}/crop{cropno:}/masks/{{label:}}" blueprint_mask_ds = "volumes/masks/groundtruth/{version:}" if subsample_variant is None: if "volumes/raw/s0" in n5file: raw_ds = "volumes/raw/s0" else: raw_ds = "volumes/raw" else: raw_ds = "volumes/subsampled/raw/{0:}".format(subsample_variant) label_ds = blueprint_label_ds.format(version=gt_version.lstrip("v"), cropno=crop["number"]) labelmask_ds = blueprint_labelmask_ds.format(version=gt_version.lstrip("v"), cropno=crop["number"]) mask_ds = blueprint_mask_ds.format(version=gt_version.lstrip("v")) # add sources for all groundtruth labels all_srcs = [] # We should really only be adding this with the above if statement, but need it for now because we need to # construct masks from it as separate labelsets contain zeros logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format( cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=label_ds.format(label="all"), ak=ak_labels)) all_srcs.append( ZarrSource(os.path.join(data_dir, crop["parent"]), {ak_labels: label_ds.format(label="all")} ) + Pad(ak_labels, Coordinate(output_size) + crop_width) + DownSample(ak_labels, (2, 2, 2), ak_labels_downsampled) ) for label in _label_filter(lambda l: l.separate_labelset, labels): if all(l in get_label_ids_by_category(crop, "present_annotated") for l in label.labelid): ds = label_ds.format(label=label.labelname) assert ds in n5file, "separate dataset {ds:} not present in file {file:}".format(ds=ds, file=n5file.store.path) else: ds = label_ds.format(label="all") logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format( cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=ds, ak=label.gt_key)) all_srcs.append(ZarrSource(os.path.join(data_dir, crop["parent"]), {label.gt_key: ds}) + Pad(label.gt_key, Coordinate(output_size) + crop_width)) # add mask source per label labelmask_srcs = [] for label in labels: labelmask_ds = labelmask_ds.format(label=label.labelname) if labelmask_ds in n5file: # specified mask available: logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format( cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=labelmask_ds, ak=label.mask_key)) labelmask_srcs.append(ZarrSource(os.path.join(data_dir, crop["parent"]), {label.mask_key: labelmask_ds} ) + Pad(label.mask_key, Coordinate(output_size) + crop_width)) else: if label.generic_label is not None: specific_labels = list(set(label.labelid) - set(label.generic_label)) generic_condition = (all(l in get_all_annotated_label_ids(crop) for l in label.generic_label) or all(l in get_all_annotated_label_ids(crop) for l in specific_labels)) else: generic_condition = False if all(l in get_all_annotated_label_ids(crop) for l in label.labelid) or generic_condition: f = lambda val: ((val > 0) * 1).astype(np.bool) else: f = lambda val: ((val > 0) * 0).astype(np.bool) # This does not work because there are crops that are very close to each other. This would lead to # issues with masking # logging.debug("Adding LambdaSource {f:} for crop {cropno:}, providing {ak}".format( # cropno=crop["number"], f=f, ak=label.mask_key)) # labelmask_srcs.append( # LambdaSource( # f, # label.mask_key, # {label.mask_key: ArraySpec(voxel_size=voxel_size, interpolatable=False)} # ) # ) all_srcs[0] += LambdaFilter(f, ak_labels_downsampled, target_key=label.mask_key, target_spec=ArraySpec( dtype=np.bool, interpolatable=False)) all_srcs.extend(labelmask_srcs) # add raw source logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format( cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=raw_ds, ak=ak_raw)) raw_src = ( ZarrSource( os.path.join(data_dir, crop["parent"]), {ak_raw: raw_ds}, array_specs={ak_raw: ArraySpec(voxel_size=voxel_size_input)}) + Pad(ak_raw, Coordinate(input_size), 0) ) all_srcs.append(raw_src) # add gt mask source logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format( cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=mask_ds, ak=ak_mask)) mask_src = ( ZarrSource( os.path.join(data_dir, crop["parent"]), {ak_mask: mask_ds}, array_specs={ak_mask: ArraySpec(interpolatable=False, voxel_size=voxel_size)} ) ) all_srcs.append(mask_src) # combine all sources and pick a random location crop_src = ( tuple(all_srcs) + MergeProvider() + RandomLocation() + Reject(ak_mask, min_masked=keep_thr) ) # contrast adjustment if "volumes/raw/s0" in n5file: contr_info_ds = "volumes/raw/s0" else: contr_info_ds = "volumes/raw" contr_adj = n5file[contr_info_ds].attrs["contrastAdjustment"] scale = 255.0 / (float(contr_adj["max"]) - float(contr_adj["min"])) shift = - scale * float(contr_adj["min"]) logging.debug("Adjusting contrast with scale {scale:} and shift {shift:}".format(scale=scale, shift=shift)) crop_src += IntensityScaleShift(ak_raw, scale, shift ) return crop_src