Example #1
0
 def _open_file(self, filename):
     return z5py.File(ensure_str(filename), mode='r')
Example #2
0
 def _open_file(self, filename):
     return ZarrFile(ensure_str(filename), mode='a')
def _make_crop_source(crop: Dict[str, Any],
                      data_dir: Optional[str],
                      subsample_variant: Optional[
                         Union[int, str]],
                      gt_version: str,
                      labels: List[CNNectome.utils.label.Label],
                      ak_raw: ArrayKey,
                      ak_labels: ArrayKey,
                      ak_labels_downsampled: ArrayKey,
                      ak_mask: ArrayKey,
                      input_size: Coordinate,
                      output_size: Coordinate,
                      voxel_size_input: Coordinate,
                      voxel_size: Coordinate,
                      crop_width: Coordinate,
                      keep_thr: float) -> gunpowder.batch_provider_tree.BatchProviderTree:
    """
    Generate a batch provider for a specific crop, including label data, raw data, generating per label mask,
    rejection based on `min_masked_voxels` and contrast scaling.

    Args:
        crop: Instance of an entry in the crop database.
        data_dir: Path to directory where data is stored. If None, read from config file.
        subsample_variant: If using raw data that has been subsampled from its original resolution,
                           `subsample_variant` is the name of the dataset in the group "volumes/subsampled/raw"
                           containing the subsampled raw data. If None, use the raw data at original resolution
                           from "volumes/raw/s0".
        gt_version: Version of groundtruth annotations, e.g. "v0003"
        labels: List of labels that the network needs to be trained for.
        ak_raw: array key for raw data
        ak_labels: array key for label data
        ak_labels_downsampled: array key for downsampled label data
        ak_mask: array key for mask
        input_size: Size of input arrays of network.
        output_size: Size of output arrays of network.
        voxel_size_input: Voxel size of the input arrays.
        voxel_size: Voxel size of the output arrays.
        crop_width: additional padding width on top of `output_size`
        keep_thr: threshold for ratio of voxels that need to be annotated for a batch to not be rejected.

    Returns:
        Gunpowder  batch provider tree for grabbing batches from the specified crop.
    """
    if data_dir is None:
        data_dir = CNNectome.utils.config_loader.get_config()["organelles"]["data_path"]
    n5file = zarr.open(ensure_str(os.path.join(data_dir, crop["parent"])), mode='r')
    blueprint_label_ds = "volumes/groundtruth/{version:}/crop{cropno:}/labels/{{label:}}"
    blueprint_labelmask_ds = "volumes/groundtruth/{version:}/crop{cropno:}/masks/{{label:}}"
    blueprint_mask_ds = "volumes/masks/groundtruth/{version:}"
    if subsample_variant is None:
        if "volumes/raw/s0" in n5file:
            raw_ds = "volumes/raw/s0"
        else:
            raw_ds = "volumes/raw"
    else:
        raw_ds = "volumes/subsampled/raw/{0:}".format(subsample_variant)
    label_ds = blueprint_label_ds.format(version=gt_version.lstrip("v"), cropno=crop["number"])
    labelmask_ds = blueprint_labelmask_ds.format(version=gt_version.lstrip("v"), cropno=crop["number"])
    mask_ds = blueprint_mask_ds.format(version=gt_version.lstrip("v"))

    # add sources for all groundtruth labels
    all_srcs = []
    # We should really only be adding this with the above if statement, but need it for now because we need to
    # construct masks from it as separate labelsets contain zeros
    logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format(
        cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=label_ds.format(label="all"), ak=ak_labels))
    all_srcs.append(
        ZarrSource(os.path.join(data_dir, crop["parent"]),
                   {ak_labels: label_ds.format(label="all")}
                   )
        + Pad(ak_labels, Coordinate(output_size) + crop_width)
        + DownSample(ak_labels, (2, 2, 2), ak_labels_downsampled)
    )

    for label in _label_filter(lambda l: l.separate_labelset, labels):
        if all(l in get_label_ids_by_category(crop, "present_annotated") for l in label.labelid):
            ds = label_ds.format(label=label.labelname)
            assert ds in n5file, "separate dataset {ds:} not present in file {file:}".format(ds=ds,
                                                                                             file=n5file.store.path)
        else:
            ds = label_ds.format(label="all")
        logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format(
            cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=ds, ak=label.gt_key))
        all_srcs.append(ZarrSource(os.path.join(data_dir, crop["parent"]), {label.gt_key: ds}) +
                        Pad(label.gt_key, Coordinate(output_size) + crop_width))

    # add mask source per label
    labelmask_srcs = []
    for label in labels:
        labelmask_ds = labelmask_ds.format(label=label.labelname)
        if labelmask_ds in n5file:  # specified mask available:
            logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format(
                cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=labelmask_ds, ak=label.mask_key))
            labelmask_srcs.append(ZarrSource(os.path.join(data_dir, crop["parent"]),
                                             {label.mask_key: labelmask_ds}
                                             )
                                  + Pad(label.mask_key, Coordinate(output_size) + crop_width))
        else:
            if label.generic_label is not None:
                specific_labels = list(set(label.labelid) - set(label.generic_label))
                generic_condition = (all(l in get_all_annotated_label_ids(crop) for l in label.generic_label) or
                                     all(l in get_all_annotated_label_ids(crop) for l in specific_labels))
            else:
                generic_condition = False

            if all(l in get_all_annotated_label_ids(crop) for l in label.labelid) or generic_condition:
                f = lambda val: ((val > 0) * 1).astype(np.bool)
            else:
                f = lambda val: ((val > 0) * 0).astype(np.bool)
            # This does not work because there are crops that are very close to each other. This would lead to
            # issues with masking
            # logging.debug("Adding LambdaSource {f:} for crop {cropno:}, providing {ak}".format(
            #     cropno=crop["number"], f=f, ak=label.mask_key))
            # labelmask_srcs.append(
            #     LambdaSource(
            #         f,
            #         label.mask_key,
            #         {label.mask_key: ArraySpec(voxel_size=voxel_size, interpolatable=False)}
            #     )
            # )
            all_srcs[0] += LambdaFilter(f, ak_labels_downsampled, target_key=label.mask_key, target_spec=ArraySpec(
                dtype=np.bool, interpolatable=False))
    all_srcs.extend(labelmask_srcs)

    # add raw source
    logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format(
        cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=raw_ds, ak=ak_raw))
    raw_src = (
        ZarrSource(
            os.path.join(data_dir, crop["parent"]),
            {ak_raw: raw_ds},
            array_specs={ak_raw: ArraySpec(voxel_size=voxel_size_input)})
        + Pad(ak_raw, Coordinate(input_size), 0)
    )
    all_srcs.append(raw_src)

    # add gt mask source
    logging.debug("Adding ZarrSource ({file:}/{ds:}) for crop {cropno:}, providing {ak}".format(
        cropno=crop["number"], file=os.path.join(data_dir, crop["parent"]), ds=mask_ds, ak=ak_mask))
    mask_src = (
        ZarrSource(
            os.path.join(data_dir, crop["parent"]),
            {ak_mask: mask_ds},
            array_specs={ak_mask: ArraySpec(interpolatable=False, voxel_size=voxel_size)}
        )
    )
    all_srcs.append(mask_src)

    # combine all sources and pick a random location
    crop_src = (
        tuple(all_srcs)
        + MergeProvider()
        + RandomLocation()
        + Reject(ak_mask, min_masked=keep_thr)
               )

    # contrast adjustment
    if "volumes/raw/s0" in n5file:
        contr_info_ds = "volumes/raw/s0"
    else:
        contr_info_ds = "volumes/raw"
    contr_adj = n5file[contr_info_ds].attrs["contrastAdjustment"]
    scale = 255.0 / (float(contr_adj["max"]) - float(contr_adj["min"]))
    shift = - scale * float(contr_adj["min"])
    logging.debug("Adjusting contrast with scale {scale:} and shift {shift:}".format(scale=scale, shift=shift))
    crop_src += IntensityScaleShift(ak_raw,
                                    scale,
                                    shift
                                    )
    return crop_src