예제 #1
0
    def run(self):
        with h5py.File(self._filepath, "r") as h5_file:
            (internal_path, slicing) = self._request_queue_recv.recv()
            # 'None' means stop the process.
            while internal_path is not None:
                try:
                    if METHOD == "shared-array":
                        read_roi = slice_to_roi(slicing, h5_file[internal_path].shape)
                        read_roi = numpy.array(read_roi)
                        read_shape = read_roi[1] - read_roi[0]
                        num_bytes = h5_file[internal_path].dtype.itemsize * bigintprod(read_shape)
                        assert num_bytes <= self.available_bytes, "I don't yet support really big slicings"
                        read_array = numpy.frombuffer(self.transfer_buffer, dtype=numpy.uint8, count=num_bytes)
                        read_array.setflags(write=True)
                        read_array = read_array.view(h5_file[internal_path].dtype).reshape(read_shape)

                        h5_file[internal_path].read_direct(read_array, slicing)

                    if METHOD == "pipe-bytes" or METHOD == "pipe-array":
                        read_array = h5_file[internal_path][slicing]
                except Exception as ex:
                    self._result_queue_send.send(ex)
                    raise
                else:
                    self._result_queue_send.send((read_array.shape, read_array.dtype))

                    if METHOD == "pipe-array":
                        self._result_queue_send.send(read_array)

                    if METHOD == "pipe-bytes":
                        self._result_queue_send.send_bytes(numpy.getbuffer(read_array))

                # Wait for the next request
                (internal_path, slicing) = self._request_queue_recv.recv()
예제 #2
0
    def _handleCompletedRequest(self, roi, result):
        try:
            if self._allowParallelResults:
                # Signal the user with the result before the critical section
                self.resultSignal(roi, result)
        except Exception:
            # Always notify.
            with self._condition:
                self._failure_excinfo = sys.exc_info()
                self._condition.notify()
            raise

        with self._condition:
            try:
                if not self._allowParallelResults:
                    # Signal here, inside the critical section.
                    self.resultSignal(roi, result)

                # Report progress (if possible)
                if self._totalVolume is not None:
                    self._processedVolume += bigintprod(numpy.subtract(roi[1], roi[0]))
                    progress = 100 * self._processedVolume // self._totalVolume
                    self.progressSignal(progress)

                logger.debug("Request completed for roi: {}".format(roi))
                self._completed_count += 1
            finally:
                # Always notify in this finally section,
                #  even if the client result/progress handler raised.
                self._condition.notify()
예제 #3
0
    def _calculate_probabilities(self, roi):
        classifier = self.Classifier.value

        assert isinstance(
            classifier, LazyflowVectorwiseClassifierABC
        ), f"Classifier {classifier} must be sublcass of {LazyflowVectorwiseClassifierABC}"

        key = roi.toSlice()
        newKey = key[:-1]
        newKey += (slice(0, self.Image.meta.shape[-1], None),)

        with Timer() as features_timer:
            input_data = self.Image[newKey].wait()

        input_data = numpy.asarray(input_data, numpy.float32)
        shape = input_data.shape
        prod = bigintprod(shape[:-1])
        features = input_data.reshape((prod, shape[-1]))

        with Timer() as prediction_timer:
            probabilities = classifier.predict_probabilities(features)

        logger.debug(
            f"Features took {features_timer.seconds()} seconds."
            f" Prediction took {prediction_timer.seconds()} seconds. {roi}"
        )

        probabilities.shape = shape[:-1] + (probabilities.shape[-1],)
        return probabilities
예제 #4
0
    def setupOutputs(self):
        # Read the dataset meta-info from the HDF5 dataset
        self._h5N5File = self.H5N5File.value
        internalPath = self.InternalPath.value

        if internalPath not in self._h5N5File:
            raise OpStreamingH5N5Reader.DatasetReadError(internalPath)

        dataset = self._h5N5File[internalPath]

        try:
            # Read the axistags property without actually importing the data
            # Throws KeyError if 'axistags' can't be found
            axistagsJson = self._h5N5File[internalPath].attrs["axistags"]
            axistags = vigra.AxisTags.fromJSON(axistagsJson)
            axisorder = "".join(tag.key for tag in axistags)
            if "?" in axisorder:
                raise KeyError("?")
        except KeyError:
            # No axistags found.
            if "axes" in dataset.attrs:
                axisorder = "".join(dataset.attrs["axes"][::-1]).lower()
            else:
                axisorder = get_default_axisordering(dataset.shape)
            axistags = vigra.defaultAxistags(str(axisorder))

        assert len(axistags) == len(
            dataset.shape
        ), f"Mismatch between shape {dataset.shape} and axisorder {axisorder}"

        # Configure our slot meta-info
        self.OutputImage.meta.dtype = dataset.dtype.type
        self.OutputImage.meta.shape = dataset.shape
        self.OutputImage.meta.axistags = axistags

        # If the dataset specifies a datarange, add it to the slot metadata
        if "drange" in self._h5N5File[internalPath].attrs:
            self.OutputImage.meta.drange = tuple(
                self._h5N5File[internalPath].attrs["drange"])

        # Same for display_mode
        if "display_mode" in self._h5N5File[internalPath].attrs:
            self.OutputImage.meta.display_mode = str(
                self._h5N5File[internalPath].attrs["display_mode"])

        total_volume = bigintprod(self._h5N5File[internalPath].shape)
        chunks = self._h5N5File[internalPath].chunks
        if not chunks and total_volume > 1e8:
            self.OutputImage.meta.inefficient_format = True
            logger.warning(
                f"This dataset ({self._h5N5File.filename}{internalPath}) is NOT chunked. "
                f"Performance for 3D access patterns will be bad!")
        if chunks:
            self.OutputImage.meta.ideal_blockshape = chunks
예제 #5
0
def export_from_tiled_volume(tiles_description_json_path, roi,
                             output_hdf5_path, output_dataset_name):
    """
    Export a cutout volume from a TiledVolume into an hdf5 dataset.

    Args:
        tiles_description_json_path: path to the TiledVolume's json description file.
        roi: The (start, stop) corners of the cutout region to export. (Must be tuple-of-tuples.)
        output_hdf5_path: The HDF5 file to export to.
        output_dataset_name: The name of the HDF5 dataset to write.  Will be deleted first if necessary.
    """
    if not os.path.exists(tiles_description_json_path):
        raise Exception("Description file does not exist: " +
                        tiles_description_json_path)

    start, stop = numpy.array(roi)
    shape = tuple(stop - start)

    tiled_volume = TiledVolume(tiles_description_json_path)

    with Timer() as timer:
        result_array = numpy.ndarray(shape, tiled_volume.description.dtype)

        logger.info("Reading cutout volume of shape: {}".format(shape))
        tiled_volume.read((start, stop), result_out=result_array)

        logger.info("Writing data to: {}/{}".format(output_hdf5_path,
                                                    output_dataset_name))
        with h5py.File(output_hdf5_path, "a") as output_h5_file:
            if output_dataset_name in output_h5_file:
                del output_h5_file[output_dataset_name]
            dset = output_h5_file.create_dataset(output_dataset_name,
                                                 shape,
                                                 result_array.dtype,
                                                 chunks=True,
                                                 data=result_array)
            try:
                import vigra
            except ImportError:
                pass
            else:
                # Attach axistags to the exported dataset, so ilastik
                #  automatically interprets the volume correctly.
                output_axes = tiled_volume.description.output_axes
                dset.attrs["axistags"] = vigra.defaultAxistags(
                    output_axes).toJSON()

        logger.info("Exported {:.1e} pixels in {:.1f} seconds.".format(
            bigintprod(shape), timer.seconds()))
예제 #6
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            if self.cache is None:
                shape = self.Input.meta.shape
                # self.blockshape has None in the last dimension to indicate that it should not be
                # handled block-wise. None is replaced with the image shape in the respective axis.
                fullBlockShape = []
                for u, v in zip(self.blockShape.value, shape):
                    if u is not None:
                        fullBlockShape.append(u)
                    else:
                        fullBlockShape.append(v)
                fullBlockShape = numpy.array(fullBlockShape,
                                             dtype=numpy.float64)

                # data = self.inputs["Input"][:].wait()
                # split up requests into blocks

                numBlocks = numpy.ceil(shape / fullBlockShape).astype("int")
                blockCache = numpy.ndarray(shape=bigintprod(numBlocks),
                                           dtype=self.Output.meta.dtype)
                pool = RequestPool()
                # blocks holds the different roi keys for each of the blocks
                blocks = itertools.product(
                    *[list(range(i)) for i in numBlocks])
                blockKeys = []
                for b in blocks:
                    start = b * fullBlockShape
                    stop = b * fullBlockShape + fullBlockShape
                    stop = numpy.min(numpy.vstack((stop, shape)), axis=0)
                    blockKey = roiToSlice(start, stop)
                    blockKeys.append(blockKey)

                fun = self.inputs["Function"].value

                def predict_block(i):
                    data = self.Input[blockKeys[i]].wait()
                    blockCache[i] = fun(data)

                for i, f in enumerate(blockCache):
                    req = pool.request(partial(predict_block, i))

                pool.wait()
                pool.clean()

                self.cache = [fun(blockCache)]
            return self.cache
    def get_intersecting_blocks(blockshape, roi, shape):
        """Find block indices for given roi

        Wraps around lazyflow.roi.getIntersectingBlocks
        Idea is that only the required blocks are allocated using

        Everything in 'czyx' - order

        Args:
            blockshape (iterable): block shape
            roi (tuple): (start, stop), inclusive start of block, exclusive end

        Difference is that it returns a dictionary consisting of
          * 'array_of_blocks' the array of blocks
          * 'subimage_shape' shape of the subimage
          * 'block_offsets' array of offset for each of the blocks
          * 'subimage_roi' the roi in the sub_image
        """
        blocks = lazyflow.roi.getIntersectingBlocks(blockshape,
                                                    roi,
                                                    asarray=True)

        num_indexes = bigintprod(blocks.shape[0:-1])
        axiscount = blocks.shape[-1]
        blocks_array = numpy.reshape(blocks, (num_indexes, axiscount))

        block_aligned_subimage_start = blocks_array.min(axis=0)
        block_aligned_subimage_end = blocks_array.max(axis=0)

        assert (block_aligned_subimage_start == blocks_array).all(
            axis=1).any(), "roi does not seem to be block aligned"
        assert (block_aligned_subimage_end == blocks_array).all(
            axis=1).any(), "roi does not seem to be block aligned"

        # get the real end of the image:
        block_aligned_subimage_end += blockshape
        # take care of image border
        block_aligned_subimage_end = numpy.min(
            [block_aligned_subimage_end, shape], axis=0)

        subimage_shape = block_aligned_subimage_end - block_aligned_subimage_start
        block_offsets = blocks_array - block_aligned_subimage_start
        subimage_start = roi[0] - block_aligned_subimage_start
        subimage_roi = ((subimage_start), (subimage_start + (roi[1] - roi[0])))

        return blocks_array, block_offsets, subimage_roi, subimage_shape
예제 #8
0
    def _ensureCached(self, entire_block_roi):
        """
        Ensure that the cache file for the given block is up-to-date.
        (Refresh it if it's dirty.)
        """
        block_start = tuple(entire_block_roi[0])
        block_file = self._getCacheFile(entire_block_roi)
        if block_start in self._dirtyBlocks:
            updated_cache = False
            with self._blockLocks[block_start]:
                # Check AGAIN now that we have the lock.
                # (Avoid doing this twice in parallel requests.)
                if block_start in self._dirtyBlocks:
                    # Can't write directly into the hdf5 dataset because
                    #  h5py.dataset.__getitem__ creates a copy, not a view.
                    # We must use a temporary numpy array to hold the data.
                    data = self.Input(*entire_block_roi).wait()
                    block_file["data"][...] = data
                    if self.Output.meta.has_mask:
                        block_file["mask"][...] = data.mask
                        block_file["fill_value"][...] = data.fill_value

                    if logger.isEnabledFor(logging.DEBUG):
                        uncompressed_size = bigintprod(
                            data.shape) * self._getDtypeBytes(data.dtype)
                        storage_size = block_file["data"].id.get_storage_size()
                        if "mask" in block_file:
                            storage_size += block_file[
                                "mask"].id.get_storage_size()
                        if "fill_value" in block_file:
                            storage_size += block_file[
                                "fill_value"].id.get_storage_size()
                        logger.debug(
                            "Storage for block: {} is {}. ({}% of original)".
                            format(block_start, storage_size,
                                   100 * storage_size / uncompressed_size))
                    with self._lock:
                        self._dirtyBlocks.remove(block_start)
                    updated_cache = True

            if updated_cache:
                # Now that the lock is released, signal that the cache was updated.
                self.Output._sig_value_changed()
                self.OutputHdf5._sig_value_changed()
                self.CleanBlocks._sig_value_changed()
예제 #9
0
    def execute(self, slot, subindex, roi, result):
        if bigintprod(roi.stop - roi.start) > 1e9:
            logger.error(
                "Requesting a very large volume from DVID: {}\nIs that really what you meant to do?"
                .format(roi))

        # TODO: Modify accessor implementation to accept a pre-allocated array.

        # FIXME: Disabled throttling for now.  Need a better heuristic or explicit setting.
        #         # For "heavy" requests, we'll use the throttled accessor
        #         HEAVY_REQ_SIZE = 256*256*10
        #         if bigintprod(result.shape) > HEAVY_REQ_SIZE:
        #             accessor = self._throttled_accessor
        #         else:
        #             accessor = self._default_accessor

        accessor = self._default_accessor  # FIXME (see above)
        result[:] = accessor.get_ndarray(roi.start, roi.stop)
        return result
예제 #10
0
    def read_subvolume(self, internal_path, slicing):
        self._request_queue_send.send((internal_path, slicing))
        response_info = self._result_queue_recv.recv()
        if isinstance(response_info, Exception):
            raise response_info
        shape, dtype = response_info

        if METHOD == "pipe-array":
            result = self._result_queue_recv.recv()

        if METHOD == "pipe-bytes":
            raw_buffer = self._result_queue_recv.recv_bytes()
            result = numpy.frombuffer(raw_buffer, dtype=dtype).reshape(shape)
            result.setflags(write=True)

        if METHOD == "shared-array":
            result = numpy.frombuffer(self.transfer_buffer, dtype=dtype, count=bigintprod(shape)).copy()
            result = result.reshape(shape)
        return result
예제 #11
0
    def predict_probabilities_pixelwise(self, X, roi, axistags=None):
        """
        For each pixel in the given feature_image, predict the probabilities that the
        pixel belongs to each label class the classifier was trained with.

        X: An ND image.  Last axis must be channel.
        roi: The region of interest (start, stop) within feature_image to predict (e.g. without the halo region)
             Note: roi parameter should not include channel.
                   For example, a valid roi for a zyxc image could be ((0,0,0), (10,20,30))
        axistags: Optional.  A vigra.AxisTags object describing the feature_image.

        Returns: A multi-channel image (each channel corresponds to a different label class).
                 The result image size is determined by the roi parameter.
        """
        logger.debug("predicting PIXELWISE vigra RF")

        # This classifier doesn't benefit from any context around the input,
        #  so just strip it off and only use the given roi.
        assert len(roi[0]) == len(roi[1]) == X.ndim - 1
        X = X[roi_to_slice(*roi)]

        # reshape the image into a 2D feature matrix
        matrix_shape = (bigintprod(X.shape[:-1]), X.shape[-1])
        feature_matrix = numpy.reshape(X, matrix_shape)

        # Run classifier
        probabilities = self._vigra_rf.predictProbabilities(
            feature_matrix.view(numpy.ndarray))

        # Reshape into an image.
        # Choose the prediction image shape carefully:
        #
        # Most classifiers omit a channel entirely if there are no labels given for a particular class,
        # So the number of prediction channels we got is the same as the number of known_classes
        # But if the classifier attempts to "help us out" by including channels for "missing" labels,
        #  then we want to just return the whole thing.
        num_probability_channels = max(len(self.known_classes),
                                       probabilities.shape[-1])

        prediction_shape = X.shape[:-1] + (num_probability_channels, )
        return numpy.reshape(probabilities, prediction_shape)
예제 #12
0
    def execute(self, slot, subindex, roi, result):
        input_data = self.Input(roi.start, roi.stop).wait()
        assert slot == self.Output

        n_segments = self.NumSegments.value

        if n_segments == 0:
            # If the number of supervoxels was not given, use a default proportional to the number of voxels
            n_segments = numpy.int(bigintprod(input_data.shape) / 2500)

        logger.debug(
            "calling skimage.segmentation.slic with {}".format(
                dict(
                    n_segments=n_segments,
                    compactness=self.Compactness.value,
                    max_iter=self.MaxIter.value,
                    multichannel=True,
                    enforce_connectivity=True,
                    convert2lab=False,
                )
            )
        )
        slic_sp = skimage.segmentation.slic(
            input_data,
            n_segments=n_segments,
            compactness=self.Compactness.value,
            max_iter=self.MaxIter.value,
            multichannel=True,
            enforce_connectivity=True,
            convert2lab=False,
        )  # Use with caution.
        # This would cause slic() to have special behavior for 3-channel data,
        # in which case we better really be dealing with RGB channels
        # (not, say 3 unrelated image features).

        # slic_sp has no channel axis, so insert that axis before copying to 'result'
        result[:] = slic_sp[..., None]
        # import IPython; IPython.embed()

        return result
예제 #13
0
def remove_wrongly_sized_connected_components(a, min_size, max_size=None, in_place=False, bin_out=False):
    original_dtype = a.dtype

    if not in_place:
        a = a.copy()
    if min_size == 0 and (max_size is None or max_size > bigintprod(a.shape)):  # shortcut for efficiency
        if bin_out:
            numpy.place(a, a, 1)
        return a

    component_sizes = vigra_bincount(a)
    bad_sizes = component_sizes < min_size
    if max_size is not None:
        numpy.logical_or(bad_sizes, component_sizes > max_size, out=bad_sizes)
    del component_sizes

    bad_locations = bad_sizes[a]
    a[bad_locations] = 0
    del bad_locations
    if bin_out:
        # Replace non-zero values with 1
        numpy.place(a, a, 1)
    return numpy.asarray(a, dtype=original_dtype)
예제 #14
0
    def __init__(self, slot: Slot, batchsize: int, iterate_axes: str):
        """
        Args:
            slot: slot to request data from
            batchsize: maximum number of requests to launch in parallel
        """
        self._slot = slot
        self._batchsize = batchsize
        self._q: Queue[Tuple[ROI_TUPLE, numpy.ndarray]] = Queue()
        self._items: Dict[int, numpy.ndarray] = {}

        self._index = 0
        self._roi_iter = _RoiIter(self._slot, iterate_axes=iterate_axes)
        self._max = len(self._roi_iter)

        self._roi_request_batch = RoiRequestBatch(
            outputSlot=self._slot,
            roiIterator=iter(self._roi_iter),
            totalVolume=bigintprod(self._slot.meta.shape),
            batchSize=self._batchsize,
            allowParallelResults=False,
        )
        self._roi_request_batch.resultSignal.subscribe(self._put)
예제 #15
0
 def normalized_surface_area(shape):
     pairs = numpy.array(list(combinations(shape, 2)))
     surface_area = 2 * (pairs[:, 0] * pairs[:, 1]).sum()
     volume = bigintprod(shape)
     return surface_area / volume
예제 #16
0
    def execute(self, slot, subindex, roi, result):
        classifier = self.Classifier.value

        # Training operator may return 'None' if there was no data to train with
        skip_prediction = classifier is None

        # Shortcut: If the mask is totally zero, skip this request entirely
        if not skip_prediction and self.PredictionMask.ready():
            mask_roi = numpy.array((roi.start, roi.stop))
            mask_roi[:, -1:] = [[0], [1]]
            start, stop = list(map(tuple, mask_roi))
            mask = self.PredictionMask(start, stop).wait()
            skip_prediction = not numpy.any(mask)
            del mask

        if skip_prediction:
            result[:] = 0.0
            return result

        assert issubclass(
            type(classifier), LazyflowVectorwiseClassifierABC
        ), "Classifier is of type {}, which does not satisfy the LazyflowVectorwiseClassifierABC interface.".format(
            type(classifier)
        )

        key = roi.toSlice()
        newKey = key[:-1]
        newKey += (slice(0, self.Image.meta.shape[-1], None),)

        with Timer() as features_timer:
            input_data = self.Image[newKey].wait()

        input_data = numpy.asarray(input_data, numpy.float32)

        shape = input_data.shape
        prod = bigintprod(shape[:-1])
        features = input_data.reshape((prod, shape[-1]))
        features = self.SupervoxelFeatures.value
        # print("features before prediction {}".format(features))
        # features = get_supervoxel_features(features, self.SupervoxelSegmentation.value)
        # import ipdb; ipdb.set_trace()
        with Timer() as prediction_timer:
            probabilities = classifier.predict_probabilities(features)
        # import ipdb; ipdb.set_trace()
        probabilities = slic_to_mask(self.SupervoxelSegmentation.value, probabilities).reshape(
            -1, probabilities.shape[-1]
        )
        logger.debug(
            "Features took {} seconds, Prediction took {} seconds for roi: {} : {}".format(
                features_timer.seconds(), prediction_timer.seconds(), roi.start, roi.stop
            )
        )

        assert probabilities.shape[1] <= self.PMaps.meta.shape[-1], (
            "Error: Somehow the classifier has more label classes than expected:"
            " Got {} classes, expected {} classes".format(probabilities.shape[1], self.PMaps.meta.shape[-1])
        )

        # We're expecting a channel for each label class.
        # If we didn't provide at least one sample for each label,
        #  we may get back fewer channels.
        if probabilities.shape[1] < self.PMaps.meta.shape[-1]:
            # Copy to an array of the correct shape
            # This is slow, but it's an unusual case
            assert probabilities.shape[-1] == len(classifier.known_classes)
            full_probabilities = numpy.zeros(
                probabilities.shape[:-1] + (self.PMaps.meta.shape[-1],), dtype=numpy.float32
            )
            for i, label in enumerate(classifier.known_classes):
                full_probabilities[:, label - 1] = probabilities[:, i]

            probabilities = full_probabilities

        # Reshape to image
        probabilities.shape = shape[:-1] + (self.PMaps.meta.shape[-1],)

        # Copy only the prediction channels the client requested.
        result[...] = probabilities[..., roi.start[-1] : roi.stop[-1]]
        return result
예제 #17
0
    def _chooseChunkshape(self, blockshape):
        """
        Choose an optimal chunkshape for our blockshape and Input shape.
        We assume access patterns to vary more in space than in time or channel
        and choose the inner chunk shape to be about 1MiB slices of t and c.
        Furthermore, we use the function
          lazyflow.utility.chunkHelpers.chooseChunkShape()
        to preserve the aspect ratio of the input (at least approximately).
        """
        if blockshape is None:
            return None

        def isConsistent(idealshape):
            """
            check if ideal block shape and given block shape are consistent

            shapes are consistent if, for each dimension,
                * input is unready, or
                * blockshape equals fullshape, or
                * idealshape divides blockshape evenly
            """
            if not self.Input.ready():
                return True

            fullshape = self.Input.meta.shape
            z = list(zip(idealshape, blockshape, fullshape))
            m = [
                i_b_f[1] == i_b_f[2] or i_b_f[1] % i_b_f[0] == 0 for i_b_f in z
            ]
            return all(m)

        if not self._ignore_ideal_blockshape and self.Input.ready():
            # take the ideal chunk shape, but check if sane
            ideal = self.Input.meta.ideal_blockshape
            if ideal is not None:
                if len(ideal) == len(blockshape):
                    ideal = numpy.asarray(ideal, dtype=numpy.int)
                    for i, d in enumerate(ideal):
                        if d == 0:
                            ideal[i] = blockshape[i]
                    if not isConsistent(ideal):
                        logger.warning(
                            "{}: BlockShape and ideal_blockshape are "
                            "inconsistent {} vs {}".format(
                                self.name, blockshape, ideal))
                    else:
                        return tuple(ideal)
                else:
                    logger.warning(
                        "{}: Encountered meta.ideal_blockshape that does not fit the data"
                        .format(self.name))

        # we need to figure out an ideal chunk shape on our own

        # Start with a copy of blockshape
        axes = list(self.Output.meta.getTaggedShape().keys())
        taggedBlockShape = collections.OrderedDict(
            list(zip(axes, self._blockshape)))

        dtypeBytes = self._getDtypeBytes(self.Output.meta.dtype)

        desiredSpace = 1024**2 / float(dtypeBytes)

        if bigintprod(blockshape) <= desiredSpace:
            return blockshape

        # set t and c to 1
        for key in "tc":
            if key in taggedBlockShape:
                taggedBlockShape[key] = 1
        logger.debug("desired space: {}".format(desiredSpace))

        # extract only the spatial shape
        spatialKeys = [k for k in list(taggedBlockShape.keys()) if k in "xyz"]
        spatialShape = [taggedBlockShape[k] for k in spatialKeys]
        newSpatialShape = chooseChunkShape(spatialShape, desiredSpace)
        for k, v in zip(spatialKeys, newSpatialShape):
            taggedBlockShape[k] = v
        chunkShape = tuple(taggedBlockShape.values())
        logger.debug("Using chunk shape: {}".format(chunkShape))
        return chunkShape
예제 #18
0
 def __len__(self) -> int:
     return bigintprod(self._iterate_shape)
예제 #19
0
    def _determine_blockshape(self, outputSlot):
        """
        Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise.
        """
        input_shape = outputSlot.meta.shape
        ideal_blockshape = outputSlot.meta.ideal_blockshape
        ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel
        max_blockshape = outputSlot.meta.max_blockshape or input_shape

        num_channels = 1
        tagged_shape = outputSlot.meta.getTaggedShape()

        available_ram = Memory.getAvailableRamComputation()

        # Generally, we don't want to split requests across channels.
        if "c" in list(tagged_shape.keys()):
            num_channels = tagged_shape["c"]
            channel_index = list(tagged_shape.keys()).index("c")
            input_shape = input_shape[:channel_index] + input_shape[
                channel_index + 1:]
            max_blockshape = max_blockshape[:channel_index] + max_blockshape[
                channel_index + 1:]
            if ideal_blockshape:
                # Never enlarge 'ideal' in the channel dimension.
                num_channels = ideal_blockshape[channel_index]
                ideal_blockshape = ideal_blockshape[:
                                                    channel_index] + ideal_blockshape[
                                                        channel_index + 1:]
            del tagged_shape["c"]

        # Generally, we don't want to join time slices
        if "t" in tagged_shape.keys():
            blockshape_time_steps = 1
            time_index = list(tagged_shape.keys()).index("t")
            input_shape = input_shape[:time_index] + input_shape[time_index +
                                                                 1:]
            max_blockshape = max_blockshape[:time_index] + max_blockshape[
                time_index + 1:]
            if ideal_blockshape:
                # Never enlarge 'ideal' in the time dimension.
                blockshape_time_steps = ideal_blockshape[time_index]
                ideal_blockshape = ideal_blockshape[:
                                                    time_index] + ideal_blockshape[
                                                        time_index + 1:]
                available_ram /= blockshape_time_steps
            del tagged_shape["t"]

        if ram_usage_per_requested_pixel is None:
            # Make a conservative guess: 2*(bytes for dtype) * (num channels) + (fudge factor=4)
            ram_usage_per_requested_pixel = 2 * outputSlot.meta.dtype(
            ).nbytes * num_channels + 4
            warnings.warn(
                "Unknown per-pixel RAM requirement.  Making a guess.")

        # Safety factor (fudge factor): Double the estimated RAM usage per pixel
        safety_factor = 2.0
        logger.info(
            "Estimated RAM usage per pixel is {} * safety factor ({})".format(
                Memory.format(ram_usage_per_requested_pixel), safety_factor))
        ram_usage_per_requested_pixel *= safety_factor

        if ideal_blockshape is None:
            blockshape = determineBlockShape(
                input_shape,
                (available_ram //
                 (self._num_threads * ram_usage_per_requested_pixel)))
            blockshape = tuple(numpy.minimum(max_blockshape, blockshape))
            warnings.warn("Chose an arbitrary request blockshape")
        else:
            logger.info("determining blockshape assuming available_ram is {}"
                        ", split between {} threads".format(
                            Memory.format(available_ram), self._num_threads))

            # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel'
            # Therefore, we do not include the channel dimension in the blockshapes here.
            #
            # Also, it rarely makes sense to request more than one time slice, so we omit that, too. (See above.)
            blockshape = determine_optimal_request_blockshape(
                max_blockshape, ideal_blockshape,
                ram_usage_per_requested_pixel, self._num_threads,
                available_ram)
        # compute the RAM size of the block before adding back t anc c dimensions
        fmt = Memory.format(ram_usage_per_requested_pixel *
                            bigintprod(blockshape))
        # If we removed time and channel from consideration, add them back now before returning
        if "t" in outputSlot.meta.getAxisKeys():
            blockshape = blockshape[:time_index] + (
                blockshape_time_steps, ) + blockshape[time_index:]

        if "c" in outputSlot.meta.getAxisKeys():
            blockshape = blockshape[:channel_index] + (
                num_channels, ) + blockshape[channel_index:]

        logger.info("Chose blockshape: {}".format(blockshape))
        logger.info("Estimated RAM usage per block is {}".format(fmt))

        return blockshape
예제 #20
0
    def __init__(self,
                 outputSlot,
                 roi,
                 blockshape=None,
                 batchSize=None,
                 blockAlignment="absolute",
                 allowParallelResults=False):
        """
        Constructor.

        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot.
        :param batchSize: The maximum number of requests to launch in parallel.  This should not be necessary if the blockshape is small enough that you won't run out of RAM.
        :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'.
        :param allowParallelResults: If False, The resultSignal will not be called in parallel.
                                     In that case, your handler function has no need for locks.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._num_threads = max(1, Request.global_thread_pool.num_workers)

        totalVolume = bigintprod(numpy.subtract(roi[1], roi[0]))

        if batchSize is None:
            batchSize = self._num_threads

        if blockshape is None:
            blockshape = self._determine_blockshape(outputSlot)

        assert blockAlignment in ["relative", "absolute"]
        if blockAlignment == "relative":
            # Align the blocking with the start of the roi
            offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
            block_starts = getIntersectingBlocks(blockshape, offsetRoi)
            block_starts += roi[0]  # Un-offset

            # For now, simply iterate over the min blocks
            # TODO: Auto-dialate block sizes based on CPU/RAM usage.
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    try:
                        block_start = next(block_iter)
                    except StopIteration:
                        # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator
                        # https://www.python.org/dev/peps/pep-0479
                        break
                    else:
                        # Use offset blocking
                        offset_block_start = block_start - self._bigRoi[0]
                        offset_data_shape = numpy.subtract(
                            self._bigRoi[1], self._bigRoi[0])
                        offset_block_bounds = getBlockBounds(
                            offset_data_shape, blockshape, offset_block_start)

                        # Un-offset
                        block_bounds = (
                            offset_block_bounds[0] + self._bigRoi[0],
                            offset_block_bounds[1] + self._bigRoi[0],
                        )
                        logger.debug("Requesting Roi: {}".format(block_bounds))
                        yield block_bounds

        else:
            # Absolute blocking.
            # Blocks are simply relative to (0,0,0,...)
            # But we still clip the requests to the overall roi bounds.
            block_starts = getIntersectingBlocks(blockshape, roi)

            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    try:
                        block_start = next(block_iter)
                    except StopIteration:
                        # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator
                        # https://www.python.org/dev/peps/pep-0479
                        break
                    else:
                        block_bounds = getBlockBounds(outputSlot.meta.shape,
                                                      blockshape, block_start)
                        block_intersecting_portion = getIntersection(
                            block_bounds, roi)

                        logger.debug("Requesting Roi: {}".format(block_bounds))
                        yield block_intersecting_portion

        self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(),
                                             totalVolume, batchSize,
                                             allowParallelResults)
예제 #21
0
def determine_optimal_request_blockshape(max_blockshape, ideal_blockshape,
                                         ram_usage_per_requested_pixel,
                                         num_threads, available_ram):
    """
    Choose a blockshape for requests subject to the following constraints:
    - not larger than max_blockshape in any dimension
    - not too large to run in parallel without exceeding available ram (according to num_threads and available_ram)

    Within those constraints, choose the largest blockshape possible.
    The blockshape will be chosen according to the following heuristics:
    - If any dimensions in ideal_blockshape are 0, prefer to expand those first until max_blockshape is reached.
    (The result is known as atomic_blockshape.)
    - After that, attempt to expand the blockshape by incrementing a dimension according to its width in atomic_blockshape.

    Note: For most use-cases, the ``ram_usage_per_requested_pixel`` parameter refers to the ram consumed when requesting ALL channels of an image.
          Therefore, you probably want to omit the channel dimension from your max_blockshape and ideal_blockshape parameters.

    >>> determine_optimal_request_blockshape( (1000,1000,100), (0,0,1), 4, 10, 1e6 )
    (158, 158, 1)

    >>> determine_optimal_request_blockshape( (1000,1000,100), (0,0,1), 4, 10, 1e9 )
    (1000, 1000, 24)

    """
    assert len(max_blockshape) == len(ideal_blockshape)

    # Convert to numpy for convenience.
    max_blockshape = numpy.asarray(max_blockshape)
    ideal_blockshape = numpy.asarray(ideal_blockshape)

    target_block_volume_bytes = available_ram // num_threads
    target_block_volume_pixels = max(
        4096, target_block_volume_bytes // ram_usage_per_requested_pixel)

    # Replace 0's in the ideal_blockshape with the corresponding piece of max_blockshape
    complete_ideal_blockshape = numpy.where(ideal_blockshape == 0,
                                            max_blockshape, ideal_blockshape)

    # Clip to max
    clipped_ideal_blockshape = numpy.minimum(max_blockshape,
                                             complete_ideal_blockshape)

    atomic_blockshape = determineBlockShape(clipped_ideal_blockshape,
                                            target_block_volume_pixels)
    atomic_blockshape = numpy.asarray(atomic_blockshape)

    if bigintprod(clipped_ideal_blockshape) >= target_block_volume_pixels:
        # Target volume is too small for us to stack the atomic blockshape, anyway
        return tuple(atomic_blockshape)

    # Need to stack the ideal_blockshape to come up with something larger.
    # Start with an isotropic block, clipped to the nearest multiple of the atomic_blockshape
    blockshape = numpy.array(
        determineBlockShape(clipped_ideal_blockshape,
                            target_block_volume_pixels))
    blockshape -= blockshape % atomic_blockshape

    while True:
        # Find a dimension of atomic_blockshape that isn't already maxed out,
        # And see if we have enough RAM to
        candidate_blockshapes = []
        for index in range(len(blockshape)):
            # If we were to expand the blockshape in this dimension, would the block still fit in RAM?
            candidate_blockshape = blockshape.copy()
            candidate_blockshape[index] += clipped_ideal_blockshape[index]
            if (candidate_blockshape <=
                    max_blockshape).all() and (bigintprod(candidate_blockshape)
                                               < target_block_volume_pixels):
                candidate_blockshapes.append(candidate_blockshape)

        if len(candidate_blockshapes) == 0:
            break

        def normalized_surface_area(shape):
            pairs = numpy.array(list(combinations(shape, 2)))
            surface_area = 2 * (pairs[:, 0] * pairs[:, 1]).sum()
            volume = bigintprod(shape)
            return surface_area / volume

        # Choose the best among the canidates
        scores = list(map(normalized_surface_area, candidate_blockshapes))
        (best_shape, best_score) = min(zip(candidate_blockshapes, scores),
                                       key=lambda shape_score: shape_score[1])
        blockshape = best_shape

    return tuple(blockshape)
예제 #22
0
    def execute(self, slot, subindex, roi, result):
        dtypeBytes = self._getDtypeBytes()
        totalBytes = dtypeBytes * bigintprod(self.Input.meta.shape)
        totalMB = old_div(totalBytes, (1000 * 1000))
        logger.info(
            "Clusterizing computation of {} MB dataset, outputting according to {}".format(
                totalMB, self.OutputDatasetDescription.value
            )
        )

        configFilePath = self.ConfigFilePath.value
        self._config = parseClusterConfigFile(configFilePath)

        # Create the destination file if necessary
        blockwiseFileset, taskInfos = self._prepareDestination()

        try:
            # Figure out which work doesn't need to be recomputed (if any)
            unneeded_rois = []
            for roi in list(taskInfos.keys()):
                if blockwiseFileset.getBlockStatus(
                    roi[0]
                ) == BlockwiseFileset.BLOCK_AVAILABLE or blockwiseFileset.isBlockLocked(
                    roi[0]
                ):  # We don't attempt to process currently locked blocks.
                    unneeded_rois.append(roi)

            # Remove any tasks that we don't need to compute (they were finished in a previous run)
            for roi in unneeded_rois:
                logger.info("No need to run task: {} for roi: {}".format(taskInfos[roi].taskName, roi))
                del taskInfos[roi]

            absWorkDir, _ = getPathVariants(self._config.server_working_directory, os.path.split(configFilePath)[0])
            if self._config.task_launch_server == "localhost":

                def localCommand(cmd):
                    cwd = os.getcwd()
                    os.chdir(absWorkDir)
                    subprocess.call(cmd, shell=True)
                    os.chdir(cwd)

                launchFunc = localCommand
            else:
                # We use fabric for executing remote tasks
                # Import it here because it isn't required that the nodes can use it.
                import fabric.api as fab

                @fab.hosts(self._config.task_launch_server)
                def remoteCommand(cmd):
                    with fab.cd(absWorkDir):
                        fab.run(cmd)

                launchFunc = functools.partial(fab.execute, remoteCommand)

            # Spawn each task
            for taskInfo in list(taskInfos.values()):
                logger.info("Launching node task: " + taskInfo.command)
                launchFunc(taskInfo.command)

            # Return immediately.  We do not attempt to monitor the task progress.
            result[0] = True
            return result
        finally:
            blockwiseFileset.close()
예제 #23
0
def test_bigintprod(nums, result):
    assert bigintprod(nums) == result
    def predict_probabilities_pixelwise(self, X, roi, axistags=None):
        logger.debug("predicting PIXELWISE vigra RF")

        # This classifier doesn't benefit from any context around the input, (does it?)
        #  so just strip it off and only use the given roi.
        assert len(roi[0]) == len(roi[1]) == X.ndim - 1
        X = X[roi_to_slice(*roi)]

        FRAME_SPAN = 10  # Number of frames to wait until the mask is recalculated
        DILATION_RADIUS = 50  # In pixels
        BACKGROUND_LABEL = 1

        # Allocate memory for probability volume and mask
        prob_vol = numpy.zeros((X.shape[:-1] + (len(self._known_labels),)), dtype=numpy.float32)
        mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool)

        frm_cnt = 0

        for X_t in X:
            if frm_cnt % FRAME_SPAN == 0:
                mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool)

            prob_mat = numpy.zeros((bigintprod(X.shape[1:-1]), len(self._known_labels)), dtype=numpy.float32)

            # Reshape the image into a 2D feature matrix
            mat_shape = (bigintprod(X_t.shape[:-1]), X_t.shape[-1])
            feature_mat = numpy.reshape(X_t, mat_shape)

            # Mask the feature matrix
            feature_mat_masked = feature_mat[mask == 1, :]

            # Run classifier
            prob_mat_masked = self._vigra_rf.predictProbabilities(feature_mat_masked.view(numpy.ndarray))

            prob_mat[mask == 1, :] = prob_mat_masked
            prob_mat[mask == 0, 0] = 1.0  # Fill background

            prob_img = prob_mat.reshape((1,) + X_t.shape[:-1] + (prob_mat.shape[-1],))

            # Recalculate the mask every 20 frames
            if frm_cnt % FRAME_SPAN == 0:
                predicted_labels = numpy.argmax(prob_img[0], axis=-1) + 1
                prob_slice = (predicted_labels != BACKGROUND_LABEL).astype(numpy.bool)

                kernel = numpy.ones((DILATION_RADIUS * 2 + 1), dtype=bool)

                with Timer() as morpho_timer:
                    prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice, kernel[None, :])
                    prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice_dilated, kernel[:, None])

                logger.debug("[PROF] Morphology took {} ".format(morpho_timer.seconds()))

                mask = prob_slice_dilated.reshape(bigintprod(prob_slice_dilated.shape))

                # vigra.impex.writeHDF5(prob_slice_dilated, 'mask.h5', 'data')

            prob_vol[frm_cnt, :, :, :] = prob_img

            frm_cnt = frm_cnt + 1

        # Reshape into an image.
        # Choose the prediction image shape carefully:
        #
        # Most classifiers omit a channel entirely if there are no labels given for a particular class,
        # So the number of prediction channels we got is the same as the number of known_classes
        # But if the classifier attempts to "help us out" by including channels for "missing" labels,
        #  then we want to just return the whole thing.
        num_probability_channels = max(len(self.known_classes), prob_vol.shape[-1])

        prediction_shape = X.shape[:-1] + (num_probability_channels,)
        return numpy.reshape(prob_vol, prediction_shape)
예제 #25
0
def getIntersectingBlocks(blockshape, roi, asarray=False):
    """
    Returns the start coordinate of each block that the given roi intersects.
    By default, returned as an array of shape (N,M) (N indexes with M coordinates each).
    If asarray=True, then the blocks are returned as an array of shape (D1,D2,D3,...DN,M)
    such that coordinates of spatially adjacent blocks are returned in adjacent entries of the array.

    (SEE ALSO: ``lazyflow.utility.blockwise_view``)

    For example:

    >>> block_starts = getIntersectingBlocks( (10, 20), [(15, 25),(23, 40)] )
    >>> block_starts.shape
    (2, 2)
    >>> print(block_starts)
    [[10 20]
     [20 20]]

    >>> block_starts = getIntersectingBlocks( (10, 20), [(15, 25),(23, 41)] )
    >>> block_starts.shape
    (4, 2)
    >>> print(block_starts)
    [[10 20]
     [10 40]
     [20 20]
     [20 40]]

    Now the same two examples, with asarray=True.  Note the shape of the result.

    >>> block_start_matrix = getIntersectingBlocks( (10, 20), [(15, 25),(23, 40)], asarray=True )
    >>> block_start_matrix.shape
    (2, 1, 2)
    >>> print(block_start_matrix)
    [[[10 20]]
    <BLANKLINE>
     [[20 20]]]

    >>> block_start_matrix = getIntersectingBlocks( (10, 20), [(15, 25),(23, 41)], asarray=True )
    >>> block_start_matrix.shape
    (2, 2, 2)
    >>> print(block_start_matrix)
    [[[10 20]
      [10 40]]
    <BLANKLINE>
     [[20 20]
      [20 40]]]


    This function works for negative rois, too.

    >>> block_starts = getIntersectingBlocks( (10, 20), [(-10, -5),(5, 5)] )
    >>> print(block_starts)
    [[-10 -20]
     [-10   0]
     [  0 -20]
     [  0   0]]
    """
    assert len(blockshape) == len(roi[0]) == len(
        roi[1]), "blockshape and roi are mismatched: {} vs {}".format(
            blockshape, roi)
    assert not numpy.any(numpy.isclose(
        blockshape,
        0)), f"blockshape ({blockshape}) should not contain zero elements"
    roistart = TinyVector(roi[0])
    roistop = TinyVector(roi[1])
    blockshape = TinyVector(blockshape)

    block_index_map_start = roistart // blockshape
    block_index_map_stop = (
        roistop + (blockshape - 1)
    ) // blockshape  # Add (blockshape-1) first as a faster alternative to ceil()
    block_index_map_shape = block_index_map_stop - block_index_map_start

    num_axes = len(blockshape)
    block_indices = numpy.indices(block_index_map_shape)
    block_indices = numpy.rollaxis(block_indices, 0, num_axes + 1)
    block_indices += block_index_map_start

    # Multiply by blockshape to get the list of start coordinates
    block_indices *= blockshape

    if asarray:
        return block_indices
    else:
        # Reshape into N*M matrix for easy iteration
        num_indexes = bigintprod(block_indices.shape[0:-1])
        axiscount = block_indices.shape[-1]
        return numpy.reshape(block_indices, (num_indexes, axiscount))