def create_bounding_boxes(chunk_size: tuple,
                          chunk_overlap: tuple = (0, 0, 0),
                          roi_start: tuple = None,
                          roi_stop: tuple = None,
                          layer_path: str = None,
                          mip: int = 0,
                          grid_size: tuple = None,
                          verbose: bool = True):
    if layer_path:
        vol = CloudVolume(layer_path, mip=mip)
        # dataset shape as z,y,x
        dataset_size = vol.mip_shape(mip)[:3][::-1]
        dataset_offset = vol.mip_voxel_offset(mip)[::-1]
        if roi_stop is None:
            roi_stop = Vec(
                *[o + s for o, s in zip(dataset_offset, dataset_size)])
        if roi_start is None:
            # note that we normally start from -overlap to keep the chunks aligned!
            roi_start = dataset_offset - chunk_overlap

    chunk_size = Vec(*chunk_size)
    chunk_overlap = Vec(*chunk_overlap)
    stride = chunk_size - chunk_overlap
    if isinstance(grid_size, tuple):
        grid_size = Vec(*grid_size)

    assert roi_start is not None
    if isinstance(roi_start, tuple):
        roi_start = Vec(*roi_start)

    if roi_stop is None:
        roi_stop = roi_start + stride * grid_size + chunk_overlap
    elif isinstance(roi_stop, tuple):
        roi_stop = Vec(*roi_stop)
    roi_size = roi_stop - roi_start

    if grid_size is None:
        grid_size = (roi_size - chunk_overlap) // stride + 1

    # the stride should not be zero if there is more than one chunks
    for g, s in zip(grid_size, stride):
        if g > 1:
            assert s > 0

    final_output_stop = roi_start + (grid_size - 1) * stride + chunk_size
    if verbose:
        print('\nroi start: ', roi_start)
        print('stride: ', stride)
        print('grid size: ', grid_size)
        print('final output stop: ', final_output_stop)

    bboxes = []
    for (z, y, x) in product(range(grid_size[0]), range(grid_size[1]),
                             range(grid_size[2])):
        chunk_start = roi_start + Vec(z, y, x) * stride
        bbox = Bbox.from_delta(chunk_start, chunk_size)
        bboxes.append(bbox)
    return bboxes
def create_bounding_boxes(chunk_size:tuple, overlap: tuple=(0,0,0),
                    start:tuple=None, layer_path: str=None, mip:int=0, 
                    grid_size: tuple=None, verbose: bool=True):
    if layer_path:
        vol = CloudVolume(layer_path, mip=mip)
        # dataset shape as z,y,x
        dataset_shape = vol.mip_shape(mip)[:3][::-1]
        dataset_offset = vol.mip_voxel_offset(mip)[::-1]

    chunk_size = Vec(*chunk_size)
    overlap = Vec(*overlap)
    stride = chunk_size - overlap

    if start is None:
        # note that we normally start from -overlap to keep the chunks aligned!
        start = dataset_offset - overlap
        volume_size = dataset_shape
    else:
        start = Vec(*start)

    if grid_size is None:
        volume_size = dataset_shape - (start - dataset_offset)
        grid_size = (volume_size-overlap) // stride + 1

    # the stride should not be zero if there is more than one chunks
    for g, s in zip(grid_size, stride):
        if g > 1:
            assert s > 0

    if verbose:
        print('\nstart: ', start)
        print('stride: ', stride)
        print('grid size: ', grid_size)
        print('chunk_size: ', chunk_size, '\n')

    bboxes = []
    for (z, y, x) in tqdm(product(range(grid_size[0]), range(grid_size[1]),
                                                       range(grid_size[2]))):
        chunk_start = start + Vec(z, y, x) * stride
        bbox = Bbox.from_delta(chunk_start, chunk_size)
        bboxes.append( bbox )

    return bboxes
Example #3
0
    def from_manual_setup(cls,
                          chunk_size: Union[Vec, tuple],
                          chunk_overlap: Union[Vec, tuple] = Vec(0, 0, 0),
                          roi_start: Union[Vec, tuple] = None,
                          roi_stop: Union[Vec, tuple] = None,
                          roi_size: Union[Vec, tuple] = None,
                          grid_size: Union[Vec, tuple] = None,
                          respect_chunk_size: bool = True,
                          aligned_block_size: Union[Vec, tuple] = None,
                          layer_path: str = None,
                          mip: int = 0):

        if layer_path:
            if layer_path.endswith('.h5'):
                assert os.path.exists(layer_path)
                with h5py.File(layer_path, mode='r') as file:
                    for key in file.keys():
                        if 'offset' in key:
                            roi_start = Vec(*(file[key]))
                        elif 'voxel_size' not in key:
                            if roi_size is None:
                                roi_size = Vec(*file[key].shape[-3:])
                if roi_start is None:
                    roi_start = Vec(0, 0, 0)
                roi_stop = roi_start + roi_size
            else:
                vol = CloudVolume(layer_path, mip=mip)
                # dataset shape as z,y,x
                dataset_size = vol.mip_shape(mip)[:3][::-1]
                dataset_offset = vol.mip_voxel_offset(mip)[::-1]
                if roi_size is None:
                    roi_size = Vec(*dataset_size)
                if roi_stop is None:
                    roi_stop = Vec(
                        *[o + s for o, s in zip(dataset_offset, dataset_size)])
                if roi_start is None:
                    # note that we normally start from -overlap to keep the chunks aligned!
                    roi_start = dataset_offset - chunk_overlap
        assert roi_start is not None

        if roi_size is None and roi_stop is None and grid_size is None:
            grid_size = Vec(1, 1, 1)

        if isinstance(chunk_size, tuple):
            chunk_size = Vec(*chunk_size)
        if isinstance(chunk_overlap, tuple):
            chunk_overlap = Vec(*chunk_overlap)
        if isinstance(roi_start, tuple):
            roi_start = Vec(*roi_start)
        if isinstance(roi_size, tuple):
            roi_size = Vec(*roi_size)
        if isinstance(grid_size, tuple):
            grid_size = Vec(*grid_size)
        if isinstance(roi_stop, tuple):
            roi_stop = Vec(*roi_stop)

        stride = chunk_size - chunk_overlap
        if roi_stop is None:
            roi_stop = roi_start + stride * grid_size + chunk_overlap

        if aligned_block_size is not None:
            if not isinstance(aligned_block_size, Vec):
                aligned_block_size = Vec(*aligned_block_size)
            assert np.all(aligned_block_size <= chunk_size)
            assert np.alltrue(chunk_size % aligned_block_size == 0)
            roi_start -= roi_start % aligned_block_size
            assert len(aligned_block_size) == 3
            assert len(roi_stop) == 3
            for idx in range(3):
                if roi_stop[idx] % aligned_block_size[idx] > 0:
                    roi_stop[idx] += aligned_block_size[
                        idx] - roi_stop[idx] % aligned_block_size[idx]

        if roi_size is None:
            roi_size = roi_stop - roi_start

        if grid_size is None:
            grid_size = (roi_size - chunk_overlap) / stride
            grid_size = tuple(ceil(x) for x in grid_size)
            grid_size = Vec(*grid_size)

        # the stride should not be zero if there is more than one chunks
        for g, s in zip(grid_size, stride):
            if g > 1:
                assert s > 0

        final_output_stop = roi_start + (grid_size - 1) * stride + chunk_size
        logging.info(f'\nroi start: {roi_start}')
        logging.info(f'stride: {stride}')
        logging.info(f'grid size: {grid_size}')
        logging.info(f'final output stop: {final_output_stop}')

        print('grid size: ', grid_size)

        bboxes = []
        for (gz, gy, gx) in product(range(grid_size[0]), range(grid_size[1]),
                                    range(grid_size[2])):
            chunk_start = roi_start + Vec(gz, gy, gx) * stride
            bbox = Bbox.from_delta(chunk_start, chunk_size)
            if not respect_chunk_size:
                bbox.maxpt = np.minimum(bbox.maxpt, roi_stop)
            bboxes.append(bbox)

        return cls(bboxes)
Example #4
0
class BigBrainVolume:
    """
    TODO use siibra requests cache
    
    """
    # function to switch x/y coordinates on a vector or matrix.
    # Note that direction doesn't matter here since the inverse is the same.
    switch_xy = lambda X: np.dot(np.identity(4)[[1, 0, 2, 3], :], X)

    # Gigabyte size that is considered feasible for ad-hoc downloads of
    # BigBrain data. This is used to avoid accidental huge downloads.
    gbyte_feasible = 0.5

    def __init__(self, ngsite, fill_missing=True):
        """
        ngsite: base url of neuroglancer http location
        """
        with requests.get(ngsite + '/transform.json') as r:
            self._translation_nm = np.array(json.loads(r.content))[:, -1]
        with requests.get(ngsite + '/info') as r:
            self.info = json.loads(r.content)
        self.volume = CloudVolume(ngsite,
                                  fill_missing=fill_missing,
                                  progress=False)
        self.ngsite = ngsite
        self.nbits = np.iinfo(self.volume.info['data_type']).bits
        self.bbox_phys = self._bbox_phys()
        self.resolutions_available = {
            np.min(v['resolution']) / 1000: {
                'mip': i,
                'GBytes': np.prod(v['size']) * self.nbits / (8 * 1024**3)
            }
            for i, v in enumerate(self.volume.scales)
        }
        self.helptext = "\n".join([
            "{:7.0f} micron {:10.4f} GByte".format(k, v['GBytes'])
            for k, v in self.resolutions_available.items()
        ])

    def largest_feasible_resolution(self):
        # returns the highest resolution in micrometer that is available and
        # still below the threshold of downloadable volume sizes.
        return min([
            res for res, v in self.resolutions_available.items()
            if v['GBytes'] < self.gbyte_feasible
        ])

    def affine(self, mip, clip=False):
        """
        Builds the affine matrix that maps voxels 
        at the given mip to physical space in mm.
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        """

        # correct clipping offset, if needed
        voxelshift = np.identity(4)
        if (type(clip) == bool) and clip is True:
            voxelshift[:3, -1] = self._clipcoords(mip)[:3, 0]
        elif isinstance(clip, Bbox):
            voxelshift[:3, -1] = clip.minpt

        # retrieve the pixel resolution
        resolution_nm = self.info['scales'][mip]['resolution']

        # build affine matrix in nm physical space
        affine = np.identity(4)
        for i in range(3):
            affine[i, i] = resolution_nm[i]
            affine[i, -1] = self._translation_nm[i]

        # warp from nm to mm
        affine[:3, :] /= 1000000.

        return np.dot(affine, voxelshift)
        #return BigBrainVolume.switch_xy(np.dot(affine,voxelshift))

    def _clipcoords(self, mip):
        # compute clip coordinates in voxels for the given mip
        # from the pre-computed physical bounding box coordinates

        logger.debug(
            "Computing bounding box coordinates at mip {}".format(mip))
        phys2vox = np.linalg.inv(self.affine(mip))
        clipcoords = np.dot(phys2vox, self.bbox_phys).astype('int')
        # clip bounding box coordinates to actual shape of the mip
        clipcoords[:, 0] = np.maximum(clipcoords[:, 0], 0)
        clipcoords[:, 1] = np.minimum(clipcoords[:, 1],
                                      self.volume.mip_shape(mip))
        return clipcoords

    def _load_data(self, mip, clip=False, force=False):
        """
        Actually load image data.
        TODO: Check amount of data beforehand and raise an Exception if it is over a reasonable threshold.
        NOTE: this function caches chunks as numpy arrays (*.npy) to the
        CACHEDIR defined in the retrieval module.
        
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        force : Boolean (default: False)
            if true, will start downloads even if they exceed the download
            threshold set in the gbytes_feasible member variable.
        """
        if (type(clip) == bool) and clip is True:
            clipcoords = self._clipcoords(mip)
            bbox = Bbox(clipcoords[:3, 0], clipcoords[:3, 1])
        elif isinstance(clip, Bbox):
            # make sure the bounding box is integer, some are not
            bbox = Bbox(
                np.array(clip.minpt).astype('int'),
                np.array(clip.maxpt).astype('int'))
        else:
            bbox = Bbox([0, 0, 0], self.volume.mip_shape(mip))
        gbytes = bbox.volume() * self.nbits / (8 * 1024**3)
        if not force and gbytes > BigBrainVolume.gbyte_feasible:
            # TODO would better do an estimate of the acutal data size
            logger.error(
                "Data request is too large (would result in an ~{:.2f} GByte download, the limit is {})."
                .format(gbytes, self.gbyte_feasible))
            print(self.helptext)
            raise RuntimeError(
                "The requested resolution is too high to provide a feasible download, but you can override this behavior with the 'force' parameter."
            )
        cachefile = retrieval.cachefile("{}{}{}".format(
            self.ngsite, bbox.serialize(), str(mip)).encode('utf8'),
                                        suffix='npy')
        if os.path.exists(cachefile):
            return np.load(cachefile)
        else:
            data = self.volume.download(bbox=bbox, mip=mip)
            np.save(cachefile, np.array(data))
            return np.array(data)

    def determine_mip(self, resolution=None):
        # given a resolution in micrometer, try to determine the mip that can
        # be used to move on.
        if resolution is None:
            maxres = self.largest_feasible_resolution()
            logger.info(
                'Using the largest feasible resolution of {} micron'.format(
                    maxres))
            return self.resolutions_available[maxres]['mip']
        elif resolution in self.resolutions_available.keys():
            return self.resolutions_available[resolution]['mip']
        logger.error(
            'The requested resolution ({} micron) is not available. Choose one of:'
            .format(resolution))
        print(self.helptext)
        return None

    def build_image(self,
                    resolution,
                    clip=True,
                    transform=lambda I: I,
                    force=False):
        """
        Compute and return a spatial image for the given mip.
        
        Parameters:
        -----------
        clip : Boolean, or Bbox
            If true, clip by computing the bounding box from nonempty pixels
            if False, get the complete data of the selected mip
            If Bbox, clip by this bounding box
        force : Boolean (default: False)
            If true, will start downloads even if they exceed the download
            threshold set in the gbytes_feasible member variable.
        """
        mip = self.determine_mip(resolution)
        if not mip:
            raise ValueError(
                "Invalid image resolution for this neuroglancer precomputed tile source."
            )
        return nib.Nifti1Image(transform(self._load_data(mip, clip, force)),
                               affine=self.affine(mip, clip))

    def _enclosing_chunkgrid(self, mip, bbox_phys):
        """
        Produce grid points representing the chunks of the mip 
        which enclose a given bounding box. The bounding box is given in
        physical coordinates, but the grid is returned in voxel spaces of the
        given mip.
        """

        # some helperfunctions to produce the smallest range on a grid enclosing another range
        cfloor = lambda x, s: int(np.floor(x / s) * s)
        cceil = lambda x, s: int(np.ceil(x / s) * s) + 1
        crange = lambda x0, x1, s: np.arange(cfloor(x0, s), cceil(x1, s), s)

        # project the bounding box to the voxel grid of the selected mip
        bb = np.dot(np.linalg.inv(self.affine(mip)), bbox_phys)

        # compute the enclosing chunk grid
        chunksizes = self.volume.scales[mip]['chunk_sizes'][0]
        x, y, z = [crange(bb[i][0], bb[i][1], chunksizes[i]) for i in range(3)]
        xx, yy, zz = np.meshgrid(x, y, z)
        return np.vstack(
            [xx.ravel(),
             yy.ravel(),
             zz.ravel(),
             zz.ravel() * 0 + 1])

    def _bbox_phys(self):
        """
        Estimates the bounding box of the nonzero values 
        in the data volume, in physical coordinates. 
        Estimation is done from the lowest resolution for 
        efficiency, so it is not fully accurate.
        """
        volume = self._load_data(-1, clip=False)
        affine = self.affine(-1, clip=False)
        bbox_vox = bbox3d(volume)
        return np.dot(affine, bbox_vox)