Ejemplo n.º 1
0
    def delete(self, bbox, mip=None):
        if mip is None:
            mip = self.config.mip

        bbox = Bbox.create(bbox, self.meta.bounds(mip), bounded=True)
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to delete non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        cloudpaths = list(
            chunknames(realized_bbox,
                       self.meta.bounds(mip),
                       self.meta.key(mip),
                       self.meta.chunk_size(mip),
                       protocol=self.meta.path.protocol))

        with Storage(self.meta.cloudpath,
                     progress=self.config.progress) as storage:
            storage.delete_files(cloudpaths)

        if self.cache.enabled:
            with Storage('file://' + self.cache.path,
                         progress=self.config.progress) as storage:
                storage.delete_files(cloudpaths)
Ejemplo n.º 2
0
    def delete(self, bbox, mip=None):
        if mip is None:
            mip = self.config.mip

        if mip in self.meta.locked_mips():
            raise exceptions.ReadOnlyException(
                "MIP {} is currently write locked. If this should not be the case, run vol.meta.unlock_mip({})."
                .format(mip, mip))

        bbox = Bbox.create(bbox, self.meta.bounds(mip), bounded=True)
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to delete non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        cloudpaths = lambda: chunknames(
            realized_bbox,
            self.meta.bounds(mip),
            self.meta.key(mip),
            self.meta.chunk_size(mip),
            protocol=self.meta.path.protocol
        )  # need to regenerate so that generator isn't used up

        CloudFiles(self.meta.cloudpath, progress=self.config.progress, secrets=self.config.secrets) \
          .delete(cloudpaths())

        if self.cache.enabled:
            CloudFiles('file://' + self.cache.path, progress=self.config.progress, secrets=self.config.secrets) \
              .delete(cloudpaths())
Ejemplo n.º 3
0
    def make_shard(self, img, bbox, mip=None, spec=None, progress=False):
        """
    Convert an image that represents a single complete shard 
    into a shard file.
  
    img: a volumetric numpy array image
    bbox: the bbox it represents in voxel coordinates
    mip: if specified, use the sharding specification from 
      this mip level, otherwise use the sharding spec from
      the current implicit mip level in config.
    spec: use the provided specification (overrides mip parameter)

    Returns: (filename, shard_file)
    """
        mip = mip if mip is not None else self.config.mip
        scale = self.meta.scale(mip)

        if spec is None:
            if 'sharding' in scale:
                spec = sharding.ShardingSpecification.from_dict(
                    scale['sharding'])
            else:
                raise ValueError(
                    "mip {} does not have a sharding specification.".format(
                        mip))

        bbox = Bbox.create(bbox)
        if bbox.subvoxel():
            raise ValueError(
                "Bounding box is too small to make a shard. Got: {}".format(
                    bbox))

        # Alignment Checks:
        # 1. Aligned to atomic chunks - required for grid point generation
        aligned_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        if bbox != aligned_bbox:
            raise exceptions.AlignmentError(
                "Unable to create shard from a non-chunk aligned bounding box. Requested: {}, Aligned: {}"
                .format(bbox, aligned_bbox))

        # 2. Covers the dataset at least partially
        aligned_bbox = Bbox.clamp(aligned_bbox, self.meta.bounds(mip))
        if aligned_bbox.subvoxel():
            raise exceptions.OutOfBoundsError(
                "Shard completely outside dataset: Requested: {}, Dataset: {}".
                format(bbox, self.meta.bounds(mip)))

        grid_size = self.grid_size(mip)
        chunk_size = self.meta.chunk_size(mip)
        reader = sharding.ShardReader(self.meta, self.cache, spec)

        # 3. Gridpoints all within this one shard
        gpts = list(gridpoints(aligned_bbox, self.meta.bounds(mip),
                               chunk_size))
        morton_codes = compressed_morton_code(gpts, grid_size)
        all_same_shard = bool(
            reduce(lambda a, b: operator.eq(a, b) and a,
                   map(reader.get_filename, morton_codes)))

        if not all_same_shard:
            raise exceptions.AlignmentError(
                "The gridpoints for this image did not all correspond to the same shard. Got: {}"
                .format(bbox))

        labels = {}
        pt_anchor = gpts[0] * chunk_size
        for pt_abs, morton_code in zip(gpts, morton_codes):
            cutout_bbx = Bbox(pt_abs * chunk_size, (pt_abs + 1) * chunk_size)

            # Neuroglancer expects border chunks not to extend beyond dataset bounds
            cutout_bbx.maxpt = cutout_bbx.maxpt.clip(
                None, self.meta.volume_size(mip))
            cutout_bbx -= pt_anchor

            chunk = img[cutout_bbx.to_slices()]
            labels[morton_code] = chunks.encode(
                chunk,
                self.meta.encoding(mip),
                block_size=self.meta.compressed_segmentation_block_size(mip),
            )

        shard_filename = reader.get_filename(first(labels.keys()))

        return (shard_filename, spec.synthesize_shard(labels,
                                                      progress=progress))
Ejemplo n.º 4
0
    def transfer_to(self,
                    cloudpath,
                    bbox,
                    mip,
                    block_size=None,
                    compress=True,
                    compress_level=None):
        """
    Transfer files from one storage location to another, bypassing
    volume painting. This enables using a single CloudVolume instance
    to transfer big volumes. In some cases, gsutil or aws s3 cli tools
    may be more appropriate. This method is provided for convenience. It
    may be optimized for better performance over time as demand requires.

    cloudpath (str): path to storage layer
    bbox (Bbox object): ROI to transfer
    mip (int): resolution level
    block_size (int): number of file chunks to transfer per I/O batch.
    compress (bool): Set to False to upload as uncompressed
    """
        from cloudvolume import CloudVolume

        if mip is None:
            mip = self.config.mip

        if self.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded sources are not supported. got: {self.meta.cloudpath}"
            )

        bbox = Bbox.create(bbox, self.meta.bounds(mip))
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        default_block_size_MB = 50  # MB
        chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype(
            self.meta.dtype).itemsize * self.meta.num_channels
        if self.meta.layer_type == 'image':
            # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1
            # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed
            chunk_MB /= 1.3
        else:  # segmentation
            chunk_MB /= 100.0  # compression ratios between 80 and 800....
        chunk_MB /= 1024.0 * 1024.0

        if block_size:
            step = block_size
        else:
            step = int(default_block_size_MB // chunk_MB) + 1

        try:
            destvol = CloudVolume(cloudpath, mip=mip)
        except exceptions.InfoUnavailableError:
            destvol = CloudVolume(cloudpath,
                                  mip=mip,
                                  info=self.meta.info,
                                  provenance=self.meta.provenance.serialize())
            destvol.commit_info()
            destvol.commit_provenance()
        except exceptions.ScaleUnavailableError:
            destvol = CloudVolume(cloudpath)
            for i in range(len(destvol.scales) + 1, len(self.meta.scales)):
                destvol.scales.append(self.meta.scales[i])
            destvol.commit_info()
            destvol.commit_provenance()

        if destvol.image.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded destinations are not supported. got: {destvol.cloudpath}"
            )

        num_blocks = np.ceil(
            self.meta.bounds(mip).volume() /
            self.meta.chunk_size(mip).rectVolume()) / step
        num_blocks = int(np.ceil(num_blocks))

        cloudpaths = chunknames(bbox,
                                self.meta.bounds(mip),
                                self.meta.key(mip),
                                self.meta.chunk_size(mip),
                                protocol=self.meta.path.protocol)

        pbar = tqdm(
            desc='Transferring Blocks of {} Chunks'.format(step),
            unit='blocks',
            disable=(not self.config.progress),
            total=num_blocks,
        )

        cfsrc = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets)
        cfdest = CloudFiles(cloudpath)

        def check(files):
            errors = [
              file for file in files if \
              (file['content'] is None or file['error'] is not None)
            ]
            if errors:
                error_paths = [f['path'] for f in errors]
                raise exceptions.EmptyFileException(
                    "{} were empty or had IO errors.".format(
                        ", ".join(error_paths)))
            return files

        with pbar:
            for srcpaths in sip(cloudpaths, step):
                files = check(cfsrc.get(srcpaths, raw=True))
                cfdest.puts(compression.transcode(files,
                                                  encoding=compress,
                                                  level=compress_level,
                                                  in_place=True),
                            compress=compress,
                            content_type=tx.content_type(destvol),
                            raw=True)
                pbar.update()
Ejemplo n.º 5
0
    def transfer_to(self,
                    cloudpath,
                    bbox,
                    mip,
                    block_size=None,
                    compress=True,
                    compress_level=None):
        """
    Transfer files from one storage location to another, bypassing
    volume painting. This enables using a single CloudVolume instance
    to transfer big volumes. In some cases, gsutil or aws s3 cli tools
    may be more appropriate. This method is provided for convenience. It
    may be optimized for better performance over time as demand requires.

    cloudpath (str): path to storage layer
    bbox (Bbox object): ROI to transfer
    mip (int): resolution level
    block_size (int): number of file chunks to transfer per I/O batch.
    compress (bool): Set to False to upload as uncompressed
    """
        from cloudvolume import CloudVolume

        if mip is None:
            mip = self.config.mip

        bbox = Bbox.create(bbox, self.meta.bounds(mip))
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        default_block_size_MB = 50  # MB
        chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype(
            self.meta.dtype).itemsize * self.meta.num_channels
        if self.meta.layer_type == 'image':
            # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1
            # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed
            chunk_MB /= 1.3
        else:  # segmentation
            chunk_MB /= 100.0  # compression ratios between 80 and 800....
        chunk_MB /= 1024.0 * 1024.0

        if block_size:
            step = block_size
        else:
            step = int(default_block_size_MB // chunk_MB) + 1

        try:
            destvol = CloudVolume(cloudpath, mip=mip)
        except exceptions.InfoUnavailableError:
            destvol = CloudVolume(cloudpath,
                                  mip=mip,
                                  info=self.meta.info,
                                  provenance=self.meta.provenance.serialize())
            destvol.commit_info()
            destvol.commit_provenance()
        except exceptions.ScaleUnavailableError:
            destvol = CloudVolume(cloudpath)
            for i in range(len(destvol.scales) + 1, len(self.meta.scales)):
                destvol.scales.append(self.meta.scales[i])
            destvol.commit_info()
            destvol.commit_provenance()

        num_blocks = np.ceil(
            self.meta.bounds(mip).volume() /
            self.meta.chunk_size(mip).rectVolume()) / step
        num_blocks = int(np.ceil(num_blocks))

        cloudpaths = chunknames(bbox,
                                self.meta.bounds(mip),
                                self.meta.key(mip),
                                self.meta.chunk_size(mip),
                                protocol=self.meta.path.protocol)

        pbar = tqdm(
            desc='Transferring Blocks of {} Chunks'.format(step),
            unit='blocks',
            disable=(not self.config.progress),
            total=num_blocks,
        )

        with pbar:
            with Storage(self.meta.cloudpath) as src_stor:
                with Storage(cloudpath) as dest_stor:
                    for _ in range(num_blocks, 0, -1):
                        srcpaths = list(itertools.islice(cloudpaths, step))
                        files = src_stor.get_files(srcpaths)
                        files = [(f['filename'], f['content']) for f in files]
                        dest_stor.put_files(
                            files=files,
                            compress=compress,
                            compress_level=compress_level,
                            content_type=tx.content_type(destvol),
                        )
                        pbar.update()
Ejemplo n.º 6
0
    def make_shard(self, img, bbox, mip=None, spec=None):
        """
    Convert an image that represents a single complete shard 
    into a shard file.
  
    img: a volumetric numpy array image
    bbox: the bbox it represents in voxel coordinates
    mip: if specified, use the sharding specification from 
      this mip level, otherwise use the sharding spec from
      the current implicit mip level in config.
    spec: use the provided specification (overrides mip parameter)

    Returns: (filename, shard_file)
    """
        mip = mip if mip is not None else self.config.mip
        scale = self.meta.scale(mip)

        if spec is None:
            if 'sharding' in scale:
                spec = sharding.ShardingSpecification.from_dict(
                    scale['sharding'])
            else:
                raise ValueError(
                    "mip {} does not have a sharding specification.".format(
                        mip))

        if bbox.subvoxel():
            raise ValueError(
                "Bounding box is too small to make a shard. Got: {}".format(
                    bbox))

        bbox = Bbox.create(bbox, self.meta.bounds(mip), bounded=True)
        aligned_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        aligned_bbox = Bbox.clamp(aligned_bbox, self.meta.bounds(mip))

        if bbox != aligned_bbox:
            raise exceptions.AlignmentError(
                "Unable to create shard from a non-chunk aligned bounding box. Requested: {}, Aligned: {}"
                .format(bbox, aligned_bbox))

        grid_size = self.grid_size(mip)
        chunk_size = self.meta.chunk_size(mip)
        reader = sharding.ShardReader(self.meta, self.cache, spec)

        gpts = lambda: gridpoints(bbox, self.meta.bounds(mip), chunk_size)
        all_same_shard = bool(
            reduce(
                lambda a, b: operator.eq(a, b) and a,
                map(
                    reader.get_filename,
                    map(lambda gpt: compressed_morton_code(gpt, grid_size),
                        gpts()))))

        if not all_same_shard:
            raise exceptions.AlignmentError(
                "The gridpoints for this image did not all correspond to the same shard. Got: {}"
                .format(bbox))

        labels = {}
        pt_anchor = next(gpts())
        for pt_abs in gpts():
            pt_rel = pt_abs - pt_anchor
            cutout_bbx = Bbox(pt_rel * chunk_size, (pt_rel + 1) * chunk_size)
            chunk = img[cutout_bbx.to_slices()]
            morton_code = compressed_morton_code(pt_abs, grid_size)
            labels[morton_code] = chunks.encode(chunk, self.meta.encoding(mip))

        shard_filename = reader.get_filename(first(labels.keys()))

        return (shard_filename, spec.synthesize_shard(labels))