Example #1
0
def test_sharded_image_bits(scale):
    dataset_size = Vec(*scale["size"])
    chunk_size = Vec(*scale["chunk_sizes"][0])

    spec = create_sharded_image_info(dataset_size=dataset_size,
                                     chunk_size=chunk_size,
                                     encoding=scale["encoding"],
                                     dtype=np.uint8)

    shape = image_shard_shape_from_spec(spec, dataset_size, chunk_size)

    shape = lib.min2(shape, dataset_size)
    dataset_bbox = Bbox.from_vec(dataset_size)
    gpts = list(gridpoints(dataset_bbox, dataset_bbox, chunk_size))
    grid_size = np.ceil(dataset_size / chunk_size).astype(np.int64)

    spec = ShardingSpecification.from_dict(spec)
    reader = ShardReader(None, None, spec)

    morton_codes = compressed_morton_code(gpts, grid_size)
    min_num_shards = prod(dataset_size / shape)
    max_num_shards = prod(np.ceil(dataset_size / shape))

    assert 0 < min_num_shards <= 2**spec.shard_bits
    assert 0 < max_num_shards <= 2**spec.shard_bits

    real_num_shards = len(set(map(reader.get_filename, morton_codes)))

    assert min_num_shards <= real_num_shards <= max_num_shards
Example #2
0
def create_image_shard_downsample_tasks(cloudpath,
                                        mip=0,
                                        fill_missing=False,
                                        sparse=False,
                                        chunk_size=None,
                                        encoding=None,
                                        memory_target=MEMORY_TARGET,
                                        agglomerate=False,
                                        timestamp=None,
                                        factor=(2, 2, 1),
                                        bounds=None,
                                        bounds_mip=0):
    """
  Downsamples an existing image layer that may be
  sharded or unsharded to create a sharded layer.
  
  Only 2x2x1 downsamples are supported for now.
  """
    cv = downsample_scales.add_scale(cloudpath,
                                     mip,
                                     preserve_chunk_size=True,
                                     chunk_size=chunk_size,
                                     encoding=encoding,
                                     factor=factor)
    cv.mip = mip + 1
    cv.scale["sharding"] = create_sharded_image_info(
        dataset_size=cv.scale["size"],
        chunk_size=cv.scale["chunk_sizes"][0],
        encoding=cv.scale["encoding"],
        dtype=cv.dtype,
        uncompressed_shard_bytesize=int(memory_target),
    )
    cv.commit_info()

    shape = image_shard_shape_from_spec(cv.scale["sharding"], cv.volume_size,
                                        cv.chunk_size)
    shape = Vec(*shape) * factor

    cv.mip = mip
    bounds = get_bounds(cv,
                        bounds,
                        mip,
                        bounds_mip=bounds_mip,
                        chunk_size=cv.meta.chunk_size(mip + 1))

    class ImageShardDownsampleTaskIterator(FinelyDividedTaskIterator):
        def task(self, shape, offset):
            return partial(
                ImageShardDownsampleTask,
                cloudpath,
                shape=tuple(shape),
                offset=tuple(offset),
                mip=int(mip),
                fill_missing=bool(fill_missing),
                sparse=bool(sparse),
                agglomerate=bool(agglomerate),
                timestamp=timestamp,
                factor=tuple(factor),
            )

        def on_finish(self):
            job_details = {
                "method": {
                    "task": "ImageShardDownsampleTask",
                    "cloudpath": cloudpath,
                    "shape": list(map(int, shape)),
                    "fill_missing": fill_missing,
                    "sparse": bool(sparse),
                    "bounds": [bounds.minpt.tolist(),
                               bounds.maxpt.tolist()],
                    "mip": mip,
                    "agglomerate": agglomerate,
                    "timestamp": timestamp,
                },
                "by": operator_contact(),
                "date": strftime("%Y-%m-%d %H:%M %Z"),
            }

            cv.provenance.sources = [cloudpath]
            cv.provenance.processing.append(job_details)
            cv.commit_provenance()

    return ImageShardDownsampleTaskIterator(bounds, shape)
Example #3
0
def create_image_shard_transfer_tasks(
        src_layer_path: str,
        dst_layer_path: str,
        mip: int = 0,
        chunk_size: Optional[ShapeType] = None,
        encoding: bool = None,
        bounds: Optional[Bbox] = None,
        bounds_mip: int = 0,
        fill_missing: bool = False,
        translate: ShapeType = (0, 0, 0),
        dest_voxel_offset: Optional[ShapeType] = None,
        agglomerate: bool = False,
        timestamp: int = None,
        memory_target: int = MEMORY_TARGET,
        clean_info: bool = False):
    src_vol = CloudVolume(src_layer_path, mip=mip)

    if dest_voxel_offset:
        dest_voxel_offset = Vec(*dest_voxel_offset, dtype=int)
    else:
        dest_voxel_offset = src_vol.voxel_offset.clone()

    if not chunk_size:
        chunk_size = src_vol.info['scales'][mip]['chunk_sizes'][0]
    chunk_size = Vec(*chunk_size)

    try:
        dest_vol = CloudVolume(dst_layer_path, mip=mip)
    except cloudvolume.exceptions.InfoUnavailableError:
        info = copy.deepcopy(src_vol.info)
        dest_vol = CloudVolume(dst_layer_path, info=info, mip=mip)
        dest_vol.commit_info()

    if dest_voxel_offset is not None:
        dest_vol.scale["voxel_offset"] = dest_voxel_offset

    # If translate is not set, but dest_voxel_offset is then it should naturally be
    # only be the difference between datasets.
    if translate is None:
        translate = dest_vol.voxel_offset - src_vol.voxel_offset  # vector pointing from src to dest
    else:
        translate = Vec(*translate) // src_vol.downsample_ratio

    if encoding is not None:
        dest_vol.info['scales'][mip]['encoding'] = encoding
        if encoding == 'compressed_segmentation' and 'compressed_segmentation_block_size' not in dest_vol.info[
                'scales'][mip]:
            dest_vol.info['scales'][mip][
                'compressed_segmentation_block_size'] = (8, 8, 8)
    dest_vol.info['scales'] = dest_vol.info['scales'][:mip + 1]
    dest_vol.info['scales'][mip]['chunk_sizes'] = [chunk_size.tolist()]

    spec = create_sharded_image_info(
        dataset_size=dest_vol.scale["size"],
        chunk_size=dest_vol.scale["chunk_sizes"][0],
        encoding=dest_vol.scale["encoding"],
        dtype=dest_vol.dtype,
        uncompressed_shard_bytesize=memory_target,
    )
    dest_vol.scale["sharding"] = spec
    if clean_info:
        dest_vol.info = clean_xfer_info(dest_vol.info)
    dest_vol.commit_info()

    shape = image_shard_shape_from_spec(spec, dest_vol.scale["size"],
                                        chunk_size)

    bounds = get_bounds(
        dest_vol,
        bounds,
        mip,
        bounds_mip=bounds_mip,
        chunk_size=chunk_size,
    )

    class ImageShardTransferTaskIterator(FinelyDividedTaskIterator):
        def task(self, shape, offset):
            return partial(
                ImageShardTransferTask,
                src_layer_path,
                dst_layer_path,
                shape=shape,
                offset=offset,
                fill_missing=fill_missing,
                translate=translate,
                mip=mip,
                agglomerate=agglomerate,
                timestamp=timestamp,
            )

        def on_finish(self):
            job_details = {
                "method": {
                    "task": "ImageShardTransferTask",
                    "src": src_layer_path,
                    "dest": dst_layer_path,
                    "shape": list(map(int, shape)),
                    "fill_missing": fill_missing,
                    "translate": list(map(int, translate)),
                    "bounds": [bounds.minpt.tolist(),
                               bounds.maxpt.tolist()],
                    "mip": mip,
                },
                "by": operator_contact(),
                "date": strftime("%Y-%m-%d %H:%M %Z"),
            }

            dvol = CloudVolume(dst_layer_path)
            dvol.provenance.sources = [src_layer_path]
            dvol.provenance.processing.append(job_details)
            dvol.commit_provenance()

    return ImageShardTransferTaskIterator(bounds, shape)