Exemple #1
0
def test_ingest_segmentation():
    delete_layer()
    storage, data = create_layer(size=(256,256,128,1), offset=(0,0,0), layer_type='segmentation')
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 3
    assert len(cv.available_mips) == 3

    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    assert len(cv.available_mips) == 3
    assert np.array_equal(cv.mip_volume_size(0), [ 256, 256, 128 ])
    assert np.array_equal(cv.mip_volume_size(1), [ 128, 128, 128 ])
    assert np.array_equal(cv.mip_volume_size(2), [  64,  64, 128 ])
    
    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1 = downsample.downsample_segmentation(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2 = downsample.downsample_segmentation(data_ds1, factor=[2, 2, 1, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])
Exemple #2
0
def test_downsample_with_offset():
    delete_layer()
    storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 4
    assert len(cv.available_mips) == 4

    cv.commit_info()

    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ])
    assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ])
    assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  64,  64, 128 ])

    assert np.all(cv.mip_voxel_offset(3) == (0,0,11))
    
    cv.mip = 0
    assert np.all(cv[3:67, 7:71, 11:75] == data[0:64, 0:64, 0:64])

    data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[1:33, 3:35, 11:75] == data_ds1[0:32, 0:32, 0:64])

    data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1])
    cv.mip = 2
    assert np.all(cv[0:16, 1:17, 11:75] == data_ds2[0:16, 0:16, 0:64])

    data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1])
    cv.mip = 3
    assert np.all(cv[0:8, 0:8, 11:75] == data_ds3[0:8,0:8,0:64])
Exemple #3
0
def test_downsample_w_missing():
    delete_layer()
    storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 4
    assert len(cv.available_mips) == 4
    delete_layer()

    cv.commit_info()

    try:
        create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=False)
    except EmptyVolumeException:
        pass

    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=True)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ])
    assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ])
    assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  64,  64, 128 ])

    assert np.all(cv.mip_voxel_offset(3) == (0,0,11))
    
    cv.mip = 0
    cv.fill_missing = True
    assert np.count_nonzero(cv[3:67, 7:71, 11:75]) == 0
Exemple #4
0
def test_boss_download():
    vol = CloudVolume('gs://seunglab-test/test_v0/image')
    bossvol = CloudVolume('boss://automated_testing/test_v0/image')

    vimg = vol[:, :, :5]
    bimg = bossvol[:, :, :5]

    assert np.all(bimg == vimg)
    assert bimg.dtype == vimg.dtype

    vol.bounded = False
    vol.fill_missing = True
    bossvol.bounded = False
    bossvol.fill_missing = True

    assert np.all(vol[-100:100, -100:100, -10:10] == bossvol[-100:100,
                                                             -100:100, -10:10])

    # BOSS using a different algorithm for creating downsamples
    # so hard to compare 1:1 w/ pixels.
    bossvol.bounded = True
    bossvol.fill_missing = False
    bossvol.mip = 1
    bimg = bossvol[:, :, 5:6]
    assert np.any(bimg > 0)
Exemple #5
0
def test_downsample_no_offset(compression_method):
    delete_layer()
    storage, data = create_layer(size=(1024, 1024, 128, 1), offset=(0, 0, 0))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 1
    assert len(cv.available_mips) == 1

    cv.commit_info()

    tq = MockTaskQueue()
    tasks = create_downsampling_tasks(storage.layer_path,
                                      mip=0,
                                      num_mips=4,
                                      compress=compression_method)
    tq.insert_all(tasks)

    cv.refresh_info()

    assert len(cv.available_mips) == 5
    assert np.array_equal(cv.mip_volume_size(0), [1024, 1024, 128])
    assert np.array_equal(cv.mip_volume_size(1), [512, 512, 128])
    assert np.array_equal(cv.mip_volume_size(2), [256, 256, 128])
    assert np.array_equal(cv.mip_volume_size(3), [128, 128, 128])
    assert np.array_equal(cv.mip_volume_size(4), [64, 64, 128])

    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 1, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])

    data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 1, 1])
    cv.mip = 3
    assert np.all(cv[slice64] == data_ds3[slice64])

    data_ds4, = tinybrain.downsample_with_averaging(data,
                                                    factor=[16, 16, 1, 1])
    cv.mip = 4
    assert np.all(cv[slice64] == data_ds4[slice64])
Exemple #6
0
def MultiResShardedMeshMergeTask(cloudpath: str,
                                 shard_no: str,
                                 draco_compression_level: int = 1,
                                 mesh_dir: Optional[str] = None,
                                 num_lod: int = 1,
                                 spatial_index_db: Optional[str] = None,
                                 progress: bool = False):
    cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db)
    cv.mip = cv.mesh.meta.mip
    if mesh_dir is None and 'mesh' in cv.info:
        mesh_dir = cv.info['mesh']

    # This looks messy because we are trying to avoid retaining
    # unnecessary memory. In the original skeleton iteration, this was
    # using 50 GB+ memory on minnie65. So it makes sense to be just
    # as careful with a heavier type of object.
    locations = locations_for_labels(cv, labels_for_shard(cv, shard_no))
    filenames = set(itertools.chain(*locations.values()))
    labels = set(locations.keys())
    del locations
    meshes = collect_mesh_fragments(cv, labels, filenames, mesh_dir, progress)
    del labels
    del filenames
    meshes = {
        label: process_mesh(cv, label, mesh_frags, num_lod,
                            draco_compression_level)
        for label, mesh_frags in tqdm(meshes.items(), disable=(not progress))
    }
    data_offset = {
        label: len(manifest)
        for label, (manifest, mesh) in meshes.items()
    }
    meshes = {
        label: mesh + manifest.to_binary()
        for label, (manifest, mesh) in meshes.items()
    }

    if len(meshes) == 0:
        return

    shard_files = synthesize_shard_files(cv.mesh.reader.spec, meshes,
                                         data_offset)
    del meshes
    del data_offset

    if len(shard_files) != 1:
        raise ValueError("Only one shard file should be generated per task. "
                         "Expected: {} Got: {} ".format(
                             str(shard_no), ", ".join(shard_files.keys())))

    cf = CloudFiles(cv.mesh.meta.layerpath, progress=progress)
    cf.puts(
        ((fname, data) for fname, data in shard_files.items()),
        compress=False,
        content_type='application/octet-stream',
        cache_control='no-cache',
    )
Exemple #7
0
  def execute(self):
    vol = CloudVolume(self.layer_path)

    highres_bbox = Bbox( self.offset, self.offset + self.shape )
    for mip in vol.available_mips:
      vol.mip = mip
      slices = vol.slices_from_global_coords(highres_bbox.to_slices())
      bbox = Bbox.from_slices(slices).round_to_chunk_size(vol.underlying, offset=vol.bounds.minpt)
      vol.delete(bbox)
Exemple #8
0
def test_downsample_no_offset_2x2x2():
    delete_layer()
    cf, data = create_layer(size=(512,512,512,1), offset=(0,0,0))
    cv = CloudVolume(cf.cloudpath)
    assert len(cv.scales) == 1
    assert len(cv.available_mips) == 1

    cv.commit_info()

    tq = MockTaskQueue()
    tasks = create_downsampling_tasks(
        cf.cloudpath, mip=0, num_mips=3, 
        compress=None, factor=(2,2,2)
    )
    tq.insert_all(tasks)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 512 ])
    assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 256 ])
    assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  64,  64,  64 ])
    
    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 2, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 4, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])

    data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 8, 1])
    cv.mip = 3
    assert np.all(cv[slice64] == data_ds3[slice64])
Exemple #9
0
def test_downsample_no_offset():
    delete_layer()
    storage, data = create_layer(size=(1024,1024,128,1), offset=(0,0,0))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 5
    assert len(cv.available_mips) == 5

    cv.commit_info()

    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=4)

    cv.refresh_info()

    assert len(cv.available_mips) == 5
    assert np.array_equal(cv.mip_volume_size(0), [ 1024, 1024, 128 ])
    assert np.array_equal(cv.mip_volume_size(1), [  512,  512, 128 ])
    assert np.array_equal(cv.mip_volume_size(2), [  256,  256, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  128,  128, 128 ])
    assert np.array_equal(cv.mip_volume_size(4), [   64,   64, 128 ])
    
    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])

    data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1])
    cv.mip = 3
    assert np.all(cv[slice64] == data_ds3[slice64])

    data_ds4 = downsample.downsample_with_averaging(data_ds3, factor=[2, 2, 1, 1])
    cv.mip = 4
    assert np.all(cv[slice64] == data_ds4[slice64])
Exemple #10
0
def test_transfer_task_rechunk(tq, src_cv, transfer_data):
    tasks = tc.create_transfer_tasks(src_cv.cloudpath,
                                     destpath,
                                     chunk_size=(50, 50, 50))
    tq.insert_all(tasks)
    dest_cv = CloudVolume(destpath)
    assert len(dest_cv.scales) == 4
    assert np.all(src_cv[:] == dest_cv[:])
    for mip in range(1, 4):
        dest_cv.mip = mip
        assert np.all(dest_cv[:] == transfer_data[mip])
    rmsrc()
    rmdest()
Exemple #11
0
def MultiResShardedMeshMergeTask(
  cloudpath:str,
  shard_no:str,
  draco_compression_level:int = 1,
  mesh_dir:Optional[str] = None,
  num_lod:int = 1,
  spatial_index_db:Optional[str] = None,
  progress:bool = False
):
  cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db)
  cv.mip = cv.mesh.meta.mip
  if mesh_dir is None and 'mesh' in cv.info:
    mesh_dir = cv.info['mesh']

  # This looks messy because we are trying to avoid retaining
  # unnecessary memory. In the original skeleton iteration, this was 
  # using 50 GB+ memory on minnie65. So it makes sense to be just
  # as careful with a heavier type of object.
  locations = locations_for_labels(cv, labels_for_shard(cv, shard_no))
  filenames = set(itertools.chain(*locations.values()))
  labels = set(locations.keys())
  del locations
  meshes = collect_mesh_fragments(
    cv, labels, filenames, mesh_dir, progress
  )
  del filenames

  # important to iterate this way to avoid
  # creating a copy of meshes vs. { ... for in }
  for label in labels:
    meshes[label] = Mesh.concatenate(*meshes[label])
  del labels

  fname, shard = create_mesh_shard(
    cv, meshes, 
    num_lod, draco_compression_level,
    progress, shard_no
  )
  del meshes

  if shard is None:
    return

  cf = CloudFiles(cv.mesh.meta.layerpath)
  cf.put(
    fname, shard,
    compress=False,
    content_type='application/octet-stream',
    cache_control='no-cache',
  )
Exemple #12
0
def test_transfer_task_dest_offset(tq, src_cv, transfer_data):
    tasks = tc.create_transfer_tasks(src_cv.cloudpath,
                                     destpath,
                                     chunk_size=(50, 50, 50),
                                     dest_voxel_offset=(100, 100, 100))
    tq.insert_all(tasks)
    dest_cv = CloudVolume(destpath)
    assert len(dest_cv.scales) == 4
    assert tuple(dest_cv.voxel_offset) == (100, 100, 100)
    assert tuple(src_cv.voxel_offset) == (0, 0, 0)
    assert np.all(src_cv[:] == dest_cv[:])
    for mip in range(1, 4):
        dest_cv.mip = mip
        assert np.all(dest_cv[:] == transfer_data[mip])
    rmsrc()
    rmdest()
Exemple #13
0
def test_downsample_higher_mip():
    delete_layer()
    storage, data = create_layer(size=(512,512,64,1), offset=(3,7,11))
    cv = CloudVolume(storage.layer_path)
    cv.info['scales'] = cv.info['scales'][:1]
    
    cv.commit_info()
    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=2)
    cv.refresh_info()
    assert len(cv.available_mips) == 3

    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=1, num_mips=2)
    cv.refresh_info()
    assert len(cv.available_mips) == 4

    cv.mip = 3
    assert cv[:,:,:].shape == (64,64,64,1)
Exemple #14
0
  def execute(self):
    vol = CloudVolume(self.layer_path, mip=self.mip)

    highres_bbox = Bbox( self.offset, self.offset + self.shape )

    top_mip = min(vol.available_mips[-1], self.mip + self.num_mips)

    for mip in range(self.mip, top_mip + 1):
      vol.mip = mip
      bbox = vol.bbox_to_mip(highres_bbox, self.mip, mip)
      bbox = bbox.round_to_chunk_size(vol.underlying, offset=vol.bounds.minpt)
      bbox = Bbox.clamp(bbox, vol.bounds)

      if bbox.volume() == 0: 
        continue

      vol.delete(bbox)
Exemple #15
0
def test_quantize():
    qpath = 'file:///tmp/removeme/quantized/'

    delete_layer()
    delete_layer(qpath)

    storage, _ = create_layer(size=(256, 256, 128, 3),
                              offset=(0, 0, 0),
                              layer_type="affinities")
    cv = CloudVolume(storage.layer_path)

    shape = (128, 128, 64)
    slices = np.s_[:shape[0], :shape[1], :shape[2], :1]

    data = cv[slices]
    data *= 255.0
    data = data.astype(np.uint8)

    task = QuantizeTask(
        source_layer_path=storage.layer_path,
        dest_layer_path=qpath,
        shape=shape,
        offset=(0, 0, 0),
        mip=0,
    )

    info = create_quantized_affinity_info(storage.layer_path,
                                          qpath,
                                          shape,
                                          mip=0,
                                          chunk_size=[64, 64, 64],
                                          encoding='raw')
    qcv = CloudVolume(qpath, info=info)
    qcv.commit_info()

    create_downsample_scales(qpath, mip=0, ds_shape=shape)

    task.execute()

    qcv.mip = 0

    qdata = qcv[slices]

    assert np.all(data.shape == qdata.shape)
    assert np.all(data == qdata)
    assert data.dtype == np.uint8
Exemple #16
0
def DeleteTask(layer_path:str, shape, offset, mip=0, num_mips=5):
  """Delete a block of images inside a layer on all mip levels."""
  shape = Vec(*shape)
  offset = Vec(*offset)
  vol = CloudVolume(layer_path, mip=mip, max_redirects=0)

  highres_bbox = Bbox( offset, offset + shape )

  top_mip = min(vol.available_mips[-1], mip + num_mips)

  for mip_i in range(mip, top_mip + 1):
    vol.mip = mip_i
    bbox = vol.bbox_to_mip(highres_bbox, mip, mip_i)
    bbox = bbox.round_to_chunk_size(vol.chunk_size, offset=vol.bounds.minpt)
    bbox = Bbox.clamp(bbox, vol.bounds)

    if bbox.volume() == 0:
      continue

    vol.delete(bbox)
Exemple #17
0
def test_downsample_higher_mip():
    delete_layer()
    cf, data = create_layer(size=(512,512,64,1), offset=(3,7,11))
    cv = CloudVolume(cf.cloudpath)
    cv.info['scales'] = cv.info['scales'][:1]
    
    tq = MockTaskQueue()

    cv.commit_info()
    tasks = create_downsampling_tasks(cf.cloudpath, mip=0, num_mips=2)
    tq.insert_all(tasks)
    cv.refresh_info()
    assert len(cv.available_mips) == 3

    tasks = create_downsampling_tasks(cf.cloudpath, mip=1, num_mips=2)
    tq.insert_all(tasks)
    cv.refresh_info()
    assert len(cv.available_mips) == 4

    cv.mip = 3
    assert cv[:,:,:].shape == (64,64,64,1)
Exemple #18
0
def create_sharded_skeleton_merge_tasks(layer_path,
                                        dust_threshold,
                                        tick_threshold,
                                        shard_index_bytes=2**13,
                                        minishard_index_bytes=2**15,
                                        minishard_index_encoding='gzip',
                                        data_encoding='gzip',
                                        max_cable_length=None,
                                        spatial_index_db=None):
    cv = CloudVolume(layer_path,
                     progress=True,
                     spatial_index_db=spatial_index_db)
    cv.mip = cv.skeleton.meta.mip

    # 17 sec to download for pinky100
    all_labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution)

    (shard_bits, minishard_bits, preshift_bits) = \
      compute_shard_params_for_hashed(
        num_labels=len(all_labels),
        shard_index_bytes=int(shard_index_bytes),
        minishard_index_bytes=int(minishard_index_bytes),
      )

    spec = ShardingSpecification(
        type='neuroglancer_uint64_sharded_v1',
        preshift_bits=preshift_bits,
        hash='murmurhash3_x86_128',
        minishard_bits=minishard_bits,
        shard_bits=shard_bits,
        minishard_index_encoding=minishard_index_encoding,
        data_encoding=data_encoding,
    )
    cv.skeleton.meta.info['sharding'] = spec.to_dict()
    cv.skeleton.meta.commit_info()

    # rebuild b/c sharding changes the skeleton source
    cv = CloudVolume(layer_path,
                     progress=True,
                     spatial_index_db=spatial_index_db)
    cv.mip = cv.skeleton.meta.mip

    # perf: ~36k hashes/sec
    shardfn = lambda lbl: cv.skeleton.reader.spec.compute_shard_location(
        lbl).shard_number

    shard_labels = defaultdict(list)
    for label in tqdm(all_labels, desc="Hashes"):
        shard_labels[shardfn(label)].append(label)

    cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True)
    files = ((str(shardno) + '.labels', labels)
             for shardno, labels in shard_labels.items())
    cf.put_jsons(files,
                 compress="gzip",
                 cache_control="no-cache",
                 total=len(shard_labels))

    cv.provenance.processing.append({
        'method': {
            'task': 'ShardedSkeletonMergeTask',
            'cloudpath': layer_path,
            'mip': cv.skeleton.meta.mip,
            'dust_threshold': dust_threshold,
            'tick_threshold': tick_threshold,
            'max_cable_length': max_cable_length,
            'preshift_bits': preshift_bits,
            'minishard_bits': minishard_bits,
            'shard_bits': shard_bits,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    cv.commit_provenance()

    return [
        ShardedSkeletonMergeTask(layer_path,
                                 shard_no,
                                 dust_threshold,
                                 tick_threshold,
                                 max_cable_length=max_cable_length)
        for shard_no in shard_labels.keys()
    ]
Exemple #19
0
def create_sharded_skeletons_from_unsharded_tasks(
    src: str,
    dest: str,
    shard_index_bytes=2**13,
    minishard_index_bytes=2**15,
    min_shards: int = 1,
    minishard_index_encoding='gzip',
    data_encoding='gzip',
    skel_dir: Optional[str] = None,
) -> Iterator[ShardedFromUnshardedSkeletonMergeTask]:
    cv_src = CloudVolume(src)
    cv_src.mip = cv_src.skeleton.meta.mip

    cf = CloudFiles(cv_src.skeleton.meta.layerpath)

    all_labels = []
    SEGID_RE = re.compile(r'(\d+)(?:\.gz|\.br|\.zstd)?$')
    for path in cf.list():
        match = SEGID_RE.search(path)
        if match is None:
            continue
        (segid, ) = match.groups()
        all_labels.append(int(segid))

    cv_dest = CloudVolume(dest, skel_dir=skel_dir)
    cv_dest.skeleton.meta.info = copy.deepcopy(cv_src.skeleton.meta.info)
    cv_dest.skeleton.meta.info["vertex_attributes"] = [
        attr for attr in cv_dest.skeleton.meta.info["vertex_attributes"]
        if attr["data_type"] in ("float32", "float64")
    ]

    (shard_bits, minishard_bits, preshift_bits) = \
      compute_shard_params_for_hashed(
        num_labels=len(all_labels),
        shard_index_bytes=int(shard_index_bytes),
        minishard_index_bytes=int(minishard_index_bytes),
        min_shards=int(min_shards),
      )

    spec = ShardingSpecification(
        type='neuroglancer_uint64_sharded_v1',
        preshift_bits=preshift_bits,
        hash='murmurhash3_x86_128',
        minishard_bits=minishard_bits,
        shard_bits=shard_bits,
        minishard_index_encoding=minishard_index_encoding,
        data_encoding=data_encoding,
    )

    cv_dest.skeleton.meta.info['sharding'] = spec.to_dict()
    cv_dest.skeleton.meta.commit_info()

    cv_dest = CloudVolume(dest, skel_dir=skel_dir)

    # perf: ~66.5k hashes/sec on M1 ARM64
    shardfn = lambda lbl: cv_dest.skeleton.reader.spec.compute_shard_location(
        lbl).shard_number

    shard_labels = defaultdict(list)
    for label in tqdm(all_labels, desc="Hashes"):
        shard_labels[shardfn(label)].append(label)
    del all_labels

    cf = CloudFiles(cv_dest.skeleton.meta.layerpath, progress=True)
    files = ((str(shardno) + '.labels', labels)
             for shardno, labels in shard_labels.items())
    cf.put_jsons(files,
                 compress="gzip",
                 cache_control="no-cache",
                 total=len(shard_labels))

    cv_dest.provenance.processing.append({
        'method': {
            'task': 'ShardedFromUnshardedSkeletonMergeTask',
            'src': src,
            'dest': dest,
            'preshift_bits': preshift_bits,
            'minishard_bits': minishard_bits,
            'shard_bits': shard_bits,
            'skel_dir': skel_dir,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    cv_dest.commit_provenance()

    return [
        partial(
            ShardedFromUnshardedSkeletonMergeTask,
            src=src,
            dest=dest,
            shard_no=shard_no,
            skel_dir=skel_dir,
        ) for shard_no in shard_labels.keys()
    ]
Exemple #20
0
def create_sharded_multires_mesh_tasks(
  cloudpath:str, 
  shard_index_bytes=2**13, 
  minishard_index_bytes=2**15,
  num_lod:int = 1, 
  draco_compression_level:int = 1,
  vertex_quantization_bits:int = 16,
  minishard_index_encoding="gzip", 
  mesh_dir:Optional[str] = None, 
  spatial_index_db:Optional[str] = None
) -> Iterator[MultiResShardedMeshMergeTask]: 

  configure_multires_info(
    cloudpath, 
    vertex_quantization_bits, 
    mesh_dir
  )

  # rebuild b/c sharding changes the mesh source class
  cv = CloudVolume(cloudpath, progress=True, spatial_index_db=spatial_index_db) 
  cv.mip = cv.mesh.meta.mip

  # 17 sec to download for pinky100
  all_labels = cv.mesh.spatial_index.query(cv.bounds * cv.resolution)
  
  (shard_bits, minishard_bits, preshift_bits) = \
    compute_shard_params_for_hashed(
      num_labels=len(all_labels),
      shard_index_bytes=int(shard_index_bytes),
      minishard_index_bytes=int(minishard_index_bytes),
    )

  spec = ShardingSpecification(
    type='neuroglancer_uint64_sharded_v1',
    preshift_bits=preshift_bits,
    hash='murmurhash3_x86_128',
    minishard_bits=minishard_bits,
    shard_bits=shard_bits,
    minishard_index_encoding=minishard_index_encoding,
    data_encoding="raw", # draco encoded meshes
  )

  cv.mesh.meta.info['sharding'] = spec.to_dict()
  cv.mesh.meta.commit_info()

  cv = CloudVolume(cloudpath)

  # perf: ~66.5k hashes/sec on M1 ARM64
  shardfn = lambda lbl: cv.mesh.reader.spec.compute_shard_location(lbl).shard_number

  shard_labels = defaultdict(list)
  for label in tqdm(all_labels, desc="Hashes"):
    shard_labels[shardfn(label)].append(label)
  del all_labels

  cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True)
  files = ( 
    (str(shardno) + '.labels', labels) 
    for shardno, labels in shard_labels.items() 
  )
  cf.put_jsons(
    files, compress="gzip", 
    cache_control="no-cache", total=len(shard_labels)
  )

  cv.provenance.processing.append({
    'method': {
      'task': 'MultiResShardedMeshMergeTask',
      'cloudpath': cloudpath,
      'mip': cv.mesh.meta.mip,
      'num_lod': num_lod,
      'vertex_quantization_bits': vertex_quantization_bits,
      'preshift_bits': preshift_bits, 
      'minishard_bits': minishard_bits, 
      'shard_bits': shard_bits,
      'mesh_dir': mesh_dir,
      'draco_compression_level': draco_compression_level,
    },
    'by': operator_contact(),
    'date': strftime('%Y-%m-%d %H:%M %Z'),
  }) 
  cv.commit_provenance()

  return [
    partial(MultiResShardedMeshMergeTask,
      cloudpath, shard_no, 
      num_lod=num_lod,
      mesh_dir=mesh_dir, 
      spatial_index_db=spatial_index_db,
      draco_compression_level=draco_compression_level,
    )
    for shard_no in shard_labels.keys()
  ]