Example #1
0
def test_downsample_higher_mip():
    delete_layer()
    storage, data = create_layer(size=(512, 512, 64, 1), offset=(3, 7, 11))
    cv = CloudVolume(storage.layer_path)
    cv.info['scales'] = cv.info['scales'][:1]

    tq = MockTaskQueue()

    cv.commit_info()
    tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=2)
    tq.insert_all(tasks)
    cv.refresh_info()
    assert len(cv.available_mips) == 3

    tasks = create_downsampling_tasks(storage.layer_path, mip=1, num_mips=2)
    tq.insert_all(tasks)
    cv.refresh_info()
    assert len(cv.available_mips) == 4

    cv.mip = 3
    assert cv[:, :, :].shape == (64, 64, 64, 1)
Example #2
0
def make_info_file(brain, home_dir, volume_size, type_vol = "647", commit=True):
    info = CloudVolume.create_new_info(
    num_channels = 1,
    layer_type = "image", # "image" or "segmentation"
    data_type = "uint16", # 32 not necessary for Princeton atlas, but was for Allen atlas 
    encoding = "raw", # other options: "jpeg", "compressed_segmentation" (req. uint32 or uint64)
    resolution = [ 1810, 1810, 2000 ], # X,Y,Z values in nanometers, 40 microns in each dim. 
    voxel_offset = [ 0, 0, 0 ], # values X,Y,Z values in voxels
    chunk_size = [ 1024, 1024, 32], # rechunk of image X,Y,Z in voxels, 
    volume_size = volume_size, # X,Y,Z size in voxels
    )
    
    	# If you"re using amazon or the local file system, you can replace "gs" with "s3" or "file"
    vol = CloudVolume("file://"+home_dir+"/"+brain+"/"+type_vol, info=info)
    vol.provenance.description = "TP tracing"
    vol.provenance.owners = ["*****@*****.**"] # list of contact email addresses
    if commit:
        vol.commit_info() # generates gs://bucket/dataset/layer/info json file
        vol.commit_provenance() # generates gs://bucket/dataset/layer/provenance json file
        print("Created CloudVolume info file: ",vol.info_cloudpath)
    return vol
Example #3
0
def test_downsample_no_offset_2x2x2():
    delete_layer()
    cf, data = create_layer(size=(512,512,512,1), offset=(0,0,0))
    cv = CloudVolume(cf.cloudpath)
    assert len(cv.scales) == 1
    assert len(cv.available_mips) == 1

    cv.commit_info()

    tq = MockTaskQueue()
    tasks = create_downsampling_tasks(
        cf.cloudpath, mip=0, num_mips=3, 
        compress=None, factor=(2,2,2)
    )
    tq.insert_all(tasks)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 512 ])
    assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 256 ])
    assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  64,  64,  64 ])
    
    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 2, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 4, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])

    data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 8, 1])
    cv.mip = 3
    assert np.all(cv[slice64] == data_ds3[slice64])
Example #4
0
def test_downsample_no_offset():
    delete_layer()
    storage, data = create_layer(size=(1024,1024,128,1), offset=(0,0,0))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 5
    assert len(cv.available_mips) == 5

    cv.commit_info()

    create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=4)

    cv.refresh_info()

    assert len(cv.available_mips) == 5
    assert np.array_equal(cv.mip_volume_size(0), [ 1024, 1024, 128 ])
    assert np.array_equal(cv.mip_volume_size(1), [  512,  512, 128 ])
    assert np.array_equal(cv.mip_volume_size(2), [  256,  256, 128 ])
    assert np.array_equal(cv.mip_volume_size(3), [  128,  128, 128 ])
    assert np.array_equal(cv.mip_volume_size(4), [   64,   64, 128 ])
    
    slice64 = np.s_[0:64, 0:64, 0:64]

    cv.mip = 0
    assert np.all(cv[slice64] == data[slice64])

    data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[slice64] == data_ds1[slice64])

    data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1])
    cv.mip = 2
    assert np.all(cv[slice64] == data_ds2[slice64])

    data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1])
    cv.mip = 3
    assert np.all(cv[slice64] == data_ds3[slice64])

    data_ds4 = downsample.downsample_with_averaging(data_ds3, factor=[2, 2, 1, 1])
    cv.mip = 4
    assert np.all(cv[slice64] == data_ds4[slice64])
Example #5
0
def test_downsample_w_missing():
    delete_layer()
    storage, data = create_layer(size=(512, 512, 128, 1), offset=(3, 7, 11))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 1
    assert len(cv.available_mips) == 1
    delete_layer()

    cv.commit_info()

    tq = MockTaskQueue()

    try:
        tasks = create_downsampling_tasks(storage.layer_path,
                                          mip=0,
                                          num_mips=3,
                                          fill_missing=False)
        tq.insert_all(tasks)
    except EmptyVolumeException:
        pass

    tasks = create_downsampling_tasks(storage.layer_path,
                                      mip=0,
                                      num_mips=3,
                                      fill_missing=True)
    tq.insert_all(tasks)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [512, 512, 128])
    assert np.array_equal(cv.mip_volume_size(1), [256, 256, 128])
    assert np.array_equal(cv.mip_volume_size(2), [128, 128, 128])
    assert np.array_equal(cv.mip_volume_size(3), [64, 64, 128])

    assert np.all(cv.mip_voxel_offset(3) == (0, 0, 11))

    cv.mip = 0
    cv.fill_missing = True
    assert np.count_nonzero(cv[3:67, 7:71, 11:75]) == 0
def test_background_color():
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type='image',
        data_type='uint8',
        encoding='raw',
        resolution=[1, 1, 1],
        voxel_offset=[0, 0, 0],
        volume_size=[128, 128, 1],
        mesh='mesh',
        chunk_size=[64, 64, 1],
    )

    vol = CloudVolume('file:///tmp/cloudvolume/empty_volume', mip=0, info=info)
    vol.commit_info()

    vol.cache.flush()

    vol = CloudVolume('file:///tmp/cloudvolume/empty_volume',
                      mip=0,
                      background_color=1,
                      fill_missing=True)
    assert np.count_nonzero(vol[:] - 1) == 0

    vol = CloudVolume('file:///tmp/cloudvolume/empty_volume',
                      mip=0,
                      background_color=1,
                      fill_missing=True,
                      bounded=False)
    assert np.count_nonzero(vol[0:129, 0:129, 0:1] - 1) == 0

    vol = CloudVolume('file:///tmp/cloudvolume/empty_volume',
                      mip=0,
                      background_color=1,
                      fill_missing=True,
                      bounded=False,
                      parallel=2)
    assert np.count_nonzero(vol[0:129, 0:129, 0:1] - 1) == 0
    vol.cache.flush()
    delete_layer('/tmp/cloudvolume/empty_volume')
Example #7
0
def make_info_file(volume_size,
                   resolution,
                   layer_dir,
                   voxel_offset=[0, 0, 0],
                   commit=True):
    """ 
	---PURPOSE---
	Make the cloudvolume info file.
	---INPUT---
	volume_size     [Nx,Ny,Nz] in voxels, e.g. [2160,2560,1271]
	pix_scale_nm    [size of x pix in nm,size of y pix in nm,size of z pix in nm], e.g. [5000,5000,10000]
	commit          if True, will write the info/provenance file to disk. 
					if False, just creates it in memory
	atlas_type      if provided, will add a key to the info file: 
					'atlas_type': atlas_type
	"""
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type='segmentation',  # 'image' or 'segmentation'
        data_type='uint8',  # 
        encoding=
        'raw',  # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64)
        resolution=resolution,  # Size of X,Y,Z pixels in nanometers, 
        voxel_offset=voxel_offset,  # values X,Y,Z values in voxels
        chunk_size=[
            1024, 1024, 1
        ],  # rechunk of image X,Y,Z in voxels -- only used for downsampling task I think
        volume_size=volume_size,  # X,Y,Z size in voxels
    )

    vol = CloudVolume(f'file://{layer_dir}', info=info)
    vol.provenance.description = "Test on spock for profiling precomputed creation"
    vol.provenance.owners = ['*****@*****.**'
                             ]  # list of contact email addresses
    if commit:
        vol.commit_info()  # generates info json file
        vol.commit_provenance()  # generates provenance json file
        print("Created CloudVolume info file: ", vol.info_cloudpath)
    return vol
Example #8
0
def prepare_precomputed(precomputed_path, offset, size, resolution, chunk_size, factor=(2,2,1), dtype='uint32'):
  cv_args = dict(
    bounded=True, fill_missing=False, autocrop=False,
    cache=False, compress_cache=None, cdn_cache=False,
    progress=False, provenance=None, compress=True, 
    non_aligned_writes=True, parallel=False)
  info = CloudVolume.create_new_info(
    num_channels=1,
    layer_type='segmentation',
    data_type=dtype,
    # encoding='compressed_segmentation',
    encoding='raw',
    resolution=list(resolution),
    voxel_offset=np.array(offset),
    volume_size=np.array(size),
    chunk_size=chunk_size,
    max_mip=0,
    factor=factor,
    )
  cv = CloudVolume('file://'+precomputed_path, mip=0, info=info, **cv_args)
  cv.commit_info()
  return cv
Example #9
0
def configure_multires_info(
  cloudpath:str,
  vertex_quantization_bits:int, 
  mesh_dir:str
):
  """
  Computes properties and uploads a multires 
  mesh info file
  """
  assert vertex_quantization_bits in (10, 16)

  vol = CloudVolume(cloudpath)

  mesh_dir = mesh_dir or vol.info.get("mesh", None)

  if not "mesh" in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  res = vol.meta.resolution(vol.mesh.meta.mip)

  cf = CloudFiles(cloudpath)
  info_filename = f'{mesh_dir}/info'
  mesh_info = cf.get_json(info_filename) or {}
  new_mesh_info = copy.deepcopy(mesh_info)
  new_mesh_info['@type'] = "neuroglancer_multilod_draco"
  new_mesh_info['vertex_quantization_bits'] = vertex_quantization_bits
  new_mesh_info['transform'] = [ 
    res[0], 0,      0,      0,
    0,      res[1], 0,      0,
    0,      0,      res[2], 0,
  ]
  new_mesh_info['lod_scale_multiplier'] = 1.0

  if new_mesh_info != mesh_info:
    cf.put_json(
      info_filename, new_mesh_info, 
      cache_control="no-cache"
    )
Example #10
0
def test_quantize_affinities():
    qpath = 'file:///tmp/removeme/quantized/'

    delete_layer()
    delete_layer(qpath)

    storage, _ = create_layer(size=(256,256,128,3), offset=(0,0,0), layer_type="affinities")
    cv = CloudVolume(storage.layer_path)

    shape = (128, 128, 64)
    slices = np.s_[ :shape[0], :shape[1], :shape[2], :1 ]

    data = cv[slices]
    data *= 255.0
    data = data.astype(np.uint8)

    task = QuantizeAffinitiesTask(
        source_layer_path=storage.layer_path,
        dest_layer_path=qpath,
        shape=shape,
        offset=(0,0,0),
    )

    info = create_quantized_affinity_info(storage.layer_path, qpath, shape)
    qcv = CloudVolume(qpath, info=info)
    qcv.commit_info()

    create_downsample_scales(qpath, mip=0, ds_shape=shape)

    task.execute()

    qcv.mip = 0

    qdata = qcv[slices]

    assert np.all(data.shape == qdata.shape)
    assert np.all(data == qdata)
    assert data.dtype == np.uint8
Example #11
0
def create_meshing_tasks(task_queue, layer_path, mip, shape=Vec(512, 512,
                                                                512)):
    shape = Vec(*shape)
    max_simplification_error = 40

    vol = CloudVolume(layer_path, mip)

    if not 'mesh' in vol.info:
        vol.info['mesh'] = 'mesh_mip_{}_err_{}'.format(
            mip, max_simplification_error)
        vol.commit_info()

    for startpt in tqdm(xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape),
                        desc="Inserting Mesh Tasks"):
        task = MeshTask(
            layer_path=layer_path,
            mip=vol.mip,
            shape=shape.clone(),
            offset=startpt.clone(),
            max_simplification_error=max_simplification_error,
        )
        task_queue.insert(task)
    task_queue.wait('Uploading MeshTasks')
Example #12
0
def test_transfer():
    # Bbox version
    delete_layer()
    cv, _ = create_layer(size=(128, 64, 64, 1), offset=(0, 0, 0))

    cv.transfer_to('file:///tmp/removeme/transfer/', cv.bounds)

    ls = os.listdir('/tmp/removeme/transfer/1_1_1/')

    assert '0-64_0-64_0-64.gz' in ls
    assert len(ls) == 2

    assert os.path.exists('/tmp/removeme/transfer/info')
    assert os.path.exists('/tmp/removeme/transfer/provenance')

    dcv = CloudVolume("file:///tmp/removeme/transfer")
    dcv.info["dont_touch_me_bro"] = True
    dcv.commit_info()

    cv.transfer_to('file:///tmp/removeme/transfer/', cv.bounds)
    dcv.refresh_info()

    assert 'dont_touch_me_bro' in dcv.info
Example #13
0
def test_downsample_with_offset():
    delete_layer()
    storage, data = create_layer(size=(512, 512, 128, 1), offset=(3, 7, 11))
    cv = CloudVolume(storage.layer_path)
    assert len(cv.scales) == 1
    assert len(cv.available_mips) == 1

    cv.commit_info()

    tq = MockTaskQueue()
    tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=3)
    tq.insert_all(tasks)

    cv.refresh_info()

    assert len(cv.available_mips) == 4
    assert np.array_equal(cv.mip_volume_size(0), [512, 512, 128])
    assert np.array_equal(cv.mip_volume_size(1), [256, 256, 128])
    assert np.array_equal(cv.mip_volume_size(2), [128, 128, 128])
    assert np.array_equal(cv.mip_volume_size(3), [64, 64, 128])

    assert np.all(cv.mip_voxel_offset(3) == (0, 0, 11))

    cv.mip = 0
    assert np.all(cv[3:67, 7:71, 11:75] == data[0:64, 0:64, 0:64])

    data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 1, 1])
    cv.mip = 1
    assert np.all(cv[1:33, 3:35, 11:75] == data_ds1[0:32, 0:32, 0:64])

    data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 1, 1])
    cv.mip = 2
    assert np.all(cv[0:16, 1:17, 11:75] == data_ds2[0:16, 0:16, 0:64])

    data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 1, 1])
    cv.mip = 3
    assert np.all(cv[0:8, 0:8, 11:75] == data_ds3[0:8, 0:8, 0:64])
Example #14
0
def create_downsample_scales(
    layer_path, mip, ds_shape, axis='z', 
    preserve_chunk_size=False, chunk_size=None,
    encoding=None
  ):
  vol = CloudVolume(layer_path, mip)
  shape = min2(vol.volume_size, ds_shape)

  # sometimes we downsample a base layer of 512x512 
  # into underlying chunks of 64x64 which permits more scales
  underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip
  underlying_shape = vol.mip_underlying(underlying_mip).astype(np.float32)

  if chunk_size:
    underlying_shape = Vec(*chunk_size).astype(np.float32)

  toidx = { 'x': 0, 'y': 1, 'z': 2 }
  preserved_idx = toidx[axis]
  underlying_shape[preserved_idx] = float('inf')

  scales = downsample_scales.compute_plane_downsampling_scales(
    size=shape, 
    preserve_axis=axis, 
    max_downsampled_size=int(min(*underlying_shape)),
  ) 
  scales = scales[1:] # omit (1,1,1)
  scales = [ list(map(int, vol.downsample_ratio * Vec(*factor3))) for factor3 in scales ]

  if len(scales) == 0:
    print("WARNING: No scales generated.")

  for scale in scales:
    vol.add_scale(scale, encoding=encoding, chunk_size=chunk_size)

  if chunk_size is None:
    if preserve_chunk_size or len(scales) == 0:
      chunk_size = vol.scales[mip]['chunk_sizes']
    else:
      chunk_size = vol.scales[mip + 1]['chunk_sizes']
  else:
    chunk_size = [ chunk_size ]

  if encoding is None:
    encoding = vol.scales[mip]['encoding']

  for i in range(mip + 1, mip + len(scales) + 1):
    vol.scales[i]['chunk_sizes'] = chunk_size

  return vol.commit_info()
Example #15
0
def test_mesh(compress):
    delete_layer()
    storage, _ = create_layer(size=(64, 64, 64, 1),
                              offset=(0, 0, 0),
                              layer_type="segmentation")
    cv = CloudVolume(storage.layer_path)
    # create a box of ones surrounded by zeroes
    data = np.zeros(shape=(64, 64, 64, 1), dtype=np.uint32)
    data[1:-1, 1:-1, 1:-1, :] = 1
    cv[0:64, 0:64, 0:64] = data
    cv.info['mesh'] = 'mesh'
    cv.commit_info()

    t = MeshTask(shape=(64, 64, 64),
                 offset=(0, 0, 0),
                 layer_path=storage.layer_path,
                 mip=0,
                 remap_table={"1": "10"},
                 low_padding=0,
                 high_padding=1,
                 compress=compress)
    t.execute()
    assert storage.get_file('mesh/10:0:0-64_0-64_0-64') is not None
    assert list(storage.list_files('mesh/')) == ['mesh/10:0:0-64_0-64_0-64']
Example #16
0
def create_image_layer(s3_bucket, tif_dimensions, voxel_size, num_resolutions):
    """Creates segmentation layer for skeletons

    Arguments:
        s3_bucket {str} -- path to SWC file
        voxel_size {list} -- 3 floats for voxel size in nm
        num_resolutions {int} -- number of resolutions for the image
    Returns:
        vols {list} -- List of num_resolutions CloudVolume objects, starting from lowest resolution
    """
    # create cloudvolume info
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type="image",
        data_type="uint16",  # Channel images might be 'uint8'
        encoding="raw",  # raw, jpeg, compressed_segmentation, fpzip, kempressed
        resolution=voxel_size,  # Voxel scaling, units are in nanometers
        voxel_offset=[0, 0, 0],  # x,y,z offset in voxels from the origin
        # Pick a convenient size for your underlying chunk representation
        # Powers of two are recommended, doesn't need to cover image exactly
        chunk_size=[int(d / 4) for d in tif_dimensions],  # units are voxels
        # USING MAXIMUM VOLUME size
        volume_size=[i * 2**(num_resolutions - 1) for i in tif_dimensions],
    )
    # get cloudvolume info
    vol = CloudVolume(s3_bucket, info=info, parallel=False)  # compress = False
    # scales resolution up, volume size down
    [vol.add_scale((2**i, 2**i, 2**i))
     for i in range(num_resolutions)]  # ignore chunk size
    vol.commit_info()
    vols = [
        CloudVolume(s3_bucket, mip=i,
                    parallel=False)  # parallel False, compress
        for i in range(num_resolutions - 1, -1, -1)
    ]
    return vols
Example #17
0
def create_downsample_scales(layer_path,
                             mip,
                             ds_shape,
                             axis='z',
                             preserve_chunk_size=False):
    vol = CloudVolume(layer_path, mip)
    shape = min2(vol.volume_size, ds_shape)

    # sometimes we downsample a base layer of 512x512
    # into underlying chunks of 64x64 which permits more scales
    underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip
    underlying_shape = vol.mip_underlying(underlying_mip).astype(np.float32)

    toidx = {'x': 0, 'y': 1, 'z': 2}
    preserved_idx = toidx[axis]
    underlying_shape[preserved_idx] = float('inf')

    scales = downsample_scales.compute_plane_downsampling_scales(
        size=shape,
        preserve_axis=axis,
        max_downsampled_size=int(min(*underlying_shape)),
    )
    scales = scales[1:]  # omit (1,1,1)
    scales = [
        list(map(int, vol.downsample_ratio * Vec(*factor3)))
        for factor3 in scales
    ]

    for scale in scales:
        vol.add_scale(scale)

    if preserve_chunk_size:
        for i in range(1, len(vol.scales)):
            vol.scales[i]['chunk_sizes'] = vol.scales[0]['chunk_sizes']

    return vol.commit_info()
Example #18
0
def ingest(args):
    """
    Ingest an HDF file to a CloudVolume bucket
    """
    if args.local_hdf_path:
        hdf_file = h5py.File(args.local_hdf_path, "r")
    else:
        with Storage(args.cloud_src_path) as storage:
            hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename),
                                 "r")
    cur_hdf_group = hdf_file
    for group_name in args.hdf_keys_to_dataset:
        cur_hdf_group = cur_hdf_group[group_name]
    hdf_dataset = cur_hdf_group
    if args.zyx:
        dataset_shape = np.array(
            [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]])
    else:
        dataset_shape = np.array([*hdf_dataset.shape])
    if args.layer_type == "image":
        data_type = "uint8"
    else:
        data_type = "uint64"
    voxel_offset = args.voxel_offset
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type=args.layer_type,
        data_type=data_type,
        encoding="raw",
        resolution=args.resolution,
        voxel_offset=voxel_offset,
        chunk_size=args.chunk_size,
        volume_size=dataset_shape,
    )
    provenance = {
        "description": args.provenance_description,
        "owners": [args.owner]
    }
    vol = CloudVolume(args.dst_path, info=info, provenance=provenance)
    vol.commit_info()
    vol.commit_provenance()

    all_files = set()
    for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0],
                       args.chunk_size[0]):
        for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1],
                           args.chunk_size[1]):
            for z in np.arange(voxel_offset[2],
                               voxel_offset[2] + dataset_shape[2],
                               args.chunk_size[2]):
                all_files.add(tuple((x, y, z)))

    progress_dir = mkdir(
        "progress/")  # unlike os.mkdir doesn't crash on prexisting
    done_files = set()
    for done_file in os.listdir(progress_dir):
        done_files.add(tuple(done_file.split(",")))
    to_upload = all_files.difference(done_files)

    for chunk_start_tuple in to_upload:
        chunk_start = np.array(list(chunk_start_tuple))
        end_of_dataset = np.array(voxel_offset) + dataset_shape
        chunk_end = chunk_start + np.array(args.chunk_size)
        chunk_end = Vec(*chunk_end)
        chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset)
        chunk_hdf_start = chunk_start - voxel_offset
        chunk_hdf_end = chunk_end - voxel_offset
        if args.zyx:
            chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2],
                                chunk_hdf_start[1]:chunk_hdf_end[1],
                                chunk_hdf_start[0]:chunk_hdf_end[0], ]
            chunk = chunk.T
        else:
            chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0],
                                chunk_hdf_start[1]:chunk_hdf_end[1],
                                chunk_hdf_start[2]:chunk_hdf_end[2], ]
        print("Processing ", chunk_start_tuple)
        array = np.array(chunk, dtype=np.dtype(data_type), order="F")
        vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1],
            chunk_start[2]:chunk_end[2], ] = array
        touch(os.path.join(progress_dir, str(chunk_start_tuple)))
Example #19
0
    def transfer_to(self,
                    cloudpath,
                    bbox,
                    mip,
                    block_size=None,
                    compress=True,
                    compress_level=None):
        """
    Transfer files from one storage location to another, bypassing
    volume painting. This enables using a single CloudVolume instance
    to transfer big volumes. In some cases, gsutil or aws s3 cli tools
    may be more appropriate. This method is provided for convenience. It
    may be optimized for better performance over time as demand requires.

    cloudpath (str): path to storage layer
    bbox (Bbox object): ROI to transfer
    mip (int): resolution level
    block_size (int): number of file chunks to transfer per I/O batch.
    compress (bool): Set to False to upload as uncompressed
    """
        from cloudvolume import CloudVolume

        if mip is None:
            mip = self.config.mip

        if self.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded sources are not supported. got: {self.meta.cloudpath}"
            )

        bbox = Bbox.create(bbox, self.meta.bounds(mip))
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        default_block_size_MB = 50  # MB
        chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype(
            self.meta.dtype).itemsize * self.meta.num_channels
        if self.meta.layer_type == 'image':
            # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1
            # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed
            chunk_MB /= 1.3
        else:  # segmentation
            chunk_MB /= 100.0  # compression ratios between 80 and 800....
        chunk_MB /= 1024.0 * 1024.0

        if block_size:
            step = block_size
        else:
            step = int(default_block_size_MB // chunk_MB) + 1

        try:
            destvol = CloudVolume(cloudpath, mip=mip)
        except exceptions.InfoUnavailableError:
            destvol = CloudVolume(cloudpath,
                                  mip=mip,
                                  info=self.meta.info,
                                  provenance=self.meta.provenance.serialize())
            destvol.commit_info()
            destvol.commit_provenance()
        except exceptions.ScaleUnavailableError:
            destvol = CloudVolume(cloudpath)
            for i in range(len(destvol.scales) + 1, len(self.meta.scales)):
                destvol.scales.append(self.meta.scales[i])
            destvol.commit_info()
            destvol.commit_provenance()

        if destvol.image.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded destinations are not supported. got: {destvol.cloudpath}"
            )

        num_blocks = np.ceil(
            self.meta.bounds(mip).volume() /
            self.meta.chunk_size(mip).rectVolume()) / step
        num_blocks = int(np.ceil(num_blocks))

        cloudpaths = chunknames(bbox,
                                self.meta.bounds(mip),
                                self.meta.key(mip),
                                self.meta.chunk_size(mip),
                                protocol=self.meta.path.protocol)

        pbar = tqdm(
            desc='Transferring Blocks of {} Chunks'.format(step),
            unit='blocks',
            disable=(not self.config.progress),
            total=num_blocks,
        )

        cfsrc = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets)
        cfdest = CloudFiles(cloudpath)

        def check(files):
            errors = [
              file for file in files if \
              (file['content'] is None or file['error'] is not None)
            ]
            if errors:
                error_paths = [f['path'] for f in errors]
                raise exceptions.EmptyFileException(
                    "{} were empty or had IO errors.".format(
                        ", ".join(error_paths)))
            return files

        with pbar:
            for srcpaths in sip(cloudpaths, step):
                files = check(cfsrc.get(srcpaths, raw=True))
                cfdest.puts(compression.transcode(files,
                                                  encoding=compress,
                                                  level=compress_level,
                                                  in_place=True),
                            compress=compress,
                            content_type=tx.content_type(destvol),
                            raw=True)
                pbar.update()
Example #20
0
def save_cloudvolume(img,
                     path,
                     mode,
                     origin,
                     mip=0,
                     resolution=None,
                     flip_xy=False,
                     voxel_offset=None,
                     volume_size=None,
                     chunk_size=(64, 64, 64),
                     factor=(2, 2, 2)):
    """Save images to a CloudVolume layer.

    Parameters
    ----------
    img : array_like
        The image/volume to save.
    path : str
        The directory to write the layer to.
    mode : {'image', 'segmentation'}
    """
    if mode not in ['image', 'segmentation']:
        raise ValueError(
            'Invalid mode {}. Must be one of "image", "segmentation"'.format(
                mode))

    if not re.search(r'^[a-zA-Z\d]+://$', path.split(os.path.sep)[0]):
        raise ValueError('No protocol specified in {}.'.format(path))

    if not os.path.isfile(os.path.join(path, 'info')):
        if MPI.COMM_WORLD.Get_rank() == 0:
            if mode == 'image':
                info = CloudVolume.create_new_info(
                    num_channels=img.shape[-1],
                    layer_type='image',
                    data_type='uint8',
                    encoding='raw',
                    resolution=resolution,
                    voxel_offset=offset,
                    volume_size=list(volume_size),
                    chunk_size=chunk_size,
                    max_mip=mip,
                    factor=factor)
                cv_args = dict(bounded=True,
                               fill_missing=True,
                               autocrop=False,
                               cache=False,
                               compress_cache=None,
                               cdn_cache=False,
                               progress=False,
                               info=info,
                               provenance=None,
                               compress=True,
                               non_aligned_writes=True,
                               parallel=1)
                cv = CloudVolume(path, mip=0, **cv_args)
                cv.commit_info()
            elif mode == 'segmentation':
                info = CloudVolume.create_new_info(
                    num_channels=img.shape[-1],
                    layer_type='segmentation',
                    data_type='uint32',
                    encoding='compressed_segmentation',
                    resolution=resolution,
                    voxel_offset=offset,
                    volume_size=list(volume_size),
                    chunk_size=chunk_size,
                    max_mip=mip,
                    factor=factor)

                if mip >= 1:
                    for i in range(1, mip + 1):
                        info['scales'][i]['compressed_segmentation_block_size'] = \
                            info['scales'][0]['compressed_segmentation_block_size']

                cv_args = dict(bounded=True,
                               fill_missing=True,
                               autocrop=False,
                               cache=False,
                               compress_cache=None,
                               cdn_cache=False,
                               progress=False,
                               info=info,
                               provenance=None,
                               compress=True,
                               non_aligned_writes=True,
                               parallel=1)
                cv = CloudVolume(path, mip=0, **cv_args)
                cv.commit_info()

        if MPI.COMM_WORLD.Get_size() > 1:
            MPI.COMM_WORLD.barrier()

    if flip_xy:
        img = np.transpose(img, axes=(1, 2, 0))
    else:
        img = np.transpose(img, axes=(2, 1, 0))

    cv_args = dict(bounded=True,
                   fill_missing=True,
                   autocrop=False,
                   cache=False,
                   compress_cache=None,
                   cdn_cache=False,
                   progress=False,
                   info=None,
                   provenance=None,
                   compress=(mode == 'segmentation'),
                   non_aligned_writes=True,
                   parallel=1)

    for m in range(mip + 1):
        cv = CloudVolume(path, mip=m, **cv_args)

        offset = cv.mip_voxel_offset(m)
        step = np.power(np.asarray(factor), m)
        cv_z_start = origin[0] // step[2] + offset[2]
        cv_z_size = img.shape[2]
        cv[:, :, cv_z_start:cv_z_start + cv_z_size] = loaded_vol
        img = img[::factor[0], ::factor[1], ::factor[2]]

    return cv
Example #21
0
def create_hypersquare_ingest_tasks(hypersquare_bucket_name, dataset_name,
                                    hypersquare_chunk_size, resolution,
                                    voxel_offset, volume_size, overlap):
    def crtinfo(layer_type, dtype, encoding):
        return CloudVolume.create_new_info(
            num_channels=1,
            layer_type=layer_type,
            data_type=dtype,
            encoding=encoding,
            resolution=resolution,
            voxel_offset=voxel_offset,
            volume_size=volume_size,
            chunk_size=[56, 56, 56],
        )

    imginfo = crtinfo('image', 'uint8', 'jpeg')
    seginfo = crtinfo('segmentation', 'uint16', 'raw')

    scales = downsample_scales.compute_plane_downsampling_scales(
        hypersquare_chunk_size)[1:]  # omit (1,1,1)

    IMG_LAYER_NAME = 'image'
    SEG_LAYER_NAME = 'segmentation'

    imgvol = CloudVolume(dataset_name, IMG_LAYER_NAME, 0, info=imginfo)
    segvol = CloudVolume(dataset_name, SEG_LAYER_NAME, 0, info=seginfo)

    print("Creating info files for image and segmentation...")
    imgvol.commit_info()
    segvol.commit_info()

    def crttask(volname, tasktype, layer_name):
        return HyperSquareTask(
            bucket_name=hypersquare_bucket_name,
            dataset_name=dataset_name,
            layer_name=layer_name,
            volume_dir=volname,
            layer_type=tasktype,
            overlap=overlap,
            resolution=resolution,
        )

    print("Listing hypersquare bucket...")
    volumes_listing = lib.gcloud_ls('gs://{}/'.format(hypersquare_bucket_name))

    # download this from:
    # with open('e2198_volumes.json', 'r') as f:
    #   volumes_listing = json.loads(f.read())

    volumes_listing = [x.split('/')[-2] for x in volumes_listing]

    class CreateHypersquareIngestTaskIterator(object):
        def __len__(self):
            return len(volumes_listing)

        def __iter__(self):
            for cloudpath in volumes_listing:
                # img_task = crttask(cloudpath, 'image', IMG_LAYER_NAME)
                yield crttask(cloudpath, 'segmentation', SEG_LAYER_NAME)
                # seg_task.execute()

    return CreateHypersquareIngestTaskIterator()
Example #22
0
def create_contrast_normalization_tasks(src_path,
                                        dest_path,
                                        levels_path=None,
                                        shape=None,
                                        mip=0,
                                        clip_fraction=0.01,
                                        fill_missing=False,
                                        translate=(0, 0, 0),
                                        minval=None,
                                        maxval=None,
                                        bounds=None):

    srcvol = CloudVolume(src_path, mip=mip)

    try:
        dvol = CloudVolume(dest_path, mip=mip)
    except Exception:  # no info file
        info = copy.deepcopy(srcvol.info)
        dvol = CloudVolume(dest_path, mip=mip, info=info)
        dvol.info['scales'] = dvol.info['scales'][:mip + 1]
        dvol.commit_info()

    if shape == None:
        shape = Bbox((0, 0, 0), (2048, 2048, 64))
        shape = shape.shrink_to_chunk_size(dvol.underlying).size3()
        shape = Vec.clamp(shape, (1, 1, 1), bounds.size3())

    shape = Vec(*shape)

    create_downsample_scales(dest_path,
                             mip=mip,
                             ds_shape=shape,
                             preserve_chunk_size=True)
    dvol.refresh_info()

    bounds = get_bounds(srcvol, bounds, shape, mip)

    class ContrastNormalizationTaskIterator(object):
        def __len__(self):
            return int(reduce(operator.mul, np.ceil(bounds.size3() / shape)))

        def __iter__(self):
            for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape):
                task_shape = min2(shape.clone(), srcvol.bounds.maxpt - startpt)
                yield ContrastNormalizationTask(
                    src_path=src_path,
                    dest_path=dest_path,
                    levels_path=levels_path,
                    shape=task_shape,
                    offset=startpt.clone(),
                    clip_fraction=clip_fraction,
                    mip=mip,
                    fill_missing=fill_missing,
                    translate=translate,
                    minval=minval,
                    maxval=maxval,
                )

            dvol.provenance.processing.append({
                'method': {
                    'task': 'ContrastNormalizationTask',
                    'src_path': src_path,
                    'dest_path': dest_path,
                    'shape': Vec(*shape).tolist(),
                    'clip_fraction': clip_fraction,
                    'mip': mip,
                    'translate': Vec(*translate).tolist(),
                    'minval': minval,
                    'maxval': maxval,
                    'bounds': [bounds.minpt.tolist(),
                               bounds.maxpt.tolist()],
                },
                'by':
                OPERATOR_CONTACT,
                'date':
                strftime('%Y-%m-%d %H:%M %Z'),
            })
            dvol.commit_provenance()

    return ContrastNormalizationTaskIterator()
Example #23
0
def create_transfer_tasks(src_layer_path,
                          dest_layer_path,
                          chunk_size=None,
                          shape=Vec(2048, 2048, 64),
                          fill_missing=False,
                          translate=(0, 0, 0),
                          bounds=None,
                          mip=0,
                          preserve_chunk_size=True,
                          encoding=None):
    """
  Transfer data from one data layer to another. It's possible
  to transfer from a lower resolution mip level within a given
  bounding box. The bounding box should be specified in terms of
  the highest resolution.
  """
    shape = Vec(*shape)
    vol = CloudVolume(src_layer_path, mip=mip)
    translate = Vec(*translate) // vol.downsample_ratio

    if not chunk_size:
        chunk_size = vol.info['scales'][mip]['chunk_sizes'][0]
    chunk_size = Vec(*chunk_size)

    try:
        dvol = CloudVolume(dest_layer_path, mip=mip)
    except Exception:  # no info file
        info = copy.deepcopy(vol.info)
        dvol = CloudVolume(dest_layer_path, info=info)
        dvol.commit_info()

    if encoding is not None:
        dvol.info['scales'][mip]['encoding'] = encoding
    dvol.info['scales'] = dvol.info['scales'][:mip + 1]
    dvol.info['scales'][mip]['chunk_sizes'] = [chunk_size.tolist()]
    dvol.commit_info()

    create_downsample_scales(dest_layer_path,
                             mip=mip,
                             ds_shape=shape,
                             preserve_chunk_size=preserve_chunk_size,
                             encoding=encoding)

    if bounds is None:
        bounds = vol.bounds.clone()
    else:
        bounds = vol.bbox_to_mip(bounds, mip=0, to_mip=mip)
        bounds = Bbox.clamp(bounds, dvol.bounds)

    dvol_bounds = dvol.mip_bounds(mip).clone()

    class TransferTaskIterator(object):
        def __len__(self):
            return int(reduce(operator.mul, np.ceil(bounds.size3() / shape)))

        def __iter__(self):
            for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape):
                task_shape = min2(shape.clone(), dvol_bounds.maxpt - startpt)
                yield TransferTask(
                    src_path=src_layer_path,
                    dest_path=dest_layer_path,
                    shape=task_shape,
                    offset=startpt.clone(),
                    fill_missing=fill_missing,
                    translate=translate,
                    mip=mip,
                )

            job_details = {
                'method': {
                    'task': 'TransferTask',
                    'src': src_layer_path,
                    'dest': dest_layer_path,
                    'shape': list(map(int, shape)),
                    'fill_missing': fill_missing,
                    'translate': list(map(int, translate)),
                    'bounds': [bounds.minpt.tolist(),
                               bounds.maxpt.tolist()],
                    'mip': mip,
                },
                'by': OPERATOR_CONTACT,
                'date': strftime('%Y-%m-%d %H:%M %Z'),
            }

            dvol = CloudVolume(dest_layer_path)
            dvol.provenance.sources = [src_layer_path]
            dvol.provenance.processing.append(job_details)
            dvol.commit_provenance()

            if vol.path.protocol != 'boss':
                vol.provenance.processing.append(job_details)
                vol.commit_provenance()

    return TransferTaskIterator()
Example #24
0
def create_meshing_tasks(
    layer_path, mip, shape=(448, 448, 448), 
    simplification=True, max_simplification_error=40,
    mesh_dir=None, cdn_cache=False, dust_threshold=None,
    object_ids=None, progress=False, fill_missing=False,
    encoding='precomputed', spatial_index=True, sharded=False,
    compress='gzip'
  ):
  shape = Vec(*shape)

  vol = CloudVolume(layer_path, mip)

  if mesh_dir is None:
    mesh_dir = 'mesh_mip_{}_err_{}'.format(mip, max_simplification_error)

  if not 'mesh' in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  cf = CloudFiles(layer_path)
  info_filename = '{}/info'.format(mesh_dir)
  mesh_info = cf.get_json(info_filename) or {}
  mesh_info['@type'] = 'neuroglancer_legacy_mesh'
  mesh_info['mip'] = int(vol.mip)
  mesh_info['chunk_size'] = shape.tolist()
  if spatial_index:
    mesh_info['spatial_index'] = {
        'resolution': vol.resolution.tolist(),
        'chunk_size': (shape*vol.resolution).tolist(),
    }
  cf.put_json(info_filename, mesh_info)

  class MeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return MeshTask(
        shape=shape.clone(),
        offset=offset.clone(),
        layer_path=layer_path,
        mip=vol.mip,
        simplification_factor=(0 if not simplification else 100),
        max_simplification_error=max_simplification_error,
        mesh_dir=mesh_dir, 
        cache_control=('' if cdn_cache else 'no-cache'),
        dust_threshold=dust_threshold,
        progress=progress,
        object_ids=object_ids,
        fill_missing=fill_missing,
        encoding=encoding,
        spatial_index=spatial_index,
        sharded=sharded,
        compress=compress,
      )

    def on_finish(self):
      vol.provenance.processing.append({
        'method': {
          'task': 'MeshTask',
          'layer_path': layer_path,
          'mip': vol.mip,
          'shape': shape.tolist(),
          'simplification': simplification,
          'max_simplification_error': max_simplification_error,
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'cdn_cache': cdn_cache,
          'dust_threshold': dust_threshold,
          'encoding': encoding,
          'object_ids': object_ids,
          'spatial_index': spatial_index,
          'sharded': sharded,
          'compress': compress,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      vol.commit_provenance()

  return MeshTaskIterator(vol.mip_bounds(mip), shape)
Example #25
0
def test_redirects():
    info = CloudVolume.create_new_info(
        num_channels=1,  # Increase this number when we add more tests for RGB
        layer_type='image',
        data_type='uint8',
        encoding='raw',
        resolution=[1, 1, 1],
        voxel_offset=[0, 0, 0],
        volume_size=[128, 128, 64],
        mesh='mesh',
        chunk_size=[64, 64, 64],
    )

    vol = CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0, info=info)
    vol.commit_info()
    vol.refresh_info()

    vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_0'
    vol.commit_info()
    vol.refresh_info()

    del vol.info['redirect']

    for i in range(0, 10):
        info['redirect'] = 'file:///tmp/cloudvolume/redirects_' + str(i + 1)
        vol = CloudVolume('file:///tmp/cloudvolume/redirects_' + str(i),
                          mip=0,
                          info=info)
        vol.commit_info()
    else:
        del vol.info['redirect']
        vol.commit_info()

    vol = CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0)

    assert vol.cloudpath == 'file:///tmp/cloudvolume/redirects_9'

    info['redirect'] = 'file:///tmp/cloudvolume/redirects_10'
    vol = CloudVolume('file:///tmp/cloudvolume/redirects_9', mip=0, info=info)
    vol.commit_info()

    try:
        CloudVolume('file:///tmp/cloudvolume/redirects_0', mip=0)
        assert False
    except exceptions.TooManyRedirects:
        pass

    vol = CloudVolume('file:///tmp/cloudvolume/redirects_9', max_redirects=0)
    del vol.info['redirect']
    vol.commit_info()

    vol = CloudVolume('file:///tmp/cloudvolume/redirects_5', max_redirects=0)
    vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_1'
    vol.commit_info()

    try:
        vol = CloudVolume('file:///tmp/cloudvolume/redirects_5')
        assert False
    except exceptions.CyclicRedirect:
        pass

    vol.info['redirect'] = 'file:///tmp/cloudvolume/redirects_6'
    vol.commit_info()

    vol = CloudVolume('file:///tmp/cloudvolume/redirects_1')

    try:
        vol[:, :, :] = 1
        assert False
    except exceptions.ReadOnlyException:
        pass

    for i in range(0, 10):
        delete_layer('/tmp/cloudvolume/redirects_' + str(i))
Example #26
0
def setup_environment(dry_run, volume_start, volume_stop, volume_size,
                      layer_path, max_ram_size, output_patch_size,
                      input_patch_size, channel_num, dtype,
                      output_patch_overlap, crop_chunk_margin, mip,
                      thumbnail_mip, max_mip, queue_name, visibility_timeout,
                      thumbnail, encoding, voxel_size, overwrite_info,
                      verbose):
    """Prepare storage info files and produce tasks."""
    assert not (volume_stop is None and volume_size is None)
    if isinstance(volume_start, tuple):
        volume_start = Vec(*volume_start)
    if isinstance(volume_stop, tuple):
        volume_stop = Vec(*volume_stop)
    if isinstance(volume_size, tuple):
        volume_size = Vec(*volume_size)

    if input_patch_size is None:
        input_patch_size = output_patch_size

    if volume_size:
        assert volume_stop is None
        volume_stop = volume_start + volume_size
    else:
        volume_size = volume_stop - volume_start
    print('\noutput volume start: ' + tuple2string(volume_start))
    print('output volume stop: ' + tuple2string(volume_stop))
    print('output volume size: ' + tuple2string(volume_size))

    if output_patch_overlap is None:
        # use 50% patch overlap in default
        output_patch_overlap = tuple(s // 2 for s in output_patch_size)
    assert output_patch_overlap[1] == output_patch_overlap[2]

    if crop_chunk_margin is None:
        crop_chunk_margin = output_patch_overlap
    assert crop_chunk_margin[1] == crop_chunk_margin[2]
    print('margin size: ' + tuple2string(crop_chunk_margin))

    if thumbnail:
        # thumnail requires maximum mip level of 5
        thumbnail_mip = max(thumbnail_mip, 5)

    patch_stride = tuple(
        s - o for s, o in zip(output_patch_size, output_patch_overlap))
    # total number of voxels per patch in one stride
    patch_voxel_num = np.product(patch_stride)
    # use half of the maximum ram size to store output buffer
    ideal_total_patch_num = int(max_ram_size * 1e9 / 2 / 4 / channel_num /
                                patch_voxel_num)
    # the xy size should be the same
    assert output_patch_size[1] == output_patch_size[2]
    # compute the output chunk/block size in cloud storage
    # assume that the crop margin size is the same with the patch overlap
    patch_num_start = int(ideal_total_patch_num**(1. / 3.) / 2)
    patch_num_stop = patch_num_start * 3

    # find the patch number solution with minimum cost by bruteforce search
    cost = sys.float_info.max
    patch_num = None
    # patch number in x and y
    max_factor = 2**max_mip
    factor = 2**mip
    for pnxy in range(patch_num_start, patch_num_stop):
        if (pnxy * patch_stride[2] + output_patch_overlap[2] -
                2 * crop_chunk_margin[2]) % max_factor != 0:
            continue
        # patch number in z
        for pnz in range(patch_num_start, patch_num_stop):
            if (pnz * patch_stride[0] + output_patch_overlap[0] -
                    2 * crop_chunk_margin[0]) % factor != 0:
                continue
            current_cost = (pnxy * pnxy * pnz / ideal_total_patch_num -
                            1)**2  #+ (pnxy / pnz - 1) ** 2
            if current_cost < cost:
                cost = current_cost
                patch_num = (pnz, pnxy, pnxy)

    print('\n--input-patch-size ', tuple2string(input_patch_size))
    print('--output-patch-size ', tuple2string(output_patch_size))
    print('--output-patch-overlap ', tuple2string(output_patch_overlap))
    print('--output-patch-stride ', tuple2string(patch_stride))
    print('--patch-num ', patch_num)

    assert mip >= 0
    block_mip = (mip + thumbnail_mip) // 2
    block_factor = 2**block_mip

    output_chunk_size = tuple(n * s + o - 2 * c for n, s, o, c in zip(
        patch_num, patch_stride, output_patch_overlap, crop_chunk_margin))

    input_chunk_size = tuple(
        ocs + ccm * 2 + ips - ops
        for ocs, ccm, ips, ops in zip(output_chunk_size, crop_chunk_margin,
                                      input_patch_size, output_patch_size))

    expand_margin_size = tuple(
        (ics - ocs) // 2
        for ics, ocs in zip(input_chunk_size, output_chunk_size))

    input_chunk_start = tuple(
        vs - ccm - (ips - ops) // 2
        for vs, ccm, ips, ops in zip(volume_start, crop_chunk_margin,
                                     input_patch_size, output_patch_size))

    block_size = (output_chunk_size[0] // factor,
                  output_chunk_size[1] // block_factor,
                  output_chunk_size[2] // block_factor)

    print('\n--input-chunk-size ' + tuple2string(input_chunk_size))
    print('--input-volume-start ' + tuple2string(input_chunk_start))
    print('--output-chunk-size ' + tuple2string(output_chunk_size))
    print('cutout expand margin size ' + tuple2string(expand_margin_size))

    print('output volume start: ' + tuple2string(volume_start))
    print('block size ' + tuple2string(block_size))
    print('RAM size of each block: ',
          np.prod(output_chunk_size) / 1024 / 1024 / 1024 * 4 * channel_num,
          ' GB')
    voxel_utilization = np.prod(output_chunk_size) / np.prod(
        patch_num) / np.prod(output_patch_size)
    print('voxel utilization: {:.2f}'.format(voxel_utilization))

    if not dry_run:
        storage = SimpleStorage(layer_path)
        thumbnail_layer_path = os.path.join(layer_path, 'thumbnail')
        thumbnail_storage = SimpleStorage(thumbnail_layer_path)

        if not overwrite_info:
            print('\ncheck that we are not overwriting existing info file.')
            assert storage.exists('info')
            assert thumbnail_storage.exists('info')

        print('create and upload info file to ', layer_path)
        # Note that cloudvolume use fortran order rather than C order
        info = CloudVolume.create_new_info(channel_num,
                                           layer_type='image',
                                           data_type=dtype,
                                           encoding=encoding,
                                           resolution=voxel_size[::-1],
                                           voxel_offset=volume_start[::-1],
                                           volume_size=volume_size[::-1],
                                           chunk_size=block_size[::-1],
                                           max_mip=mip)
        vol = CloudVolume(layer_path, info=info)
        if overwrite_info:
            vol.commit_info()

        thumbnail_factor = 2**thumbnail_mip
        thumbnail_block_size = (output_chunk_size[0] // factor,
                                output_chunk_size[1] // thumbnail_factor,
                                output_chunk_size[2] // thumbnail_factor)
        print('thumbnail block size: ' + tuple2string(thumbnail_block_size))
        thumbnail_info = CloudVolume.create_new_info(
            1,
            layer_type='image',
            data_type='uint8',
            encoding='raw',
            resolution=voxel_size[::-1],
            voxel_offset=volume_start[::-1],
            volume_size=volume_size[::-1],
            chunk_size=thumbnail_block_size[::-1],
            max_mip=thumbnail_mip)
        thumbnail_vol = CloudVolume(thumbnail_layer_path, info=thumbnail_info)
        if overwrite_info:
            thumbnail_vol.commit_info()

    print('create a list of bounding boxes...')
    roi_start = (volume_start[0], volume_start[1] // factor,
                 volume_start[2] // factor)
    roi_size = (volume_size[0], volume_size[1] // factor,
                volume_size[2] // factor)
    roi_stop = tuple(s + z for s, z in zip(roi_start, roi_size))

    # create bounding boxes and ingest to queue
    bboxes = create_bounding_boxes(output_chunk_size,
                                   roi_start=roi_start,
                                   roi_stop=roi_stop,
                                   verbose=verbose)
    print('total number of tasks: ', len(bboxes))

    if verbose > 1:
        print('bounding boxes: ', bboxes)

    return bboxes
Example #27
0
def create_graphene_meshing_tasks(
  cloudpath, timestamp, mip,
  simplification=True, max_simplification_error=40,
  mesh_dir=None, cdn_cache=False, object_ids=None, 
  progress=False, fill_missing=False, sharding=None,
  draco_compression_level=1, bounds=None
):
  cv = CloudVolume(cloudpath, mip=mip)

  if mip < cv.meta.watershed_mip:
    raise ValueError("Must mesh at or above the watershed mip level. Watershed MIP: {} Got: {}".format(
      cv.meta.watershed_mip, mip
    ))

  if mesh_dir is None:
    mesh_dir = 'meshes'

  cv.info['mesh'] = mesh_dir # necessary to set the mesh.commit_info() dir right
  if not 'mesh' in cv.info:
    cv.commit_info()

  watershed_downsample_ratio = cv.resolution // cv.meta.resolution(cv.meta.watershed_mip)
  shape = Vec(*cv.meta.graph_chunk_size) // watershed_downsample_ratio

  cv.mesh.meta.info['@type'] = 'neuroglancer_legacy_mesh'
  cv.mesh.meta.info['mip'] = cv.mip
  cv.mesh.meta.info['chunk_size'] = list(shape)
  if sharding:
    cv.mesh.meta.info['sharding'] = sharding
  cv.mesh.meta.commit_info()

  simplification = (0 if not simplification else 100)

  class GrapheneMeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return GrapheneMeshTask(
        cloudpath=cloudpath,
        shape=shape.clone(),
        offset=offset.clone(),
        mip=int(mip),
        simplification_factor=simplification,
        max_simplification_error=max_simplification_error,
        draco_compression_level=draco_compression_level,
        mesh_dir=mesh_dir, 
        cache_control=('' if cdn_cache else 'no-cache'),
        progress=progress,
        fill_missing=fill_missing,
        timestamp=timestamp,
      )

    def on_finish(self):
      cv.provenance.processing.append({
        'method': {
          'task': 'GrapheneMeshTask',
          'cloudpath': cv.cloudpath,
          'shape': cv.meta.graph_chunk_size,
          'mip': int(mip),
          'simplification': simplification,
          'max_simplification_error': max_simplification_error,
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'cdn_cache': cdn_cache,
          'timestamp': timestamp,
          'draco_compression_level': draco_compression_level,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      cv.commit_provenance()

  if bounds is None:
    bounds = cv.meta.bounds(mip).clone()
  else:
    bounds = cv.bbox_to_mip(bounds, mip=0, to_mip=mip)
    bounds = Bbox.clamp(bounds, cv.bounds)

  bounds = bounds.expand_to_chunk_size(shape, cv.voxel_offset)

  return GrapheneMeshTaskIterator(bounds, shape)
def create_structures(animal):
    """
    This is the important method called from main. This does all the work.
    Args:
        animal: string to identify the animal/stack

    Returns:
        Nothing, creates a directory of the precomputed volume. Copy this directory somewhere apache can read it. e.g.,
        /net/birdstore/Active_Atlas_Data/data_root/pipeline_data/
    """


    sqlController = SqlController(animal)
    fileLocationManager = FileLocationManager(animal)
    # Set all relevant directories
    THUMBNAIL_PATH = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail')
    CSV_PATH = '/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations'
    CLEANED = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail_cleaned')
    PRECOMPUTE_PATH = f'/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations/{animal}'

    width = sqlController.scan_run.width
    height = sqlController.scan_run.height
    width = int(width * SCALING_FACTOR)
    height = int(height * SCALING_FACTOR)
    aligned_shape = np.array((width, height))
    THUMBNAILS = sorted(os.listdir(THUMBNAIL_PATH))
    num_section = len(THUMBNAILS)
    structure_dict = sqlController.get_structures_dict()
    csvfile = os.path.join(CSV_PATH, f'{animal}_annotation.csv')

    hand_annotations = pd.read_csv(csvfile)
    hand_annotations['vertices'] = hand_annotations['vertices'] \
        .apply(lambda x: x.replace(' ', ','))\
        .apply(lambda x: x.replace('\n',','))\
        .apply(lambda x: x.replace(',]',']'))\
        .apply(lambda x: x.replace(',,', ','))\
        .apply(lambda x: x.replace(',,', ','))\
        .apply(lambda x: x.replace(',,', ',')).apply(lambda x: x.replace(',,', ','))
    hand_annotations['vertices'] = hand_annotations['vertices'].apply(lambda x: ast.literal_eval(x))

    structures = list(hand_annotations['name'].unique())
    section_structure_vertices = defaultdict(dict)
    for structure in tqdm(structures):
        contour_annotations, first_sec, last_sec = get_contours_from_annotations(animal, structure, hand_annotations, densify=4)
        for section in contour_annotations:
            section_structure_vertices[section][structure] = contour_annotations[section][structure][1]


    ##### Reproduce create_clean transform
    section_offset = {}
    for file_name in tqdm(THUMBNAILS):
        filepath = os.path.join(THUMBNAIL_PATH, file_name)
        img = io.imread(filepath)
        section = int(file_name.split('.')[0])
        section_offset[section] = (aligned_shape - img.shape[:2][::-1]) // 2


    ##### Reproduce create_alignment transform

    image_name_list = sorted(os.listdir(CLEANED))
    anchor_idx = len(image_name_list) // 2
    transformation_to_previous_sec = {}

    for i in range(1, len(image_name_list)):
        fixed_fn = os.path.splitext(image_name_list[i - 1])[0]
        moving_fn = os.path.splitext(image_name_list[i])[0]
        transformation_to_previous_sec[i] = load_consecutive_section_transform(animal, moving_fn, fixed_fn)

    transformation_to_anchor_sec = {}
    # Converts every transformation
    for moving_idx in range(len(image_name_list)):
        if moving_idx == anchor_idx:
            transformation_to_anchor_sec[image_name_list[moving_idx]] = np.eye(3)
        elif moving_idx < anchor_idx:
            T_composed = np.eye(3)
            for i in range(anchor_idx, moving_idx, -1):
                T_composed = np.dot(np.linalg.inv(transformation_to_previous_sec[i]), T_composed)
            transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed
        else:
            T_composed = np.eye(3)
            for i in range(anchor_idx + 1, moving_idx + 1):
                T_composed = np.dot(transformation_to_previous_sec[i], T_composed)
            transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed


    warp_transforms = create_warp_transforms(animal, transformation_to_anchor_sec, 'thumbnail', 'thumbnail')
    ordered_transforms = sorted(warp_transforms.items())
    section_transform = {}

    for section, transform in ordered_transforms:
        section_num = int(section.split('.')[0])
        transform = np.linalg.inv(transform)
        section_transform[section_num] = transform

    ##### Alignment of annotation coordinates
    keys = [k for k in structure_dict.keys()]
    # This missing_sections will need to be manually built up from Beth's spreadsheet
    missing_sections = {k: [117] for k in keys}
    fill_sections = defaultdict(dict)
    pr5_sections = []
    other_structures = set()
    volume = np.zeros((aligned_shape[1], aligned_shape[0], num_section), dtype=np.uint8)
    for section in section_structure_vertices:
        template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8)
        for structure in section_structure_vertices[section]:
            points = np.array(section_structure_vertices[section][structure])
            points = points // 32
            points = points + section_offset[section]  # create_clean offset
            points = transform_create_alignment(points, section_transform[section])  # create_alignment transform
            points = points.astype(np.int32)

            try:
                missing_list = missing_sections[structure]
            except:
                missing_list = []

            if section in missing_list:
                fill_sections[structure][section] = points


            if 'pr5' in structure.lower():
                pr5_sections.append(section)

            try:
                # color = colors[structure.upper()]
                color = structure_dict[structure][1]  # structure dict returns a list of [description, color]
                # for each key
            except:
                color = 255
                other_structures.add(structure)

            cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA)
        volume[:, :, section - 1] = template

    # fill up missing sections
    template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8)
    for structure, v in fill_sections.items():
        color = structure_dict[structure][1]
        for section, points in v.items():
            cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA)
            volume[:, :, section] = template
    volume_filepath = os.path.join(CSV_PATH, f'{animal}_annotations.npy')

    volume = np.swapaxes(volume, 0, 1)
    print('Saving:', volume_filepath, 'with shape', volume.shape)
    with open(volume_filepath, 'wb') as file:
        np.save(file, volume)


    # now use 9-1 notebook to convert to a precomputed.
    # Voxel resolution in nanometer (how much nanometer each element in numpy array represent)
    resol = (14464, 14464, 20000)
    # Voxel offset
    offset = (0, 0, 0)
    # Layer type
    layer_type = 'segmentation'
    # number of channels
    num_channels = 1
    # segmentation properties in the format of [(number1, label1), (number2, label2) ...]
    # where number is an integer that is in the volume and label is a string that describes that segmenetation

    segmentation_properties = [(number, f'{structure}: {label}') for structure, (label, number) in structure_dict.items()]
    extra_structures = ['Pr5', 'VTg', 'DRD', 'IF', 'MPB', 'Op', 'RPC', 'LSO', 'MVe', 'CnF',
                        'pc', 'DTgC', 'LPB', 'Pr5DM', 'DTgP', 'RMC', 'VTA', 'IPC', 'DRI', 'LDTg',
                        'IPA', 'PTg', 'DTg', 'IPL', 'SuVe', 'Sol', 'IPR', '8n', 'Dk', 'IO',
                        'Cb', 'Pr5VL', 'APT', 'Gr', 'RR', 'InC', 'X', 'EW']
    segmentation_properties += [(len(structure_dict) + index + 1, structure) for index, structure in enumerate(extra_structures)]

    cloudpath = f'file://{PRECOMPUTE_PATH}'
    info = CloudVolume.create_new_info(
        num_channels = num_channels,
        layer_type = layer_type,
        data_type = str(volume.dtype), # Channel images might be 'uint8'
        encoding = 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed
        resolution = resol, # Voxel scaling, units are in nanometers
        voxel_offset = offset, # x,y,z offset in voxels from the origin
        chunk_size = [64, 64, 64], # units are voxels
        volume_size = volume.shape, # e.g. a cubic millimeter dataset
    )
    vol = CloudVolume(cloudpath, mip=0, info=info, compress=False)
    vol.commit_info()
    vol[:, :, :] = volume[:, :, :]

    vol.info['segment_properties'] = 'names'
    vol.commit_info()

    segment_properties_path = os.path.join(PRECOMPUTE_PATH, 'names')
    os.makedirs(segment_properties_path, exist_ok=True)

    info = {
        "@type": "neuroglancer_segment_properties",
        "inline": {
            "ids": [str(number) for number, label in segmentation_properties],
            "properties": [{
                "id": "label",
                "description": "Name of structures",
                "type": "label",
                "values": [str(label) for number, label in segmentation_properties]
            }]
        }
    }
    print('Creating names in', segment_properties_path)
    with open(os.path.join(segment_properties_path, 'info'), 'w') as file:
        json.dump(info, file, indent=2)


    # Setting parallel to a number > 1 hangs the script. It still runs fast with parallel=1
    tq = LocalTaskQueue(parallel=1)
    tasks = tc.create_downsampling_tasks(cloudpath, compress=False) # Downsample the volumes
    tq.insert(tasks)
    tq.execute()
    print('Finished')
Example #29
0
def setup_environment(dry_run, volume_start, volume_stop, volume_size, layer_path, 
              max_ram_size, output_patch_size, 
              input_patch_size, channel_num, dtype, 
              output_patch_overlap, crop_chunk_margin, mip, thumbnail_mip, max_mip,
              thumbnail, encoding, voxel_size, 
              overwrite_info):
    """Prepare storage info files and produce tasks."""
    assert not (volume_stop is None and volume_size is None)
    if isinstance(volume_start, tuple):
        volume_start = Vec(*volume_start)
    if isinstance(volume_stop, tuple):
        volume_stop = Vec(*volume_stop)
    if isinstance(volume_size, tuple):
        volume_size = Vec(*volume_size)

    if input_patch_size is None:
        input_patch_size = output_patch_size

    if volume_size is not None:
        assert len(volume_size) == 3
        assert volume_stop is None
        volume_stop = volume_start + volume_size
    else:
        volume_size = volume_stop - volume_start
    print('\noutput volume start: ' + tuple2string(volume_start))
    print('output volume stop: ' + tuple2string(volume_stop))
    print('output volume size: ' + tuple2string(volume_size))
    
    if output_patch_overlap is None:
        # use 50% patch overlap in default
        output_patch_overlap = tuple(s//2 for s in output_patch_size)
    assert output_patch_overlap[1] == output_patch_overlap[2]

    if crop_chunk_margin is None:
        crop_chunk_margin = output_patch_overlap
    assert crop_chunk_margin[1] == crop_chunk_margin[2]
    print('margin size: ' + tuple2string(crop_chunk_margin))
    
    if thumbnail:
        # thumnail requires maximum mip level of 5
        thumbnail_mip = max(thumbnail_mip, 5)

    block_size, output_chunk_size, factor = get_optimized_block_size(
        output_patch_size, output_patch_overlap, max_ram_size,
        channel_num, max_mip, crop_chunk_margin,
        input_patch_size, mip, thumbnail_mip, volume_start
    )

    if not dry_run:
        storage = SimpleStorage(layer_path)
        thumbnail_layer_path = os.path.join(layer_path, 'thumbnail')
        thumbnail_storage = SimpleStorage(thumbnail_layer_path)

        if not overwrite_info:
            print('\ncheck that we are not overwriting existing info file.')
            assert storage.exists('info')
            assert thumbnail_storage.exists('info')

        if overwrite_info:
            print('create and upload info file to ', layer_path)
            # Note that cloudvolume use fortran order rather than C order
            info = CloudVolume.create_new_info(channel_num, layer_type='image',
                                            data_type=dtype,
                                            encoding=encoding,
                                            resolution=voxel_size[::-1],
                                            voxel_offset=volume_start[::-1],
                                            volume_size=volume_size[::-1],
                                            chunk_size=block_size[::-1],
                                            max_mip=mip)
            vol = CloudVolume(layer_path, info=info)
            vol.commit_info()
      
        if overwrite_info:
            thumbnail_factor = 2**thumbnail_mip
            thumbnail_block_size = (output_chunk_size[0]//factor,
                                    output_chunk_size[1]//thumbnail_factor,
                                    output_chunk_size[2]//thumbnail_factor)
            print('thumbnail block size: ' + tuple2string(thumbnail_block_size))
            thumbnail_info = CloudVolume.create_new_info(
                1, layer_type='image', 
                data_type='uint8',
                encoding='raw',
                resolution=voxel_size[::-1],
                voxel_offset=volume_start[::-1],
                volume_size=volume_size[::-1],
                chunk_size=thumbnail_block_size[::-1],
                max_mip=thumbnail_mip)
            thumbnail_vol = CloudVolume(thumbnail_layer_path, info=thumbnail_info)
            thumbnail_vol.commit_info()
       
    print('create a list of bounding boxes...')
    roi_start = (volume_start[0],
                 volume_start[1]//factor,
                 volume_start[2]//factor)
    roi_size = (volume_size[0],
                volume_size[1]//factor,
                volume_size[2]//factor)
    roi_stop = tuple(s+z for s, z in zip(roi_start, roi_size))

    # create bounding boxes and ingest to queue
    bboxes = BoundingBoxes.from_manual_setup(
            output_chunk_size,
            roi_start=roi_start, roi_stop=roi_stop)
    logging.info(f'total number of tasks: {len(bboxes)}')
    
    logging.debug(f'bounding boxes: {bboxes}')
    
    print(yellow(
        'Note that you should reuse the printed out parameters in the production run.' + 
        ' These parameters are not ingested to AWS SQS queue.'))
    return bboxes
Example #30
0
def create_spatial_index_mesh_tasks(
  cloudpath:str, 
  shape:Tuple[int,int,int] = (448,448,448), 
  mip:int = 0, 
  fill_missing:bool = False, 
  compress:Optional[Union[str,bool]] = 'gzip', 
  mesh_dir:Optional[str] = None
):
  """
  The main way to add a spatial index is to use the MeshTask,
  but old datasets or broken datasets may need it to be 
  reconstituted. An alternative use is create the spatial index
  over a different area size than the mesh task.
  """
  shape = Vec(*shape)

  vol = CloudVolume(cloudpath, mip=mip)

  if mesh_dir is None:
    mesh_dir = f"mesh_mip_{mip}_err_40"

  if not "mesh" in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  cf = CloudFiles(cloudpath)
  info_filename = '{}/info'.format(mesh_dir)
  mesh_info = cf.get_json(info_filename) or {}
  new_mesh_info = copy.deepcopy(mesh_info)
  new_mesh_info['@type'] = new_mesh_info.get('@type', 'neuroglancer_legacy_mesh') 
  new_mesh_info['mip'] = new_mesh_info.get("mip", int(vol.mip))
  new_mesh_info['chunk_size'] = shape.tolist()
  new_mesh_info['spatial_index'] = {
    'resolution': vol.resolution.tolist(),
    'chunk_size': (shape * vol.resolution).tolist(),
  }
  if new_mesh_info != mesh_info:
    cf.put_json(info_filename, new_mesh_info)

  class SpatialIndexMeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return partial(MeshSpatialIndex, 
        cloudpath=cloudpath,
        shape=shape,
        offset=offset,
        mip=int(mip),
        fill_missing=bool(fill_missing),
        compress=compress,
        mesh_dir=mesh_dir,
      )

    def on_finish(self):
      vol.provenance.processing.append({
        'method': {
          'task': 'MeshSpatialIndex',
          'cloudpath': vol.cloudpath,
          'shape': shape.tolist(),
          'mip': int(mip),
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'compress': compress,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      vol.commit_provenance()

  return SpatialIndexMeshTaskIterator(vol.bounds, shape)