Beispiel #1
0
def create_meshing_tasks(task_queue, layer_path, mip, shape=Vec(512, 512,
                                                                512)):
    shape = Vec(*shape)
    max_simplification_error = 40

    vol = CloudVolume(layer_path, mip)

    if not 'mesh' in vol.info:
        vol.info['mesh'] = 'mesh_mip_{}_err_{}'.format(
            mip, max_simplification_error)
        vol.commit_info()

    for startpt in tqdm(xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape),
                        desc="Inserting Mesh Tasks"):
        task = MeshTask(
            layer_path=layer_path,
            mip=vol.mip,
            shape=shape.clone(),
            offset=startpt.clone(),
            max_simplification_error=max_simplification_error,
        )
        task_queue.insert(task)
    task_queue.wait('Uploading MeshTasks')

    vol.provenance.processing.append({
        'method': {
            'task': 'MeshTask',
            'layer_path': layer_path,
            'mip': vol.mip,
            'shape': shape.tolist(),
        },
        'by': USER_EMAIL,
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    vol.commit_provenance()
Beispiel #2
0
def ingest(args):
    """Ingest CATMAID tiles with same row,col index across z range 

    Args:
	args: ArgParse object from main
    """
    mip = args.mip 
    row = args.row 
    col = args.col 
    chunk_size = Vec(*args.chunk_size)
    bbox = Bbox(bbox_start, bbox_start + bbox_size)
    chunk_size = [1024, 1024, 1]
    x_start = row*chunk_size[0]
    x_stop = (row+1)*chunk_size[0]
    y_start = col*chunk_size[1]
    y_stop = (col+1)*chunk_size[1]
    z_start = args.z_start
    z_stop = args.z_stop
    info = CloudVolume.create_new_info(
    	num_channels = 1,
    	layer_type = 'image', 
    	data_type = 'uint8', 
    	encoding = 'raw', 
    	resolution = [args.resolution[0]*2**mip, 
    		      args.resolution[1]*2**mip, 
		      args.resolution[2]]
    	voxel_offset = [x_start,
                            y_start, 
                            z_start],
    	chunk_size = chunk_size,
    	volume_size = [chunk_size[0],
                           chunk_size[1],
                           z_stop - z_start] 
    )
    x_range = range(x_start, x_stop)
    y_range = range(y_start, y_stop)
    z_range = range(z_start, z_stop)
    
    url_base = args.url_base 
    ex_url = '{}/{}/{}/{}/{}.jpg'.format(url_base, mip, z_start, row, col)
    vol = CloudVolume(args.dst_path, info=info)
    vol.provenance.description = 'Cutout from CATMAID'
    vol.provenance.processing.append({
        'method': {
            'task': 'ingest',
            'src_path': url_base,
            'dst_path': args.dst_path,
    	'row': row,
    	'col': col,
    	'z_range': [z_start, z_stop],
    	'chunk_size': chunk_size.tolist()
            'mip': args.mip,
            },
        'by': args.owner,
        'date': strftime('%Y-%m-%d%H:%M %Z'),
        })
Beispiel #3
0
def create_transfer_tasks(task_queue,
                          src_layer_path,
                          dest_layer_path,
                          chunk_size=None,
                          shape=Vec(2048, 2048, 64),
                          fill_missing=False,
                          translate=(0, 0, 0)):
    shape = Vec(*shape)
    translate = Vec(*translate)
    vol = CloudVolume(src_layer_path)

    if not chunk_size:
        chunk_size = vol.info['scales'][0]['chunk_sizes'][0]
    chunk_size = Vec(*chunk_size)

    try:
        dvol = CloudVolume(dest_layer_path)
    except Exception:  # no info file
        info = copy.deepcopy(vol.info)
        dvol = CloudVolume(dest_layer_path, info=info)
        dvol.info['scales'] = dvol.info['scales'][:1]
        dvol.info['scales'][0]['chunk_sizes'] = [chunk_size.tolist()]
        dvol.commit_info()

    create_downsample_scales(dest_layer_path,
                             mip=0,
                             ds_shape=shape,
                             preserve_chunk_size=True)

    bounds = vol.bounds.clone()
    for startpt in tqdm(xyzrange(bounds.minpt, bounds.maxpt, shape),
                        desc="Inserting Transfer Tasks"):
        task = TransferTask(
            src_path=src_layer_path,
            dest_path=dest_layer_path,
            shape=shape.clone(),
            offset=startpt.clone(),
            fill_missing=fill_missing,
            translate=translate,
        )
        task_queue.insert(task)
    task_queue.wait('Uploading Transfer Tasks')
    dvol = CloudVolume(dest_layer_path)
    dvol.provenance.processing.append({
        'method': {
            'task': 'TransferTask',
            'src': src_layer_path,
            'dest': dest_layer_path,
            'shape': list(map(int, shape)),
            'fill_missing': fill_missing,
            'translate': list(map(int, translate)),
        },
        'by': USER_EMAIL,
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    dvol.commit_provenance()
Beispiel #4
0
def create_transfer_tasks(src_layer_path,
                          dest_layer_path,
                          chunk_size=None,
                          shape=Vec(2048, 2048, 64),
                          fill_missing=False,
                          translate=(0, 0, 0),
                          bounds=None,
                          mip=0,
                          preserve_chunk_size=True,
                          encoding=None):
    """
  Transfer data from one data layer to another. It's possible
  to transfer from a lower resolution mip level within a given
  bounding box. The bounding box should be specified in terms of
  the highest resolution.
  """
    shape = Vec(*shape)
    vol = CloudVolume(src_layer_path, mip=mip)
    translate = Vec(*translate) // vol.downsample_ratio

    if not chunk_size:
        chunk_size = vol.info['scales'][mip]['chunk_sizes'][0]
    chunk_size = Vec(*chunk_size)

    try:
        dvol = CloudVolume(dest_layer_path, mip=mip)
    except Exception:  # no info file
        info = copy.deepcopy(vol.info)
        dvol = CloudVolume(dest_layer_path, info=info)
        dvol.commit_info()

    if encoding is not None:
        dvol.info['scales'][mip]['encoding'] = encoding
    dvol.info['scales'] = dvol.info['scales'][:mip + 1]
    dvol.info['scales'][mip]['chunk_sizes'] = [chunk_size.tolist()]
    dvol.commit_info()

    create_downsample_scales(dest_layer_path,
                             mip=mip,
                             ds_shape=shape,
                             preserve_chunk_size=preserve_chunk_size,
                             encoding=encoding)

    if bounds is None:
        bounds = vol.bounds.clone()
    else:
        bounds = vol.bbox_to_mip(bounds, mip=0, to_mip=mip)
        bounds = Bbox.clamp(bounds, dvol.bounds)

    dvol_bounds = dvol.mip_bounds(mip).clone()

    class TransferTaskIterator(object):
        def __len__(self):
            return int(reduce(operator.mul, np.ceil(bounds.size3() / shape)))

        def __iter__(self):
            for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape):
                task_shape = min2(shape.clone(), dvol_bounds.maxpt - startpt)
                yield TransferTask(
                    src_path=src_layer_path,
                    dest_path=dest_layer_path,
                    shape=task_shape,
                    offset=startpt.clone(),
                    fill_missing=fill_missing,
                    translate=translate,
                    mip=mip,
                )

            job_details = {
                'method': {
                    'task': 'TransferTask',
                    'src': src_layer_path,
                    'dest': dest_layer_path,
                    'shape': list(map(int, shape)),
                    'fill_missing': fill_missing,
                    'translate': list(map(int, translate)),
                    'bounds': [bounds.minpt.tolist(),
                               bounds.maxpt.tolist()],
                    'mip': mip,
                },
                'by': OPERATOR_CONTACT,
                'date': strftime('%Y-%m-%d %H:%M %Z'),
            }

            dvol = CloudVolume(dest_layer_path)
            dvol.provenance.sources = [src_layer_path]
            dvol.provenance.processing.append(job_details)
            dvol.commit_provenance()

            if vol.path.protocol != 'boss':
                vol.provenance.processing.append(job_details)
                vol.commit_provenance()

    return TransferTaskIterator()
Beispiel #5
0
def create_meshing_tasks(
    layer_path, mip, shape=(448, 448, 448), 
    simplification=True, max_simplification_error=40,
    mesh_dir=None, cdn_cache=False, dust_threshold=None,
    object_ids=None, progress=False, fill_missing=False,
    encoding='precomputed', spatial_index=True, sharded=False,
    compress='gzip'
  ):
  shape = Vec(*shape)

  vol = CloudVolume(layer_path, mip)

  if mesh_dir is None:
    mesh_dir = 'mesh_mip_{}_err_{}'.format(mip, max_simplification_error)

  if not 'mesh' in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  cf = CloudFiles(layer_path)
  info_filename = '{}/info'.format(mesh_dir)
  mesh_info = cf.get_json(info_filename) or {}
  mesh_info['@type'] = 'neuroglancer_legacy_mesh'
  mesh_info['mip'] = int(vol.mip)
  mesh_info['chunk_size'] = shape.tolist()
  if spatial_index:
    mesh_info['spatial_index'] = {
        'resolution': vol.resolution.tolist(),
        'chunk_size': (shape*vol.resolution).tolist(),
    }
  cf.put_json(info_filename, mesh_info)

  class MeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return MeshTask(
        shape=shape.clone(),
        offset=offset.clone(),
        layer_path=layer_path,
        mip=vol.mip,
        simplification_factor=(0 if not simplification else 100),
        max_simplification_error=max_simplification_error,
        mesh_dir=mesh_dir, 
        cache_control=('' if cdn_cache else 'no-cache'),
        dust_threshold=dust_threshold,
        progress=progress,
        object_ids=object_ids,
        fill_missing=fill_missing,
        encoding=encoding,
        spatial_index=spatial_index,
        sharded=sharded,
        compress=compress,
      )

    def on_finish(self):
      vol.provenance.processing.append({
        'method': {
          'task': 'MeshTask',
          'layer_path': layer_path,
          'mip': vol.mip,
          'shape': shape.tolist(),
          'simplification': simplification,
          'max_simplification_error': max_simplification_error,
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'cdn_cache': cdn_cache,
          'dust_threshold': dust_threshold,
          'encoding': encoding,
          'object_ids': object_ids,
          'spatial_index': spatial_index,
          'sharded': sharded,
          'compress': compress,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      vol.commit_provenance()

  return MeshTaskIterator(vol.mip_bounds(mip), shape)
Beispiel #6
0
def segment(args):
    """Run segmentation on contiguous block of affinities from CV

    Args:
        args: ArgParse object from main
    """
    bbox_start = Vec(*args.bbox_start)
    bbox_size = Vec(*args.bbox_size)
    chunk_size = Vec(*args.chunk_size)
    bbox = Bbox(bbox_start, bbox_start + bbox_size)
    src_cv = CloudVolume(args.src_path,
                         fill_missing=True,
                         parallel=args.parallel)
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type='segmentation',
        data_type='uint64',
        encoding='raw',
        resolution=src_cv.info['scales'][args.mip]['resolution'],
        voxel_offset=bbox_start,
        chunk_size=chunk_size,
        volume_size=bbox_size,
        mesh='mesh_mip_{}_err_{}'.format(args.mip,
                                         args.max_simplification_error))
    dst_cv = CloudVolume(args.dst_path, info=info, parallel=args.parallel)
    dst_cv.provenance.description = 'ws+agg using waterz'
    dst_cv.provenance.processing.append({
        'method': {
            'task': 'watershed+agglomeration',
            'src_path': args.src_path,
            'dst_path': args.dst_path,
            'mip': args.mip,
            'shape': bbox_size.tolist(),
            'bounds': [
                bbox.minpt.tolist(),
                bbox.maxpt.tolist(),
            ],
        },
        'by': args.owner,
        'date': strftime('%Y-%m-%d%H:%M %Z'),
    })
    dst_cv.provenance.owners = [args.owner]
    dst_cv.commit_info()
    dst_cv.commit_provenance()
    if args.segment:
        print('Downloading affinities')
        aff = src_cv[bbox.to_slices()]
        aff = np.transpose(aff, (3, 0, 1, 2))
        aff = np.ascontiguousarray(aff, dtype=np.float32)
        thresholds = [args.threshold]
        print('Starting ws+agg')
        seg_gen = waterz.agglomerate(aff, thresholds)
        seg = next(seg_gen)
        print('Deleting affinities')
        del aff
        print('Uploading segmentation')
        dst_cv[bbox.to_slices()] = seg
    if args.mesh:
        print('Starting meshing')
        with LocalTaskQueue(parallel=args.parallel) as tq:
            tasks = tc.create_meshing_tasks(
                layer_path=args.dst_path,
                mip=args.mip,
                shape=args.chunk_size,
                simplification=True,
                max_simplification_error=args.max_simplification_error,
                progress=True)
            tq.insert_all(tasks)
            tasks = tc.create_mesh_manifest_tasks(layer_path=args.dst_path,
                                                  magnitude=args.magnitude)
            tq.insert_all(tasks)
        print("Meshing complete")
Beispiel #7
0
def create_transfer_tasks(src_layer_path,
                          dest_layer_path,
                          chunk_size=None,
                          shape=None,
                          fill_missing=False,
                          translate=None,
                          bounds=None,
                          mip=0,
                          preserve_chunk_size=True,
                          encoding=None,
                          skip_downsamples=False,
                          delete_black_uploads=False,
                          background_color=0,
                          agglomerate=False,
                          timestamp=None,
                          compress='gzip',
                          factor=None,
                          sparse=False,
                          dest_voxel_offset=None,
                          memory_target=MEMORY_TARGET,
                          max_mips=5,
                          clean_info=False,
                          no_src_update=False):
    """
  Transfer data to a new data layer. You can use this operation
  to make changes to the dataset representation as well. For 
  example, you can change the chunk size, compression, bounds,
  and offset.

  Downsamples will be automatically generated while transferring
  unless skip_downsamples is set. The number of downsamples will
  be determined by the chunk size and the task shape.

  bounds: Bbox specified in terms of the destination image and its
    highest resolution.
  translate: Vec3 pointing from source bounds to dest bounds
    and is in terms of the highest resolution of the source image.
    This allows you to compensate for differing voxel offsets
    or enables you to move part of the image to a new location.
  dest_voxel_offset: When creating a new image, move the 
    global coordinate origin to this point. This is commonly
    used to "zero" a newly aligned image (e.g. (0,0,0)) 

  background_color: Designates which color should be considered background.
  chunk_size: (overrides preserve_chunk_size) force chunk size for new layers to be this.
  clean_info: scrub additional fields from the info file that might interfere
    with later processing (e.g. mesh and skeleton related info).
  compress: None, 'gzip', or 'br' Determines which compression algorithm to use 
    for new uploaded files.
  delete_black_uploads: issue delete commands instead of upload chunks
    that are all background.
  encoding: "raw", "jpeg", "compressed_segmentation", "compresso", "fpzip", or "kempressed"
    depending on which kind of data you're dealing with. raw works for everything (no compression) 
    but you might get better compression with another encoding. You can think of encoding as the
    image type-specific first stage of compression and the "compress" flag as the data
    agnostic second stage compressor. For example, compressed_segmentation and gzip work
    well together, but not jpeg and gzip.
  factor: (overrides axis) can manually specify what each downsampling round is
    supposed to do: e.g. (2,2,1), (2,2,2), etc
  fill_missing: Treat missing image tiles as zeroed for both src and dest.
  max_mips: (pairs with memory_target) maximum number of downsamples to generate even
    if the memory budget is large enough for more.
  memory_target: given a task size in bytes, pick the task shape that will produce the 
    maximum number of downsamples. Only works for (2,2,1) or (2,2,2).
  no_src_update: don't update the source's provenance file
  preserve_chunk_size: if true, maintain chunk size of starting mip, else, find the closest
    evenly divisible chunk size to 64,64,64 for this shape and use that. The latter can be
    useful when mip 0 uses huge chunks and you want to simply visualize the upper mips.
  shape: (overrides memory_target) The 3d size of each task. Choose a shape that meets 
    the following criteria unless you're doing something out of the ordinary.
    (a) 2^n multiple of destination chunk size (b) doesn't consume too much memory
    (c) n is related to the downsample factor for each axis, so for a factor of (2,2,1) (default)
      z only needs to be a single chunk, but x and y should be 2, 4, 8,or 16 times the chunk size.
    Remember to multiply 4/3 * shape.x * shape.y * shape.z * data_type to estimate how much memory 
    each task will require. If downsamples are off, you can skip the 4/3. In the future, if chunk
    sizes match we might be able to do a simple file transfer. The problem can be formulated as 
    producing the largest number of downsamples within a given memory target.

    EXAMPLE: destination is uint64 with chunk size (128, 128, 64) with a memory target of
      at most 3GB per task and a downsample factor of (2,2,1).

      The largest number of downsamples is 4 using 2048 * 2048 * 64 sized tasks which will
      use 2.9 GB of memory. The next size up would use 11.5GB and is too big. 

  sparse: When downsampling segmentation, if true, don't count black pixels when computing
    the mode. Useful for e.g. synapses and point labels.

  agglomerate: (graphene only) remap the watershed layer to a proofread segmentation.
  timestamp: (graphene only) integer UNIX timestamp indicating the proofreading state
    to represent.
  """
    src_vol = CloudVolume(src_layer_path, mip=mip)

    if dest_voxel_offset:
        dest_voxel_offset = Vec(*dest_voxel_offset, dtype=int)
    else:
        dest_voxel_offset = src_vol.voxel_offset.clone()

    if factor is None:
        factor = (2, 2, 1)

    if skip_downsamples:
        factor = (1, 1, 1)

    if not chunk_size:
        chunk_size = src_vol.info['scales'][mip]['chunk_sizes'][0]
    chunk_size = Vec(*chunk_size)

    try:
        dest_vol = CloudVolume(dest_layer_path, mip=mip)
    except cloudvolume.exceptions.InfoUnavailableError:
        info = copy.deepcopy(src_vol.info)
        dest_vol = CloudVolume(dest_layer_path, info=info, mip=mip)
        dest_vol.commit_info()

    if dest_voxel_offset is not None:
        dest_vol.scale["voxel_offset"] = dest_voxel_offset

    # If translate is not set, but dest_voxel_offset is then it should naturally be
    # only be the difference between datasets.
    if translate is None:
        translate = dest_vol.voxel_offset - src_vol.voxel_offset  # vector pointing from src to dest
    else:
        translate = Vec(*translate) // src_vol.downsample_ratio

    if encoding is not None:
        dest_vol.info['scales'][mip]['encoding'] = encoding
        if encoding == 'compressed_segmentation' and 'compressed_segmentation_block_size' not in dest_vol.info[
                'scales'][mip]:
            dest_vol.info['scales'][mip][
                'compressed_segmentation_block_size'] = (8, 8, 8)
    dest_vol.info['scales'] = dest_vol.info['scales'][:mip + 1]
    dest_vol.info['scales'][mip]['chunk_sizes'] = [chunk_size.tolist()]

    if clean_info:
        dest_vol.info = clean_xfer_info(dest_vol.info)

    dest_vol.commit_info()

    if shape is None:
        if memory_target is not None:
            shape = downsample_scales.downsample_shape_from_memory_target(
                np.dtype(src_vol.dtype).itemsize, dest_vol.chunk_size.x,
                dest_vol.chunk_size.y, dest_vol.chunk_size.z, factor,
                memory_target, max_mips)
        else:
            raise ValueError(
                "Either shape or memory_target must be specified.")

    shape = Vec(*shape)

    if factor[2] == 1:
        shape.z = int(dest_vol.chunk_size.z *
                      round(shape.z / dest_vol.chunk_size.z))

    if not skip_downsamples:
        downsample_scales.create_downsample_scales(
            dest_layer_path,
            mip=mip,
            ds_shape=shape,
            preserve_chunk_size=preserve_chunk_size,
            encoding=encoding)

    dest_bounds = get_bounds(dest_vol, bounds, mip, chunk_size)

    class TransferTaskIterator(FinelyDividedTaskIterator):
        def task(self, shape, offset):
            return partial(
                TransferTask,
                src_path=src_layer_path,
                dest_path=dest_layer_path,
                shape=shape.clone(),
                offset=offset.clone(),
                fill_missing=fill_missing,
                translate=translate,
                mip=mip,
                skip_downsamples=skip_downsamples,
                delete_black_uploads=bool(delete_black_uploads),
                background_color=background_color,
                agglomerate=agglomerate,
                timestamp=timestamp,
                compress=compress,
                factor=factor,
                sparse=sparse,
            )

        def on_finish(self):
            job_details = {
                'method': {
                    'task':
                    'TransferTask',
                    'src':
                    src_layer_path,
                    'dest':
                    dest_layer_path,
                    'shape':
                    list(map(int, shape)),
                    'fill_missing':
                    fill_missing,
                    'translate':
                    list(map(int, translate)),
                    'skip_downsamples':
                    skip_downsamples,
                    'delete_black_uploads':
                    bool(delete_black_uploads),
                    'background_color':
                    background_color,
                    'bounds':
                    [dest_bounds.minpt.tolist(),
                     dest_bounds.maxpt.tolist()],
                    'mip':
                    mip,
                    'agglomerate':
                    bool(agglomerate),
                    'timestamp':
                    timestamp,
                    'compress':
                    compress,
                    'encoding':
                    encoding,
                    'memory_target':
                    memory_target,
                    'factor': (tuple(factor) if factor else None),
                    'sparse':
                    bool(sparse),
                },
                'by': operator_contact(),
                'date': strftime('%Y-%m-%d %H:%M %Z'),
            }

            dest_vol = CloudVolume(dest_layer_path)
            dest_vol.provenance.sources = [src_layer_path]
            dest_vol.provenance.processing.append(job_details)
            dest_vol.commit_provenance()

            if not no_src_update and src_vol.meta.path.protocol in ('gs', 's3',
                                                                    'file'):
                src_vol.provenance.processing.append(job_details)
                src_vol.commit_provenance()

    return TransferTaskIterator(dest_bounds, shape)