Esempio n. 1
0
  def _compute_meshes(self):
    with Storage(self.layer_path) as storage:
      data = self._data[:, :, :, 0].T
      self._mesher.mesh(data)
      for obj_id in self._mesher.ids():
        if self.options['remap_table'] is None:
          remapped_id = obj_id
        else:
          remapped_id = self._remap_list[obj_id]

        storage.put_file(
            file_path='{}/{}:{}:{}'.format(
                self._mesh_dir, remapped_id, self.options['lod'],
                self._bounds.to_filename()
            ),
            content=self._create_mesh(obj_id),
            compress=True,
            cache_control=self.options['cache_control']
        )

        if self.options['generate_manifests']:
          fragments = []
          fragments.append('{}:{}:{}'.format(remapped_id, self.options['lod'],
                                             self._bounds.to_filename()))

          storage.put_file(
              file_path='{}/{}:{}'.format(
                  self._mesh_dir, remapped_id, self.options['lod']),
              content=json.dumps({"fragments": fragments}),
              content_type='application/json',
              cache_control=self.options['cache_control']
          )
Esempio n. 2
0
  def fetch_z_levels(self):
    bounds = Bbox(self.offset, self.shape[:3] + self.offset)

    levelfilenames = [
      'levels/{}/{}'.format(self.mip, z) \
      for z in range(bounds.minpt.z, bounds.maxpt.z)
    ]
    
    with Storage(self.levels_path) as stor:
      levels = stor.get_files(levelfilenames)

    errors = [ 
      level['filename'] \
      for level in levels if level['content'] == None
    ]

    if len(errors):
      raise Exception(", ".join(
          errors) + " were not defined. Did you run a LuminanceLevelsTask for these slices?")

    levels = [(
      int(os.path.basename(item['filename'])),
      json.loads(item['content'].decode('utf-8'))
    ) for item in levels ]

    levels.sort(key=lambda x: x[0])
    levels = [x[1] for x in levels]
    return [ np.array(x['levels'], dtype=np.uint64) for x in levels ]
Esempio n. 3
0
    def fetch_z_levels(self, bounds):
        """
        readout the histograms in each corresponding section.
        TODO: use local cache for the z levels
        """
        levelfilenames = [
            'levels/{}/{}'.format(self.mip, z) \
            for z in range(bounds.minpt.z, bounds.maxpt.z)
        ]

        with Storage(self.levels_path) as stor:
            levels = stor.get_files(levelfilenames)

        errors = [
            level['filename'] \
            for level in levels if level['content'] == None
        ]

        if len(errors):
            raise Exception(
                ", ".join(errors) +
                " were not defined. Did you run a LuminanceLevelsTask for these slices?"
            )

        levels = [(int(os.path.basename(item['filename'])),
                   json.loads(item['content'].decode('utf-8')))
                  for item in levels]

        levels.sort(key=lambda x: x[0])
        levels = [x[1] for x in levels]
        return [np.array(x['levels'], dtype=np.uint64) for x in levels]
Esempio n. 4
0
def put_chunk_edges(
    edges_dir: str, chunk_coordinates: np.ndarray, edges_d, compression_level: int
) -> None:
    """
    :param edges_dir: cloudvolume storage path
    :type str:
    :param chunk_coordinates: chunk coords x,y,z
    :type np.ndarray:
    :param edges_d: edges_d with keys "in", "cross", "between"
    :type dict:
    :param compression_level: zstandard compression level (1-22, higher - better ratio)
    :type int:
    :return None:
    """

    chunk_edges = ChunkEdgesMsg()
    chunk_edges.in_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.in_chunk]))
    chunk_edges.between_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.between_chunk]))
    chunk_edges.cross_chunk.CopyFrom(serialize(edges_d[EDGE_TYPES.cross_chunk]))

    cctx = zstd.ZstdCompressor(level=compression_level)
    chunk_str = "_".join(str(coord) for coord in chunk_coordinates)

    # filename format - edges_x_y_z.serialization.compression
    file = f"edges_{chunk_str}.proto.zst"
    with Storage(edges_dir) as storage:
        storage.put_file(
            file_path=file,
            content=cctx.compress(chunk_edges.SerializeToString()),
            compress=None,
            cache_control="no-cache",
        )
Esempio n. 5
0
def create_info_file_from_build(layer_path, layer_type, resolution, encoding):
    assert layer_type in ('image', 'segmentation', 'affinities')

    with Storage(layer_path) as storage:
        bounds, build_chunk_size = compute_build_bounding_box(storage)
        data_type, num_channels = get_build_data_type_and_shape(storage)

    neuroglancer_chunk_size = find_closest_divisor(build_chunk_size,
                                                   closest_to=[64, 64, 64])

    info = CloudVolume.create_new_info(
        num_channels=num_channels,
        layer_type=layer_type,
        data_type=data_type,
        encoding=encoding,
        resolution=resolution,
        voxel_offset=bounds.minpt.tolist(),
        volume_size=bounds.size3(),
        mesh=(layer_type == 'segmentation'),
        chunk_size=list(map(int, neuroglancer_chunk_size)),
    )

    vol = CloudVolume(layer_path, mip=0, info=info).commit_info()
    vol = create_downsample_scales(layer_path,
                                   mip=0,
                                   ds_shape=build_chunk_size,
                                   axis='z')

    return vol.info
Esempio n. 6
0
def remesh_pending(cg: ChunkedGraph):
    mesh_dir = cg.meta.dataset_info["mesh"]
    mesh_info = cg.meta.custom_data.get("mesh", {})
    unsharded_mesh_path = join(
        cg.meta.data_source.WATERSHED,
        mesh_dir,
        cg.meta.dataset_info["mesh_metadata"]["unsharded_mesh_dir"],
    )

    pending_path = f"{unsharded_mesh_path}/in-progress"
    for task in _get_pending_tasks(pending_path):
        fname, l2ids = task
        print(f"remeshing IDs {l2ids} from {fname}")
        remeshing(
            cg,
            l2ids,
            stop_layer=mesh_info["max_layer"],
            mip=mesh_info["mip"],
            max_err=mesh_info["max_error"],
            cv_sharded_mesh_dir=mesh_dir,
            cv_unsharded_mesh_path=unsharded_mesh_path,
        )

        with Storage(pending_path) as storage:  # pylint: disable=not-context-manager
            storage.delete_file(fname)
        print(f"remesh job for {fname} with {l2ids} complete")
Esempio n. 7
0
  def execute(self):
    with Storage(self.layer_path) as storage:
      self._info = json.loads(storage.get_file('info').decode('utf8'))

      if self.mesh_dir is None and 'mesh' in self._info:
        self.mesh_dir = self._info['mesh']

      self._generate_manifests(storage)
Esempio n. 8
0
 def __iter__(self):
   with Storage(cloudpath) as storage:
     for filename in storage.list_files(prefix='build/'):
       yield IngestTask(
         chunk_path=storage.get_path_to_file('build/'+filename),
         chunk_encoding='npz',
         layer_path=cloudpath,
       )
Esempio n. 9
0
def _get_pending_tasks(pending_path: str) -> list:
    from numpy import frombuffer

    tasks = []
    with Storage(pending_path) as storage:  # pylint: disable=not-context-manager
        for f in storage.get_files(
                list(storage.list_files(prefix=REMESH_PREFIX))):
            tasks.append((f["filename"], frombuffer(f["content"],
                                                    dtype=uint64)))
    return tasks
Esempio n. 10
0
 def _compute_meshes(self):
   with Storage(self.layer_path) as storage:
     data = self._data[:,:,:,0].T
     self._mesher.mesh(data.flatten(), *data.shape[:3])
     for obj_id in self._mesher.ids():
       storage.put_file(
         file_path='{}/{}:{}:{}'.format(self._mesh_dir, obj_id, self.lod, self._bounds.to_filename()),
         content=self._create_mesh(obj_id),
         compress=True,
       )
Esempio n. 11
0
def load_raw_skeletons():
    print("Downloading list of files...")
    print(cv.skeleton.meta.layerpath)
    with Storage(cv.skeleton.meta.layerpath, progress=True) as stor:
        all_files = list(stor.list_files())

    all_files = [
        fname for fname in all_files if os.path.splitext(fname)[1] == '.frags'
    ]

    print("Downloading files...")
    with Storage(cv.skeleton.meta.layerpath, progress=True) as stor:
        all_files = stor.get_files(all_files)

    # CHECKPOINT?

    for i, res in enumerate(tqdm(all_files, desc='Unpickling')):
        all_files[i] = pickle.loads(res['content'])

    # group by segid

    unfused_skeletons = defaultdict(list)
    while all_files:
        fragment = all_files.pop()
        for label, skel_frag in fragment.items():
            unfused_skeletons[label].append(skel_frag)

    # CHECKPOINT?

    skeletons = crt_dict()
    labels = list(unfused_skeletons.keys())
    for label in tqdm(labels, desc='Simple Merging'):
        skels = unfused_skeletons[label]
        skeleton = PrecomputedSkeleton.simple_merge(skels)
        skeleton.id = label
        skeleton.extra_attributes = [
          attr for attr in skeleton.extra_attributes \
          if attr['data_type'] == 'float32'
        ]
        skeletons[label] = skeleton
        del unfused_skeletons[label]

    return skeletons
Esempio n. 12
0
def get_highest_child_nodes_with_meshes(
    cg,
    node_id: np.uint64,
    stop_layer=2,
    start_layer=None,
    verify_existence=False,
    bounding_box=None,
    flexible_start_layer=None,
):
    if flexible_start_layer is not None:
        # Get highest children that are at flexible_start_layer or below
        # (do this because of skip connections)
        candidates = cg.get_children_at_layer(node_id, flexible_start_layer, True)
    elif start_layer is None:
        candidates = np.array([node_id], dtype=np.uint64)
    else:
        candidates = cg.get_subgraph_nodes(
            node_id,
            bounding_box=bounding_box,
            bb_is_coordinate=True,
            return_layers=[start_layer],
        )

    if verify_existence:
        valid_node_ids = []
        with Storage(cg.cv_mesh_path) as stor:
            while True:
                filenames = [get_mesh_name(cg, c) for c in candidates]

                time_start = time.time()
                existence_dict = stor.files_exist(filenames)
                print("Existence took: %.3fs" % (time.time() - time_start))

                missing_meshes = []
                for mesh_key in existence_dict:
                    node_id = np.uint64(mesh_key.split(":")[0])
                    if existence_dict[mesh_key]:
                        valid_node_ids.append(node_id)
                    else:
                        if cg.get_chunk_layer(node_id) > stop_layer:
                            missing_meshes.append(node_id)

                time_start = time.time()
                if missing_meshes:
                    candidates = cg.get_children(missing_meshes, flatten=True)
                else:
                    break
                print("ChunkedGraph lookup took: %.3fs" % (time.time() - time_start))

    else:
        valid_node_ids = candidates

    return valid_node_ids
Esempio n. 13
0
def _create_manifest_files_thread(args):
    cg_info, cv_path, cv_mesh_dir, root_id_start, root_id_end, \
        highest_mesh_level = args

    cg = ChunkedGraph(**cg_info)

    with Storage(cv_path) as cv_storage:
        for root_seg_id in range(root_id_start, root_id_end):

            root_id = cg.get_node_id(np.uint64(root_seg_id),
                                     cg.get_chunk_id(layer=int(cg.n_layers),
                                                     x=0, y=0, z=0))
Esempio n. 14
0
def test_mesh_manifests():
    directory = '/tmp/removeme/mesh_manifests/'
    layer_path = 'file://' + directory
    mesh_dir = 'mesh_mip_3_error_40'

    delete_layer(layer_path)

    to_path = lambda filename: os.path.join(directory, mesh_dir, filename)

    n_segids = 100
    n_lods = 2
    n_fragids = 5

    with Storage(layer_path) as stor:
        stor.put_file('info', '{"mesh":"mesh_mip_3_error_40"}'.encode('utf8'))

    for segid in range(n_segids):
        for lod in range(n_lods):
            for fragid in range(n_fragids):
                filename = '{}:{}:{}'.format(segid, lod, fragid)
                lib.touch(to_path(filename))

    for i in range(10):
        MeshManifestTask(layer_path=layer_path, prefix=i, lod=0).execute()

    for segid in range(n_segids):
        for fragid in range(n_fragids):
            filename = '{}:0'.format(segid)
            assert os.path.exists(to_path(filename))
            filename = '{}:1'.format(segid)
            assert not os.path.exists(to_path(filename))

    for i in range(10):
        MeshManifestTask(layer_path=layer_path, prefix=i, lod=1).execute()

    for segid in range(n_segids):
        for fragid in range(n_fragids):
            filename = '{}:0'.format(segid)
        assert os.path.exists(to_path(filename))
        filename = '{}:1'.format(segid)
        assert os.path.exists(to_path(filename))

    with open(to_path('50:0'), 'r') as f:
        content = json.loads(f.read())
        assert content == {
            "fragments": ["50:0:0", "50:0:1", "50:0:2", "50:0:3", "50:0:4"]
        }

    if os.path.exists(directory):
        shutil.rmtree(directory)
Esempio n. 15
0
def mesh_node_and_parents(node_id, cg, cv_path, cv_mesh_dir=None, mip=3,
                          highest_mesh_level=1, create_manifest_root=True,
                          lod=0):
    layer = cg.get_chunk_layer(node_id)

    parents = [node_id] + list(cg.get_all_parents(node_id))
    for i_layer in range(layer, highest_mesh_level + 1):
        mesh_single_component(parents[i_layer], cg=cg, cv_path=cv_path,
                              cv_mesh_dir=cv_mesh_dir, mip=mip)

    if create_manifest_root:
        with Storage(cv_path) as cv_storage:
            create_manifest_file(cg=cg, cv_storage=cv_storage,
                                 cv_mesh_dir=cv_mesh_dir, node_id=parents[-1],
                                 highest_mesh_level=highest_mesh_level,
                                 mip=mip, lod=lod)
Esempio n. 16
0
def compute_mesh_centroids_of_l2_ids(cg, l2_ids, flatten=False):
    """
    Given a list of l2_ids, return a tuple containing a dict that maps l2_ids to their
    mesh's centroid (a global coordinate), and a list of the l2_ids for which the mesh does not exist.

    :param cg: ChunkedGraph object
    :param l2_ids: Sequence[np.uint64]
    :return: Union[Dict[np.uint64, np.ndarray], [np.uint64], [np.uint64]]
    """
    fragments_to_fetch = [
        f"{l2_id}:0:{meshgen_utils.get_chunk_bbox_str(cg, cg.get_chunk_id(l2_id))}"
        for l2_id in l2_ids
    ]
    if flatten:
        centroids_with_chunk_boundary_points = []
    else:
        centroids_with_chunk_boundary_points = {}
    last_l2_id = None
    failed_l2_ids = []
    with Storage(cg.cv_mesh_path) as storage:
        files_contents = storage.get_files(fragments_to_fetch)
        fragment_map = {}
        for i in range(len(files_contents)):
            fragment_map[files_contents[i]["filename"]] = files_contents[i]
        for i in range(len(fragments_to_fetch)):
            fragment_to_fetch = fragments_to_fetch[i]
            l2_id = l2_ids[i]
            try:
                fragment = fragment_map[fragment_to_fetch]
                if fragment["content"] is not None and fragment["error"] is None:
                    mesh = meshgen.decode_draco_mesh_buffer(fragment["content"])
                    if flatten:
                        centroids_with_chunk_boundary_points.extend(
                            compute_centroid_with_chunk_boundary(
                                cg, mesh["vertices"], l2_id, last_l2_id
                            )
                        )
                    else:
                        centroids_with_chunk_boundary_points[
                            l2_id
                        ] = compute_centroid_with_chunk_boundary(
                            cg, mesh["vertices"], l2_id, last_l2_id
                        )
            except:
                failed_l2_ids.append(l2_id)
            last_l2_id = l2_id
    return centroids_with_chunk_boundary_points, failed_l2_ids
Esempio n. 17
0
def get_highest_child_nodes_with_meshes(cg,
                                        node_id: np.uint64,
                                        stop_layer=1,
                                        start_layer=None,
                                        verify_existence=False):
    if start_layer is None:
        start_layer = cg.n_layers

    # FIXME: Read those from config
    HIGHEST_MESH_LAYER = min(start_layer, cg.n_layers - 3)
    MESH_MIP = 2

    highest_node = get_downstream_multi_child_node(cg, node_id, stop_layer)
    highest_node_layer = cg.get_chunk_layer(highest_node)
    if highest_node_layer <= HIGHEST_MESH_LAYER:
        candidates = [highest_node]
    else:
        candidates = cg.get_subgraph_nodes(highest_node,
                                           return_layers=[HIGHEST_MESH_LAYER])

    if verify_existence:
        valid_node_ids = []
        with Storage(cg.cv_mesh_path) as stor:
            while True:
                filenames = [
                    get_mesh_name(cg, c, MESH_MIP) for c in candidates
                ]
                existence_dict = stor.files_exist(filenames)

                missing_meshes = []
                for mesh_key in existence_dict:
                    node_id = np.uint64(mesh_key.split(':')[0])
                    if existence_dict[mesh_key]:
                        valid_node_ids.append(node_id)
                    else:
                        if cg.get_chunk_layer(node_id) > stop_layer:
                            missing_meshes.append(node_id)

                if missing_meshes:
                    candidates = cg.get_children(missing_meshes, flatten=True)
                else:
                    break
    else:
        valid_node_ids = candidates

    return valid_node_ids
Esempio n. 18
0
def cache(task, cloudpath):
  layer_path, filename = os.path.split(cloudpath)

  classname = task.__class__.__name__
  lcldir = mkdir(os.path.join('/tmp/', classname))
  lclpath = os.path.join(lcldir, filename)

  if os.path.exists(lclpath):
    with open(lclpath, 'rb') as f:
      filestr = f.read()
  else:
      with Storage(layer_path, n_threads=0) as stor:
        filestr = stor.get_file(filename)

      with open(lclpath, 'wb') as f:
        f.write(filestr)

  return filestr
Esempio n. 19
0
def remesh(cg: ChunkedGraph, operation_id: int, l2ids: ndarray):
    from cloudvolume.storage import SimpleStorage as Storage

    mesh_info = cg.meta.custom_data.get("mesh", {})
    mesh_dir, unsharded_mesh_path, bucket_path, file_name = get_remesh_info(
        cg, operation_id)

    remeshing(
        cg,
        l2ids,
        stop_layer=mesh_info["max_layer"],
        mip=mesh_info["mip"],
        max_err=mesh_info["max_error"],
        cv_sharded_mesh_dir=mesh_dir,
        cv_unsharded_mesh_path=unsharded_mesh_path,
    )
    with Storage(bucket_path) as storage:  # pylint: disable=not-context-manager
        storage.delete_file(file_name)
Esempio n. 20
0
def create_segment_metadata_file(cv_input_path, cv_output_path,
                                 output_filename, override_default_size_limit):
    mip = 0
    cv = CloudVolume(cv_input_path, mip)
    max_allowed_size = np.int64(3000000000)
    vol_size = np.prod(cv.volume_size)
    if vol_size > max_allowed_size and not override_default_size_limit:
        raise ValueError(
            f'Volume size of {vol_size} exceeds maximum of 3 billion voxels')
    volume_bbox = Bbox(cv.voxel_offset, cv.shape[0:3] + cv.voxel_offset)
    data = cv[volume_bbox]
    unique_segids = np.unique(data, return_counts=True)
    del data
    arr = np.array([])
    for x in zip(unique_segids[0], unique_segids[1]):
        if x[0] != 0:
            arr = np.append(arr, {"segmentId": str(x[0]), "voxelCount": x[1]})
    with Storage(cv_output_path) as storage:
        storage.put_file(file_path=output_filename,
                         content=json.dumps(arr, cls=NumpyEncoder),
                         compress=False,
                         cache_control='no-cache')
Esempio n. 21
0
def get_chunk_edges(
    edges_dir: str, chunks_coordinates: List[np.ndarray], cv_threads: int = 1
) -> Dict:
    """
    :param edges_dir: cloudvolume storage path
    :type str:    
    :param chunks_coordinates: list of chunk coords for which to load edges
    :type List[np.ndarray]:
    :param cv_threads: cloudvolume storage client thread count
    :type int:     
    :return: dictionary {"edge_type": Edges}
    """
    fnames = []
    for chunk_coords in chunks_coordinates:
        chunk_str = "_".join(str(coord) for coord in chunk_coords)
        # filename format - edges_x_y_z.serialization.compression
        fnames.append(f"edges_{chunk_str}.proto.zst")

    storage = (
        Storage(edges_dir, n_threads=cv_threads)
        if cv_threads > 1
        else SimpleStorage(edges_dir)
    )

    chunk_edge_dicts = []
    with storage:
        files = storage.get_files(fnames)
        for _file in files:
            # cv error
            if _file["error"]:
                raise ValueError(_file["error"])
            # empty chunk
            if not _file["content"]:
                continue
            edges_dict = _decompress_edges(_file["content"])
            chunk_edge_dicts.append(edges_dict)
    return concatenate_chunk_edges(chunk_edge_dicts)
Esempio n. 22
0
  def execute(self):
    srccv = CloudVolume(self.src_path, mip=self.mip, fill_missing=True)

    # Accumulate a histogram of the luminance levels
    nbits = np.dtype(srccv.dtype).itemsize * 8
    levels = np.zeros(shape=(2 ** nbits,), dtype=np.uint64)

    bounds = Bbox(self.offset, self.shape[:3] + self.offset)
    bounds = Bbox.clamp(bounds, srccv.bounds)

    bboxes = self.select_bounding_boxes(bounds)
    for bbox in bboxes:
      img2d = srccv[bbox.to_slices()].reshape((bbox.volume()))
      cts = np.bincount(img2d)
      levels[0:len(cts)] += cts.astype(np.uint64)

    covered_area = sum([bbx.volume() for bbx in bboxes])

    bboxes = [(bbox.volume(), bbox.size3()) for bbox in bboxes]
    bboxes.sort(key=lambda x: x[0])
    biggest = bboxes[-1][1]

    output = {
      "levels": levels.tolist(),
      "patch_size": biggest.tolist(),
      "num_patches": len(bboxes),
      "coverage_ratio": covered_area / self.shape.rectVolume(),
    }

    path = self.levels_path if self.levels_path else self.src_path
    path = os.path.join(path, 'levels')
    with Storage(path, n_threads=0) as stor:
      stor.put_json(
        file_path="{}/{}".format(self.mip, self.offset.z),
        content=output,
        cache_control='no-cache'
      )
Esempio n. 23
0
missing_report = []
invalid_report = []
success_report = []

for dataset in datasets:
    layers = ls(dataset)

    for layer in layers:
        if not valid_paths.match(layer):
            continue

        if 'removeme' in layer:
            continue

        with Storage(layer, n_threads=0) as stor:
            if not stor.exists('provenance'):
                missing_report.append(layer)
            else:
                prov = stor.get_file('provenance')

                try:
                    prov = DataLayerProvenance().from_json(prov)
                except:
                    invalid_report.append(layer)
                else:
                    success_report.append(layer)

RESET_COLOR = "\033[m"
YELLOW = "\033[1;93m"
RED = '\033[1;91m'
Esempio n. 24
0
            pool = pathos.pools.ProcessPool(parallel)
            for skel in pool.uimap(complex_merge, skeletons.values()):
                merged_skeletons[skel.id] = skel.to_precomputed()
                pbar.update(1)
            pool.close()
            pool.join()
            pool.clear()

    return merged_skeletons


if has_checkpoint('complex-merge'):
    merged_skeletons = load_checkpoint('complex-merge')
else:
    skeletons = checkpoint('simple-merge', load_raw_skeletons)
    postprocessfn = lambda: postprocess(skeletons)
    merged_skeletons = checkpoint('complex-merge', postprocessfn)
    del skeletons
    del postprocessfn

shard_files = synthesize_shard_files(spec, merged_skeletons, progress=True)

uploadable = [(fname, data) for fname, data in shard_files.items()]
with Storage(cv.skeleton.meta.layerpath) as stor:
    stor.put_files(
        files=uploadable,
        compress=False,
        content_type='application/octet-stream',
        cache_control='no-cache',
    )
Esempio n. 25
0
 def _download_input_chunk(self, bounds):
   storage = Storage(self.layer_path, n_threads=0)
   relpath = 'build/{}'.format(bounds.to_filename())
   return storage.get_file(relpath)
Esempio n. 26
0
def ingest(args):
    """
    Ingest an HDF file to a CloudVolume bucket
    """
    if args.local_hdf_path:
        hdf_file = h5py.File(args.local_hdf_path, "r")
    else:
        with Storage(args.cloud_src_path) as storage:
            hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename),
                                 "r")
    cur_hdf_group = hdf_file
    for group_name in args.hdf_keys_to_dataset:
        cur_hdf_group = cur_hdf_group[group_name]
    hdf_dataset = cur_hdf_group
    if args.zyx:
        dataset_shape = np.array(
            [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]])
    else:
        dataset_shape = np.array([*hdf_dataset.shape])
    if args.layer_type == "image":
        data_type = "uint8"
    else:
        data_type = "uint64"
    voxel_offset = args.voxel_offset
    info = CloudVolume.create_new_info(
        num_channels=1,
        layer_type=args.layer_type,
        data_type=data_type,
        encoding="raw",
        resolution=args.resolution,
        voxel_offset=voxel_offset,
        chunk_size=args.chunk_size,
        volume_size=dataset_shape,
    )
    provenance = {
        "description": args.provenance_description,
        "owners": [args.owner]
    }
    vol = CloudVolume(args.dst_path, info=info, provenance=provenance)
    vol.commit_info()
    vol.commit_provenance()

    all_files = set()
    for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0],
                       args.chunk_size[0]):
        for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1],
                           args.chunk_size[1]):
            for z in np.arange(voxel_offset[2],
                               voxel_offset[2] + dataset_shape[2],
                               args.chunk_size[2]):
                all_files.add(tuple((x, y, z)))

    progress_dir = mkdir(
        "progress/")  # unlike os.mkdir doesn't crash on prexisting
    done_files = set()
    for done_file in os.listdir(progress_dir):
        done_files.add(tuple(done_file.split(",")))
    to_upload = all_files.difference(done_files)

    for chunk_start_tuple in to_upload:
        chunk_start = np.array(list(chunk_start_tuple))
        end_of_dataset = np.array(voxel_offset) + dataset_shape
        chunk_end = chunk_start + np.array(args.chunk_size)
        chunk_end = Vec(*chunk_end)
        chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset)
        chunk_hdf_start = chunk_start - voxel_offset
        chunk_hdf_end = chunk_end - voxel_offset
        if args.zyx:
            chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2],
                                chunk_hdf_start[1]:chunk_hdf_end[1],
                                chunk_hdf_start[0]:chunk_hdf_end[0], ]
            chunk = chunk.T
        else:
            chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0],
                                chunk_hdf_start[1]:chunk_hdf_end[1],
                                chunk_hdf_start[2]:chunk_hdf_end[2], ]
        print("Processing ", chunk_start_tuple)
        array = np.array(chunk, dtype=np.dtype(data_type), order="F")
        vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1],
            chunk_start[2]:chunk_end[2], ] = array
        touch(os.path.join(progress_dir, str(chunk_start_tuple)))
Esempio n. 27
0
def run_task_bundle(settings: Mapping, roi: Tuple[slice, slice, slice]):
    # Remember: DB must be cleared before starting a whole new run
    with open("/secrets/mysql") as passwd:
        mysql_conn = MySQLdb.connect(host=settings["mysql"]["host"],
                                     user=settings["mysql"]["user"],
                                     db=settings["mysql"]["db"],
                                     passwd=passwd.read().strip())

    cgraph = chunkedgraph.ChunkedGraph(
        table_id=settings["chunkedgraph"]["table_id"],
        instance_id=settings["chunkedgraph"]["instance_id"])

    # Things to check:
    # - Agglomeration and Input Watershed have the same offset/size
    # - Taskbundle Offset and ROI is a multiple of cgraph.chunksize
    # - Output Watershed chunksize must be a multiple of cgraph.chunksize

    agglomeration_input = CloudVolume(
        settings["layers"]["agglomeration_path_input"], bounded=False)
    watershed_input = CloudVolume(settings["layers"]["watershed_path_input"],
                                  bounded=False)
    watershed_output = CloudVolume(settings["layers"]["watershed_path_output"],
                                   bounded=False,
                                   autocrop=True)
    regiongraph_input = Storage(
        settings["regiongraph"]["regiongraph_path_input"])
    regiongraph_output = Storage(
        settings["regiongraph"]["regiongraph_path_output"])
    regiongraph_chunksize = tuple(settings["regiongraph"]["chunksize"])

    chunkgraph_chunksize = np.array(cgraph.chunk_size, dtype=np.int)
    output_watershed_chunksize = np.array(watershed_output.underlying,
                                          dtype=np.int)
    outer_chunksize = np.maximum(chunkgraph_chunksize,
                                 output_watershed_chunksize,
                                 dtype=np.int)

    # Iterate through TaskBundle using a minimal chunk size that is a multiple
    # of the output watershed chunk size and the Chunked Graph chunk size.
    for ox in range(roi[0].start, roi[0].stop, outer_chunksize[0]):
        for oy in range(roi[1].start, roi[1].stop, outer_chunksize[1]):
            for oz in range(roi[2].start, roi[2].stop, outer_chunksize[2]):

                watershed_output_buffer = np.zeros((*outer_chunksize, 1),
                                                   dtype=np.uint64)

                # Iterate through ChunkGraph chunk-sized tasks:
                for ix_start in range(0, outer_chunksize[0],
                                      chunkgraph_chunksize[0]):
                    for iy_start in range(0, outer_chunksize[1],
                                          chunkgraph_chunksize[1]):
                        for iz_start in range(0, outer_chunksize[2],
                                              chunkgraph_chunksize[2]):
                            ix_end = ix_start + chunkgraph_chunksize[0]
                            iy_end = iy_start + chunkgraph_chunksize[1]
                            iz_end = iz_start + chunkgraph_chunksize[2]

                            # One voxel overlap in each dimension to get
                            # consistent labeling across chunks
                            edgetask_roi = (slice(ox + ix_start,
                                                  ox + ix_end + 1),
                                            slice(oy + iy_start,
                                                  oy + iy_end + 1),
                                            slice(oz + iz_start,
                                                  oz + iz_end + 1))

                            edgetask = EdgeTask(
                                cgraph=cgraph,
                                mysql_conn=mysql_conn,
                                agglomeration_input=agglomeration_input,
                                watershed_input=watershed_input,
                                regiongraph_input=regiongraph_input,
                                regiongraph_output=regiongraph_output,
                                regiongraph_chunksize=regiongraph_chunksize,
                                roi=edgetask_roi)
                            edgetask.execute()

                            # Write relabeled ChunkGraph chunk to (possibly larger)
                            # watershed-chunk aligned buffer
                            watershed_output_buffer[ix_start:ix_end,
                                                    iy_start:iy_end,
                                                    iz_start:iz_end, :] = \
                                edgetask.get_relabeled_watershed()

                watershed_output[ox:ox + outer_chunksize[0],
                                 oy:oy + outer_chunksize[1],
                                 oz:oz + outer_chunksize[2], :] = \
                    watershed_output_buffer
Esempio n. 28
0
def create_storage(layer_name='layer'):
    stor_path = os.path.join(layer_path, layer_name)
    return Storage('file://' + stor_path, n_threads=0)
Esempio n. 29
0
"""

import sys
import os

from tqdm import tqdm

from cloudvolume.lib import max2
from cloudvolume import CloudVolume, Storage, Bbox

layer_path = sys.argv[1]

cv = CloudVolume(layer_path)

bboxes = []

with Storage(layer_path) as stor:
  for filename in tqdm(stor.list_files(prefix=cv.key), desc="Computing Bounds"):
    bboxes.append( Bbox.from_filename(filename) )

bounds = Bbox.expand(*bboxes)
chunk_size = reduce(max2, map(lambda bbox: bbox.size3(), bboxes))
print('bounds={} (size: {}); chunk_size={}'.format(bounds, bounds.size3(), chunk_size))







Esempio n. 30
0
def record_remesh_ids(cg: ChunkedGraph, operation_id: int, l2ids: ndarray):
    from cloudvolume.storage import SimpleStorage as Storage

    _, _, bucket_path, file_name = get_remesh_info(cg, operation_id)
    with Storage(bucket_path) as storage:  # pylint: disable=not-context-manager
        storage.put_file(file_path=file_name, content=l2ids.tobytes())