Beispiel #1
0
class SkeletonizeOperator(OperatorBase):
    """Create mesh files from segmentation."""
    def __init__(self, output_path, name: str = 'skeletonize'):
        """
        Parameters
        ------------
        output_path:
            where to put the skeleton files
        name: 
            operator name.
        """
        super().__init__(name=name)
        self.storage = CloudFiles(output_path)

    def __call__(self, seg, voxel_size):
        if seg is None:
            print('no segmentation, skip computation.')
            return None

        logging.info('skeletonize segmentation...')

        seg = Segmentation.from_chunk(seg)
        skels = seg.skeletonize(voxel_size)
        bbox_str = seg.bbox.to_filename()
        for neuron_id, skel in skels.items():
            file_name = f'{neuron_id}:{bbox_str}'
            self.storage.put(file_name, skel.to_precomputed())
        return skels
def test_compress_level(compression_method):
    from cloudfiles import CloudFiles, exceptions
    filepath = "/tmp/cloudfiles/compress_level"
    url = "file://" + filepath

    content = b'some_string' * 1000

    compress_levels = range(1, 9, 2)
    for compress_level in compress_levels:
        cf = CloudFiles(url, num_threads=5)
        cf.put('info',
               content,
               compress=compression_method,
               compression_level=compress_level)

        retrieved = cf.get('info')
        assert content == retrieved

        conn = cf._get_connection()
        _, encoding, server_md5, hash_type = conn.get_file("info")
        assert encoding == compression_method
        assert hash_type in ('md5', None)

        assert cf.get('nonexistentfile') is None

        rmtree(filepath)
Beispiel #3
0
def MultiResUnshardedMeshMergeTask(
  cloudpath:str, 
  prefix:str,
  cache_control:bool = False,
  draco_compression_level:int = 1,
  mesh_dir:Optional[str] = None,
  num_lod:int = 1,
  progress:bool = False,
):
  cv = CloudVolume(cloudpath)
  
  if mesh_dir is None and 'mesh' in cv.info:
    mesh_dir = cv.info['mesh']

  files_per_label = get_mesh_filenames_subset(
    cloudpath, mesh_dir, prefix
  )

  cf = CloudFiles(cv.meta.join(cloudpath, mesh_dir))
  for label, filenames in tqdm(files_per_label.items(), disable=(not progress)):
    files = cf.get(filenames)
    # we should handle draco as well
    files = [ Mesh.from_precomputed(f["content"]) for f in files ]

    (manifest, mesh) = process_mesh(
      cv, label, files, 
      num_lod, draco_compression_level
    )

    cf.put(f"{label}.index", manifest.to_binary(), cache_control="no-cache")
    cf.put(f"{label}", mesh, cache_control="no-cache")
Beispiel #4
0
def write_to_cloud_bucket(dst_dir, img_arr, extension="tif"):
    cf = CloudFiles(dst_dir)
    for k in range(img_arr.shape[2]):
        img = Image.fromarray(img_arr[:, :, k].T)
        img_bytes = BytesIO()
        img.save(img_bytes, format="tiff" if extension == "tif" else extension)
        cf.put("{0:03d}.{1}".format(k + 1, extension), img_bytes.getvalue())
Beispiel #5
0
    def execute(self):
        corgie_logger.info(
            f"Skeletonizing {self.seg_layer} at MIP{self.mip}, region: {self.bcube}"
        )
        seg_data = self.seg_layer.read(bcube=self.bcube,
                                       mip=self.mip,
                                       timestamp=self.timestamp)
        resolution = self.seg_layer.cv[self.mip].resolution
        skeletons = kimimaro.skeletonize(
            seg_data,
            self.teasar_params,
            object_ids=self.object_ids,
            anisotropy=resolution,
            dust_threshold=self.dust_threshold,
            progress=False,
            fix_branching=self.fix_branching,
            fix_borders=self.fix_borders,
            fix_avocados=self.fix_avocados,
        ).values()

        minpt = self.bcube.minpt(self.mip)
        for skel in skeletons:
            skel.vertices[:] += minpt * resolution

        cf = CloudFiles(self.dst_path)
        for skel in skeletons:
            path = "{}:{}".format(skel.id, self.bcube.to_filename(self.mip))
            cf.put(
                path=path,
                content=pickle.dumps(skel),
                compress="gzip",
                content_type="application/python-pickle",
                cache_control=False,
            )
Beispiel #6
0
def test_isdir(s3, protocol):
  from cloudfiles import CloudFiles, exceptions
  url = compute_url(protocol, "isdir")

  cf = CloudFiles(url, num_threads=5)
  assert not cf.isdir()

  content = b'some_string'
  cf.put('info', content, compress=None)
  
  assert cf.isdir()
  cf.delete('info')
Beispiel #7
0
    def _upload_batch(self, meshes, bbox):
        cf = CloudFiles(self.layer_path, progress=self.options['progress'])

        mbuf = MapBuffer(meshes, compress="br")

        cf.put(
            f"{self._mesh_dir}/{bbox.to_filename()}.frags",
            content=mbuf.tobytes(),
            compress=None,
            content_type="application/x.mapbuffer",
            cache_control=False,
        )
    def upload_batch(self, vol, path, bbox, skeletons):
        mbuf = MapBuffer(skeletons,
                         compress="br",
                         tobytesfn=lambda skel: skel.to_precomputed())

        cf = CloudFiles(path, progress=vol.progress)
        cf.put(
            path="{}.frags".format(bbox.to_filename()),
            content=mbuf.tobytes(),
            compress=None,
            content_type="application/x-mapbuffer",
            cache_control=False,
        )
class AggregateSkeletonFragmentsOperator(OperatorBase):
    """Merge skeleton fragments for Neuroglancer visualization."""
    def __init__(self,
                 fragments_path: str,
                 output_path: str,
                 name: str = 'aggregate-skeleton-fragments'):
        """
        Parameters
        ------------
        fragments_path: 
            path to store fragment files
        output_path:
            save the merged skeleton file here.
        """
        super().__init__(name=name)
        self.fragments_storage = CloudFiles(fragments_path)
        self.output_storage = CloudFiles(output_path)

    def __call__(self, prefix: str):
        logging.info(f'aggregate skeletons with prefix of {prefix}')

        id2filenames = defaultdict(list)
        for filename in self.fragments_storage.list_files(prefix=prefix):
            filename = os.path.basename(filename)
            # `match` implies the beginning (^). `search` matches whole string
            matches = re.search(r'(\d+):', filename)

            if not matches:
                continue

            # skeleton ID
            skl_id = int(matches.group(0)[:-1])
            id2filenames[skl_id].append(filename)

        for skl_id, filenames in id2filenames.items():
            logging.info(f'skeleton id: {skl_id}')
            frags = self.fragments_storage.get(filenames)
            frags = [
                PrecomputedSkeleton.from_precomputed(x['content'])
                for x in frags
            ]
            skel = PrecomputedSkeleton.simple_merge(frags).consolidate()
            skel = kimimaro.postprocess(skel,
                                        dust_threshold=1000,
                                        tick_threshold=3500)
            self.output_storage.put(
                file_path=str(skl_id),
                content=skel.to_precomputed(),
            )
            # the last few hundred files will not be uploaded without sleeping!
            sleep(0.01)
Beispiel #10
0
def MultiResShardedMeshMergeTask(
  cloudpath:str,
  shard_no:str,
  draco_compression_level:int = 1,
  mesh_dir:Optional[str] = None,
  num_lod:int = 1,
  spatial_index_db:Optional[str] = None,
  progress:bool = False
):
  cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db)
  cv.mip = cv.mesh.meta.mip
  if mesh_dir is None and 'mesh' in cv.info:
    mesh_dir = cv.info['mesh']

  # This looks messy because we are trying to avoid retaining
  # unnecessary memory. In the original skeleton iteration, this was 
  # using 50 GB+ memory on minnie65. So it makes sense to be just
  # as careful with a heavier type of object.
  locations = locations_for_labels(cv, labels_for_shard(cv, shard_no))
  filenames = set(itertools.chain(*locations.values()))
  labels = set(locations.keys())
  del locations
  meshes = collect_mesh_fragments(
    cv, labels, filenames, mesh_dir, progress
  )
  del filenames

  # important to iterate this way to avoid
  # creating a copy of meshes vs. { ... for in }
  for label in labels:
    meshes[label] = Mesh.concatenate(*meshes[label])
  del labels

  fname, shard = create_mesh_shard(
    cv, meshes, 
    num_lod, draco_compression_level,
    progress, shard_no
  )
  del meshes

  if shard is None:
    return

  cf = CloudFiles(cv.mesh.meta.layerpath)
  cf.put(
    fname, shard,
    compress=False,
    content_type='application/octet-stream',
    cache_control='no-cache',
  )
Beispiel #11
0
def test_delete(s3, green, protocol):
  from cloudfiles import CloudFiles, exceptions
  if protocol == 'file':
    url = "file:///tmp/cloudfiles/delete"
  else:
    url = "{}://cloudfiles/delete".format(protocol)

  cf = CloudFiles(url, green=green, num_threads=1)    
  content = b'some_string'
  cf.put('delete-test', content, compress=None, cache_control='no-cache')
  cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache')
  assert cf.get('delete-test') == content
  cf.delete('delete-test')
  assert cf.get('delete-test') is None

  assert cf.get('delete-test-compressed') == content
  cf.delete('delete-test-compressed')
  assert cf.get('delete-test-compressed') is None

  # Reset for batch delete
  cf.put('delete-test', content, compress=None, cache_control='no-cache')
  cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache')
  assert cf.get('delete-test') == content
  assert cf.get('delete-test-compressed') == content

  cf.delete(['delete-test', 'delete-nonexistent', 'delete-test-compressed'])
  assert cf.get('delete-test') is None
  assert cf.get('delete-test-compressed') is None
Beispiel #12
0
    def execute(self):

        corgie_logger.info(
            f"Generate new skeleton vertices task for id {self.skeleton_id_str}"
        )
        skeleton = get_skeleton(self.src_path, self.skeleton_id_str)
        if self.vertex_sort:
            vertex_sort = skeleton.vertices[:, 2].argsort()
        else:
            vertex_sort = np.arange(0, len(skeleton.vertices))
        number_vertices = len(skeleton.vertices)
        index_points = list(range(0, number_vertices, self.task_vertex_size))
        cf = CloudFiles(f"{self.dst_path}")
        array_filenames = []
        for i in range(len(index_points)):
            start_index = index_points[i]
            if i + 1 == len(index_points):
                end_index = number_vertices
            else:
                end_index = index_points[i + 1]
            array_filenames.append(
                f"intermediary_arrays/{self.skeleton_id_str}:{start_index}-{end_index}"
            )
        array_files = cf.get(array_filenames)
        # Dict to make sure arrays are concatenated in correct order
        array_dict = {}
        for array_file in array_files:
            array_dict[array_file["path"]] = pickle.loads(
                array_file["content"])
        array_arrays = []
        for array_filename in array_filenames:
            array_arrays.append(array_dict[array_filename])
        array_arrays = np.concatenate(array_arrays)
        # Restore the correct order of the vertices
        restore_sort = vertex_sort.argsort()
        new_vertices = array_arrays[restore_sort]
        new_skeleton = Skeleton(
            vertices=new_vertices,
            edges=skeleton.edges,
            radii=skeleton.radius,
            vertex_types=skeleton.vertex_types,
            space=skeleton.space,
            transform=skeleton.transform,
        )
        cf.put(
            path=self.skeleton_id_str,
            content=new_skeleton.to_precomputed(),
            compress="gzip",
        )
Beispiel #13
0
def test_exists(s3, protocol):
  from cloudfiles import CloudFiles, exceptions
  url = compute_url(protocol, "exists")

  cf = CloudFiles(url, num_threads=5)
  content = b'some_string'
  cf.put('info', content, compress=None)
  
  assert cf.exists('info')
  assert not cf.exists('doesntexist')

  assert cf.exists(['info'])['info']
  assert not cf.exists(['doesntexist'])['doesntexist']

  cf.delete('info')
Beispiel #14
0
def test_access_non_cannonical_minimal_path(s3, protocol):
  from cloudfiles import CloudFiles, exceptions
  if protocol == 'file':
    url = "file:///tmp/"
  else:
    url = "{}://cloudfiles/".format(protocol)
  
  cf = CloudFiles(url, num_threads=5)
  content = b'some_string'
  cf.put('info', content, compress=None)
  
  # time.sleep(0.5) # sometimes it takes a moment for google to update the list
  
  assert cf.get('info') == content
  assert cf.get('nonexistentfile') is None
  cf.delete('info')
def test_list(s3, protocol):
    from cloudfiles import CloudFiles, exceptions
    url = compute_url(protocol, "list")

    cf = CloudFiles(url, num_threads=5)
    content = b'some_string'
    cf.put('info1', content, compress=None)
    cf.put('info2', content, compress=None)
    cf.put('build/info3', content, compress=None)
    cf.put('level1/level2/info4', content, compress=None)
    cf.put('info5', content, compress='gzip')
    cf.put('info.txt', content, compress=None)

    # time.sleep(1) # sometimes it takes a moment for google to update the list
    assert set(cf.list(prefix='')) == set([
        'build/info3', 'info1', 'info2', 'level1/level2/info4', 'info5',
        'info.txt'
    ])
    assert set(list(cf)) == set(cf.list(prefix=''))

    assert set(cf.list(prefix='inf')) == set(
        ['info1', 'info2', 'info5', 'info.txt'])
    assert set(cf.list(prefix='info1')) == set(['info1'])
    assert set(cf.list(prefix='build')) == set(['build/info3'])
    assert set(cf.list(prefix='build/')) == set(['build/info3'])
    assert set(cf.list(prefix='level1/')) == set(['level1/level2/info4'])
    assert set(cf.list(prefix='nofolder/')) == set([])

    # Tests (1)
    assert set(cf.list(prefix='', flat=True)) == set(
        ['info1', 'info2', 'info5', 'info.txt'])
    assert set(cf.list(prefix='inf', flat=True)) == set(
        ['info1', 'info2', 'info5', 'info.txt'])
    # Tests (2)
    assert set(cf.list(prefix='build', flat=True)) == set([])
    # Tests (3)
    assert set(cf.list(prefix='level1/', flat=True)) == set([])
    assert set(cf.list(prefix='build/', flat=True)) == set(['build/info3'])
    # Tests (4)
    assert set(cf.list(prefix='build/inf', flat=True)) == set(['build/info3'])

    for file_path in ('info1', 'info2', 'build/info3', 'level1/level2/info4',
                      'info5', 'info.txt'):
        cf.delete(file_path)

    if protocol == 'file':
        rmtree("/tmp/cloudfiles/list")
Beispiel #16
0
def test_read_write(s3, protocol, num_threads, green):
    from cloudfiles import CloudFiles, exceptions
    url = compute_url(protocol, "rw")

    cf = CloudFiles(url, num_threads=num_threads, green=green)

    content = b'some_string'
    cf.put('info', content, compress=None, cache_control='no-cache')
    cf['info2'] = content

    assert cf.get('info') == content
    assert cf['info2'] == content
    assert cf['info2', 0:3] == content[0:3]
    assert cf['info2', :] == content[:]
    assert cf.get('nonexistentfile') is None

    assert cf.get('info', return_dict=True) == {"info": content}
    assert cf.get(['info', 'info2'], return_dict=True) == {
        "info": content,
        "info2": content
    }

    del cf['info2']
    assert cf.exists('info2') == False

    num_infos = max(num_threads, 1)
    results = cf.get(['info' for i in range(num_infos)])

    assert len(results) == num_infos
    assert results[0]['path'] == 'info'
    assert results[0]['content'] == content
    assert all(map(lambda x: x['error'] is None, results))
    assert cf.get(['nonexistentfile'])[0]['content'] is None

    cf.delete('info')

    cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache')
    results = cf.get_json('info')
    assert results == {'omg': 'wow'}

    cf.delete('info')

    if protocol == 'file':
        rmtree(url)
Beispiel #17
0
    def upload_meshes(self, meshes):
        if len(meshes) == 0:
            return

        reader = self.cv.mesh.readers[self.layer_id]

        shard_binary = reader.spec.synthesize_shard(meshes)
        # the shard filename is derived from the chunk position,
        # so any label inside this L2 chunk will do
        shard_filename = reader.get_filename(list(meshes.keys())[0])

        cf = CloudFiles(self.cv.cloudpath)
        cf.put(
            f"{self.get_mesh_dir()}/initial/{self.layer_id}/{shard_filename}",
            shard_binary,
            compress=None,
            content_type="application/octet-stream",
            cache_control="no-cache",
        )
Beispiel #18
0
    def commit_provenance(self):
        """
    Save the current provenance object as JSON into cache (if enabled) 
    and primary storage.
    """
        prov = self.provenance.serialize()

        # hack to pretty print provenance files
        prov = json.loads(prov)
        prov = jsonify(prov, sort_keys=True, indent=2, separators=(',', ': '))

        # need to use put vs put_json to preserve formatting
        cf = CloudFiles(self.cloudpath)
        cf.put('provenance',
               prov,
               cache_control='no-cache',
               content_type='application/json')

        if self.cache:
            self.cache.maybe_cache_provenance()
Beispiel #19
0
def test_compression(s3, protocol, method, green):
  from cloudfiles import CloudFiles, exceptions
  url = compute_url(protocol, "compress")

  cf = CloudFiles(url, num_threads=5, green=green)
  content = b'some_string'

  cf.put('info', content, compress=method)
  retrieved = cf.get('info')
  assert content == retrieved

  assert cf.get('nonexistentfile') is None

  try:
    cf.put('info', content, compress='nonexistent')
    assert False
  except ValueError:
    pass

  cf.delete(iter(cf))
Beispiel #20
0
    def commit_info(self):
        """
    Save the current info dict as JSON into cache and primary storage.

    Raises KeyError if an encoding of 'compressed_segmentation' is specified
    without 'compressed_segmentation_block_size'.

    Raises ValueError if 'compressed_segmentation' is specified and the 
    data type is not uint32 or uint64.
    """
        for scale in self.scales:
            if scale['encoding'] == 'compressed_segmentation':
                if 'compressed_segmentation_block_size' not in scale.keys():
                    raise KeyError("""
            'compressed_segmentation_block_size' must be set if 
            compressed_segmentation is set as the encoding.

            A typical value for compressed_segmentation_block_size is (8,8,8)

            Info file specification:
            https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/README.md#info-json-file-specification
          """)
                elif self.data_type not in ('uint32', 'uint64'):
                    raise ValueError(
                        "compressed_segmentation can only be used with uint32 and uint64 data types."
                    )

        infojson = jsonify(self.info,
                           sort_keys=True,
                           indent=2,
                           separators=(',', ': '))
        # use put instead of put_json to preserve formatting
        cf = CloudFiles(self.cloudpath)
        cf.put('info',
               infojson,
               cache_control='no-cache',
               content_type='application/json')

        if self.cache:
            self.cache.maybe_cache_info()
Beispiel #21
0
class MergeSkeletonTask(scheduling.Task):
    def __init__(self,
                 dst_path,
                 mip,
                 dust_threshold,
                 tick_threshold,
                 prefix=""):
        super().__init__(self)
        self.dst_path = dst_path
        self.cf = CloudFiles(self.dst_path)
        self.mip = mip
        self.dust_threshold = dust_threshold
        self.tick_threshold = tick_threshold
        self.prefix = prefix

    def execute(self):
        corgie_logger.info(f"Merging skeletons at {self.dst_path}")
        fragment_filenames = self.cf.list(prefix=self.prefix, flat=True)
        skeleton_files = self.cf.get(fragment_filenames)
        skeletons = defaultdict(list)
        for skeleton_file in skeleton_files:
            try:
                colon_index = skeleton_file["path"].index(":")
            except ValueError:
                # File is full skeleton, not fragment
                continue
            seg_id = skeleton_file["path"][0:colon_index]
            skeleton_fragment = pickle.loads(skeleton_file["content"])
            if not skeleton_fragment.empty():
                skeletons[seg_id].append(skeleton_fragment)
        for seg_id, skeleton_fragments in skeletons.items():
            skeleton = PrecomputedSkeleton.simple_merge(
                skeleton_fragments).consolidate()
            skeleton = kimimaro.postprocess(skeleton, self.dust_threshold,
                                            self.tick_threshold)
            skeleton.id = int(seg_id)
            self.cf.put(path=seg_id,
                        content=skeleton.to_precomputed(),
                        compress="gzip")
            corgie_logger.info(f"Finished skeleton {seg_id}")
Beispiel #22
0
def test_size(s3, protocol, compress, green):
  from cloudfiles import CloudFiles, exceptions, compression

  url = compute_url(protocol, 'size')
  cf = CloudFiles(url)
  
  content = b'some_string'
  cf.put('info', content, compress=compress, cache_control='no-cache')
  cf['info2'] = content
  cf.put('zero', b'', compress=None, cache_control='no-cache')

  compressed_content = compression.compress(content, compress)

  assert cf.size('info') == len(compressed_content)
  assert cf.size(['info', 'info2']) == { 
    "info": len(compressed_content), 
    "info2": len(content) 
  }
  assert cf.size('nonexistent') is None
  assert cf.size('zero') == 0

  cf.delete(['info', 'info2', 'zero'])
Beispiel #23
0
def MultiResShardedFromUnshardedMeshMergeTask(
  src:str,
  dest:str,
  shard_no:str,
  cache_control:bool = False,
  draco_compression_level:int = 1,
  mesh_dir:Optional[str] = None,
  num_lod:int = 1,
  progress:bool = False,
):
  cv_src = CloudVolume(src)

  if mesh_dir is None and 'mesh' in cv.info:
    mesh_dir = cv.info['mesh']

  cv_dest = CloudVolume(dest, mesh_dir=mesh_dir, progress=True)

  labels = labels_for_shard(cv_dest, shard_no)
  meshes = cv_src.mesh.get(labels, fuse=False)
  del labels
    
  fname, shard = create_mesh_shard(
    cv_dest, meshes, 
    num_lod, draco_compression_level,
    progress, shard_no
  )
  del meshes

  if shard is None:
    return

  cf = CloudFiles(cv_dest.mesh.meta.layerpath)
  cf.put(
    fname, shard, # fname, data
    compress=False,
    content_type='application/octet-stream',
    cache_control='no-cache',
  )
Beispiel #24
0
class MeshOperator(OperatorBase):
    """Create mesh files from segmentation."""
    def __init__(self,
                 output_path: str,
                 output_format: str,
                 mip: int = None,
                 voxel_size: tuple = (1, 1, 1),
                 simplification_factor: int = 100,
                 max_simplification_error: int = 8,
                 manifest: bool = False,
                 shard: bool = False,
                 name: str = 'mesh'):
        """
        Parameters
        ------------
        output_path:
            path to store mesh files
        output_format:
            format of output {'ply', 'obj', 'precomputed'}
        voxel_size:
            size of voxels
        simplification_factor:
            mesh simplification factor.
        max_simplification_error:
            maximum tolerance error of meshing.
        manifest:
            create manifest files or not. This should 
            not be True if you are only doing meshing for a segmentation chunk.
        name: 
            operator name.

        Note that some functions are adopted from igneous.
        """
        super().__init__(name=name)
        self.simplification_factor = simplification_factor
        self.max_simplification_error = max_simplification_error
        # zmesh use fortran order, translate zyx to xyz
        self.output_path = output_path
        self.output_format = output_format
        self.manifest = manifest
        self.shard = shard

        if manifest:
            assert output_format == 'precomputed'

        if output_format == 'precomputed':
            # adjust the mesh path according to info
            vol = CloudVolume(self.output_path, mip)
            info = vol.info
            if 'mesh' not in info:
                # add mesh to info and update it
                info['mesh'] = 'mesh_err_{}'.format(max_simplification_error)
                vol.info = info
                vol.commit_info()
            self.mesh_path = os.path.join(output_path, info['mesh'])
            self.voxel_size = vol.resolution[::-1]
            self.mesher = Mesher( vol.resolution )
        else: 
            self.mesh_path = output_path
            self.mesher = Mesher(voxel_size[::-1])

        self.storage = CloudFiles(self.mesh_path)

    def _get_mesh_data(self, obj_id, offset):
        mesh = self.mesher.get_mesh(
            obj_id,
            normals=False,
            simplification_factor=self.simplification_factor,
            max_simplification_error=self.max_simplification_error)
        # delete high resolution mesh
        self.mesher.erase(obj_id)

        if self.output_format == 'precomputed':
            mesh.vertices[:] += offset[::-1] * self.voxel_size[::-1]
            data = mesh.to_precomputed()
        elif self.output_format == 'ply':
            data = mesh.to_ply()
        elif self.output_format == 'obj':
            data = mesh.to_obj()
        else:
            raise NotImplementedError

        mesh_bounds = Bbox(
            np.amin(mesh.vertices, axis=0),
            np.amax(mesh.vertices, axis=0)
        )
        return data, mesh_bounds

    def _get_file_name(self, bbox, obj_id):
        if self.output_format == 'precomputed':
            # bbox is in z,y,x order, should transform to x,y,z order 
            bbox2 = Bbox.from_slices(bbox.to_slices()[::-1])
            return '{}:0:{}'.format(obj_id, bbox2.to_filename())
        elif self.output_format == 'ply':
            return '{}.ply'.format(obj_id)
        elif self.output_format == 'obj':
            return '{}.obj'.format(obj_id)
        else:
            raise ValueError('unsupported format!')

    def __call__(self, seg: Chunk):
        """Meshing the segmentation.
        
        Parameters
        ------------
        seg:
            3D segmentation chunk.
        """
        if seg is None:
            return

        assert isinstance(seg, Chunk)
        assert seg.ndim == 3
        assert np.issubdtype(seg.dtype, np.integer)
        
        bbox = seg.bbox
        # use ndarray after getting the bounding box
        seg = seg.array

        logging.info('computing meshes from segmentation...')
        self.mesher.mesh(seg)

        logging.info('write mesh to storage...')
        if self.shard:
            assert 'precomputed' in self.output_format
            meshes = []
            mesh_bboxes = {}
            for obj_id in self.mesher.ids():
                data, mesh_bbox = self._get_mesh_data(obj_id, bbox.minpt)
                meshes.append(data)
                mesh_bboxes[obj_id] = mesh_bbox.to_list()

            # use shared format in default!
            self.storage.put(
                f"{self.mesh_path}/{bbox.to_filename()}.frags",
                content=pickle.dumps(meshes),
                compress='gzip',
                content_type="application/python-pickle",
                cache_control=False,
            )
            self.storage.put_json(
                f"{self.mesh_path}/{bbox.to_filename()}.spatial",
                mesh_bboxes,
                compress='gzip',
                cache_control=False,
            )
        else:
            if 'precomputed' in self.output_format:
                compress = 'gzip'
            else:
                compress = None

            for obj_id in tqdm(self.mesher.ids(), desc='writing out meshes'):
                # print('object id: ', obj_id)
                data, _ = self._get_mesh_data(obj_id, bbox.minpt)
                file_name = self._get_file_name(bbox, obj_id)
                self.storage.put(
                    file_name, data,
                    cache_control=None,
                    compress=compress
                )

                # create manifest file
                if self.manifest:
                    self.storage.put_json(
                        f'{obj_id}:0',
                        {'fragments': [file_name]}
                    )
                    self.storage.put_json(
                        'info',
                        {"@type": "neuroglancer_legacy_mesh"}
                    )

        # release memory
        self.mesher.clear()
Beispiel #25
0
    def execute(self):
        corgie_logger.info(
            f"Starting transform skeleton vertices task for id {self.skeleton_id_str}"
        )

        skeleton = get_skeleton(self.src_path, self.skeleton_id_str)

        if self.vertex_sort:
            vertex_sort = skeleton.vertices[:, 2].argsort()
        else:
            vertex_sort = np.arange(0, len(skeleton.vertices))

        # How many vertices we will use at once to get a bcube to download from the vector field
        vertex_process_size = 50
        vertices_to_transform = skeleton.vertices[
            vertex_sort[self.start_vertex_index:self.end_vertex_index]]
        index_vertices = list(
            range(0, self.number_vertices, vertex_process_size))
        new_vertices = []
        for i in range(len(index_vertices)):
            if i + 1 == len(index_vertices):
                current_batch_vertices = vertices_to_transform[
                    index_vertices[i]:]
            else:
                current_batch_vertices = vertices_to_transform[
                    index_vertices[i]:index_vertices[i + 1]]
            field_resolution = np.array(
                self.vector_field_layer.resolution(self.field_mip))
            bcube = get_bcube_from_vertices(
                vertices=current_batch_vertices,
                resolution=field_resolution,
                mip=self.field_mip,
            )
            field_data = self.vector_field_layer.read(
                bcube=bcube, mip=self.field_mip).permute(2, 3, 0, 1)
            current_batch_vertices_to_mip = current_batch_vertices / field_resolution
            bcube_minpt = bcube.minpt(self.field_mip)
            field_indices = current_batch_vertices_to_mip.astype(
                np.int) - bcube_minpt
            vector_resolution = (
                self.vector_field_layer.resolution(0) * np.array([
                    2**(self.field_mip - self.vector_field_layer.data_mip),
                    2**(self.field_mip - self.vector_field_layer.data_mip),
                    1,
                ]) if self.mip0_field else self.vector_field_layer.resolution(
                    self.field_mip))
            vectors_to_add = []
            corgie_logger.info(f"{field_data.shape}, {field_indices.max(0)}")
            for i in range(len(field_data.shape) - 1):
                if field_indices.max(0)[i] >= field_data.shape[i]:
                    import pdb
                    pdb.set_trace()
            for cur_field_index in field_indices:
                vector_at_point = field_data[cur_field_index[0],
                                             cur_field_index[1],
                                             cur_field_index[2]]
                # Each vector is stored in [Y,X] format
                vectors_to_add.append([
                    int(vector_resolution[0] * vector_at_point[1].item()),
                    int(vector_resolution[1] * vector_at_point[0].item()),
                    0,
                ])
            vectors_to_add = np.array(vectors_to_add)
            current_batch_warped_vertices = current_batch_vertices + vectors_to_add
            new_vertices.append(current_batch_warped_vertices)

        new_vertices = np.concatenate(new_vertices)
        cf = CloudFiles(f"{self.dst_path}/intermediary_arrays/")
        cf.put(
            path=
            f"{self.skeleton_id_str}:{self.start_vertex_index}-{self.end_vertex_index}",
            content=pickle.dumps(new_vertices),
        )
Beispiel #26
0
 def maybe_cache_provenance(self):
     if self.enabled and self.meta.provenance:
         cf = CloudFiles('file://' + self.path)
         cf.put('provenance',
                self.meta.provenance.serialize().encode('utf8'))
    def task_generator(self):
        skeletons = self.get_skeletons(self.src_path)
        if self.z_start is not None and self.z_end is not None:
            bbox = Bbox((0, 0, self.z_start*40), (10e8, 10e8, self.z_end*40))
        else:
            bbox = None

        lengths = []
        for skeleton_id_str, sk in skeletons.items():
            deleted = 1
            if bbox is not None:
                sk = sk.crop(bbox)
            while deleted != 0:
                deleted = 0
                verts = sk.vertices
                vert_zs = verts[:, 2].copy()
                vert_zs /= 40 #hack
                vert_zs = vert_zs.astype(np.int32)

                bad_verts = np.where(np.isin(vert_zs, self.bad_sections))[0]

                # Find the next set of vertices to remove so that
                # we don't mess with indices, remove the in the next pass
                deleted = 0
                for bv in bad_verts:
                    neighbors = get_skeleton_vert_neighbor_ids(sk, bv)

                    # filter out other bad vertices
                    bad_index = np.where(np.isin(neighbors, bad_verts))
                    neighbors = np.delete(neighbors, bad_index)

                    assert (np.isin(neighbors.flatten(), bad_verts).sum() == 0)

                    # if there's a good neighbor, reasign all the endpoints to it
                    # else wait until the next round
                    if len(neighbors) != 0:
                        replacement = neighbors[0]
                        assert (replacement not in bad_verts)
                        ed = np.expand_dims(sk.edges, -1)
                        ed[np.where(ed == bv)] = replacement
                        sk.edges = ed.squeeze(-1)
                        # this leaves self-edges
                        deleted += 1
                # This removes self edges
                sk = sk.consolidate()
                new_v_count = sk.vertices.shape[0]

            verts = sk.vertices
            vert_zs = verts[:, 2].copy()
            vert_zs /= 40 #hack
            vert_zs = vert_zs.astype(np.int32)
            bad_verts = np.where(np.isin(vert_zs, self.bad_sections))[0]

            lengths.append((skeleton_id_str, sk.cable_length()))
            cf = CloudFiles(self.dst_path)
            cf.put(
                path=skeleton_id_str,
                content=sk.to_precomputed(),
                compress="gzip",
            )
        for n, l in sorted(lengths):
            print (l)
        import sys; sys.exit(1)
def _cp_single(ctx, source, destination, recursive, compression, progress,
               block_size):
    use_stdin = (source == '-')

    nsrc = normalize_path(source)
    ndest = normalize_path(destination)

    ctx.ensure_object(dict)
    parallel = int(ctx.obj.get("parallel", 1))

    issrcdir = ispathdir(source) and use_stdin == False
    isdestdir = ispathdir(destination)

    srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
    many, flat, prefix = get_mfp(nsrc, recursive)

    if issrcdir and not many:
        print(f"cloudfiles: {source} is a directory (not copied).")
        return

    xferpaths = os.path.basename(nsrc)
    if use_stdin:
        xferpaths = sys.stdin.readlines()
        xferpaths = [x.replace("\n", "") for x in xferpaths]
        prefix = os.path.commonprefix(xferpaths)
        xferpaths = [x.replace(prefix, "") for x in xferpaths]
        srcpath = cloudpathjoin(srcpath, prefix)
    elif many:
        xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix,
                                                         flat=flat)

    destpath = ndest
    if isinstance(xferpaths, str):
        destpath = ndest if isdestdir else os.path.dirname(ndest)
    elif not isdestdir:
        if os.path.exists(ndest.replace("file://", "")):
            print(f"cloudfiles: {ndest} is not a directory (not copied).")
            return

    if compression == "same":
        compression = None
    elif compression == "none":
        compression = False

    if not isinstance(xferpaths, str):
        if parallel == 1:
            _cp(srcpath, destpath, compression, progress, block_size,
                xferpaths)
            return

        total = None
        try:
            total = len(xferpaths)
        except TypeError:
            pass

        fn = partial(_cp, srcpath, destpath, compression, False, block_size)
        with tqdm(desc="Transferring", total=total,
                  disable=(not progress)) as pbar:
            with pathos.pools.ProcessPool(parallel) as executor:
                for _ in executor.imap(fn, sip(xferpaths, block_size)):
                    pbar.update(block_size)
    else:
        cfsrc = CloudFiles(srcpath, green=True, progress=progress)
        if not cfsrc.exists(xferpaths):
            print(
                f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}"
            )
            return

        downloaded = cfsrc.get(xferpaths, raw=True)
        if compression is not None:
            downloaded = transcode(downloaded, compression, in_place=True)

        cfdest = CloudFiles(destpath, green=True, progress=progress)
        if isdestdir:
            cfdest.put(os.path.basename(nsrc), downloaded, raw=True)
        else:
            cfdest.put(os.path.basename(ndest), downloaded, raw=True)
Beispiel #29
0
def _cp_single(ctx, source, destination, recursive, compression, progress,
               block_size):
    use_stdin = (source == '-')
    use_stdout = (destination == '-')

    if use_stdout:
        progress = False  # can't have the progress bar interfering

    nsrc = normalize_path(source)
    ndest = normalize_path(destination)

    # For more information see:
    # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
    # Try to follow cp rules. If the directory exists,
    # copy the base source directory into the dest directory
    # If the directory does not exist, then we copy into
    # the dest directory.
    # Both x* and x** should not copy the base directory
    if recursive and nsrc[-1] != "*":
        if CloudFiles(ndest).isdir():
            if nsrc[-1] == '/':
                nsrc = nsrc[:-1]
            ndest = cloudpathjoin(ndest, os.path.basename(nsrc))

    ctx.ensure_object(dict)
    parallel = int(ctx.obj.get("parallel", 1))

    issrcdir = ispathdir(source) and use_stdin == False
    isdestdir = ispathdir(destination)

    srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
    many, flat, prefix = get_mfp(nsrc, recursive)

    if issrcdir and not many:
        print(f"cloudfiles: {source} is a directory (not copied).")
        return

    xferpaths = os.path.basename(nsrc)
    if use_stdin:
        xferpaths = sys.stdin.readlines()
        xferpaths = [x.replace("\n", "") for x in xferpaths]
        prefix = os.path.commonprefix(xferpaths)
        xferpaths = [x.replace(prefix, "") for x in xferpaths]
        srcpath = cloudpathjoin(srcpath, prefix)
    elif many:
        xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix,
                                                         flat=flat)

    destpath = ndest
    if isinstance(xferpaths, str):
        destpath = ndest if isdestdir else os.path.dirname(ndest)
    elif not isdestdir:
        if os.path.exists(ndest.replace("file://", "")):
            print(f"cloudfiles: {ndest} is not a directory (not copied).")
            return

    if compression == "same":
        compression = None
    elif compression == "none":
        compression = False

    if not isinstance(xferpaths, str):
        if parallel == 1:
            _cp(srcpath, destpath, compression, progress, block_size,
                xferpaths)
            return

        total = None
        try:
            total = len(xferpaths)
        except TypeError:
            pass

        if use_stdout:
            fn = partial(_cp_stdout, srcpath)
        else:
            fn = partial(_cp, srcpath, destpath, compression, False,
                         block_size)

        with tqdm(desc="Transferring", total=total,
                  disable=(not progress)) as pbar:
            with pathos.pools.ProcessPool(parallel) as executor:
                for _ in executor.imap(fn, sip(xferpaths, block_size)):
                    pbar.update(block_size)
    else:
        cfsrc = CloudFiles(srcpath, green=True, progress=progress)
        if not cfsrc.exists(xferpaths):
            print(
                f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}"
            )
            return

        if use_stdout:
            _cp_stdout(srcpath, xferpaths)
            return

        downloaded = cfsrc.get(xferpaths, raw=True)
        if compression is not None:
            downloaded = transcode(downloaded, compression, in_place=True)

        cfdest = CloudFiles(destpath, green=True, progress=progress)
        if isdestdir:
            cfdest.put(os.path.basename(nsrc), downloaded, raw=True)
        else:
            cfdest.put(os.path.basename(ndest), downloaded, raw=True)