class SkeletonizeOperator(OperatorBase): """Create mesh files from segmentation.""" def __init__(self, output_path, name: str = 'skeletonize'): """ Parameters ------------ output_path: where to put the skeleton files name: operator name. """ super().__init__(name=name) self.storage = CloudFiles(output_path) def __call__(self, seg, voxel_size): if seg is None: print('no segmentation, skip computation.') return None logging.info('skeletonize segmentation...') seg = Segmentation.from_chunk(seg) skels = seg.skeletonize(voxel_size) bbox_str = seg.bbox.to_filename() for neuron_id, skel in skels.items(): file_name = f'{neuron_id}:{bbox_str}' self.storage.put(file_name, skel.to_precomputed()) return skels
def test_compress_level(compression_method): from cloudfiles import CloudFiles, exceptions filepath = "/tmp/cloudfiles/compress_level" url = "file://" + filepath content = b'some_string' * 1000 compress_levels = range(1, 9, 2) for compress_level in compress_levels: cf = CloudFiles(url, num_threads=5) cf.put('info', content, compress=compression_method, compression_level=compress_level) retrieved = cf.get('info') assert content == retrieved conn = cf._get_connection() _, encoding, server_md5, hash_type = conn.get_file("info") assert encoding == compression_method assert hash_type in ('md5', None) assert cf.get('nonexistentfile') is None rmtree(filepath)
def MultiResUnshardedMeshMergeTask( cloudpath:str, prefix:str, cache_control:bool = False, draco_compression_level:int = 1, mesh_dir:Optional[str] = None, num_lod:int = 1, progress:bool = False, ): cv = CloudVolume(cloudpath) if mesh_dir is None and 'mesh' in cv.info: mesh_dir = cv.info['mesh'] files_per_label = get_mesh_filenames_subset( cloudpath, mesh_dir, prefix ) cf = CloudFiles(cv.meta.join(cloudpath, mesh_dir)) for label, filenames in tqdm(files_per_label.items(), disable=(not progress)): files = cf.get(filenames) # we should handle draco as well files = [ Mesh.from_precomputed(f["content"]) for f in files ] (manifest, mesh) = process_mesh( cv, label, files, num_lod, draco_compression_level ) cf.put(f"{label}.index", manifest.to_binary(), cache_control="no-cache") cf.put(f"{label}", mesh, cache_control="no-cache")
def write_to_cloud_bucket(dst_dir, img_arr, extension="tif"): cf = CloudFiles(dst_dir) for k in range(img_arr.shape[2]): img = Image.fromarray(img_arr[:, :, k].T) img_bytes = BytesIO() img.save(img_bytes, format="tiff" if extension == "tif" else extension) cf.put("{0:03d}.{1}".format(k + 1, extension), img_bytes.getvalue())
def execute(self): corgie_logger.info( f"Skeletonizing {self.seg_layer} at MIP{self.mip}, region: {self.bcube}" ) seg_data = self.seg_layer.read(bcube=self.bcube, mip=self.mip, timestamp=self.timestamp) resolution = self.seg_layer.cv[self.mip].resolution skeletons = kimimaro.skeletonize( seg_data, self.teasar_params, object_ids=self.object_ids, anisotropy=resolution, dust_threshold=self.dust_threshold, progress=False, fix_branching=self.fix_branching, fix_borders=self.fix_borders, fix_avocados=self.fix_avocados, ).values() minpt = self.bcube.minpt(self.mip) for skel in skeletons: skel.vertices[:] += minpt * resolution cf = CloudFiles(self.dst_path) for skel in skeletons: path = "{}:{}".format(skel.id, self.bcube.to_filename(self.mip)) cf.put( path=path, content=pickle.dumps(skel), compress="gzip", content_type="application/python-pickle", cache_control=False, )
def test_isdir(s3, protocol): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "isdir") cf = CloudFiles(url, num_threads=5) assert not cf.isdir() content = b'some_string' cf.put('info', content, compress=None) assert cf.isdir() cf.delete('info')
def _upload_batch(self, meshes, bbox): cf = CloudFiles(self.layer_path, progress=self.options['progress']) mbuf = MapBuffer(meshes, compress="br") cf.put( f"{self._mesh_dir}/{bbox.to_filename()}.frags", content=mbuf.tobytes(), compress=None, content_type="application/x.mapbuffer", cache_control=False, )
def upload_batch(self, vol, path, bbox, skeletons): mbuf = MapBuffer(skeletons, compress="br", tobytesfn=lambda skel: skel.to_precomputed()) cf = CloudFiles(path, progress=vol.progress) cf.put( path="{}.frags".format(bbox.to_filename()), content=mbuf.tobytes(), compress=None, content_type="application/x-mapbuffer", cache_control=False, )
class AggregateSkeletonFragmentsOperator(OperatorBase): """Merge skeleton fragments for Neuroglancer visualization.""" def __init__(self, fragments_path: str, output_path: str, name: str = 'aggregate-skeleton-fragments'): """ Parameters ------------ fragments_path: path to store fragment files output_path: save the merged skeleton file here. """ super().__init__(name=name) self.fragments_storage = CloudFiles(fragments_path) self.output_storage = CloudFiles(output_path) def __call__(self, prefix: str): logging.info(f'aggregate skeletons with prefix of {prefix}') id2filenames = defaultdict(list) for filename in self.fragments_storage.list_files(prefix=prefix): filename = os.path.basename(filename) # `match` implies the beginning (^). `search` matches whole string matches = re.search(r'(\d+):', filename) if not matches: continue # skeleton ID skl_id = int(matches.group(0)[:-1]) id2filenames[skl_id].append(filename) for skl_id, filenames in id2filenames.items(): logging.info(f'skeleton id: {skl_id}') frags = self.fragments_storage.get(filenames) frags = [ PrecomputedSkeleton.from_precomputed(x['content']) for x in frags ] skel = PrecomputedSkeleton.simple_merge(frags).consolidate() skel = kimimaro.postprocess(skel, dust_threshold=1000, tick_threshold=3500) self.output_storage.put( file_path=str(skl_id), content=skel.to_precomputed(), ) # the last few hundred files will not be uploaded without sleeping! sleep(0.01)
def MultiResShardedMeshMergeTask( cloudpath:str, shard_no:str, draco_compression_level:int = 1, mesh_dir:Optional[str] = None, num_lod:int = 1, spatial_index_db:Optional[str] = None, progress:bool = False ): cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db) cv.mip = cv.mesh.meta.mip if mesh_dir is None and 'mesh' in cv.info: mesh_dir = cv.info['mesh'] # This looks messy because we are trying to avoid retaining # unnecessary memory. In the original skeleton iteration, this was # using 50 GB+ memory on minnie65. So it makes sense to be just # as careful with a heavier type of object. locations = locations_for_labels(cv, labels_for_shard(cv, shard_no)) filenames = set(itertools.chain(*locations.values())) labels = set(locations.keys()) del locations meshes = collect_mesh_fragments( cv, labels, filenames, mesh_dir, progress ) del filenames # important to iterate this way to avoid # creating a copy of meshes vs. { ... for in } for label in labels: meshes[label] = Mesh.concatenate(*meshes[label]) del labels fname, shard = create_mesh_shard( cv, meshes, num_lod, draco_compression_level, progress, shard_no ) del meshes if shard is None: return cf = CloudFiles(cv.mesh.meta.layerpath) cf.put( fname, shard, compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def test_delete(s3, green, protocol): from cloudfiles import CloudFiles, exceptions if protocol == 'file': url = "file:///tmp/cloudfiles/delete" else: url = "{}://cloudfiles/delete".format(protocol) cf = CloudFiles(url, green=green, num_threads=1) content = b'some_string' cf.put('delete-test', content, compress=None, cache_control='no-cache') cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache') assert cf.get('delete-test') == content cf.delete('delete-test') assert cf.get('delete-test') is None assert cf.get('delete-test-compressed') == content cf.delete('delete-test-compressed') assert cf.get('delete-test-compressed') is None # Reset for batch delete cf.put('delete-test', content, compress=None, cache_control='no-cache') cf.put('delete-test-compressed', content, compress='gzip', cache_control='no-cache') assert cf.get('delete-test') == content assert cf.get('delete-test-compressed') == content cf.delete(['delete-test', 'delete-nonexistent', 'delete-test-compressed']) assert cf.get('delete-test') is None assert cf.get('delete-test-compressed') is None
def execute(self): corgie_logger.info( f"Generate new skeleton vertices task for id {self.skeleton_id_str}" ) skeleton = get_skeleton(self.src_path, self.skeleton_id_str) if self.vertex_sort: vertex_sort = skeleton.vertices[:, 2].argsort() else: vertex_sort = np.arange(0, len(skeleton.vertices)) number_vertices = len(skeleton.vertices) index_points = list(range(0, number_vertices, self.task_vertex_size)) cf = CloudFiles(f"{self.dst_path}") array_filenames = [] for i in range(len(index_points)): start_index = index_points[i] if i + 1 == len(index_points): end_index = number_vertices else: end_index = index_points[i + 1] array_filenames.append( f"intermediary_arrays/{self.skeleton_id_str}:{start_index}-{end_index}" ) array_files = cf.get(array_filenames) # Dict to make sure arrays are concatenated in correct order array_dict = {} for array_file in array_files: array_dict[array_file["path"]] = pickle.loads( array_file["content"]) array_arrays = [] for array_filename in array_filenames: array_arrays.append(array_dict[array_filename]) array_arrays = np.concatenate(array_arrays) # Restore the correct order of the vertices restore_sort = vertex_sort.argsort() new_vertices = array_arrays[restore_sort] new_skeleton = Skeleton( vertices=new_vertices, edges=skeleton.edges, radii=skeleton.radius, vertex_types=skeleton.vertex_types, space=skeleton.space, transform=skeleton.transform, ) cf.put( path=self.skeleton_id_str, content=new_skeleton.to_precomputed(), compress="gzip", )
def test_exists(s3, protocol): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "exists") cf = CloudFiles(url, num_threads=5) content = b'some_string' cf.put('info', content, compress=None) assert cf.exists('info') assert not cf.exists('doesntexist') assert cf.exists(['info'])['info'] assert not cf.exists(['doesntexist'])['doesntexist'] cf.delete('info')
def test_access_non_cannonical_minimal_path(s3, protocol): from cloudfiles import CloudFiles, exceptions if protocol == 'file': url = "file:///tmp/" else: url = "{}://cloudfiles/".format(protocol) cf = CloudFiles(url, num_threads=5) content = b'some_string' cf.put('info', content, compress=None) # time.sleep(0.5) # sometimes it takes a moment for google to update the list assert cf.get('info') == content assert cf.get('nonexistentfile') is None cf.delete('info')
def test_list(s3, protocol): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "list") cf = CloudFiles(url, num_threads=5) content = b'some_string' cf.put('info1', content, compress=None) cf.put('info2', content, compress=None) cf.put('build/info3', content, compress=None) cf.put('level1/level2/info4', content, compress=None) cf.put('info5', content, compress='gzip') cf.put('info.txt', content, compress=None) # time.sleep(1) # sometimes it takes a moment for google to update the list assert set(cf.list(prefix='')) == set([ 'build/info3', 'info1', 'info2', 'level1/level2/info4', 'info5', 'info.txt' ]) assert set(list(cf)) == set(cf.list(prefix='')) assert set(cf.list(prefix='inf')) == set( ['info1', 'info2', 'info5', 'info.txt']) assert set(cf.list(prefix='info1')) == set(['info1']) assert set(cf.list(prefix='build')) == set(['build/info3']) assert set(cf.list(prefix='build/')) == set(['build/info3']) assert set(cf.list(prefix='level1/')) == set(['level1/level2/info4']) assert set(cf.list(prefix='nofolder/')) == set([]) # Tests (1) assert set(cf.list(prefix='', flat=True)) == set( ['info1', 'info2', 'info5', 'info.txt']) assert set(cf.list(prefix='inf', flat=True)) == set( ['info1', 'info2', 'info5', 'info.txt']) # Tests (2) assert set(cf.list(prefix='build', flat=True)) == set([]) # Tests (3) assert set(cf.list(prefix='level1/', flat=True)) == set([]) assert set(cf.list(prefix='build/', flat=True)) == set(['build/info3']) # Tests (4) assert set(cf.list(prefix='build/inf', flat=True)) == set(['build/info3']) for file_path in ('info1', 'info2', 'build/info3', 'level1/level2/info4', 'info5', 'info.txt'): cf.delete(file_path) if protocol == 'file': rmtree("/tmp/cloudfiles/list")
def test_read_write(s3, protocol, num_threads, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "rw") cf = CloudFiles(url, num_threads=num_threads, green=green) content = b'some_string' cf.put('info', content, compress=None, cache_control='no-cache') cf['info2'] = content assert cf.get('info') == content assert cf['info2'] == content assert cf['info2', 0:3] == content[0:3] assert cf['info2', :] == content[:] assert cf.get('nonexistentfile') is None assert cf.get('info', return_dict=True) == {"info": content} assert cf.get(['info', 'info2'], return_dict=True) == { "info": content, "info2": content } del cf['info2'] assert cf.exists('info2') == False num_infos = max(num_threads, 1) results = cf.get(['info' for i in range(num_infos)]) assert len(results) == num_infos assert results[0]['path'] == 'info' assert results[0]['content'] == content assert all(map(lambda x: x['error'] is None, results)) assert cf.get(['nonexistentfile'])[0]['content'] is None cf.delete('info') cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache') results = cf.get_json('info') assert results == {'omg': 'wow'} cf.delete('info') if protocol == 'file': rmtree(url)
def upload_meshes(self, meshes): if len(meshes) == 0: return reader = self.cv.mesh.readers[self.layer_id] shard_binary = reader.spec.synthesize_shard(meshes) # the shard filename is derived from the chunk position, # so any label inside this L2 chunk will do shard_filename = reader.get_filename(list(meshes.keys())[0]) cf = CloudFiles(self.cv.cloudpath) cf.put( f"{self.get_mesh_dir()}/initial/{self.layer_id}/{shard_filename}", shard_binary, compress=None, content_type="application/octet-stream", cache_control="no-cache", )
def commit_provenance(self): """ Save the current provenance object as JSON into cache (if enabled) and primary storage. """ prov = self.provenance.serialize() # hack to pretty print provenance files prov = json.loads(prov) prov = jsonify(prov, sort_keys=True, indent=2, separators=(',', ': ')) # need to use put vs put_json to preserve formatting cf = CloudFiles(self.cloudpath) cf.put('provenance', prov, cache_control='no-cache', content_type='application/json') if self.cache: self.cache.maybe_cache_provenance()
def test_compression(s3, protocol, method, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "compress") cf = CloudFiles(url, num_threads=5, green=green) content = b'some_string' cf.put('info', content, compress=method) retrieved = cf.get('info') assert content == retrieved assert cf.get('nonexistentfile') is None try: cf.put('info', content, compress='nonexistent') assert False except ValueError: pass cf.delete(iter(cf))
def commit_info(self): """ Save the current info dict as JSON into cache and primary storage. Raises KeyError if an encoding of 'compressed_segmentation' is specified without 'compressed_segmentation_block_size'. Raises ValueError if 'compressed_segmentation' is specified and the data type is not uint32 or uint64. """ for scale in self.scales: if scale['encoding'] == 'compressed_segmentation': if 'compressed_segmentation_block_size' not in scale.keys(): raise KeyError(""" 'compressed_segmentation_block_size' must be set if compressed_segmentation is set as the encoding. A typical value for compressed_segmentation_block_size is (8,8,8) Info file specification: https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/README.md#info-json-file-specification """) elif self.data_type not in ('uint32', 'uint64'): raise ValueError( "compressed_segmentation can only be used with uint32 and uint64 data types." ) infojson = jsonify(self.info, sort_keys=True, indent=2, separators=(',', ': ')) # use put instead of put_json to preserve formatting cf = CloudFiles(self.cloudpath) cf.put('info', infojson, cache_control='no-cache', content_type='application/json') if self.cache: self.cache.maybe_cache_info()
class MergeSkeletonTask(scheduling.Task): def __init__(self, dst_path, mip, dust_threshold, tick_threshold, prefix=""): super().__init__(self) self.dst_path = dst_path self.cf = CloudFiles(self.dst_path) self.mip = mip self.dust_threshold = dust_threshold self.tick_threshold = tick_threshold self.prefix = prefix def execute(self): corgie_logger.info(f"Merging skeletons at {self.dst_path}") fragment_filenames = self.cf.list(prefix=self.prefix, flat=True) skeleton_files = self.cf.get(fragment_filenames) skeletons = defaultdict(list) for skeleton_file in skeleton_files: try: colon_index = skeleton_file["path"].index(":") except ValueError: # File is full skeleton, not fragment continue seg_id = skeleton_file["path"][0:colon_index] skeleton_fragment = pickle.loads(skeleton_file["content"]) if not skeleton_fragment.empty(): skeletons[seg_id].append(skeleton_fragment) for seg_id, skeleton_fragments in skeletons.items(): skeleton = PrecomputedSkeleton.simple_merge( skeleton_fragments).consolidate() skeleton = kimimaro.postprocess(skeleton, self.dust_threshold, self.tick_threshold) skeleton.id = int(seg_id) self.cf.put(path=seg_id, content=skeleton.to_precomputed(), compress="gzip") corgie_logger.info(f"Finished skeleton {seg_id}")
def test_size(s3, protocol, compress, green): from cloudfiles import CloudFiles, exceptions, compression url = compute_url(protocol, 'size') cf = CloudFiles(url) content = b'some_string' cf.put('info', content, compress=compress, cache_control='no-cache') cf['info2'] = content cf.put('zero', b'', compress=None, cache_control='no-cache') compressed_content = compression.compress(content, compress) assert cf.size('info') == len(compressed_content) assert cf.size(['info', 'info2']) == { "info": len(compressed_content), "info2": len(content) } assert cf.size('nonexistent') is None assert cf.size('zero') == 0 cf.delete(['info', 'info2', 'zero'])
def MultiResShardedFromUnshardedMeshMergeTask( src:str, dest:str, shard_no:str, cache_control:bool = False, draco_compression_level:int = 1, mesh_dir:Optional[str] = None, num_lod:int = 1, progress:bool = False, ): cv_src = CloudVolume(src) if mesh_dir is None and 'mesh' in cv.info: mesh_dir = cv.info['mesh'] cv_dest = CloudVolume(dest, mesh_dir=mesh_dir, progress=True) labels = labels_for_shard(cv_dest, shard_no) meshes = cv_src.mesh.get(labels, fuse=False) del labels fname, shard = create_mesh_shard( cv_dest, meshes, num_lod, draco_compression_level, progress, shard_no ) del meshes if shard is None: return cf = CloudFiles(cv_dest.mesh.meta.layerpath) cf.put( fname, shard, # fname, data compress=False, content_type='application/octet-stream', cache_control='no-cache', )
class MeshOperator(OperatorBase): """Create mesh files from segmentation.""" def __init__(self, output_path: str, output_format: str, mip: int = None, voxel_size: tuple = (1, 1, 1), simplification_factor: int = 100, max_simplification_error: int = 8, manifest: bool = False, shard: bool = False, name: str = 'mesh'): """ Parameters ------------ output_path: path to store mesh files output_format: format of output {'ply', 'obj', 'precomputed'} voxel_size: size of voxels simplification_factor: mesh simplification factor. max_simplification_error: maximum tolerance error of meshing. manifest: create manifest files or not. This should not be True if you are only doing meshing for a segmentation chunk. name: operator name. Note that some functions are adopted from igneous. """ super().__init__(name=name) self.simplification_factor = simplification_factor self.max_simplification_error = max_simplification_error # zmesh use fortran order, translate zyx to xyz self.output_path = output_path self.output_format = output_format self.manifest = manifest self.shard = shard if manifest: assert output_format == 'precomputed' if output_format == 'precomputed': # adjust the mesh path according to info vol = CloudVolume(self.output_path, mip) info = vol.info if 'mesh' not in info: # add mesh to info and update it info['mesh'] = 'mesh_err_{}'.format(max_simplification_error) vol.info = info vol.commit_info() self.mesh_path = os.path.join(output_path, info['mesh']) self.voxel_size = vol.resolution[::-1] self.mesher = Mesher( vol.resolution ) else: self.mesh_path = output_path self.mesher = Mesher(voxel_size[::-1]) self.storage = CloudFiles(self.mesh_path) def _get_mesh_data(self, obj_id, offset): mesh = self.mesher.get_mesh( obj_id, normals=False, simplification_factor=self.simplification_factor, max_simplification_error=self.max_simplification_error) # delete high resolution mesh self.mesher.erase(obj_id) if self.output_format == 'precomputed': mesh.vertices[:] += offset[::-1] * self.voxel_size[::-1] data = mesh.to_precomputed() elif self.output_format == 'ply': data = mesh.to_ply() elif self.output_format == 'obj': data = mesh.to_obj() else: raise NotImplementedError mesh_bounds = Bbox( np.amin(mesh.vertices, axis=0), np.amax(mesh.vertices, axis=0) ) return data, mesh_bounds def _get_file_name(self, bbox, obj_id): if self.output_format == 'precomputed': # bbox is in z,y,x order, should transform to x,y,z order bbox2 = Bbox.from_slices(bbox.to_slices()[::-1]) return '{}:0:{}'.format(obj_id, bbox2.to_filename()) elif self.output_format == 'ply': return '{}.ply'.format(obj_id) elif self.output_format == 'obj': return '{}.obj'.format(obj_id) else: raise ValueError('unsupported format!') def __call__(self, seg: Chunk): """Meshing the segmentation. Parameters ------------ seg: 3D segmentation chunk. """ if seg is None: return assert isinstance(seg, Chunk) assert seg.ndim == 3 assert np.issubdtype(seg.dtype, np.integer) bbox = seg.bbox # use ndarray after getting the bounding box seg = seg.array logging.info('computing meshes from segmentation...') self.mesher.mesh(seg) logging.info('write mesh to storage...') if self.shard: assert 'precomputed' in self.output_format meshes = [] mesh_bboxes = {} for obj_id in self.mesher.ids(): data, mesh_bbox = self._get_mesh_data(obj_id, bbox.minpt) meshes.append(data) mesh_bboxes[obj_id] = mesh_bbox.to_list() # use shared format in default! self.storage.put( f"{self.mesh_path}/{bbox.to_filename()}.frags", content=pickle.dumps(meshes), compress='gzip', content_type="application/python-pickle", cache_control=False, ) self.storage.put_json( f"{self.mesh_path}/{bbox.to_filename()}.spatial", mesh_bboxes, compress='gzip', cache_control=False, ) else: if 'precomputed' in self.output_format: compress = 'gzip' else: compress = None for obj_id in tqdm(self.mesher.ids(), desc='writing out meshes'): # print('object id: ', obj_id) data, _ = self._get_mesh_data(obj_id, bbox.minpt) file_name = self._get_file_name(bbox, obj_id) self.storage.put( file_name, data, cache_control=None, compress=compress ) # create manifest file if self.manifest: self.storage.put_json( f'{obj_id}:0', {'fragments': [file_name]} ) self.storage.put_json( 'info', {"@type": "neuroglancer_legacy_mesh"} ) # release memory self.mesher.clear()
def execute(self): corgie_logger.info( f"Starting transform skeleton vertices task for id {self.skeleton_id_str}" ) skeleton = get_skeleton(self.src_path, self.skeleton_id_str) if self.vertex_sort: vertex_sort = skeleton.vertices[:, 2].argsort() else: vertex_sort = np.arange(0, len(skeleton.vertices)) # How many vertices we will use at once to get a bcube to download from the vector field vertex_process_size = 50 vertices_to_transform = skeleton.vertices[ vertex_sort[self.start_vertex_index:self.end_vertex_index]] index_vertices = list( range(0, self.number_vertices, vertex_process_size)) new_vertices = [] for i in range(len(index_vertices)): if i + 1 == len(index_vertices): current_batch_vertices = vertices_to_transform[ index_vertices[i]:] else: current_batch_vertices = vertices_to_transform[ index_vertices[i]:index_vertices[i + 1]] field_resolution = np.array( self.vector_field_layer.resolution(self.field_mip)) bcube = get_bcube_from_vertices( vertices=current_batch_vertices, resolution=field_resolution, mip=self.field_mip, ) field_data = self.vector_field_layer.read( bcube=bcube, mip=self.field_mip).permute(2, 3, 0, 1) current_batch_vertices_to_mip = current_batch_vertices / field_resolution bcube_minpt = bcube.minpt(self.field_mip) field_indices = current_batch_vertices_to_mip.astype( np.int) - bcube_minpt vector_resolution = ( self.vector_field_layer.resolution(0) * np.array([ 2**(self.field_mip - self.vector_field_layer.data_mip), 2**(self.field_mip - self.vector_field_layer.data_mip), 1, ]) if self.mip0_field else self.vector_field_layer.resolution( self.field_mip)) vectors_to_add = [] corgie_logger.info(f"{field_data.shape}, {field_indices.max(0)}") for i in range(len(field_data.shape) - 1): if field_indices.max(0)[i] >= field_data.shape[i]: import pdb pdb.set_trace() for cur_field_index in field_indices: vector_at_point = field_data[cur_field_index[0], cur_field_index[1], cur_field_index[2]] # Each vector is stored in [Y,X] format vectors_to_add.append([ int(vector_resolution[0] * vector_at_point[1].item()), int(vector_resolution[1] * vector_at_point[0].item()), 0, ]) vectors_to_add = np.array(vectors_to_add) current_batch_warped_vertices = current_batch_vertices + vectors_to_add new_vertices.append(current_batch_warped_vertices) new_vertices = np.concatenate(new_vertices) cf = CloudFiles(f"{self.dst_path}/intermediary_arrays/") cf.put( path= f"{self.skeleton_id_str}:{self.start_vertex_index}-{self.end_vertex_index}", content=pickle.dumps(new_vertices), )
def maybe_cache_provenance(self): if self.enabled and self.meta.provenance: cf = CloudFiles('file://' + self.path) cf.put('provenance', self.meta.provenance.serialize().encode('utf8'))
def task_generator(self): skeletons = self.get_skeletons(self.src_path) if self.z_start is not None and self.z_end is not None: bbox = Bbox((0, 0, self.z_start*40), (10e8, 10e8, self.z_end*40)) else: bbox = None lengths = [] for skeleton_id_str, sk in skeletons.items(): deleted = 1 if bbox is not None: sk = sk.crop(bbox) while deleted != 0: deleted = 0 verts = sk.vertices vert_zs = verts[:, 2].copy() vert_zs /= 40 #hack vert_zs = vert_zs.astype(np.int32) bad_verts = np.where(np.isin(vert_zs, self.bad_sections))[0] # Find the next set of vertices to remove so that # we don't mess with indices, remove the in the next pass deleted = 0 for bv in bad_verts: neighbors = get_skeleton_vert_neighbor_ids(sk, bv) # filter out other bad vertices bad_index = np.where(np.isin(neighbors, bad_verts)) neighbors = np.delete(neighbors, bad_index) assert (np.isin(neighbors.flatten(), bad_verts).sum() == 0) # if there's a good neighbor, reasign all the endpoints to it # else wait until the next round if len(neighbors) != 0: replacement = neighbors[0] assert (replacement not in bad_verts) ed = np.expand_dims(sk.edges, -1) ed[np.where(ed == bv)] = replacement sk.edges = ed.squeeze(-1) # this leaves self-edges deleted += 1 # This removes self edges sk = sk.consolidate() new_v_count = sk.vertices.shape[0] verts = sk.vertices vert_zs = verts[:, 2].copy() vert_zs /= 40 #hack vert_zs = vert_zs.astype(np.int32) bad_verts = np.where(np.isin(vert_zs, self.bad_sections))[0] lengths.append((skeleton_id_str, sk.cable_length())) cf = CloudFiles(self.dst_path) cf.put( path=skeleton_id_str, content=sk.to_precomputed(), compress="gzip", ) for n, l in sorted(lengths): print (l) import sys; sys.exit(1)
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') nsrc = normalize_path(source) ndest = normalize_path(destination) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') use_stdout = (destination == '-') if use_stdout: progress = False # can't have the progress bar interfering nsrc = normalize_path(source) ndest = normalize_path(destination) # For more information see: # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed # Try to follow cp rules. If the directory exists, # copy the base source directory into the dest directory # If the directory does not exist, then we copy into # the dest directory. # Both x* and x** should not copy the base directory if recursive and nsrc[-1] != "*": if CloudFiles(ndest).isdir(): if nsrc[-1] == '/': nsrc = nsrc[:-1] ndest = cloudpathjoin(ndest, os.path.basename(nsrc)) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass if use_stdout: fn = partial(_cp_stdout, srcpath) else: fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return if use_stdout: _cp_stdout(srcpath, xferpaths) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)