def __init__(self, volume_path: str, mip: int, upload_log: bool = True, create_thumbnail: bool = False, name: str = 'save'): super().__init__(name=name) self.upload_log = upload_log self.create_thumbnail = create_thumbnail self.mip = mip # if not volume_path.startswith('precomputed://'): # volume_path += 'precomputed://' self.volume_path = volume_path # gevent.monkey.patch_all(thread=False) self.volume = CloudVolume(self.volume_path, fill_missing=True, bounded=False, autocrop=True, mip=self.mip, cache=False, green_threads=True, progress=True) if upload_log: log_path = os.path.join(volume_path, 'log') self.log_storage = Storage(log_path)
class ViewerServerHandler(BaseHTTPRequestHandler): def __init__(self, cloudpath, *args): self.storage = Storage(cloudpath) BaseHTTPRequestHandler.__init__(self, *args) def __del__(self): self.storage.kill_threads() def do_GET(self): if self.path.find('..') != -1: raise ValueError("Relative paths are not allowed.") path = self.path[1:] data = self.storage.get_file(path) if data is None: self.send_response(404) return self.send_response(200) self.serve_data(data) def serve_data(self, data): self.send_header('Content-type', 'application/octet-stream') self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Content-length', str(len(data))) self.end_headers() self.wfile.write(data)
class SkeletonizeOperator(OperatorBase): """Create mesh files from segmentation.""" def __init__(self, output_path, name: str='skeletonize'): """ Parameters ------------ output_path: where to put the skeleton files name: operator name. """ super().__init__(name=name) self.storage = Storage(output_path) def __call__(self, seg, voxel_size): if seg is None: print('no segmentation, skip computation.') return None logging.info('skeletonize segmentation...') seg = Segmentation.from_chunk(seg) skels = seg.skeletonize(voxel_size) bbox_str = seg.bbox.to_filename() for neuron_id, skel in skels.items(): file_name = f'{neuron_id}:{bbox_str}' self.storage.put_file(file_name, skel.to_precomputed()) return skels
def test_compression(): urls = [ "file:///tmp/removeme/compress", "gs://seunglab-test/cloudvolume/compress", "s3://seunglab-test/cloudvolume/compress" ] compression_tests = [ '', None, True, False, 'gzip', ] for url in urls: url = url + '-' + str(TEST_NUMBER) for method in compression_tests: with Storage(url, n_threads=5) as s: content = b'some_string' s.put_file('info', content, compress=method) s.wait() retrieved = s.get_file('info') assert content == retrieved assert s.get_file('nonexistentfile') is None with Storage(url, n_threads=5) as s: content = b'some_string' try: s.put_file('info', content, compress='nonexistent').wait() assert False except NotImplementedError: pass delete_layer("/tmp/removeme/compression")
class AggregateSkeletonFragmentsOperator(OperatorBase): """Merge skeleton fragments for Neuroglancer visualization.""" def __init__(self, fragments_path: str, output_path: str, name: str = 'aggregate-skeleton-fragments', verbose: bool = True): """ Parameters ------------ fragments_path: path to store fragment files output_path: save the merged skeleton file here. """ super().__init__(name=name, verbose=verbose) self.fragments_storage = Storage(fragments_path) self.output_storage = Storage(output_path) def __call__(self, prefix: str): if self.verbose: print('aggregate skeletons with prefix of ', prefix) id2filenames = defaultdict(list) for filename in self.fragments_storage.list_files(prefix=prefix): filename = os.path.basename(filename) # `match` implies the beginning (^). `search` matches whole string matches = re.search(r'(\d+):', filename) if not matches: continue # skeleton ID skl_id = int(matches.group(0)[:-1]) id2filenames[skl_id].append(filename) for skl_id, filenames in id2filenames.items(): if self.verbose: print('skeleton id: ', skl_id) # print('filenames: ', filenames) frags = self.fragments_storage.get_files(filenames) frags = [ PrecomputedSkeleton.from_precomputed(x['content']) for x in frags ] skel = PrecomputedSkeleton.simple_merge(frags).consolidate() skel = kimimaro.postprocess(skel, dust_threshold=1000, tick_threshold=3500) self.output_storage.put_file( file_path=str(skl_id), content=skel.to_precomputed(), ) # the last few hundred files will not be uploaded without sleeping! sleep(0.01)
def __init__(self, output_path, name: str='skeletonize'): """ Parameters ------------ output_path: where to put the skeleton files name: operator name. """ super().__init__(name=name) self.storage = Storage(output_path)
def test_data_layer_provenance(): fs = '/tmp/removeme/provenance/layer/' delete_layer(fs) prov = DataLayerProvenance() prov.description = 'example dataset' prov.sources = ['gs://seunglab-test/example/image'] prov.processing = [ { 'method': 'convnet', 'by': '*****@*****.**' }, ] prov.owners = ['*****@*****.**'] with Storage('file://' + fs) as stor: stor.put_file('provenance', prov.serialize(), content_type='application/json') path = os.path.join(fs, 'provenance') with open(path, 'r') as f: data = json.loads(f.read()) assert data == { 'description': 'example dataset', 'sources': ['gs://seunglab-test/example/image'], 'processing': [ { 'method': 'convnet', 'by': '*****@*****.**' }, ], 'owners': ['*****@*****.**'] } with Storage('file://' + fs) as stor: provjson = stor.get_file('provenance') provjson = provjson.decode('utf-8') prov = DataLayerProvenance().from_json(provjson) assert prov.description == 'example dataset' assert prov.sources == ['gs://seunglab-test/example/image'] assert prov.processing == [{ 'method': 'convnet', 'by': '*****@*****.**' }] assert prov.owners == ['*****@*****.**']
def test_compression(): urls = [ "file:///tmp/removeme/compress", "gs://seunglab-test/cloudvolume/compress", "s3://seunglab-test/cloudvolume/compress" ] compression_tests = [ '', None, True, False, 'gzip', 'br', ] for url in urls: url = url + '-' + str(TEST_NUMBER) for method in compression_tests: with Storage(url, n_threads=5) as s: content = b'some_string' # remove when GCS enables "br" if method == "br" and "gs://" in url: with pytest.raises( exceptions.UnsupportedCompressionType, match="Brotli unsupported on google cloud storage" ): s.put_file('info', content, compress=method) s.wait() retrieved = s.get_file('info') else: s.put_file('info', content, compress=method) s.wait() retrieved = s.get_file('info') assert content == retrieved assert s.get_file('nonexistentfile') is None with Storage(url, n_threads=5) as s: content = b'some_string' try: s.put_file('info', content, compress='nonexistent').wait() assert False except NotImplementedError: pass if "file://" in url: delete_layer("/tmp/removeme/compress-" + str(TEST_NUMBER))
def __init__(self, fragments_path: str, output_path: str, name: str = 'aggregate-skeleton-fragments'): """ Parameters ------------ fragments_path: path to store fragment files output_path: save the merged skeleton file here. """ super().__init__(name=name) self.fragments_storage = Storage(fragments_path) self.output_storage = Storage(output_path)
def test_dataset_provenance(): fs = '/tmp/removeme/provenance/' delete_layer(fs) prov = DatasetProvenance() prov.dataset_name = 'ur-mom-2039' prov.dataset_description = 'EM serial section of your mom\'s brain' prov.organism = 'Male wild-type (C57BL/6) mouse' prov.imaged_date = 'March-Feb 2010' prov.imaged_by = '*****@*****.**' prov.references = ['doi:presigiousjournalofyourmom-12142'] prov.owners = ['*****@*****.**', '*****@*****.**'] with Storage('file://' + fs) as stor: stor.put_file('provenance', prov.serialize(), content_type='application/json') path = os.path.join(fs, 'provenance') with open(path, 'r') as f: data = json.loads(f.read()) assert data == { 'dataset_name': 'ur-mom-2039', 'dataset_description': 'EM serial section of your mom\'s brain', 'organism': 'Male wild-type (C57BL/6) mouse', 'imaged_date': 'March-Feb 2010', 'imaged_by': '*****@*****.**', 'references': ['doi:presigiousjournalofyourmom-12142'], 'owners': ['*****@*****.**', '*****@*****.**'], } with Storage('file://' + fs) as stor: provjson = stor.get_file('provenance') provjson = provjson.decode('utf-8') prov = DatasetProvenance().from_json(provjson) assert prov.dataset_name == 'ur-mom-2039' assert prov.dataset_description == 'EM serial section of your mom\'s brain' assert prov.organism == 'Male wild-type (C57BL/6) mouse' assert prov.imaged_date == 'March-Feb 2010' assert prov.imaged_by == '*****@*****.**' assert prov.references == ['doi:presigiousjournalofyourmom-12142'] assert prov.owners == [ '*****@*****.**', '*****@*****.**' ]
def __init__(self, output_path, name: str = 'skeletonize', verbose: int = 1): """ Parameters ------------ output_path: where to put the skeleton files name: operator name. verbose: print out informations or not. """ super().__init__(name=name, verbose=verbose) self.storage = Storage(output_path)
def __init__(self, volume_path: str, mip: int = 0, expand_margin_size=(0, 0, 0), fill_missing: bool = False, validate_mip: int = None, blackout_sections: bool = None, dry_run: bool = False, name: str = 'cutout'): super().__init__(name=name) self.volume_path = volume_path self.mip = mip self.expand_margin_size = expand_margin_size self.fill_missing = fill_missing self.validate_mip = validate_mip self.blackout_sections = blackout_sections self.dry_run = dry_run if blackout_sections: with Storage(volume_path) as stor: self.blackout_section_ids = stor.get_json( 'blackout_section_ids.json')['section_ids'] verbose = (logging.getLogger().getEffectiveLevel() <= 30) self.vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=verbose, mip=self.mip, cache=False, green_threads=True)
def execute(self): # cache is necessary for local computation, but on GCE download is very fast # so cache isn't necessary. cv = CloudVolume(self.cloudpath, cache=False, progress=self.progress) labels = self.labels_for_shard(cv) locations = self.locations_for_labels(labels, cv) skeletons = self.process_skeletons(locations, cv) if len(skeletons) == 0: return shard_files = synthesize_shard_files(cv.skeleton.reader.spec, skeletons) if len(shard_files) != 1: raise ValueError( "Only one shard file should be generated per task. Expected: {} Got: {} " .format(str(self.shard_no), ", ".join(shard_files.keys()))) uploadable = [(fname, data) for fname, data in shard_files.items()] with Storage(cv.skeleton.meta.layerpath, progress=self.progress) as stor: stor.put_files( files=uploadable, compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def test_delete(): urls = [ "file:///tmp/removeme/delete", "gs://seunglab-test/cloudvolume/delete", "s3://seunglab-test/cloudvolume/delete" ] for url in urls: url = url + '-' + str(TEST_NUMBER) with Storage(url, n_threads=1) as s: content = b'some_string' s.put_file('delete-test', content, compress=None).wait() s.put_file('delete-test-compressed', content, compress='gzip').wait() assert s.get_file('delete-test') == content s.delete_file('delete-test').wait() assert s.get_file('delete-test') is None assert s.get_file('delete-test-compressed') == content s.delete_file('delete-test-compressed').wait() assert s.get_file('delete-test-compressed') is None # Reset for batch delete s.put_file('delete-test', content, compress=None).wait() s.put_file('delete-test-compressed', content, compress='gzip').wait() assert s.get_file('delete-test') == content assert s.get_file('delete-test-compressed') == content s.delete_files([ 'delete-test', 'delete-nonexistent', 'delete-test-compressed' ]).wait() assert s.get_file('delete-test') is None assert s.get_file('delete-test-compressed') is None
def execute(self): with Storage(self.layer_path) as storage: self._info = json.loads(storage.get_file('info').decode('utf8')) if self.mesh_dir is None and 'mesh' in self._info: self.mesh_dir = self._info['mesh'] self._generate_manifests(storage)
def __init__(self, volume_path: str, mip: int, upload_log: bool = True, create_thumbnail: bool = False, verbose: bool = True, name: str = 'save'): super().__init__(name=name, verbose=verbose) self.upload_log = upload_log self.create_thumbnail = create_thumbnail self.mip = mip self.verbose = verbose self.volume_path = volume_path if upload_log: log_path = os.path.join(volume_path, 'log') self.log_storage = Storage(log_path)
def test_list(): urls = [ "file:///tmp/removeme/list", "gs://seunglab-test/cloudvolume/list", "s3://seunglab-test/cloudvolume/list" ] for url in urls: url = url + '-' + str(TEST_NUMBER) with Storage(url, n_threads=5) as s: print('testing service:', url) content = b'some_string' s.put_file('info1', content, compress=None) s.put_file('info2', content, compress=None) s.put_file('build/info3', content, compress=None) s.put_file('level1/level2/info4', content, compress=None) s.put_file('info5', content, compress='gzip') s.put_file('info.txt', content, compress=None) s.wait() time.sleep( 1) # sometimes it takes a moment for google to update the list assert set(s.list_files(prefix='')) == set([ 'build/info3', 'info1', 'info2', 'level1/level2/info4', 'info5', 'info.txt' ]) assert set(s.list_files(prefix='inf')) == set( ['info1', 'info2', 'info5', 'info.txt']) assert set(s.list_files(prefix='info1')) == set(['info1']) assert set(s.list_files(prefix='build')) == set(['build/info3']) assert set(s.list_files(prefix='build/')) == set(['build/info3']) assert set(s.list_files(prefix='level1/')) == set( ['level1/level2/info4']) assert set(s.list_files(prefix='nofolder/')) == set([]) # Tests (1) assert set(s.list_files(prefix='', flat=True)) == set( ['info1', 'info2', 'info5', 'info.txt']) assert set(s.list_files(prefix='inf', flat=True)) == set( ['info1', 'info2', 'info5', 'info.txt']) # Tests (2) assert set(s.list_files(prefix='build', flat=True)) == set([]) # Tests (3) assert set(s.list_files(prefix='level1/', flat=True)) == set([]) assert set(s.list_files(prefix='build/', flat=True)) == set(['build/info3']) # Tests (4) assert set(s.list_files(prefix='build/inf', flat=True)) == set(['build/info3']) for file_path in ('info1', 'info2', 'build/info3', 'level1/level2/info4', 'info5', 'info.txt'): s.delete_file(file_path) delete_layer("/tmp/removeme/list")
def test_http_read_brotli_image(): fn = "2_2_50/4096-4608_4096-4608_112-128" bbox = Bbox.from_filename( fn) # possible off by one error w/ exclusive bounds with Storage("https://open-neurodata.s3.amazonaws.com/kharris15/apical/em" ) as stor: img_bytes = stor.get_file(fn) img = chunks.decode(img_bytes, 'raw', shape=bbox.size3(), dtype="uint8") assert img.shape == (512, 512, 16)
def mask_except(self, selected_obj_ids: Union[str, list, set]): if selected_obj_ids is None: logging.warning('empty selected object ids to mask, do nothing!') return if isinstance(selected_obj_ids, str) and selected_obj_ids.endswith('.json'): # assume that ids is a json file in the storage path json_storage = Storage(os.path.dirname(selected_obj_ids)) ids_str = json_storage.get_file(os.path.basename(selected_obj_ids)) selected_obj_ids = set(json.loads(ids_str)) assert len(selected_obj_ids) > 0 logging.info( f'number of selected objects: {len(selected_obj_ids)}') elif isinstance(selected_obj_ids, str): # a simple string, like "34,45,56,23" # this is used for small object numbers selected_obj_ids = set( [int(id) for id in selected_obj_ids.split(',')]) self.array = fastremap.mask_except(self.array, list(selected_obj_ids))
def __init__(self, volume_path: str, mip: int, upload_log: bool = True, create_thumbnail: bool = False, nproc: int = 0, verbose: bool = True, name: str = 'save'): super().__init__(name=name, verbose=verbose) if nproc < 0: nproc = True elif nproc == 0: nproc = False self.upload_log = upload_log self.create_thumbnail = create_thumbnail self.mip = mip self.volume = CloudVolume(volume_path, fill_missing=True, bounded=False, autocrop=True, mip=mip, parallel=nproc, progress=verbose) if upload_log: log_path = os.path.join(volume_path, 'log') self.log_storage = Storage(log_path) if create_thumbnail: self.thumbnail_volume = CloudVolume(os.path.join( volume_path, 'thumbnail'), compress='gzip', fill_missing=True, bounded=False, autocrop=True, mip=mip, progress=verbose)
def execute(self): with Storage(self.layer_path) as storage: for segid, frags in tqdm(self.segids.items()): # file_path='{}/{}:{}'.format(self.mesh_dir, segid, self.lod) # logging.warning('fp: %s', file_path) storage.put_file( file_path='{}/{}:{}'.format(self.mesh_dir, segid, self.lod), content=json.dumps({"fragments": frags}), content_type='application/json', ) a = 1
def __init__(self, dust_size_threshold: int, selected_obj_ids: str, name: str='mask-out-objects', verbose: int=1): """ Parameters ------------ dust_threshold: do not mesh tiny objects with voxel number less than threshold ids: only mesh the selected segmentation ids, other segments will not be meshed. manifest: create manifest files or not. This should not be True if you are only doing meshing for a segmentation chunk. name: operator name. verbose: print out informations or not. Note that some functions are adopted from igneous. """ super().__init__(name=name, verbose=verbose) self.dust_size_threshold = dust_size_threshold if selected_obj_ids: if selected_obj_ids.endswith('.json'): # assume that ids is a json file in the storage path json_storage = Storage(os.path.dirname(selected_obj_ids)) ids_str = json_storage.get_file(os.path.basename(selected_obj_ids)) self.ids = set(json.loads(ids_str)) assert len(self.ids) > 0 if self.verbose: print(f'number of selected objects: {len(self.ids)}') else: # a simple string, like "34,45,56,23" # this is used for small object numbers self.ids = set([int(id) for id in selected_obj_ids.split(',')])
def upload(self, vol, path, bbox, skeletons): if not self.will_postprocess: vol.skeleton.upload(skeletons) return bbox = bbox * vol.resolution with Storage(path, progress=vol.progress) as stor: for skel in skeletons: stor.put_file( file_path="{}:{}".format(skel.id, bbox.to_filename()), content=pickle.dumps(skel), compress='gzip', content_type="application/python-pickle", cache_control=False, )
def get_skeletons_by_segid(self, filenames): with Storage(self.cloudpath, progress=True) as stor: skels = stor.get_files(filenames) skeletons = defaultdict(list) for skel in skels: try: segid = filename_to_segid(skel['filename']) except ValueError: # Typically this is due to preexisting fully # formed skeletons e.g. skeletons_mip_3/1588494 continue skeletons[segid].append((Bbox.from_filename(skel['filename']), pickle.loads(skel['content']))) return skeletons
def execute(self): self.vol = CloudVolume(self.cloudpath, mip=self.mip, cdn_cache=False) fragment_filenames = self.get_filenames() skels = self.get_skeletons_by_segid(fragment_filenames) skeletons = [] for segid, frags in skels.items(): skeleton = self.fuse_skeletons(frags) skeleton = trim_skeleton(skeleton, self.dust_threshold, self.tick_threshold) skeleton.id = segid skeletons.append(skeleton) self.vol.skeleton.upload(skeletons) if self.delete_fragments: with Storage(self.cloudpath, progress=True) as stor: stor.delete_files(fragment_filenames)
def test_exists(): urls = [ "file:///tmp/removeme/exists", "gs://seunglab-test/cloudvolume/exists", "s3://seunglab-test/cloudvolume/exists" ] for url in urls: url = url + '-' + str(TEST_NUMBER) with Storage(url, n_threads=5) as s: content = b'some_string' s.put_file('info', content, compress=None) s.wait() time.sleep( 1) # sometimes it takes a moment for google to update the list assert s.exists('info') assert not s.exists('doesntexist') s.delete_file('info')
def test_access_non_cannonical_paths(): urls = [ "file:///tmp/noncanon", "gs://seunglab-test/noncanon", "s3://seunglab-test/noncanon" ] for url in urls: url = url + '-' + str(TEST_NUMBER) with Storage(url, n_threads=5) as s: content = b'some_string' s.put_file('info', content, compress=None) s.wait() time.sleep(0.5) # sometimes it takes a moment for google to update the list assert s.get_file('info') == content assert s.get_file('nonexistentfile') is None s.delete_file('info') s.wait()
def setUp(self): print('test volume cutout...') # compute parameters self.mip = 0 self.size = (36, 448, 448) # create image dataset using cloud-volume img = np.random.randint(0, 256, size=self.size) self.img = img.astype(np.uint8) # save the input to disk self.volume_path = 'file:///tmp/test/cutout/' + generate_random_string( ) CloudVolume.from_numpy(np.transpose(self.img), vol_path=self.volume_path) # prepare blackout section ids self.blackout_section_ids = [17, 20] ids = {'section_ids': self.blackout_section_ids} with Storage(self.volume_path) as stor: stor.put_json('blackout_section_ids.json', ids)
def build_segid_map(layer_path): with Storage(layer_path) as storage: _info = json.loads(storage.get_file('info').decode('utf8')) mesh_dir = os.path.join(layer_path.replace('file://', ''), _info['mesh']) # if mesh_dir is None and 'mesh' in self._info: # self.mesh_dir = self._info['mesh'] print(mesh_dir) segids = defaultdict(list) # pbar = tqmd() for entry in tqdm(os.scandir(mesh_dir), desc='Build Segid map'): match = re.match(r'(\d+):(\d+).*.gz', entry.name) if not match: continue fname, segid, lod = match.group(0), int(match.group(1)), int( match.group(2)) segids[segid].append(fname) return segids, _info['mesh']
def test_gc_stresstest(): with Storage('gs://seunglab-test/cloudvolume/connection_pool/', n_threads=0) as stor: stor.put_file('test', 'some string') n_trials = 500 pbar = tqdm(total=n_trials) @retry def create_conn(interface): # assert GC_POOL.total_connections() <= GC_POOL.max_connections * 5 bucket = GC_POOL.get_connection() blob = bucket.get_blob('cloudvolume/connection_pool/test') blob.download_as_string() GC_POOL.release_connection(bucket) pbar.update() with ThreadedQueue(n_threads=20) as tq: for _ in range(n_trials): tq.put(create_conn) pbar.close()