def test_single_threaded_insertion(): global QUEUE_NAME for qtype in QTYPES: tq = TaskQueue(n_threads=0, queue_name=QUEUE_NAME, queue_server=qtype, qurl=QURL) tq.purge() if qtype != 'aws': assert tq.enqueued == 0 n_inserts = 5 for _ in range(n_inserts): task = PrintTask() tq.insert(task) tq.wait() if qtype != 'aws': assert tq.enqueued == n_inserts assert all(map(lambda x: type(x) == PrintTask, tq.list())) tq.purge() if qtype != 'aws': assert tq.enqueued == 0
def mesh_sharded_merge(ctx, path, queue, vqb, compress_level, shard_index_bytes, minishard_index_bytes, minishard_index_encoding, spatial_index_db): """ (2) Postprocess fragments into finished sharded multires meshes. Only use this command if you used the --sharded flag during the forging step. Some reasonable defaults are selected for a dataset with a few million labels, but for smaller or larger datasets they may not be appropriate. The shard and minishard index default sizes are set to accomodate efficient access for a 100 Mbps connection. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_sharded_multires_mesh_tasks( path, draco_compression_level=compress_level, vertex_quantization_bits=vqb, shard_index_bytes=shard_index_bytes, minishard_index_bytes=minishard_index_bytes, minishard_index_encoding=minishard_index_encoding, spatial_index_db=spatial_index_db, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def crtq(): tq = TaskQueue(FILE_QURL) tq.purge() tq.rezero() tq.insert((PrintTask(i) for i in range(N))) return tq
def test_multi_threaded_insertion(): global QUEUE_NAME for qtype in QTYPES: tq = TaskQueue(n_threads=40, queue_name=QUEUE_NAME, queue_server=qtype, qurl=QURL) n_inserts = 100 tq.purge() tq.wait() if qtype != 'aws': assert tq.enqueued == 0 for _ in range(n_inserts): task = PrintTask() tq.insert(task) tq.wait() if qtype == 'aws': list_len = 10 else: list_len = 100 lst = tq.list() assert len(lst) == list_len # task list api only lists 100 items at a time assert all(map(lambda x: type(x) == PrintTask, lst)) tq.purge() if qtype != 'aws': assert tq.enqueued == 0
def test_renew(): tq = TaskQueue(FILE_QURL) tq.purge() tq.insert(PrintTask('hello')) ts = lambda fname: int(fname.split('--')[0]) ident = lambda fname: fname.split('--')[1] filenames = os.listdir(tq.api.queue_path) assert len(filenames) == 1 filename = filenames[0] assert ts(filename) == 0 identity = ident(filename) now = time.time() tq.renew(filename, 1) filenames = os.listdir(tq.api.queue_path) assert len(filenames) == 1 filename = filenames[0] assert ts(filename) >= int(time.time()) + 1 assert ident(filename) == identity
def mv(src, dest): """ Moves the contents of a queue to another service or location. Do not run this process while a queue is being worked. Moving an sqs queue to a file queue may result in duplicated tasks. """ src = normalize_path(src) dest = normalize_path(dest) tqd = TaskQueue(dest, progress=False) tqs = TaskQueue(src, progress=False) total = tqs.enqueued with tqdm(total=total, desc="Moving") as pbar: while True: try: tasks = tqs.lease(num_tasks=10, seconds=10) except QueueEmptyError: break tqd.insert(tasks) tqs.delete(tasks) pbar.update(len(tasks))
def skeleton_sharded_merge( ctx, path, queue, min_cable_length, max_cable_length, tick_threshold, preshift_bits, minishard_bits, shard_bits, minishard_index_encoding, data_encoding ): """ (2) Postprocess fragments into finished skeletons. Only use this command if you used the --sharded flag during the forging step. Some reasonable defaults are selected for a dataset with a few million labels, but for smaller or larger datasets they may not be appropriate. """ tasks = tc.create_sharded_skeleton_merge_tasks( path, dust_threshold=min_cable_length, max_cable_length=max_cable_length, tick_threshold=tick_threshold, preshift_bits=preshift_bits, minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def mesh_forge( ctx, path, queue, mip, shape, simplify, fill_missing, max_error, dust_threshold, dir, compress, spatial_index ): """ (1) Synthesize meshes from segmentation cutouts. A large labeled image is divided into a regular grid. zmesh is applied to grid point, which performs marching cubes and a quadratic mesh simplifier. Note that using task shapes with axes less than or equal to 511x1023x511 (don't ask) will be more memory efficient as it can use a 32-bit mesher. zmesh is used: https://github.com/seung-lab/zmesh Sharded format not currently supports. Coming soon. """ shape = [ int(axis) for axis in shape.split(",") ] tasks = tc.create_meshing_tasks( path, mip, shape, simplification=simplify, max_simplification_error=max_error, mesh_dir=dir, cdn_cache=False, dust_threshold=dust_threshold, object_ids=None, progress=False, fill_missing=fill_missing, encoding='precomputed', spatial_index=spatial_index, sharded=False, compress=compress ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def delete_images(ctx, path, queue, mip, num_mips, shape): """ Delete the image layer of a dataset. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_deletion_tasks(path, mip, num_mips=num_mips, shape=shape) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def test_multi_threaded_insertion(sqs, protocol, green, threads): path = getpath(protocol) tq = TaskQueue(path, n_threads=threads, green=green) n_inserts = 40 tq.purge() tq.insert((PrintTask() for i in range(n_inserts))) tq.purge()
def skeleton_forge(ctx, path, queue, mip, shape, fill_missing, dust_threshold, spatial_index, fix_branching, fix_borders, fix_avocados, fill_holes, scale, const, soma_detect, soma_accept, soma_scale, soma_const, max_paths, sharded): """ (1) Synthesize skeletons from segmentation cutouts. A large labeled image is divided into a regular grid. Kimimaro is applied to grid point, which performs a TEASAR based skeletonization. You can read more about the parameters here: https://github.com/seung-lab/kimimaro Tutorials are located here: - https://github.com/seung-lab/kimimaro/wiki/A-Pictorial-Guide-to-TEASAR-Skeletonization - https://github.com/seung-lab/kimimaro/wiki/Intuition-for-Setting-Parameters-const-and-scale A guide to how much this might cost is located here: - https://github.com/seung-lab/kimimaro/wiki/The-Economics:-Skeletons-for-the-People """ path = cloudfiles.paths.normalize(path) teasar_params = { 'scale': scale, 'const': const, # physical units 'pdrf_exponent': 4, 'pdrf_scale': 100000, 'soma_detection_threshold': soma_detect, # physical units 'soma_acceptance_threshold': soma_accept, # physical units 'soma_invalidation_scale': soma_scale, 'soma_invalidation_const': soma_const, # physical units 'max_paths': max_paths, # default None } tasks = tc.create_skeletonizing_tasks( path, mip, shape, teasar_params=teasar_params, fix_branching=fix_branching, fix_borders=fix_borders, fix_avocados=fix_avocados, fill_holes=fill_holes, dust_threshold=dust_threshold, progress=False, parallel=1, fill_missing=fill_missing, sharded=sharded, spatial_index=spatial_index, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def test_parallel_insert_all(sqs, protocol): import pathos_issue path = getpath(protocol) tq = TaskQueue(path, green=True) tasks = pathos_issue.crt_tasks(5, 20) tq.insert(tasks, parallel=2) tq.purge()
def test_get(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) n_inserts = 5 tq.purge() tq.insert((PrintTask() for _ in range(n_inserts))) for i in range(n_inserts): t = tq.lease() tq.delete(t)
def test_single_threaded_insertion(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) tq.purge() n_inserts = 5 tq.insert((PrintTask() for i in range(n_inserts))) assert all(map(lambda x: type(x) == PrintTask, tq.list())) tq.purge()
def test_single_threaded_insertion_fns(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) tq.purge() n_inserts = 5 tq.insert(( partial(printfn, "hello world " + str(i)) for i in range(n_inserts) )) assert all(map(lambda x: isinstance(x, FunctionTask), tq.list())) tq.purge()
def test_get(): global QUEUE_NAME for qtype in QTYPES: tq = TaskQueue(n_threads=0, queue_name=QUEUE_NAME, queue_server=qtype, qurl=QURL) n_inserts = 5 tq.purge() for _ in range(n_inserts): task = PrintTask() tq.insert(task) tq.wait() tq.purge()
def test_is_empty(): tq = TaskQueue(FILE_QURL) tq.purge() assert tq.is_empty() == True tq.insert(PrintTask("hello")) assert tq.is_empty() == False task = tq.lease() tq.delete(task) assert tq.is_empty() == True
def mesh_merge(ctx, path, queue, magnitude, dir): """ (2) Merge the mesh pieces produced from the forging step. The per-cutout mesh fragments are then assembled and merged. However, this process occurs by compiling a list of fragment files and uploading a "mesh manifest" file that is an index for locating the fragments. """ tasks = tc.create_mesh_manifest_tasks( path, magnitude=magnitude, mesh_dir=dir ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def skeleton_merge(ctx, path, queue, min_cable_length, max_cable_length, tick_threshold, delete_fragments, magnitude): """ (2) Postprocess fragments into finished skeletons. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_unsharded_skeleton_merge_tasks( path, magnitude=magnitude, dust_threshold=min_cable_length, max_cable_length=max_cable_length, tick_threshold=tick_threshold, delete_fragments=delete_fragments, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def test_blackout_tasks(): delete_layer() cf, _ = create_layer(size=(128,64,64,1), offset=(0,0,0), layer_type="image") cv = CloudVolume(cf.cloudpath) tq = TaskQueue("fq:///tmp/removeme/blackout/") tq.insert( partial(BlackoutTask, cloudpath=cf.cloudpath, mip=0, offset=(0,0,0), shape=(128, 64, 64), value=11, non_aligned_writes=False ) ) tq.lease().execute() img = cv[:,:,:] assert np.all(img == 11) BlackoutTask( cloudpath=cf.cloudpath, mip=0, offset=(0,0,0), shape=(37, 64, 64), value=23, non_aligned_writes=True ) img = cv[:37,:,:] assert np.all(img == 23) img = cv[:] items, counts = np.unique(img, return_counts=True) counts = { items[0]: counts[0], items[1]: counts[1] } twenty_threes = 37 * 64 * 64 assert counts[23] == twenty_threes assert counts[11] == (128 * 64 * 64) - twenty_threes
def test_enumerating_tasks(): tq = TaskQueue(FILE_QURL) tq.purge() for _ in range(10): tq.insert(PrintTask('hello')) tq.insert(PrintTask('world')) lst = list(tq.tasks()) assert len(lst) == 20 hello = 0 world = 0 for task in lst: hello += int(task.txt == "hello") world += int(task.txt == "world") assert hello == 10 assert world == 10
def test_polling(sqs): N = 100 tasks = [PrintTask(i) for i in range(N)] tq = TaskQueue(getpath('fq'), green=False) tq.purge() tq.insert(tasks) tq.poll(lease_seconds=1, verbose=False, tally=True, stop_fn=(lambda executed: executed >= 5)) tq.purge() tq.insert(tasks) tq.poll(lease_seconds=1, verbose=False, tally=True, stop_fn=(lambda elapsed_time: elapsed_time >= 1))
def mesh_spatial_index_create(ctx, path, queue, shape, mip, fill_missing): """ Create a spatial index on a pre-existing mesh. Sometimes datasets were meshes without a spatial index or need it to be updated. This function provides a more efficient way to accomplish that than remeshing. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_spatial_index_mesh_tasks( cloudpath=path, shape=shape, mip=mip, fill_missing=fill_missing, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def xfer( ctx, src, dest, queue, translate, downsample, mip, fill_missing, num_mips, cseg, shape, sparse, chunk_size, compress, volumetric, delete_bg, bg_color ): """ Transfer an image layer to another location. It is crucial to choose a good task shape. The task shape must be a multiple of two of the destination image layer chunk size. Too small, and you'll have an inefficient transfer. Too big, and you'll run out of memory and also have an inefficient transfer. Downsamples will by default be automatically calculated from whatever material is available. For the default 2x2x1 downsampling, larger XY dimension is desirable compared to Z as more downsamples can be computed for each 2x2 increase in the task size. """ encoding = ("compressed_segmentation" if cseg else None) factor = (2,2,1) if volumetric: factor = (2,2,2) shape = [ int(axis) for axis in shape.split(",") ] translate = [ int(amt) for amt in translate.split(",") ] tasks = tc.create_transfer_tasks( src, dest, chunk_size=chunk_size, fill_missing=fill_missing, translate=translate, mip=mip, shape=shape, encoding=encoding, skip_downsamples=(not downsample), delete_black_uploads=delete_bg, background_color=bg_color, compress=compress, factor=factor, sparse=sparse, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def test_lease(sqs): path = getpath("sqs") tq = TaskQueue(path, n_threads=0) n_inserts = 20 tq.purge() tq.insert(( PrintTask(str(x)) for x in range(n_inserts) )) tasks = tq.lease(num_tasks=10, wait_sec=0) assert len(tasks) == 10 tq.delete(tasks) tasks = tq.lease(num_tasks=10, wait_sec=0) assert len(tasks) == 10 tq.delete(tasks) try: tasks = tq.lease(num_tasks=10, wait_sec=0) assert False except QueueEmptyError: pass
def cp(src, dest): """ Copy the contents of a queue to another service or location. Do not run this process while a queue is being worked. Currently sqs queues are not copiable, but you can copy an fq to sqs. The mv command supports sqs queues. """ src = normalize_path(src) dest = normalize_path(dest) if get_protocol(src) == "sqs": print("ptq: cp does not support sqs:// as a source.") return tqd = TaskQueue(dest) tqs = TaskQueue(src) tqd.insert(tqs)
def downsample( ctx, path, queue, mip, fill_missing, num_mips, cseg, sparse, chunk_size, compress, volumetric, delete_bg, bg_color ): """ Create an image pyramid for grayscale or labeled images. By default, we use 2x2x1 downsampling. The levels of the pyramid are called "mips" (from the fake latin "Multum in Parvo" or "many in small"). The base of the pyramid, the highest resolution layer, is mip 0. Each level of the pyramid is one mip level higher. The general strategy is to downsample starting from mip 0. This builds several levels. Once that job is complete, pass in the current top mip level of the pyramid. This builds it even taller (referred to as "superdownsampling"). """ encoding = ("compressed_segmentation" if cseg else None) factor = (2,2,1) if volumetric: factor = (2,2,2) tasks = tc.create_downsampling_tasks( path, mip=mip, fill_missing=fill_missing, num_mips=num_mips, sparse=sparse, chunk_size=chunk_size, encoding=encoding, delete_black_uploads=delete_bg, background_color=bg_color, compress=compress, factor=factor ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def test_parallel_insert_all(sqs, protocol): import pathos_issue path = getpath(protocol) tq = TaskQueue(path, green=True) tq.purge() if protocol == 'fq': tq.rezero() tasks = pathos_issue.crt_tasks(5, 20) amt = tq.insert(tasks, parallel=2) assert amt == 15 if protocol == 'fq': assert tq.inserted == 15 tq.purge()
def test_queue_transfer(sqs): tqsqs = TaskQueue(getpath("sqs")) tqsqs.purge() tqfq = TaskQueue(getpath("fq")) tqfq.purge() assert tqsqs.enqueued == 0 tqfq.insert((PrintTask() for _ in range(10))) tqsqs.insert(tqfq) assert tqsqs.enqueued == 10 task = tqsqs.lease() assert isinstance(task, PrintTask) try: tqfq.insert(tqsqs) assert False except taskqueue.UnsupportedProtocolError: pass
processed_patch = norm_sample['src'].squeeze() cv_processed_data = get_np(processed_patch.unsqueeze(2).unsqueeze(2)).astype(np.float32) print (z, np.mean(cv_img_data), np.mean(cv_processed_data)) img_dst_cv[cv_xy_start[0]:cv_xy_end[0], cv_xy_start[1]:cv_xy_end[1], z] = cv_processed_data e = time.time() print (e - s, " sec") def work(tq): tq.poll(lease_Seconds=int(300)) if __name__ == "__main__": tq = TaskQueue(sys.argv[2]) if (sys.argv[1] == 'worker'): work(tq) elif sys.argv[1] == 'master': # w000ohhooooo start = 14780 end = 27883 for i in range(start, end): tq.insert(NormalizeTask(i, 1 + i)) #work(tq) st() #t = NormalizeTask(15000, 16000) #t.execute()