def mesh_sharded_merge(ctx, path, queue, vqb, compress_level, shard_index_bytes, minishard_index_bytes, minishard_index_encoding, spatial_index_db): """ (2) Postprocess fragments into finished sharded multires meshes. Only use this command if you used the --sharded flag during the forging step. Some reasonable defaults are selected for a dataset with a few million labels, but for smaller or larger datasets they may not be appropriate. The shard and minishard index default sizes are set to accomodate efficient access for a 100 Mbps connection. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_sharded_multires_mesh_tasks( path, draco_compression_level=compress_level, vertex_quantization_bits=vqb, shard_index_bytes=shard_index_bytes, minishard_index_bytes=minishard_index_bytes, minishard_index_encoding=minishard_index_encoding, spatial_index_db=spatial_index_db, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def skeleton_sharded_merge( ctx, path, queue, min_cable_length, max_cable_length, tick_threshold, preshift_bits, minishard_bits, shard_bits, minishard_index_encoding, data_encoding ): """ (2) Postprocess fragments into finished skeletons. Only use this command if you used the --sharded flag during the forging step. Some reasonable defaults are selected for a dataset with a few million labels, but for smaller or larger datasets they may not be appropriate. """ tasks = tc.create_sharded_skeleton_merge_tasks( path, dust_threshold=min_cable_length, max_cable_length=max_cable_length, tick_threshold=tick_threshold, preshift_bits=preshift_bits, minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def main(configfilename, tagfilename=None): config = parser.parse(configfilename) if tagfilename is not None: bboxes = io.utils.read_bbox_tag_filename(tagfilename) else: bboxes = None iterator = tc.create_chunk_edges_tasks( config["image"], config["tempoutput"], config["baseseg"], storagestr=config["storagestrs"][0], hashmax=config["nummergetasks"], storagedir=config["storagestrs"][1], volshape=config["volshape"], chunkshape=config["chunkshape"], startcoord=config["startcoord"], patchsz=config["patchshape"], normcloudpath=config["normcloudpath"], resolution=config["voxelres"], aggscratchpath=config["aggscratchpath"], aggchunksize=config["aggchunksize"], aggmaxmip=config["aggmaxmip"], aggstartcoord=config["startcoord"], bboxes=bboxes) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def testPubSub(self): sub_data = {'secret': 'SECRET'} id = PubSub.subscribe ('EVENT', 'not_an_url', sub_data) self.assertTrue (PubSub.unsubscribe (id)) self.assertFalse (PubSub.unsubscribe (id)) pub_data = {'message': 123} self.assertEqual(0, PubSub.publish ('EVENT', pub_data)) PubSub.subscribe ('EVENT', 'not_an_url', sub_data) self.assertEqual(1, PubSub.publish ('EVENT', pub_data)) response = self.executeTask() # /pubsub/notify self.assertEqual(response.status_int, 200) response.mustcontain ('unknown url type') url = "/pubsub/test" PubSub.subscribe ('EVENT2', url, sub_data) self.assertEqual(1, PubSub.publish ('EVENT2', pub_data)) response = self.executeTask() # /pubsub/notify self.assertEqual (response.json['status'], 'OK') response = self.executeTask() # /pubsub/test self.assertEqual (response.json['pub_data']["message"], 123) url = "/pubsub/task" sub_data = {'secret': 'SECRET', 'channel': 'CHANNEL', 'taskname': 'NAME'} PubSub.subscribe ('EVENT3', url, sub_data) self.assertEqual(1, PubSub.publish ('EVENT3', pub_data)) response = self.executeTask() # /pubsub/notify self.assertEqual (response.json['status'], 'OK') response = self.executeTask() # /pubsub/task self.assertEqual (response.json['status'], 'OK') queue = TaskQueue() lease = queue.lease (channel='CHANNEL') self.assertIsNotNone(lease) self.assertEqual(lease['id'], response.json['id'])
def mesh_forge( ctx, path, queue, mip, shape, simplify, fill_missing, max_error, dust_threshold, dir, compress, spatial_index ): """ (1) Synthesize meshes from segmentation cutouts. A large labeled image is divided into a regular grid. zmesh is applied to grid point, which performs marching cubes and a quadratic mesh simplifier. Note that using task shapes with axes less than or equal to 511x1023x511 (don't ask) will be more memory efficient as it can use a 32-bit mesher. zmesh is used: https://github.com/seung-lab/zmesh Sharded format not currently supports. Coming soon. """ shape = [ int(axis) for axis in shape.split(",") ] tasks = tc.create_meshing_tasks( path, mip, shape, simplification=simplify, max_simplification_error=max_error, mesh_dir=dir, cdn_cache=False, dust_threshold=dust_threshold, object_ids=None, progress=False, fill_missing=fill_missing, encoding='precomputed', spatial_index=spatial_index, sharded=False, compress=compress ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def main(configfilename): config = parser.parse(configfilename) iterator = tc.create_merge_overlaps_task(config["storagestrs"][0]) tq = TaskQueue(config["queueurl"]) tq.insert_all([iterator])
def main(configfilename): config = parser.parse(configfilename) task = tc.create_index_seg_map_task(config["storagestrs"][0]) tq = TaskQueue(config["queueurl"]) tq.insert_all([task])
def skeleton_forge(ctx, path, queue, mip, shape, fill_missing, dust_threshold, spatial_index, fix_branching, fix_borders, fix_avocados, fill_holes, scale, const, soma_detect, soma_accept, soma_scale, soma_const, max_paths, sharded): """ (1) Synthesize skeletons from segmentation cutouts. A large labeled image is divided into a regular grid. Kimimaro is applied to grid point, which performs a TEASAR based skeletonization. You can read more about the parameters here: https://github.com/seung-lab/kimimaro Tutorials are located here: - https://github.com/seung-lab/kimimaro/wiki/A-Pictorial-Guide-to-TEASAR-Skeletonization - https://github.com/seung-lab/kimimaro/wiki/Intuition-for-Setting-Parameters-const-and-scale A guide to how much this might cost is located here: - https://github.com/seung-lab/kimimaro/wiki/The-Economics:-Skeletons-for-the-People """ path = cloudfiles.paths.normalize(path) teasar_params = { 'scale': scale, 'const': const, # physical units 'pdrf_exponent': 4, 'pdrf_scale': 100000, 'soma_detection_threshold': soma_detect, # physical units 'soma_acceptance_threshold': soma_accept, # physical units 'soma_invalidation_scale': soma_scale, 'soma_invalidation_const': soma_const, # physical units 'max_paths': max_paths, # default None } tasks = tc.create_skeletonizing_tasks( path, mip, shape, teasar_params=teasar_params, fix_branching=fix_branching, fix_borders=fix_borders, fix_avocados=fix_avocados, fill_holes=fill_holes, dust_threshold=dust_threshold, progress=False, parallel=1, fill_missing=fill_missing, sharded=sharded, spatial_index=spatial_index, ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def main(configfilename): config = parser.parse(configfilename) task = tc.create_seg_graph_cc_task(config["storagestrs"][0], config["nummergetasks"]) tq = TaskQueue(config["queueurl"]) tq.insert_all([task])
def main(configfilename): config = parser.parse(configfilename) iterator = tc.create_pick_edge_tasks(config["storagestrs"][0], config["nummergetasks"]) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def delete_images(ctx, path, queue, mip, num_mips, shape): """ Delete the image layer of a dataset. """ path = cloudfiles.paths.normalize(path) tasks = tc.create_deletion_tasks(path, mip, num_mips=num_mips, shape=shape) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def main(configfilename): config = parser.parse(configfilename) task = tc.create_merge_ccs_task( config["storagestrs"][0], config["szthresh"], config["maxfaceshape"]) tq = TaskQueue(config["queueurl"]) tq.insert_all([task])
def mv(src, dest): """ Moves the contents of a queue to another service or location. Do not run this process while a queue is being worked. Moving an sqs queue to a file queue may result in duplicated tasks. """ src = normalize_path(src) dest = normalize_path(dest) tqd = TaskQueue(dest, progress=False) tqs = TaskQueue(src, progress=False) total = tqs.enqueued with tqdm(total=total, desc="Moving") as pbar: while True: try: tasks = tqs.lease(num_tasks=10, seconds=10) except QueueEmptyError: break tqd.insert(tasks) tqs.delete(tasks) pbar.update(len(tasks))
def main(configfilename): config = parser.parse(configfilename) iterator = tc.create_match_contins_tasks( config["storagestrs"][0], config["nummergetasks"], max_faceshape=config["maxfaceshape"]) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def execute(tag, queue, seconds, loop): tq = TaskQueue(queue) print("Pulling from {}".format(tq.qualified_path)) seconds = int(seconds) if loop: tq.poll(lease_seconds=seconds, verbose=True) else: task = tq.lease(seconds=seconds) task.execute()
def test_get(): global QUEUE_NAME for qtype in QTYPES: tq = TaskQueue(n_threads=0, queue_name=QUEUE_NAME, queue_server=qtype, qurl=QURL) n_inserts = 5 tq.purge() for _ in range(n_inserts): task = PrintTask() tq.insert(task) tq.wait() tq.purge()
def main(configfilename): config = parser.parse(configfilename) iterator = tc.create_merge_dup_tasks(config["storagestrs"][0], config["nummergetasks"], config["mergethresh"], config["szthresh"], config["voxelres"], config["storagestrs"][1]) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def test_renew(): tq = TaskQueue(FILE_QURL) tq.purge() tq.insert(PrintTask('hello')) ts = lambda fname: int(fname.split('--')[0]) ident = lambda fname: fname.split('--')[1] filenames = os.listdir(tq.api.queue_path) assert len(filenames) == 1 filename = filenames[0] assert ts(filename) == 0 identity = ident(filename) now = time.time() tq.renew(filename, 1) filenames = os.listdir(tq.api.queue_path) assert len(filenames) == 1 filename = filenames[0] assert ts(filename) >= int(time.time()) + 1 assert ident(filename) == identity
def crtq(): tq = TaskQueue(FILE_QURL) tq.purge() tq.rezero() tq.insert((PrintTask(i) for i in range(N))) return tq
def main(configfilename): config = parser.parse(configfilename) iterator = tc.create_overlap_tasks(config["output"], config["baseseg"], config["storagestrs"][0], volshape=config["volshape"], chunkshape=config["chunkshape"], startcoord=config["startcoord"], resolution=config["voxelres"]) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def test_get(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) n_inserts = 5 tq.purge() tq.insert((PrintTask() for _ in range(n_inserts))) for i in range(n_inserts): t = tq.lease() tq.delete(t)
def main(configfilename): config = parser.parse(configfilename) enforce_szthresh = config["workflowtype"] == "Segmentation" szthresh = config["szthresh"] if enforce_szthresh else None iterator = tc.create_merge_seginfo_tasks( config["storagestrs"][0], config["nummergetasks"], aux_storagestr=config["storagestrs"][1], szthresh=szthresh) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def execute(tag, queue, server, qurl, loop): tq = TaskQueue(queue_name=queue, queue_server=server, n_threads=0, qurl=qurl) print("Pulling from {}://{}".format(server, queue)) sec = int(LEASE_SECONDS) if loop: tq.poll(lease_seconds=sec, verbose=True) else: task = tq.lease(seconds=sec) task.execute()
def test_single_threaded_insertion(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) tq.purge() n_inserts = 5 tq.insert((PrintTask() for i in range(n_inserts))) assert all(map(lambda x: type(x) == PrintTask, tq.list())) tq.purge()
def test_single_threaded_insertion_fns(sqs, protocol): path = getpath(protocol) tq = TaskQueue(path, n_threads=0) tq.purge() n_inserts = 5 tq.insert(( partial(printfn, "hello world " + str(i)) for i in range(n_inserts) )) assert all(map(lambda x: isinstance(x, FunctionTask), tq.list())) tq.purge()
def downsample_dataset(dataset_name, from_mip=-1, num_mips=1, local=False, n_download_workers=1, n_threads=32): if dataset_name == "pinky": ws_path = "gs://neuroglancer/svenmd/pinky40_v11/watershed/" elif dataset_name == "basil": ws_path = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/" elif dataset_name == "pinky100": ws_path = "gs://neuroglancer/nkem/pinky100_v0/ws/lost_no-random/bbox1_0/" else: raise Exception("Dataset unknown") if local: if n_threads == 1: with MockTaskQueue() as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True) else: with LocalTaskQueue(parallel=n_threads) as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True) else: with TaskQueue(queue_server='sqs', qurl="https://sqs.us-east-1.amazonaws.com/098703261575/nkem-igneous") as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True)
def mesh_merge(ctx, path, queue, magnitude, dir): """ (2) Merge the mesh pieces produced from the forging step. The per-cutout mesh fragments are then assembled and merged. However, this process occurs by compiling a list of fragment files and uploading a "mesh manifest" file that is an index for locating the fragments. """ tasks = tc.create_mesh_manifest_tasks( path, magnitude=magnitude, mesh_dir=dir ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def lease(self, request): """Lease a task in the queue for a specified period of time Args: request: a LeaseTaskRequest lease_seconds is the number of seconds to wait for the task to be completed. If the task is not deleted from the queue in that interval, then it will become available again for lease Returns: A LeaseTaskResponse. status == 'OK' on success. If there are no tasks available in the requested channel, then status == 'NOT FOUND'. Any other value for status indicates failure. the task_id is a unique identifier assigned to the task request when it is added. It is needed in order to delete the task the task attribute in the response is a copy of the AddTaskRequest message that was supplied in the call to taskqueue.add """ response = LeaseTaskResponse(status='NOT FOUND', task_id='') try: EndpointsHelper.authenticate() task = TaskQueue.lease(request.channel, request.lease_seconds) if task: response.status='OK' response.task = AddTaskRequest(channel=task['channel'], name=task['name'], content=task['content']) response.task_id = task['id'] response.retry_count = task['retry_count'] except Exception, err: response.status=str(err)
def testGeoFeed(self): item = { 'topic': 'T', 'key': 'K', 'latitude': 39, 'longitude': -79 } id = GeoFeed.publish(**item) self.assertEqual(id, item['key']) time.sleep(2) item['key'] = 'L' id = GeoFeed.publish(**item) self.assertEqual(id, item['key']) last_published = '9999' for doc in GeoFeed.list(item['topic']): self.assertLess(doc['published'], last_published) last_published = doc['published'] for doc in GeoFeed.list('NOT_FOUND'): self.assertFalse('Should never get here') doc = GeoFeed.get(item['topic'], item['key']) self.assertIsNotNone(doc) #set up PubSub subscription so that a task is created when an item is published to the feed sub_url = "/pubsub/task" event=GeoFeed._indexname(item['topic']) channel='ProcessNew%s' % item['topic'] sub_data = {'secret': 'SECRET', 'channel': channel, 'pubname': 'key'} PubSub.subscribe (event, sub_url, sub_data) # now publish a new item to the feed. This should trigger creation of a new task in the queue id = GeoFeed.publish(**item) # need to manually process the task queue because we're in test mode response = self.executeTask() # /pubsub/notify self.assertEqual (response.json['status'], 'OK') response = self.executeTask() # /pubsub/task self.assertEqual (response.json['status'], 'OK') # Now make sure there is a task in the queue queue = TaskQueue() lease = queue.lease (channel=channel) self.assertIsNotNone(lease) self.assertEqual(lease['id'], response.json['id']) self.assertEqual(lease['content']['pub_data']['key'], item['key'])
def test_multi_threaded_insertion(sqs, protocol, green, threads): path = getpath(protocol) tq = TaskQueue(path, n_threads=threads, green=green) n_inserts = 40 tq.purge() tq.insert((PrintTask() for i in range(n_inserts))) tq.purge()
def test_400_errors(): global QUEUE_NAME for qtype in QTYPES: with TaskQueue(n_threads=1, queue_name=QUEUE_NAME, queue_server=qtype, qurl=QURL) as tq: tq.delete('nonexistent')
def execute(tag, queue, server, qurl): tq = TaskQueue(queue_name=queue, queue_server=server, n_threads=0, qurl=qurl) print("Pulling from {}://{}".format(server, queue)) tries = 0 with tq: while LOOP: task = 'unknown' try: task = tq.lease(tag=tag, seconds=int(LEASE_SECONDS)) tries += 1 print(task) task.execute() tq.delete(task) logger.log('INFO', task, "succesfully executed") tries = 0 except TaskQueue.QueueEmpty: time.sleep(random_exponential_window_backoff(tries)) continue except EmptyVolumeException: logger.log('WARNING', task, "raised an EmptyVolumeException") tq.delete(task) except Exception as e: logger.log('ERROR', task, "raised {}\n {}".format(e, traceback.format_exc())) raise #this will restart the container in kubernetes
def main(configfilename): config = parser.parse(configfilename) startcoord = Vec(*config["startcoord"]) volshape = Vec(*config["volshape"]) bounds = Bbox(startcoord, startcoord + volshape) iterator = tc.create_overlap_tasks(config["output"], config["baseseg"], config["storagestrs"][0], bounds=bounds, shape=config["chunkshape"], mip=config["voxelres"]) tq = TaskQueue(config["queueurl"]) tq.insert_all(iterator)
def post(self): self.response.headers.add_header('Content-Type', 'application/json') r = {'status': 'OK'} try: data = json.loads(self.request.body) channel = data['sub_data']['channel'] name = data['sub_data'].get('taskname') if not name: name = data['pub_data'].get(data['sub_data'].get('pubname')) r['id'] = TaskQueue.add (channel, name, data) except Exception, e: r['status'] = 'ERR' r['message'] = str(e)
def add(self, request): """Add a task to the queue Args: request: an AddTaskRequest Returns: An AddTaskResponse. status=='OK' on success. Any other value indicates failure and the task is not added """ response = AddTaskResponse(status = 'OK') try: EndpointsHelper.authenticate() id = TaskQueue.add(request.channel, request.name, request.content) #TODO: Maybe we should return the task id? except Exception, err: response.status=str(err)
def delete(self, request): """Remove a task from the queue Args: request: A DeleteTaskRequest containing: a LeaseTaskRequest that was acquired form a prior call to taskqueue.lease a task_result that indicates whether the task was completed successfully If the task execution failed and you want it to be re-tried automatically, then do NOT call delete, just come back later after the lease times out and the task will be availble for lease again (unless the retry limit is reached). Returns: A DeleteTaskResponse. status == 'OK' on success, or 'NOT FOUND' if the task_id does not exist. Any other value indicates failure. """ response = DeleteTaskResponse(status='NOT FOUND') try: EndpointsHelper.authenticate() lease = {'id': request.lease.task_id, 'channel': request.lease.task.channel, 'name': request.lease.task.name} if TaskQueue.delete (lease, request.task_result): response.status='OK' except Exception, err: response.status=str(err)
def testretry(self): queue = TaskQueue() id = queue.add (channel='RERTY_TEST', name='N', content=dict(xxx=123)) self.assertIsNotNone(id) lease = queue.lease (channel='RERTY_TEST', lease_seconds=1) self.assertEqual(id, lease['id']) self.assertEqual(lease['retry_count'], 1) time.sleep(1) lease = queue.lease (channel='RERTY_TEST', lease_seconds=1) self.assertEqual(id, lease['id']) self.assertEqual(lease['retry_count'], 2) time.sleep(1) lease = queue.lease (channel='RERTY_TEST', lease_seconds=1) self.assertEqual(id, lease['id']) self.assertEqual(lease['retry_count'], 3) time.sleep(1) lease = queue.lease (channel='RERTY_TEST', lease_seconds=1) self.assertIsNone(lease)
from task import Task from taskqueue import TaskQueue from taskresults import TaskResults import sys if __name__ == '__main__': simple_tasks = TaskQueue() taskresults = TaskResults() simple_tasks.push(Task(description='My first task', command="ping -n 3 google-public-dns-a.google.com", taskresults=taskresults)) simple_tasks.push(Task(description='My second task', command="ping -n 3 google-public-dns-b.google.com", taskresults=taskresults)) while simple_tasks.peek_next(): t = simple_tasks.pop() sys.stdout.write("Running task ") sys.stdout.write(str(t.GUID)) sys.stdout.write("\n") t.execute() res = taskresults.result(t.GUID) sys.stdout.write("Task suceeded: %s\n" % str(res.succeeded))
def testall(self): queue = TaskQueue() id = queue.add (channel='C', name='N', content=dict(xxx=123)) self.assertIsNotNone(id) lease = queue.lease (channel='C') self.assertIsNotNone(lease) self.assertEqual(lease['id'], id) self.assertEqual(lease['name'], 'N') self.assertEqual(lease['channel'], 'C') lease = queue.lease (channel='C') self.assertIsNone(lease) lease = queue.lease (channel='NOT FOUND') self.assertIsNone(lease) queue.add (channel='C', name='N1', content=dict(xxx=123)) queue.add (channel='C', name='N2', content=dict(xxx=123)) lease = queue.lease (channel='C') self.assertTrue(queue.delete (lease, 'DONE')) self.assertFalse(queue.delete (lease, 'DONE')) lease = queue.lease (channel='C') self.assertTrue(queue.delete (lease, 'DONE')) lease = queue.lease (channel='C') self.assertIsNone(lease)