def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue): mq_connection = pika.BlockingConnection(pika.ConnectionParameters( host=mq_hostname, port=port)) mq_channel = mq_connection.channel() tasks = list() for _ in range(16): task = Task() task.state = states.SCHEDULING task.executable = '/bin/echo' tasks.append(task.to_dict()) tasks_as_json = json.dumps(tasks) mq_channel.basic_publish(exchange='', routing_key=pending_queue, body=tasks_as_json) cnt = 0 while cnt < 15: method_frame, props, body = mq_channel.basic_get(queue=completed_queue) if not body: continue task = Task() task.from_dict(json.loads(body)) if task.state == states.DONE: cnt += 1 mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag) mq_connection.close()
def func_for_mock_tmgr_test(mq_hostname, port, pending_queue, completed_queue): mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_hostname, port=port)) mq_channel = mq_connection.channel() tasks = list() for _ in range(16): t = Task() t.state = states.SCHEDULING t.executable = '/bin/echo' tasks.append(t.to_dict()) tasks_as_json = json.dumps(tasks) mq_channel.basic_publish(exchange='', routing_key=pending_queue, body=tasks_as_json) cnt = 0 while cnt < 15: method_frame, props, body = mq_channel.basic_get(queue=completed_queue) if body: task = Task() task.from_dict(json.loads(body)) if task.state == states.DONE: cnt += 1 mq_channel.basic_ack(delivery_tag=method_frame.delivery_tag) mq_connection.close()
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def test_wfp_dequeue(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._hostname, port=amgr._port, resubmit_failed=False) wfp.initialize_workflow() assert p.state == states.INITIAL assert p.stages[0].state == states.INITIAL for t in p.stages[0].tasks: assert t.state == states.INITIAL p.state == states.SCHEDULED p.stages[0].state == states.SCHEDULING for t in p.stages[0].tasks: t.state = states.COMPLETED task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s' % amgr._completed_queue[0], body=task_as_dict) wfp.start_processor() th = mt.Thread(target=func_for_dequeue_test, name='temp-proc', args=(p, )) th.start() th.join() wfp.terminate_processor() assert p.state == states.DONE assert p.stages[0].state == states.DONE for t in p.stages[0].tasks: assert t.state == states.DONE
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def sendr(qname, bulk_size, num_tasks): try: tasks = list() for cnt in range(num_tasks): task = Task() task.name = str(cnt) tasks.append(task) connection = pika.BlockingConnection(pika.ConnectionParameters( host=hostname, port=port, heartbeat=0)) channel = connection.channel() cur_task_cnt = 0 f = open('sendr.txt','w') f.write('start: %f\n'%time.time()) while(cur_task_cnt < num_tasks): workload = list() wld_size = 0 # tasks = copy_tasks for task in tasks: workload.append(task.to_dict()) # copy_tasks.remove(task) wld_size+=1 if wld_size == bulk_size: break cur_task_cnt += wld_size wld_as_json = json.dumps(workload) channel.basic_publish( exchange = '', routing_key = qname, body = wld_as_json, # properties=pika.BasicProperties( # delivery_mode = 2, # make message persistent # ) ) f.write('stop: %f\n'%time.time()) except Exception as ex: print 'Error in sendr: %s'%ex print traceback.format_exc()
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port, username=username, password=password) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, rmq_conn_params=amgr._rmq_conn_params, resubmit_failed=False) for t in p.stages[0].tasks: t.state = states.COMPLETED task_as_dict = json.dumps(t.to_dict()) credentials = pika.PlainCredentials(amgr._username, amgr._password) mq_connection = pika.BlockingConnection(pika.ConnectionParameters( host=amgr._hostname, port=amgr._port, credentials=credentials)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange = '', routing_key = '%s' % amgr._completed_queue[0], body = task_as_dict) wfp.start_processor() th = mt.Thread(target=func_for_dequeue_test, name='temp-proc', args=(p,)) th.start() th.join() wfp.terminate_processor() assert not wfp.workflow_incomplete()
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
"--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t = json.dumps(t.to_dict()) msg_num = 0 start = time.time() while msg_num < num_tasks: #message = 'message_%s'%msg_num msg_num+=1 channel.basic_publish(exchange='', routing_key=worker_queue[msg_num%num_queues], body=t) #print(" [x] Sent %r" % message) f = open(DATA +'/producer.txt','w')
def push_function(ind, num_push, num_queues): try: mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost', port=32769)) mq_channel = mq_connection.channel() tasks_pushed = 0 global MAX_TASKS proc_tasks = MAX_TASKS/num_push push_times = [] proc_mem = [] t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t_dict = t.to_dict() print 'Size of task: ', asizeof.asizeof(t_dict) name = 'queue_%s'%(ind%num_queues) while (tasks_pushed < proc_tasks)and(not kill_pusher.is_set()): corr_id = str(uuid.uuid4()) obj = { 'task': t_dict, 'id': corr_id} mq_channel.basic_publish( exchange='', routing_key=name, properties=pika.BasicProperties(correlation_id = corr_id), body=json.dumps(obj) ) tasks_pushed +=1 cur_time = time.time() push_times.append(cur_time) mem = psutil.virtual_memory().available/(2**20) # MBytes proc_mem.append(mem) # print '%s: Push average throughput: %s tasks/sec'%(name, # float(tasks_pushed/(cur_time - start_time))) print 'Push: ',tasks_pushed f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(len(push_times)): f.write('%s %s\n'%(push_times[i],proc_mem[i])) #f.write('%s\n'%(push_times[ind])) f.close() print 'Push proc killed' except KeyboardInterrupt: print len(push_times) f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times),len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close() print 'Push proc killed' except Exception as ex: print 'Unexpected error: %s'%ex print traceback.format_exc() f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close()
def generate_simulation_pipeline(i): def post_stage(): if (not os.path.exists(f'{run_dir}/aggregator/stop.aggregator')): nstages = len(p.stages) s = Stage() s.name = f"{nstages}" t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } t.name = f" {i}_{nstages} " t.executable = PYTHON t.arguments = [ f'{current_dir}/simulation.py', f'{run_dir}/simulations/all/{i}_{nstages}', ADIOS_XML ] subprocess.getstatusoutput( f'ln -s {run_dir}/simulations/all/{i}_{nstages} {run_dir}/simulations/new/{i}_{nstages}' ) s.add_tasks(t) s.post_exec = post_stage p.add_stages(s) p = Pipeline() nstages = len(p.stages) p.name = f"{i}" s = Stage() s.name = f"{nstages}" t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } t.name = f" {i}_{nstages} " t.executable = PYTHON t.arguments = [ f'{current_dir}/simulation.py', f'{run_dir}/simulations/all/{i}_{nstages}', ADIOS_XML ] subprocess.getstatusoutput( f'ln -s {run_dir}/simulations/all/{i}_{nstages} {run_dir}/simulations/new/{i}_{nstages}' ) s.add_tasks(t) s.post_exec = post_stage p.add_stages(s) print(f"In generate_simulation_pipelin({i}): {nstages}") print("=" * 20) print(p.to_dict()) print("=" * 20) print('-' * 15) print(s.to_dict()) print('-' * 15) print('_' * 10) print(t.to_dict()) print('_' * 10) return p
t.arguments = [f'{current_dir}/aggregator.py', current_dir, run_dir] subprocess.getstatusoutput(f'mkdir -p {run_dir}/aggregator') s.add_tasks(t) p.add_stages(s) pipelines.append(p) print("After creating an aggregation pipeline") print("=" * 20) print(p.to_dict()) print("=" * 20) print('-' * 15) print(s.to_dict()) print('-' * 15) print('_' * 10) print(t.to_dict()) print('_' * 10) appman = AppManager(hostname=hostname, port=port) print(config) res_dict = { 'resource': RESOURCE, 'walltime': 30, 'cpus': config[RESOURCE]['cores'], 'gpus': config[RESOURCE]['gpus'], 'project': config[RESOURCE]['project'], 'schema': config[RESOURCE]['schema'], 'queue': config[RESOURCE]['queue'] }
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
def helper(self): # This function extracts currently tasks from the pending_queue # and pushes it to the executed_queue. Thus mimicking an execution plugin try: self._logger.info('Helper process started') # Thread should run till terminate condtion is encountered mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=self._mq_hostname)) mq_channel = mq_connection.channel() while not self._helper_terminate.is_set(): try: method_frame, header_frame, body = mq_channel.basic_get( queue=self._pending_queue[0]) if body: try: task = Task() task.load_from_dict(json.loads(body)) task.state = states.DONE task_as_dict = json.dumps(task.to_dict()) self._logger.debug( 'Got task %s from pending_queue %s' % (task.uid, self._pending_queue[0])) mq_channel.basic_publish( exchange='fork', routing_key='', body=task_as_dict #properties=pika.BasicProperties( # make message persistent # delivery_mode = 2, #) ) self._logger.debug( 'Pushed task %s with state %s to completed queue %s and synchronizerq' % (task.uid, task.state, self._completed_queue[0])) mq_channel.basic_ack( delivery_tag=method_frame.delivery_tag) except Exception, ex: # Rolling back queue and task status self._logger.error( 'Error while pushing task to completed queue, rolling back: %s' % ex) raise UnknownError(text=ex) if slow_run: time.sleep(1) except Exception, ex: self._logger.error( 'Error getting messages from pending queue: %s' % ex) raise UnknownError(text=ex) except KeyboardInterrupt: self._logger.error( 'Execution interrupted by user (you probably hit Ctrl+C), ' + 'trying to cancel enqueuer thread gracefully...') raise KeyboardInterrupt except Exception, ex: self._logger.error('Unknown error in helper process: %s' % ex) print traceback.format_exc() raise UnknownError(text=ex)