def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt', 'file2.txt', '>', 'output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = [ 'output.txt > %s/output_%s.txt' % (cur_dir, x + 1) ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def GeneratePipeline(pcfg, ecfg, pipe_name, exp_dir): # Append the exp_dir to the ecfg dictionary to simplify things a bit ecfg['exp_dir'] = exp_dir # Append the input file to the list of options (if need be) if "input_data_file" in ecfg.keys(): ecfg['options']['input_data_file'] = ecfg['input_data_file'] # Append the pipeline id to the list of options ecfg['options']['pipeline_id'] = pipe_name # Initialize the pipeline object p = Pipeline() # Give the pipeline a name p.name = pipe_name # Loop through the necessary stages for this module stage_names = ["pre-process", "fit", "project", "post-process"] for this_stage in stage_names: if this_stage in pcfg.keys(): # Populate the pipeline with the stages p.add_stages( GenerateStage(pcfg[this_stage], ecfg, p.name, this_stage)) return (p)
def test_wfp_check_processor(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, rmq_conn_params=amgr._rmq_conn_params, resubmit_failed=False) wfp.start_processor() assert wfp.check_processor() wfp.terminate_processor() assert not wfp.check_processor()
def test_wfp_initialization(s, i, b, l): p = Pipeline() st = Stage() t = Task() t.executable = ['/bin/date'] st.add_tasks(t) p.add_stages(st) wfp = WFprocessor(sid='rp.session.local.0000', workflow=set([p]), pending_queue=['pending'], completed_queue=['completed'], mq_hostname=hostname, port=port, resubmit_failed=True) assert len(wfp._uid.split('.')) == 2 assert 'wfprocessor' == wfp._uid.split('.')[0] assert wfp._pending_queue == ['pending'] assert wfp._completed_queue == ['completed'] assert wfp._mq_hostname == hostname assert wfp._port == port assert wfp._wfp_process == None assert wfp._workflow == set([p]) if not isinstance(s, unicode): wfp = WFprocessor(sid=s, workflow=set([p]), pending_queue=l, completed_queue=l, mq_hostname=s, port=i, resubmit_failed=b)
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' s1_task_uids = [] for cnt in range(128): # Create a Task object t1 = Task() t1.name = 't%s' % (cnt + 1) # to make a python script executable: # 1) add to first line "shebang": #!/usr/bin/env python # 2) chmod +x SerialCode.py # The executable always has to be in the Target Machine t1.executable = '~/SerialCode.py' # Add the Task to the Stage s1.add_tasks(t1) s1_task_uids.append(t1.name) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_wfp_start_processor(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) assert wfp.start_processor() assert not wfp._enqueue_thread assert not wfp._dequeue_thread assert not wfp._enqueue_thread_terminate.is_set() assert not wfp._dequeue_thread_terminate.is_set() assert not wfp._wfp_terminate.is_set() assert wfp._wfp_process.is_alive() wfp._wfp_terminate.set() wfp._wfp_process.join()
def generate_pipeline(name, stages): # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): # Create a Stage object s = Stage() s.name = 'Stage %s'%s_cnt for t_cnt in range(5): # Create a Task object t = Task() t.name = 'my-task' # Assign a name to the task (optional) t.executable = '/bin/echo' # Assign executable to the task # Assign arguments for the task executable t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
def generate_aggregating_task(self): """ Function to concatenate the MD trajectory (h5 contact map) """ p = Pipeline() p.name = 'aggragating' s2 = Stage() s2.name = 'aggregating' # Aggregation task t2 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py t2.pre_exec = [] t2.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] t2.pre_exec += ['conda activate %s' % conda_path] t2.pre_exec += ['cd %s' % agg_path] t2.executable = ['%s/bin/python' % conda_path] # MD_to_CVAE.py t2.arguments = [ '%s/MD_to_CVAE.py' % agg_path, '--sim_path', md_path, '--train_frames', 100000] # assign hardware the task t2.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } # Add the aggregation task to the aggreagating stage s2.add_tasks(t2) p.add_stages(s2) return p
def test_pipeline_decrement_stage(): p = Pipeline() s1 = Stage() t = Task() t.executable = '/bin/date' s1.tasks = t s2 = Stage() t = Task() t.executable = '/bin/date' s2.tasks = t p.add_stages([s1, s2]) p._increment_stage() p._increment_stage() assert p._stage_count == 2 assert p._cur_stage == 2 assert p._completed_flag.is_set() == True p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 1 assert p._completed_flag.is_set() == False p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 0 assert p._completed_flag.is_set() == False
def generate_pipeline(): global CUR_TASKS, CUR_CORES, duration, MAX_NEW_STAGE def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE < MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 for t in p.stages[CUR_NEW_STAGE].tasks: cores = randint(1,16) t.arguments = ['-c', str(cores), '-t', str(duration)] def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE+1): # Create a Stage object s1 = Stage() for i in range(CUR_TASKS): t1 = Task() t1.pre_exec = ['export PATH=/u/sciteam/balasubr/modules/stress-ng-0.09.34:$PATH'] t1.executable = ['stress-ng'] t1.arguments = [ '-c', str(CUR_CORES), '-t', str(duration)] t1.cpu_reqs = { 'processes': 1, 'process_type': '', 'threads_per_process': CUR_CORES, 'thread_type': '' } # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(name, stages): # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): # Create a Stage object s = Stage() s.name = 'Stage %s' % s_cnt for t_cnt in range(5): # Create a Task object t = Task() t.name = 'my-task' # Assign a name to the task (optional) t.executable = '/bin/echo' # Assign executable to the task # Assign arguments for the task executable t.arguments = ['I am task %s in %s in %s' % (t_cnt, s_cnt, name)] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
def test_wfp_check_processor(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp.start_processor() assert wfp.check_processor() wfp.terminate_processor() assert not wfp.check_processor()
def test_wfp_initialization(s, b, l): p = Pipeline() stage = Stage() t = Task() t.executable = '/bin/date' stage.add_tasks(t) p.add_stages(stage) rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port) wfp = WFprocessor(sid='rp.session.local.0000', workflow=set([p]), pending_queue=['pending'], completed_queue=['completed'], rmq_conn_params=rmq_conn_params, resubmit_failed=True) assert len(wfp._uid.split('.')) == 2 assert 'wfprocessor' == wfp._uid.split('.')[0] assert wfp._pending_queue == ['pending'] assert wfp._completed_queue == ['completed'] assert wfp._rmq_conn_params == rmq_conn_params assert wfp._wfp_process is None assert wfp._workflow == set([p]) if not isinstance(s, str): wfp = WFprocessor(sid=s, workflow=set([p]), pending_queue=l, completed_queue=l, rmq_conn_params=rmq_conn_params, resubmit_failed=b)
def __init__(self, cfg: ExperimentConfig): self.cfg = cfg self.stage_idx = 0 self.api = DeepDriveMD_API(cfg.experiment_directory) self.pipeline = Pipeline() self.pipeline.name = self.PIPELINE_NAME self._init_experiment_dir()
def test_pipeline_to_dict(): p = Pipeline() d = p.to_dict() assert d == {'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'completed': False}
def test_wfp_dequeue(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._hostname, port=amgr._port, resubmit_failed=False) wfp.initialize_workflow() assert p.state == states.INITIAL assert p.stages[0].state == states.INITIAL for t in p.stages[0].tasks: assert t.state == states.INITIAL p.state == states.SCHEDULED p.stages[0].state == states.SCHEDULING for t in p.stages[0].tasks: t.state = states.COMPLETED task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s' % amgr._completed_queue[0], body=task_as_dict) wfp.start_processor() th = mt.Thread(target=func_for_dequeue_test, name='temp-proc', args=(p, )) th.start() th.join() wfp.terminate_processor() assert p.state == states.DONE assert p.stages[0].state == states.DONE for t in p.stages[0].tasks: assert t.state == states.DONE
def test_state_order(): """ **Purpose**: Test if the Pipeline, Stage and Task are assigned their states in the correct order """ def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1 p1 = Pipeline() p1.name = 'p1' s = Stage() s.name = 's1' s.tasks = create_single_task() s.add_tasks(create_single_task()) p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB os.environ['RP_ENABLE_OLD_DEFINES'] = 'True' appman = Amgr(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() p_state_hist = p1.state_history assert p_state_hist == ['DESCRIBED', 'SCHEDULING', 'DONE'] s_state_hist = p1.stages[0].state_history assert s_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'DONE'] tasks = p1.stages[0].tasks for t in tasks: t_state_hist = t.state_history assert t_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'SUBMITTING', 'SUBMITTED', 'EXECUTED', 'DEQUEUEING', 'DEQUEUED', 'DONE']
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: func_on_true() func_on_false() def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 s = Stage() for i in range(10): t = Task() t.executable = '/bin/sleep' t.arguments = ['30'] s.add_tasks(t) # Add post-exec to the Stage s.post_exec = func_condition p.add_stages(s) def func_on_false(): print('Done') # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = 'sleep' t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = func_condition # Add Stage to the Pipeline p.add_stages(s1) return p
def test_pipeline_decrement_stage(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) p._increment_stage() p._increment_stage() assert p._stage_count == 2 assert p._cur_stage == 2 assert p._completed_flag.is_set() == True p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 1 assert p._completed_flag.is_set() == False p._decrement_stage() assert p._stage_count == 2 assert p._cur_stage == 0 assert p._completed_flag.is_set() == False
def test_pipeline_state_assignment(self, mocked_generate_id, mocked_Lock, mocked_Event, t, l, i, b): p = Pipeline() data_type = [l, i, b] for data in data_type: with self.assertRaises(TypeError): p.state = data if isinstance(t,str): with self.assertRaises(ValueError): p.state = t state_history = list() p = Pipeline() p._state = None p._state_history = list() states_list = list(states._pipeline_state_values.keys()) shuffle(states_list) for val in states_list: p.state = val if val != states.SUSPENDED: state_history.append(val) self.assertEqual(p._state, val) self.assertEqual(p._state_history, state_history)
def test_wfp_workflow_incomplete(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() assert wfp.workflow_incomplete() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') p.stages[0].state == states.SCHEDULING p.state == states.SCHEDULED for t in p.stages[0].tasks: t.state = states.COMPLETED import json import pika task_as_dict = json.dumps(t.to_dict()) mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port)) mq_channel = mq_connection.channel() mq_channel.basic_publish(exchange='', routing_key='%s-completedq-1' % amgr._sid, body=task_as_dict) amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() assert not wfp.workflow_incomplete()
def test_pipeline_stage_assignment(self, mocked_generate_id, mocked_Lock, mocked_Event): p = Pipeline() s = mock.MagicMock(spec=Stage) p.stages = s self.assertEqual(type(p.stages), list) self.assertEqual(p._stage_count, 1) self.assertEqual(p._cur_stage, 1) self.assertEqual(p.stages[0], s)
def test_uid_assignment(): p = Pipeline() s = Stage() t = Task() s.tasks = t p.stages = s assert t._parent_pipeline == p.uid assert t._parent_stage == s.uid assert s._parent_pipeline == p.uid
def test_pipeline_stage_addition(self, mocked_generate_id, mocked_Lock, mocked_Event): p = Pipeline() s1 = mock.MagicMock(spec=Stage) s2 = mock.MagicMock(spec=Stage) p.add_stages([s1, s2]) self.assertEqual(type(p.stages), list) self.assertEqual(p._stage_count, 2) self.assertEqual(p._cur_stage, 1) self.assertEqual(p.stages[0], s1) self.assertEqual(p.stages[1], s2)
def test_amgr_run(): amgr = Amgr(hostname=host, port=port) with pytest.raises(MissingError): amgr.run() p1 = Pipeline() p2 = Pipeline() p3 = Pipeline() with pytest.raises(MissingError): amgr.workflow = [p1, p2, p3]
def test_pipeline_stage_assignment(): p = Pipeline() s = Stage() t = Task() t.executable = '/bin/date' s.tasks = t p.stages = s assert type(p.stages) == list assert p._stage_count == 1 assert p._cur_stage == 1 assert p.stages[0] == s
def test_pipeline_stage_assignment(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.tasks = t p.stages = s assert type(p.stages) == list assert p._stage_count == 1 assert p._cur_stage == 1 assert p.stages[0] == s
def test_pipeline_validate(): p = Pipeline() p._state = 'test' with pytest.raises(ValueError): p._validate() p = Pipeline() with pytest.raises(MissingError): p._validate()
def generate_pipeline(): p = Pipeline() s1 = Stage() t1 = Task() t1.executable = '/bin/sleep' t1.arguments = ['3'] s1.add_tasks(t1) p.add_stages(s1) s2 = Stage() t2 = Task() t2.executable = '/bin/sleep' t2.arguments = ['3'] s2.add_tasks(t2) p.add_stages(s2) s3 = Stage() t3 = Task() t3.executable = '/bin/sleep' t3.arguments = ['3'] s3.add_tasks(t3) p.add_stages(s3) return p
def test_wfp_enqueue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp, )) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULED assert p.stages[0].state == states.SCHEDULED assert p.state == states.SCHEDULING
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 shuffle(p.stages[CUR_NEW_STAGE:]) def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE+1): # Create a Stage object s1 = Stage() for i in range(CUR_TASKS): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = [ '30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { condition': func_condition, on_true': func_on_true, on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): def func_condition(): global CUR_NEW_STAGE, MAX_NEW_STAGE if CUR_NEW_STAGE <= MAX_NEW_STAGE: return True return False def func_on_true(): global CUR_NEW_STAGE CUR_NEW_STAGE += 1 for t in p.stages[CUR_NEW_STAGE].tasks: dur = randint(10, 30) t.arguments = [str(dur)] def func_on_false(): print 'Done' # Create a Pipeline object p = Pipeline() for s in range(MAX_NEW_STAGE + 1): # Create a Stage object s1 = Stage() for _ in range(CUR_TASKS): t1 = Task() t1.executable = ['sleep'] t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def test_amgr_synchronizer(): amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 10 tasks to the stage for cnt in range(10): t = Task() t.executable = 'some-executable-%s' % cnt s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] sid = 'test.0016' rmgr = BaseRmgr({}, sid, None, {}) tmgr = BaseTmgr(sid=sid, pending_queue=['pending-1'], completed_queue=['completed-1'], rmgr=rmgr, mq_hostname=hostname, port=port, rts=None) amgr._rmgr = rmgr rmgr._task_manager = tmgr for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = mt.Event() sync_thread = mt.Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = mp.Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, tmgr)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.COMPLETED
def test_pipeline_assign_uid(): p = Pipeline() try: import glob import shutil import os home = os.environ.get('HOME','/home') test_fold = glob.glob('%s/.radical/utils/test*'%home) for f in test_fold: shutil.rmtree(f) except: pass p._assign_uid('test') assert p.uid == 'pipeline.0000'
def test_pipeline_from_dict(): d = {'uid': 're.Pipeline.0000', 'name': 'p1', 'state': states.DONE, 'state_history': [states.INITIAL, states.DONE], 'completed': True} p = Pipeline() p.from_dict(d) assert p.uid == d['uid'] assert p.name == d['name'] assert p.state == d['state'] assert p.state_history == d['state_history'] assert p.completed == d['completed']
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['sleep'] t1.arguments = ['10'] s.add_tasks(t1) p.add_stages(s) return p
def test_pipeline_validate_entities(t, l, i, b, se): p = Pipeline() data_type = [t, l, i, b, se] for data in data_type: with pytest.raises(TypeError): p._validate_entities(data) s = Stage() assert isinstance(p._validate_entities(s), list) s1 = Stage() s2 = Stage() assert [s1,s2] == p._validate_entities([s1,s2])
def test_pipeline_state_assignment(t, l, i, b): p = Pipeline() data_type = [l, i, b] for data in data_type: with pytest.raises(TypeError): p.state = data if isinstance(t,str): with pytest.raises(ValueError): p.state = t for val in states._pipeline_state_values.keys(): p.state = val
def test_wfp_enqueue(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() wfp = WFprocessor(sid=amgr._sid, workflow=[p], pending_queue=amgr._pending_queue, completed_queue=amgr._completed_queue, mq_hostname=amgr._mq_hostname, port=amgr._port, resubmit_failed=False) wfp._initialize_workflow() amgr.workflow = [p] profiler = ru.Profiler(name='radical.entk.temp') for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp,)) proc.start() proc.join() amgr._terminate_sync.set() sync_thread.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULED assert p.stages[0].state == states.SCHEDULED assert p.state == states.SCHEDULING
def test_pipeline_assignment_exceptions(t, l, i, b, se): p = Pipeline() data_type = [t, l, i, b, se] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): p.name = data with pytest.raises(TypeError): p.stages = data with pytest.raises(TypeError): p.add_stages(data)
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p
def test_pipeline_stage_addition(): p = Pipeline() s1 = Stage() t = Task() t.executable = ['/bin/date'] s1.tasks = t s2 = Stage() t = Task() t.executable = ['/bin/date'] s2.tasks = t p.add_stages([s1, s2]) assert type(p.stages) == list assert p._stage_count == 2 assert p._cur_stage == 1 assert p.stages[0] == s1 assert p.stages[1] == s2
def generate_pipeline(): def func_condition(): p.suspend() print 'Suspending pipeline %s for 10 seconds' %p.uid sleep(10) return True def func_on_true(): print 'Resuming pipeline %s' %p.uid p.resume() def func_on_false(): pass # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() for i in range(10): t1 = Task() t1.executable = '/bin/sleep' t1.arguments = ['30'] # Add the Task to the Stage s1.add_tasks(t1) # Add post-exec to the Stage s1.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add Stage to the Pipeline p.add_stages(s1) return p
def test_integration_local(): """ **Purpose**: Run an EnTK application on localhost """ def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1 p1 = Pipeline() p1.name = 'p1' s = Stage() s.name = 's1' s.tasks = create_single_task() s.add_tasks(create_single_task()) p1.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run()
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['/bin/sleep'] t1.arguments = ['300'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['mv'] t1.arguments = ['temp','/tmp/'] t1.upload_input_data = ['%s/temp'%cur_dir] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_wfp_initialize_workflow(): p = Pipeline() s = Stage() t = Task() t.executable = ['/bin/date'] s.add_tasks(t) p.add_stages(s) wfp = WFprocessor(sid='test', workflow=[p], pending_queue=list(), completed_queue=list(), mq_hostname=hostname, port=port, resubmit_failed=False) wfp._initialize_workflow() assert p.uid is not None assert p.stages[0].uid is not None for t in p.stages[0].tasks: assert t.uid is not None
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = '/bin/bash' t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create another Stage object to hold checksum tasks s3 = Stage() # Create a Task object t3 = Task() t3.executable = '/bin/bash' t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt'] # Copy data from the task in the first stage to the current task's location t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)] # Download the output of the current task to the current location t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't1' t1.executable = ['/bin/false'] # t1.arguments = ['"Hello World"','>>','temp.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt','file2.txt','>','output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create 4K tasks to ensure we don't hit any RMQ connection drops for _ in range(4096): t1 = Task() t1.executable = ['/bin/echo'] t1.arguments = ['"Hello World"'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def test_amgr_run_mock(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/date'] s.tasks = t p.add_stages(s) res_dict = { 'resource': 'local.localhost', 'walltime': 5, 'cpus': 1, 'project': '' } appman = Amgr(hostname=hostname, port=port, rts="mock") appman.resource_desc = res_dict appman.workflow = [p] appman.run()
def test_amgr_synchronizer(): logger = ru.Logger('radical.entk.temp_logger') profiler = ru.Profiler(name='radical.entk.temp') amgr = Amgr(hostname=hostname, port=port) amgr._setup_mqs() p = Pipeline() s = Stage() # Create and add 100 tasks to the stage for cnt in range(100): t = Task() t.executable = ['some-executable-%s' % cnt] s.add_tasks(t) p.add_stages(s) p._assign_uid(amgr._sid) p._validate() amgr.workflow = [p] for t in p.stages[0].tasks: assert t.state == states.INITIAL assert p.stages[0].state == states.INITIAL assert p.state == states.INITIAL # Start the synchronizer method in a thread amgr._terminate_sync = Event() sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread') sync_thread.start() # Start the synchronizer method in a thread proc = Process(target=func_for_synchronizer_test, name='temp-proc', args=(amgr._sid, p, logger, profiler)) proc.start() proc.join() for t in p.stages[0].tasks: assert t.state == states.SCHEDULING assert p.stages[0].state == states.SCHEDULING assert p.state == states.SCHEDULING amgr._terminate_sync.set() sync_thread.join()