def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = '/bin/date' t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/date'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def foo(value): t1 = Task(name='simulation') t1.environment = ['module load gromacs'] t1.executable = ['gmx mdrun'] t1.arguments = ['a', 'b', 'c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'dummy_task' t1.executable = ['placeholder'] t1.arguments = ['a','b','c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_single_task(): t1 = Task() t1.name = 'simulation' t1.executable = ['gmx mdrun'] t1.arguments = ['a', 'b', 'c'] t1.copy_input_data = [] t1.copy_output_data = [] return t1
def create_pipeline(): p = Pipeline() s = Stage() t = Task() t.name = 'simulation' t.executable = ['/bin/echo'] t.arguments = ['hello'] t.copy_input_data = [] t.copy_output_data = [] s.add_tasks(t) p.add_stages(s) return p
def create_pipeline(): p = Pipeline() s = Stage() t1 = Task() t1.name = 'simulation' t1.executable = ['/bin/echo'] t1.arguments = ['hello'] t1.copy_input_data = [] t1.copy_output_data = [] s.add_tasks(t1) p.add_stages(s) return p
def test_assignment_exceptions(): t = Task() data_type = [1, 'a', True, list()] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file) ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(num_replicas / num_parallel) + 1 num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--trajstride', '10', '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr if str(Kconfig.strategy) == 'extend': copy_out = [] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['python'] ana_task.arguments = [ 'run-tica-msm.py', '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/run-tica-msm.py > run-tica-msm.py', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory) num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) config_file = str(args.Kconfig).rsplit('/', 1)[-1] try: systemxml = str(Kconfig.systemxml) except: systemxml = 'system-5.xml' try: integratorxml = str(Kconfig.integratorxml) except: integratorxml = 'integrator-5.xml' md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) #if cur_iter==0: # pre_proc_stage2 = Stage() # pre_proc_task2 = Task() # pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] # pre_proc_task2.executable = ['ls'] # pre_proc_task2.arguments = ['-l'] # pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file), # '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana), # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file), # '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] # pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) # pre_proc_stage2.add_tasks(pre_proc_task2) # wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 while (num_allocated_rep < num_replicas): def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--Kconfig', config_file, '--idxstart', str(num_allocated_rep), '--idxend', str(num_allocated_rep + use_replicas), '--path', combined_path, '>', 'md.log' ] #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig), #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)), #'--path',combined_path,'--iter',str(cur_iter), #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log'] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (systemxml, systemxml), '$SHARED/%s > %s' % (integratorxml, integratorxml), '$SHARED/%s > %s' % (config_file, config_file) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (config_file, config_file) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) if str(Kconfig.strategy) != 'extend': for anatask in range(1): print("analysis task", anatask) ana_task = Task() ana_task.executable = ['python'] pre_exec_arr = ana_settings ana_task.pre_exec = pre_exec_arr ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (config_file, config_file) ] ana_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } ana_task.arguments = [ script_ana, '--Kconfig', config_file, '>', "analysis.log" ] ana_task.copy_output_data = [ 'analysis.log > %s/analysis_iter%s_r%s.log' % (combined_path, cur_iter, anatask) ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, sim_stage.uid, ana_task.uid) sim_stage.add_tasks(ana_task) wf.add_stages(sim_stage) cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def test_output_list_from_task(): """ **Purpose**: Test if the 'get_output_list_from_task' function generates the correct RP output transfer directives when given a Task. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_output_list_from_task(t, placeholders) # Test copy output data t = Task() t.copy_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0]) t = Task() t.copy_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_output_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.copy_output_data[0].split('>')[1].strip()) # Test move output data t = Task() t.move_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0]) t = Task() t.move_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_output_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.move_output_data[0].split('>')[1].strip()) # Test download input data t = Task() t.download_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.download_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0]) t = Task() t.download_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.download_output_data[0].split('>')[0] \ .strip() assert ip_list[0]['target'] == os.path.basename( t.download_output_data[0].split('>')[1].strip())
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_output_list_from_task(): """ **Purpose**: Test if the 'get_output_list_from_task' function generates the correct RP output transfer directives when given a Task """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_output_list_from_task(t, placeholder_dict) # Test copy output data t = Task() t.copy_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_output_data[0] assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0]) t = Task() t.copy_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_output_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0].split('>')[1].strip()) # Test move output data t = Task() t.move_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_output_data[0] assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0]) t = Task() t.move_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_output_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0].split('>')[1].strip()) # Test download input data t = Task() t.download_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.download_output_data[0] assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0]) t = Task() t.download_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.download_output_data[0].split('>')[0].strip() assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0].split('>')[1].strip())
def func_on_true(): global cur_iter, book # Create Stage 2 s2 = Stage() s2.name = 'iter%s-s2' % cur_iter[instance] # Create a Task t2 = Task() t2.name = 'iter%s-s2-t2' % cur_iter[instance] t2.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if cur_iter[instance] == 1: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.name, book[p.name]['stages'][-2]['name'], book[p.name]['stages'][-2]['task']) ] # Add the Task to the Stage s2.add_tasks(t2) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s2.name, 'task': t2.name}) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() s3.name = 'iter%s-s3' % cur_iter[instance] # Create a Task t3 = Task() t3.name = 'iter%s-s3-t3' % cur_iter[instance] t3.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance) ] t3.download_output_data = [ 'CB7G3.xtc > ./output/CB7G3_run{1}_gen{0}.xtc'.format( cur_iter[instance], instance), 'CB7G3.log > ./output/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance), 'CB7G3_dhdl.xvg > ./output/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > ./output/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > ./output/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.gro > ./output/CB7G3_run{1}_gen{0}.gro'.format( cur_iter[instance], instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s3.name, 'task': t3.name}) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() s4.name = 'iter%s-s4' % cur_iter[instance] # Create a Task t4 = Task() t4.name = 'iter%s-s4-t4' % cur_iter[instance] t4.pre_exec = [ 'module load python/2.7.7-anaconda', 'export PYTHONPATH=%s/alchemical_analysis:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=%s:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ 'analysis_2.py', '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', # '--prev_data=%s'%DATA_LOC '--gen={0}'.format(cur_iter[instance], instance), '--run={1}'.format(cur_iter[instance], instance) ] t4.cores = 1 t4.copy_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > ./output/results_run{1}_gen{0}.txt'. format(cur_iter[instance], instance), 'STDOUT > ./output/stdout_run{1}_gen{0}'.format( cur_iter[instance], instance), 'STDERR > ./output/stderr_run{1}_gen{0}'.format( cur_iter[instance], instance), 'CB7G3_run.mdp > ./output/CB7G3_run{1}_gen{0}.mdp'.format( cur_iter[instance], instance), 'results_average.txt > ./output/results_average_run{1}_gen{0}.txt'. format(cur_iter[instance], instance) ] s4.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add the Task to the Stage s4.add_tasks(t4) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s4.name, 'task': t4.name}) # Add Stage to the Pipeline p.add_stages(s4) print book
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) if cur_iter == 0: #pre_proc_stage = Stage() #pre_proc_task = Task() #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] #pre_proc_task.executable = ['mv'] #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid) #pre_proc_stage.add_tasks(pre_proc_task) #wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 #num_used_threads=0 #print(def_rep_per_thread) while (num_allocated_rep < num_replicas): #if (num_used_threads>=num_parallel): # print("ALLERT tried use more gpus than allocated") def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) #if ((num_replicas-num_allocated_rep)>def_rep_per_thread): # check if use all threads # use_replicas=def_rep_per_thread #else: #use pnly part of threads # use_replicas=(num_replicas-num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', str(Kconfig.save_alltraj), '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['python'] ana_task.arguments = [ script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py vpy4_settings = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.4', 'module add bwpy-mpi', 'module add fftw/3.3.4.10', 'module add cray-netcdf', 'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1', 'module add cmake/3.1.3', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy4_2_settings = [ 'module unload bwpy', 'module load bwpy', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LIBRARY_PATH="${BWPY_LIBRARY_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enabled', ' export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugin', 'source /projects/sciteam/bamm/hruska/vpy4/bin/activate', 'bwpy-environ', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy8_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre0', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'module load craype-ml-plugin-py3/1.1.0', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy8/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] vpy9_settings = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy', 'module load bwpy/2.0.0-pre1', 'module load bwpy-mpi', 'module add cudatoolkit', 'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"', 'export CPATH="${BWPY_INCLUDE_PATH}"', 'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"', 'export MPICH_GNI_MALLOC_FALLBACK=enable', 'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64', 'export MPICH_MAX_THREAD_SAFETY=multiple', 'export MPICH_RMA_OVER_DMAPP=1', 'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins', 'source /projects/sciteam/bamm/hruska/vpy9/bin/activate', 'printenv > env.log', 'export OMP_NUM_THREADS=1', 'export PYEMMA_NJOBS=1' ] #'module load craype-ml-plugin-py3/1.1.0' if Kconfig.md_env == 'vpy4': md_settings = vpy4_settings if Kconfig.md_env == 'vpy8': md_settings = vpy8_settings if Kconfig.ana_env == 'vpy8': ana_settings = vpy8_settings if Kconfig.md_env == 'vpy9': md_settings = vpy9_settings if Kconfig.ana_env == 'vpy9': ana_settings = vpy9_settings #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference), '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(np.ceil(num_replicas / num_parallel)) num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['bwpy-environ'] #'python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'python', 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] if str(Kconfig.strategy) == 'extend': for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ 'md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['bwpy-environ'] ana_task.arguments = [ 'python', script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description. """ pipeline = 'p1' stage = 's1' task = 't1' placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = 'grompp' t1.arguments = ['hello'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s cud = create_cud_from_task(t1, placeholders) assert cud.name == '%s,%s,%s,%s,%s,%s' % ( t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.post_exec == t1.post_exec assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert { 'source': 'upload_input.dat', 'target': 'upload_input.dat' } in cud.input_staging assert { 'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat' } in cud.input_staging assert { 'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat' } in cud.input_staging assert { 'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat' } in cud.output_staging assert { 'source': 'download_output.dat', 'target': 'download_output.dat' } in cud.output_staging
def create_workflow(Kconfig): # User settings ENSEMBLE_SIZE = int(Kconfig.num_CUs) # Number of ensemble members TOTAL_ITERS = int( Kconfig.num_iterations) # Number of iterations to run current trial wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ ''' pre_proc_stage : Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : inputfile = file to be split numCUs = number of simulation instances/ number of smaller files ''' pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = ['module load bwpy'] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', Kconfig.num_CUs, os.path.basename(Kconfig.md_input_file) ] pre_proc_task.copy_input_data = [ '$SHARED/%s' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py', '$SHARED/gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ cur_iter = 0 while (cur_iter < TOTAL_ITERS): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range(ENSEMBLE_SIZE): sim_task = Task() sim_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH' ] sim_task.executable = ['python'] sim_task.cores = 16 sim_task.arguments = [ 'run.py', '--mdp', os.path.basename(Kconfig.mdp_file), '--top', os.path.basename(Kconfig.top_file), '--gro', 'start.gro', '--out', 'out.gro' ] sim_task.link_input_data = [ '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file)), '$SHARED/run.py > run.py' ] if Kconfig.ndx_file is not None: sim_task.link_input_data.append('$SHARED/{0}'.format( os.path.basename(Kconfig.ndx_file))) if (cur_iter == 0): sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulation stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = [ 'pre_analyze.py', Kconfig.num_CUs, 'tmp.gro', '.' ] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze.py > pre_analyze.py' ] for sim_num in range(ENSEMBLE_SIZE): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > $SHARED/iter_%s/tmpha.gro' % cur_iter, 'tmp.gro > $SHARED/iter_%s/tmp.gro' % cur_iter ] pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] ana_task.executable = ['lsdmap'] ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '$SHARED/iter_%s/tmpha.gro > tmpha.gro' % cur_iter ] ana_task.copy_output_data = [ 'tmpha.ev > $SHARED/iter_%s/tmpha.ev' % cur_iter, 'out.nn > $SHARED/iter_%s/out.nn' % cur_iter ] if cur_iter > 0: ana_task.link_input_data += [ '%s/weight.w > weight.w' % ana_task_ref ] ana_task.copy_output_data += [ 'weight.w > $SHARED/iter_%s/weight.w' % cur_iter ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > output/iter%s/lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_runs = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_runs, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_CUs ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '$SHARED/iter_%s/tmp.gro > tmp.gro' % cur_iter, '$SHARED/iter_%s/tmpha.ev > tmpha.ev' % cur_iter, '$SHARED/iter_%s/out.nn > out.nn' % cur_iter, '$SHARED/input.gro > input.gro' ] if cur_iter > 0: post_ana_task.link_input_data += [ '%s/weight.w > weight_new.w' % ana_task_ref ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter%s/out.gro' % cur_iter, 'weight.w > output/iter%s/weight.w' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 return wf
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }