def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = ['mv'] t1.arguments = ['temp','/tmp/'] t1.upload_input_data = ['%s/temp'%cur_dir] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = 'mv' t1.arguments = ['temp','/tmp/'] t1.upload_input_data = ['%s/temp' % cur_dir] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def setup_replicas(replicas, min_temp, max_temp, timesteps, basename): writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) tar = tarfile.open("input_files.tar", "w") for name in [basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"]: tar.add(name) for r in range(replicas): tar.add('mdin-{replica}-{cycle}'.format(replica=r, cycle=0)) tar.close() for r in range(replicas): os.remove('mdin-{replica}-{cycle}'.format(replica=r, cycle=0)) setup_p = Pipeline() setup_p.name = 'untarPipe' repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untarTsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py', 'input_files.tar'] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 untar_tsk.post_exec = [] untar_stg.add_tasks(untar_tsk) setup_p.add_stages(untar_stg) global replica_sandbox replica_sandbox='$Pipeline_%s_Stage_%s_Task_%s'%(setup_p.name, untar_stg.name, untar_tsk.name) print replica_sandbox return setup_p
def test_assignment_exceptions(): t = Task() data_type = [1, 'a', True, list()] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data
def generate_task(self): task = Task() task.name = self.name task.pre_exec = [ 'env > env.log', 'export PATH=/home/dakka/miniconda3/bin:$PATH', 'export LD_LIBRARY_PATH=/home/dakka/miniconda3/lib:$LD_LIBRARY_PATH', 'source activate ve_hyperspace' ] task.executable = ['python'] task.arguments = [ 'optimize.py', '--data_path', self.data_path, '--results_dir', self.results_dir ] task.cpu_reqs = { 'processes': self.hyperparameters**2, 'process_type': None, 'threads_per_process': 32, 'thread_type': 'MPI' } task.upload_input_data = [self.optimization_file] return task
def test_input_list_from_task(): """ **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives when given a Task """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_input_list_from_task(t, placeholder_dict) # Test link input data t = Task() t.link_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0] assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0]) t = Task() t.link_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0].split('>')[1].strip()) # Test copy input data t = Task() t.copy_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0] assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0]) t = Task() t.copy_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0].split('>')[1].strip()) # Test move input data t = Task() t.move_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0] assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0]) t = Task() t.move_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0].split('>')[1].strip()) # Test upload input data t = Task() t.upload_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0] assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0]) t = Task() t.upload_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip() assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0].split('>')[1].strip())
def InitCycle(self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350,min_temp=250,replicas=Replicas,timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar","w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range (Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range (Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_tsk.link_input_data += [ '%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], '%s/mdin_{0}'.format(r)%tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
def InitCycle( self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps ): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350, min_temp=250, replicas=Replicas, timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar", "w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range(Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ 'untar_input_files.py', 'Input_Files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format( replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [python_path]#['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name): # Initialize a task object t = Task() # Define magic variable dictionary mvar_dict = {"PIPELINE_ID": pipe_name} # Give this task object a name t.name = task_name # Pre exec let you load modules, set environment before executing the workload if tcfg['pre_exec'] != "": t.pre_exec = [tcfg['pre_exec']] # Executable to use for the task t.executable = tcfg['executable'] # If there's a user-defined input file (likely for genmod modules), add it to the # options list and upload file list if needed if "input_data_file" in tcfg['options']: tcfg['upload_input_data'].append( os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file'])) # List of arguments for the executable t.arguments = [tcfg['script']] + match_options(tcfg['options'], ecfg['options']) # CPU requirements for this task t.cpu_threads = { 'processes': tcfg['cpu']['processes'], 'process-type': tcfg['cpu']['process-type'], 'threads-per-process': tcfg['cpu']['threads-per-process'], 'thread-type': tcfg['cpu']['thread-type'], } # Upload data from your local machine to the remote machine # Note: Remote machine can be the local machine t.upload_input_data = tcfg['upload_input_data'] # Copy data from other stages/tasks for use in this task copy_list = [] if "copy_input_data" in tcfg.keys(): for copy_stage in tcfg['copy_input_data'].keys(): for copy_task in tcfg['copy_input_data'][copy_stage].keys(): loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format( pipe_name, copy_stage, copy_task) copy_list.extend([ '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x)) for x in tcfg['copy_input_data'][copy_stage][copy_task] ]) # Append the copy list (if any) to the task object t.copy_input_data = copy_list # Set the download data for the task download_list = [] outdir = os.path.join(ecfg['exp_dir'], "output") if "download_output_data" in tcfg.keys(): download_list.extend([ '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir) for x in tcfg['download_output_data'] ]) # Append the download list to this task t.download_output_data = download_list # Return the task object return (t)
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, tar_stg.uid, tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += [ '%s/inpcrd' % dict_tarball[0], '%s/prmtop' % dict_tarball[0], '%s/mdin' % dict_tarball[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q #p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod) #q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod) #return (p, q)
def generate_pipeline(cfg): cfg_file = cfg['run_cfg_file'] # resource and workload config run_file = cfg['run_file'] # runs for this campaign # setup S1 workload cfg = ru.Config(cfg=ru.read_json(cfg_file)) runs = check_runs(cfg_file, run_file) if not runs: print('S1: nothing to run, exiting.') return # for each run in the campaign: # - create cfg with requested receptor and smiles # - create a number of masters as EnTK tasks and add them to a pipeline # - submit configured number of masters with that cfg # setup EnTK pipeline p = Pipeline() p.name = 'S1-RAPTOR' s = Stage() # create cfg subs = dict() rurl = cfg.fs_url + cfg.workload.results d = rs.filesystem.Directory(rurl) ls = [str(u).split('/')[-1] for u in d.list()] workload = cfg.workload for receptor, smiles, nodes, runtime in runs: print('%30s %s' % (receptor, smiles)) name = '%s_-_%s' % (receptor, smiles) tgt = '%s.%s.gz' % (name, workload.output) # rec = False # if tgt in ls: # if workload.recompute: # rec += 1 # d.move(tgt, tgt + '.bak') # else: # print('skip 1 %s' % name) # continue # if smiles in ls: # if smiles not in subs: # subs[smiles] = [str(u).split('/')[-1] for u in d.list('%s/*' % smiles)] # if tgt in subs[smiles]: # if workload.recompute: # rec += 2 # d.move('%s/%s' % (smiles, tgt), # '%s/%s.bak' % (smiles, tgt)) # else: # print('skip 2 %s' % name) # continue ## if os.path.exists('results/%s.%s.gz' % (name, wofkload.output)): ## print('skip 3 %s' % name) ## continue #if rec: print('recompute %d %s' % (rec, name)) #else : print('compute 2 %s' % name) cpn = cfg.cpn gpn = cfg.gpn n_masters = cfg.n_masters cfg.workload.receptor = receptor cfg.workload.smiles = smiles cfg.workload.name = name cfg.nodes = nodes cfg.runtime = runtime cfg.n_workers = int(nodes / n_masters - 1) print('n_workers: %d' % cfg.n_workers) ru.write_json(cfg, 'configs/wf0.%s.cfg' % name) for i in range(n_masters): t = Task() t.pre_exec = [ '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate' ] t.executable = "python3" t.arguments = ['wf0_master.py', i] t.cpu_threads = cpn t.upload_input_data = [ 'wf0_master.py', 'wf0_worker.py', 'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py' ] t.link_input_data = ['%s > input_dir' % workload.input_dir] t.download_output_data = [ '%s.%s.gz > results/%s.%s.gz' % (name, workload.output, name, workload.output) ] # t.input_staging = [{'source': 'wf0_master.py', # 'target': 'wf0_master.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'wf0_worker.py', # 'target': 'wf0_worker.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'configs/wf0.%s.cfg' % name, # 'target': 'wf0.cfg', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': workload.input_dir, # 'target': 'input_dir', # 'action': rp.LINK, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': workload.impress_dir, # 'target': 'impress_md', # 'action': rp.LINK, # 'flags' : rp.DEFAULT_FLAGS}, # {'source': 'read_ligand_dict.py', # 'target': 'read_ligand_dict.py', # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}, # ] # t.output_staging = [{'source': '%s.%s.gz' % (name, workload.output), # 'target': 'results/%s.%s.gz' % (name, workload.output), # 'action': rp.TRANSFER, # 'flags' : rp.DEFAULT_FLAGS}] s.add_tasks(t) p.add_stages(s) return p
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat", "r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]), '%s/prmtop' % (Book[k - 1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin' % (Book[k - 1][n0]) ] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/' ] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,tar_stg.uid,tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += ['%s/inpcrd'%dict_tarball[0], '%s/prmtop'%dict_tarball[0], '%s/mdin'%dict_tarball[0]] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat","r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]), '%s/prmtop'%(Book[k-1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin'%(Book[k-1][n0])] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod): # "Cycle" = 1 MD stage plus the subsequent exchange computation #Initialize Pipeline p = Pipeline() md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping #Create Tarball of input data #Create Untar Stage untar_stg = Stage() #Untar Task untar_tsk = Task() untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, untar_stg.uid, untar_tsk.uid) print tar_dict[0] # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() # MD tasks for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], #'%s/mdin_{0}'.format(r)%tar_dict[0] '%s/mdin'%tar_dict[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user? md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin', #'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.executable = ['python'] #ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print Book return p
def test_input_list_from_task(): """ **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives when given a Task. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_input_list_from_task(t, placeholders) # Test link input data t = Task() t.link_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['source'] == t.link_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0]) t = Task() t.link_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.LINK assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.link_input_data[0].split('>')[1].strip()) # Test copy input data t = Task() t.copy_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0]) t = Task() t.copy_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.copy_input_data[0].split('>')[1].strip()) # Test move input data t = Task() t.move_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0]) t = Task() t.move_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.move_input_data[0].split('>')[1].strip()) # Test upload input data t = Task() t.upload_input_data = ['/home/vivek/test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.upload_input_data[0] assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0]) t = Task() t.upload_input_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_input_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.upload_input_data[0].split('>')[1].strip())
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description. """ pipeline = 'p1' stage = 's1' task = 't1' placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = 'grompp' t1.arguments = ['hello'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s cud = create_cud_from_task(t1, placeholders) assert cud.name == '%s,%s,%s,%s,%s,%s' % ( t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.post_exec == t1.post_exec assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert { 'source': 'upload_input.dat', 'target': 'upload_input.dat' } in cud.input_staging assert { 'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat' } in cud.input_staging assert { 'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat' } in cud.input_staging assert { 'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat' } in cud.output_staging assert { 'source': 'download_output.dat', 'target': 'download_output.dat' } in cud.output_staging
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
p = Pipeline() # Bookkeeping stage_uids = list() task_uids = dict() Stages = 1 Replicas = 2 for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine t.upload_input_data = ['inpcrd', 'prmtop', 'mdin'] t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] t.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out'] t.cores = 32 t.mpi = True stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine
def init_cycle(self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs( max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path)+'/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle), 'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=Cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.Book[Cycle - 1][ExchangeArray[r]]), '%s/prmtop' % (self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r) % (self.Book[0][r]) ] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle + 1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = [ 'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(Cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
# Bookkeeping stage_uids = list() task_uids = dict() Stages = 3 Replicas = 4 for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s' % N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d' ] #MD Engine t.upload_input_data = [ 'in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp' ] t.pre_exec = [ 'module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top' ] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s' % N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas):
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [ python_path ] #['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def generate_pipeline(cfg): cfg_file = cfg['run_cfg_file'] # resource and workload config run_file = cfg['run_file'] # runs for this campaign # setup S1 workload cfg = ru.Config(cfg=ru.read_json(cfg_file)) runs = check_runs(cfg_file, run_file) if not runs: print('S1: nothing to run, exiting.') return # for each run in the campaign: # - create cfg with requested receptor and smiles # - create a number of masters as EnTK tasks and add them to a pipeline # - submit configured number of masters with that cfg # setup EnTK pipeline p = Pipeline() p.name = 'S1.RAPTOR' s = Stage() # create cfg subs = dict() rurl = cfg.fs_url + cfg.workload.results d = rs.filesystem.Directory(rurl) ls = [str(u).split('/')[-1] for u in d.list()] workload = cfg.workload for receptor, smiles, n_workers, runtime in runs: print('%30s %s' % (receptor, smiles)) name = '%s_-_%s' % (receptor, smiles) tgt = '%s.%s.gz' % (name, workload.output) cpw = cfg.cpw gpw = cfg.gpw n_masters = cfg.n_masters cfg.workload.receptor = receptor cfg.workload.smiles = smiles cfg.workload.name = name cfg.runtime = runtime cfg.n_workers = n_workers print('n_workers: %d' % cfg.n_workers) ru.write_json(cfg, 'configs/wf0.%s.cfg' % name) for i in range(n_masters): t = Task() t.pre_exec = [ '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate' ] t.executable = "python3" t.arguments = ['wf0_master.py', i] t.cpu_reqs = { 'processes': 1, 'threads_per_process': 4, 'thread_type': None, 'process_type': None } t.upload_input_data = [ 'wf0_master.py', 'wf0_worker.py', 'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py' ] t.link_input_data = ['%s > input_dir' % workload.input_dir] #t.download_output_data = ['%s.%s.gz > results/%s.%s.gz' % # (name, workload.output, name, workload.output)] s.add_tasks(t) p.add_stages(s) return p
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
def init_cycle( self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec ): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path) + '/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
Replica_Cores = 1 Pilot_Cores = Replicas * Replica_Cores for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() #####Initial MD stage if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine t.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] t.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out'] t.cores = Replica_Cores stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) #####Exchange Stages elif N_Stg != 0 and N_Stg%2 = 1: t = Task() t.executable = ['python'] t.upload_input_data = ['exchangeMethods/RandEx.py']
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle),'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=Cycle) md_tsk.link_input_data = ['%s/restrt > inpcrd'%(self.Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r)%(self.Book[0][r])] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle+1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle+1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '{0}'.format(Cycle+1)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_{0}.dat'.format(Cycle+1)] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
# Create a Pipeline object p = Pipeline() # Bookkeeping stage_uids = list() task_uids = dict() Stages = 3 Replicas = 4 for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine t.upload_input_data = ['in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp'] t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine
def generate_pipeline(name, stages): #generate the pipeline of prediction and blob detection # Create a Pipeline object p = Pipeline() p.name = name for s_cnt in range(stages): if(s_cnt==0): # Create a Stage object s0 = Stage() s0.name = 'Stage %s'%s_cnt # Create Task 1, training t1 = Task() t1.name = 'Predictor' t1.pre_exec = ['module load psc_path/1.1', 'module load slurm/default', 'module load intel/17.4', 'module load python3', 'module load cuda', 'mkdir -p classified_images/crabeater', 'mkdir -p classified_images/weddel', 'mkdir -p classified_images/pack-ice', 'mkdir -p classified_images/other', 'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate' ] t1.executable = 'python3' # Assign executable to the task # Assign arguments for the task executable t1.arguments = ['pt_predict.py','-class_names','crabeater','weddel','pack-ice','other'] t1.link_input_data = ['/pylon5/mc3bggp/paraskev/seal_test/nn_model.pth.tar', '/pylon5/mc3bggp/paraskev/nn_images', '/pylon5/mc3bggp/paraskev/seal_test/test_images' ] t1.upload_input_data = ['pt_predict.py','sealnet_nas_scalable.py'] t1.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} t1.gpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} s0.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s0) else: # Create a Stage object s1 = Stage() s1.name = 'Stage %s'%s_cnt # Create Task 2, t2 = Task() t2.pre_exec = ['module load psc_path/1.1', 'module load slurm/default', 'module load intel/17.4', 'module load python3', 'module load cuda', 'module load opencv', 'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate', 'mkdir -p blob_detected' ] t2.name = 'Blob_detector' t2.executable = ['python3'] # Assign executable to the task # Assign arguments for the task executable t2.arguments = ['blob_detector.py'] t2.upload_input_data = ['blob_detector.py'] t2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/classified_images'%(p.uid, s0.uid, t1.uid)] t2.download_output_data = ['blob_detected/'] #Download resuting images t2.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'} t2.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'} s1.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s1) return p