def get_pipeline(tasks): # Create a Pipeline object p = Pipeline() # Create a Stage 1 s1 = Stage() # Create a Task object according to the app_name t1 = Task() t1.pre_exec = ['module load gromacs/5.0/INTEL-140-MVAPICH2-2.0'] t1.executable = app_coll['grompp']['executable'] t1.arguments = app_coll['grompp']['arguments'] t1.cores = app_coll['grompp']['cores'] t1.link_input_data = [ '$SHARED/grompp.mdp > grompp.mdp', '$SHARED/input.gro > input.gro', '$SHARED/topol.top > topol.top' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create a Stage 2 s2 = Stage() for cnt in range(tasks): # Create a Task object according to the app_name t2 = Task() t2.pre_exec = [ 'module load gromacs/5.0/INTEL-140-MVAPICH2-2.0', 'export OMP_NUM_THREADS=%s' % num_cores ] t2.executable = app_coll['mdrun']['executable'] t2.arguments = app_coll['mdrun']['arguments'] #t2.cores = app_coll['mdrun']['cores'] t2.cores = num_cores t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/topol.tpr' % (p.uid, s1.uid, t1.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) return p
def generate_task(self, **ensembles): """ Generate a `radical.entk` task. Parameters ---------- ensembles: dict Dictionary of the *current* values of variables that are ensembles. All the variables that were declared with `add_ensemble` should be specified here so that a correct task object can be generated. """ [setattr(self, k, w) for k, w in ensembles.iteritems()] if not self.all_variables_defined(): raise ValueError('Some variables are not defined!') task = Task() task.name = "-".join("{}-{}".format(k, w) for k, w, in ensembles.iteritems()) or "sim" task.pre_exec += self.engine.pre_exec task.executable += self.engine.executable task.arguments += self.engine.arguments task.mpi = self.engine.uses_mpi task.cores = self._cores task.arguments.extend(self.arguments) task.copy_input_data.extend(self.copied_files) task.copy_input_data.extend(self.system.copied_files) task.post_exec.append('echo "{}" > sim_desc.txt'.format(task.name)) if self._input_sim: task.link_input_data.extend( self._input_sim.output_data(for_ensemble=ensembles)) task.link_input_data.extend(self.system.linked_files) task.pre_exec.extend( self._sed.format(n, v, f) for f, vs in self.get_variables().items() for n, v in vs) return task
def get_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage 1 s1 = Stage() # Create a Task object according to the app_name t1 = Task() t1.executable = [app_coll[app_name]['executable']] t1.arguments = [app_coll[app_name]['arguments']]*100 t1.cores = app_coll[app_name]['cores'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def get_pipeline(stages): # Create a Pipeline object p = Pipeline() for cnt in range(stages): # Create a Stage 1 s = Stage() # Create a Task object according to the app_name t = Task() t.executable = [app_coll[app_name]['executable']] t.arguments = [app_coll[app_name]['arguments']] * 100 t.cores = app_coll[app_name]['cores'] # Add the Task to the Stage s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p
for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s' % N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/opt/gromacs/bin/gmx_mpi'] #MD Engine t.upload_input_data = [ 'in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp' ] t.pre_exec = [ 'module load gromacs', '/opt/gromacs/bin/gmx_mpi grompp -f in.mdp -c in.gro -o in.tpr -p in.top' ] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 24 stg.add_tasks(t) task_uids['Stage_%s' % N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = ['/opt/gromacs/bin/gmx_mpi'] #MD Engine t.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/out.gro > in.gro' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' % (N_Stg - 1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.top' %
def generate_pipeline(self): pipeline = Pipeline() # generate replicas # create a wrapper task that assigns the values of replica_i and replica_j # ================= stage_1 = Stage() for _gibbs_step in range(self.n_gibbs_steps): task = Task() # assign replica_i and replica_j task.name = assign_replica_numbers task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%_gibbs_step] stage_1.add_tasks(task) pipeline.add_stages(stage_1) # replica exchange Metropolis criteria # invoke repex from RepEx 3.0 # ================= stage_2 = Stage() for _gibbs_step in range(self.n_gibbs_steps): task = Task() task.name = repex task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%_gibbs_step] stage_2.add_tasks(task) pipeline.add_stages(stage_2) # rotation (MC) # ================= stage_3 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_3.add_tasks(task) pipeline.add_stages(stage_3) # translation (MC) # ================= stage_4 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_4.add_tasks(task) pipeline.add_stages(stage_4) # propagation (MC) # ================= stage_5 = Stage() for replica in range(self.number_of_replicas): task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_5.add_tasks(task) pipeline.add_stages(stage_5) # energy matrix # for every replica pull the sampler state # compute the energy matrix of each thermo state in thermo_matrix, given that replica's sampler state # ================= stage_6 = Stage() for replica in range(self.number_of_replicas): for thermo_state in range(self.thermo_state) task = Task() task.name = rotation task.executable = [NULL] task.cores = self.cores task.arguments = ['I am task %s'%replica] stage_6.add_tasks(task) pipeline.add_stages(stage_6) print 'TIES pipeline has', len(pipeline.stages), 'stages. Tasks counts:', [len(s.tasks) for s in pipeline.stages] return pipeline
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle), 'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=Cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.Book[Cycle - 1][ExchangeArray[r]]), '%s/prmtop' % (self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r) % (self.Book[0][r]) ] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle + 1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = [ 'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(Cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
def init_cycle(self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs( max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path)+'/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q
def push_function(pipe_conn, name): try: start_time = time.time() #tasks_pushed = 0 global tasks_pushed push_times = [] proc_mem = [] t = Task() t.arguments = [ "--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1" ] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = [ 'PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] print 'Size of task: ', asizeof.asizeof(t) while (tasks_pushed < 1000000) and (not kill_pusher.is_set()): #t = DATA pipe_conn.send(t) tasks_pushed += 1 cur_time = time.time() push_times.append(cur_time) mem = psutil.Process(os.getpid()).memory_info().rss / float( 2**20) # MB proc_mem.append(mem) print len(push_times) f = open(DATA + '/%s.txt' % name, 'w') for ind in range(len(push_times)): f.write('%s %s\n' % (push_times[ind], proc_mem[ind])) f.close() print 'Push proc killed' except KeyboardInterrupt: print len(push_times) f = open(DATA + '/%s.txt' % name, 'w') for ind in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n' % (push_times[ind], proc_mem[ind])) f.close() print 'Push proc killed' print traceback.format_exc() except Exception, ex: print len(push_times) f = open(DATA + '/%s.txt' % name, 'w') for ind in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n' % (push_times[ind], proc_mem[ind])) f.close() print 'Unexpected error: %s' % ex print traceback.format_exc()
# List to catch all the uids of the AnEn tasks anen_task_uids = list() stations_subset = list() for ind in range(16): # Create a new task t1 = Task() # task executable t1.executable = ['canalogs'] # All modules to be loaded for the executable to be detected t1.pre_exec = resource_key['xsede.supermic'] # Number of cores for this task t1.cores = int(initial_config['cores']) # List of arguments to the executable t1.arguments = [ '-N', '-p', '--forecast-nc', os.path.basename(initial_config['file.forecast']), '--observation-nc', os.path.basename(initial_config['file.observation']), '-o', './' + os.path.basename(initial_config['output.AnEn']), '--stations-ID' ] #t1.arguments.extend(initial_config['stations.ID'][ind*10:(ind+1)*10]) t1.arguments.extend(initial_config['stations.ID']) t1.arguments.extend([
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,tar_stg.uid,tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += ['%s/inpcrd'%dict_tarball[0], '%s/prmtop'%dict_tarball[0], '%s/mdin'%dict_tarball[0]] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat","r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]), '%s/prmtop'%(Book[k-1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin'%(Book[k-1][n0])] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
# Create stage. s1 = Stage() s1_task_uids = [] s2_task_uids = [] for cnt in range(4): # Create a Task object t1 = Task() ##GROMPP t1.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine t1.upload_input_data = ['in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp'] t1.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t1.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t1.cores = 5 # Add the Task to the Stage s1.add_tasks(t1) s1_task_uids.append(t1.uid) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold checksum tasks s2 = Stage() #HARD-CODED EXCHANGE FOLLOWED BY MD # Create a Task object
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat", "r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]), '%s/prmtop' % (Book[k - 1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin' % (Book[k - 1][n0]) ] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/' ] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
for cnt in range(4): # Create a Task object t1 = Task() ##GROMPP t1.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d' ] #MD Engine t1.upload_input_data = [ 'in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp' ] t1.pre_exec = [ 'module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top' ] t1.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t1.cores = 5 # Add the Task to the Stage s1.add_tasks(t1) s1_task_uids.append(t1.uid) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold checksum tasks s2 = Stage() #HARD-CODED EXCHANGE FOLLOWED BY MD # Create a Task object t2 = Task() t2.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, tar_stg.uid, tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += [ '%s/inpcrd' % dict_tarball[0], '%s/prmtop' % dict_tarball[0], '%s/mdin' % dict_tarball[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q #p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod) #q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod) #return (p, q)
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [ python_path ] #['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format( replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [python_path]#['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def init_cycle( self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec ): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path) + '/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
def push_function(ind, num_push, num_queues): try: mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost', port=32769)) mq_channel = mq_connection.channel() tasks_pushed = 0 global MAX_TASKS proc_tasks = MAX_TASKS/num_push push_times = [] proc_mem = [] t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t_dict = t.to_dict() print 'Size of task: ', asizeof.asizeof(t_dict) name = 'queue_%s'%(ind%num_queues) while (tasks_pushed < proc_tasks)and(not kill_pusher.is_set()): corr_id = str(uuid.uuid4()) obj = { 'task': t_dict, 'id': corr_id} mq_channel.basic_publish( exchange='', routing_key=name, properties=pika.BasicProperties(correlation_id = corr_id), body=json.dumps(obj) ) tasks_pushed +=1 cur_time = time.time() push_times.append(cur_time) mem = psutil.virtual_memory().available/(2**20) # MBytes proc_mem.append(mem) # print '%s: Push average throughput: %s tasks/sec'%(name, # float(tasks_pushed/(cur_time - start_time))) print 'Push: ',tasks_pushed f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(len(push_times)): f.write('%s %s\n'%(push_times[i],proc_mem[i])) #f.write('%s\n'%(push_times[ind])) f.close() print 'Push proc killed' except KeyboardInterrupt: print len(push_times) f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times),len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close() print 'Push proc killed' except Exception as ex: print 'Unexpected error: %s'%ex print traceback.format_exc() f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close()
def func_on_true(): global cur_iter, book # Create Stage 2 s2 = Stage() s2.name = 'iter%s-s2' % cur_iter[instance] # Create a Task t2 = Task() t2.name = 'iter%s-s2-t2' % cur_iter[instance] t2.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if cur_iter[instance] == 1: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.name, book[p.name]['stages'][-2]['name'], book[p.name]['stages'][-2]['task']) ] # Add the Task to the Stage s2.add_tasks(t2) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s2.name, 'task': t2.name}) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() s3.name = 'iter%s-s3' % cur_iter[instance] # Create a Task t3 = Task() t3.name = 'iter%s-s3-t3' % cur_iter[instance] t3.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance) ] t3.download_output_data = [ 'CB7G3.xtc > ./output/CB7G3_run{1}_gen{0}.xtc'.format( cur_iter[instance], instance), 'CB7G3.log > ./output/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance), 'CB7G3_dhdl.xvg > ./output/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > ./output/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > ./output/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.gro > ./output/CB7G3_run{1}_gen{0}.gro'.format( cur_iter[instance], instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s3.name, 'task': t3.name}) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() s4.name = 'iter%s-s4' % cur_iter[instance] # Create a Task t4 = Task() t4.name = 'iter%s-s4-t4' % cur_iter[instance] t4.pre_exec = [ 'module load python/2.7.7-anaconda', 'export PYTHONPATH=%s/alchemical_analysis:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=%s:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ 'analysis_2.py', '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', # '--prev_data=%s'%DATA_LOC '--gen={0}'.format(cur_iter[instance], instance), '--run={1}'.format(cur_iter[instance], instance) ] t4.cores = 1 t4.copy_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > ./output/results_run{1}_gen{0}.txt'. format(cur_iter[instance], instance), 'STDOUT > ./output/stdout_run{1}_gen{0}'.format( cur_iter[instance], instance), 'STDERR > ./output/stderr_run{1}_gen{0}'.format( cur_iter[instance], instance), 'CB7G3_run.mdp > ./output/CB7G3_run{1}_gen{0}.mdp'.format( cur_iter[instance], instance), 'results_average.txt > ./output/results_average_run{1}_gen{0}.txt'. format(cur_iter[instance], instance) ] s4.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add the Task to the Stage s4.add_tasks(t4) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s4.name, 'task': t4.name}) # Add Stage to the Pipeline p.add_stages(s4) print book
def InitCycle( self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps ): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350, min_temp=250, replicas=Replicas, timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar", "w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range(Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ 'untar_input_files.py', 'Input_Files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() #####Initial MD stage if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine t.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] t.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out'] t.cores = Replica_Cores stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) #####Exchange Stages elif N_Stg != 0 and N_Stg%2 = 1: t = Task() t.executable = ['python'] t.upload_input_data = ['exchangeMethods/RandEx.py'] #t.link_input_data = [''] t.arguments = ['RandEx.py', Replicas] t.cores = 1
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle),'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=Cycle) md_tsk.link_input_data = ['%s/restrt > inpcrd'%(self.Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r)%(self.Book[0][r])] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle+1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle+1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '{0}'.format(Cycle+1)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_{0}.dat'.format(Cycle+1)] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
workers = [] for i in range(num_workers): w = Thread(target=worker, name='worker', args=(worker_queue[i%num_queues],i)) w.start() workers.append(w) t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t = json.dumps(t.to_dict()) msg_num = 0 start = time.time() while msg_num < num_tasks:
def InitCycle(self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350,min_temp=250,replicas=Replicas,timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar","w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range (Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range (Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_tsk.link_input_data += [ '%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], '%s/mdin_{0}'.format(r)%tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
task_uids['Stage_%s' % N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d' ] #MD Engine t.upload_input_data = [ 'in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp' ] t.pre_exec = [ 'module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top' ] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s' % N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = [ '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d' ] #MD Engine t.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/out.gro > in.gro' % (p.uid, stage_uids[N_Stg - 1], task_uids['Stage_%s' %
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod): # "Cycle" = 1 MD stage plus the subsequent exchange computation #Initialize Pipeline p = Pipeline() md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping #Create Tarball of input data #Create Untar Stage untar_stg = Stage() #Untar Task untar_tsk = Task() untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, untar_stg.uid, untar_tsk.uid) print tar_dict[0] # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() # MD tasks for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], #'%s/mdin_{0}'.format(r)%tar_dict[0] '%s/mdin'%tar_dict[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user? md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin', #'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.executable = ['python'] #ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print Book return p
# Bookkeeping stage_uids = list() task_uids = dict() Stages = 3 Replicas = 4 for N_Stg in range(Stages): stg = Stage() ## initialization task_uids['Stage_%s'%N_Stg] = list() if N_Stg == 0: for n0 in range(Replicas): t = Task() t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine t.upload_input_data = ['in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp'] t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out'] t.cores = 32 stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) else: for n0 in range(Replicas): t = Task() t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'] #MD Engine t.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/out.gro > in.gro'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.top'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/FNF.itp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/martini_v2.2.itp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0]), '$Pipeline_%s_Stage_%s_Task_%s/in.mdp'%(p.uid, stage_uids[N_Stg-1], task_uids['Stage_%s'%(N_Stg-1)][n0])] t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out']
def create_workflow(Kconfig): # User settings ENSEMBLE_SIZE = int(Kconfig.num_CUs) # Number of ensemble members TOTAL_ITERS = int( Kconfig.num_iterations) # Number of iterations to run current trial wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ ''' pre_proc_stage : Purpose : Transfers files, Split the input file into smaller files to be used by each of the gromacs instances in the first iteration. Arguments : inputfile = file to be split numCUs = number of simulation instances/ number of smaller files ''' pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = ['module load bwpy'] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', Kconfig.num_CUs, os.path.basename(Kconfig.md_input_file) ] pre_proc_task.copy_input_data = [ '$SHARED/%s' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py', '$SHARED/gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ cur_iter = 0 while (cur_iter < TOTAL_ITERS): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range(ENSEMBLE_SIZE): sim_task = Task() sim_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH' ] sim_task.executable = ['python'] sim_task.cores = 16 sim_task.arguments = [ 'run.py', '--mdp', os.path.basename(Kconfig.mdp_file), '--top', os.path.basename(Kconfig.top_file), '--gro', 'start.gro', '--out', 'out.gro' ] sim_task.link_input_data = [ '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)), '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file)), '$SHARED/run.py > run.py' ] if Kconfig.ndx_file is not None: sim_task.link_input_data.append('$SHARED/{0}'.format( os.path.basename(Kconfig.ndx_file))) if (cur_iter == 0): sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulation stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash', 'module load bwpy' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = [ 'pre_analyze.py', Kconfig.num_CUs, 'tmp.gro', '.' ] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze.py > pre_analyze.py' ] for sim_num in range(ENSEMBLE_SIZE): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > $SHARED/iter_%s/tmpha.gro' % cur_iter, 'tmp.gro > $SHARED/iter_%s/tmp.gro' % cur_iter ] pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load bwpy', 'module load platform-mpi', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] ana_task.executable = ['lsdmap'] ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '$SHARED/iter_%s/tmpha.gro > tmpha.gro' % cur_iter ] ana_task.copy_output_data = [ 'tmpha.ev > $SHARED/iter_%s/tmpha.ev' % cur_iter, 'out.nn > $SHARED/iter_%s/out.nn' % cur_iter ] if cur_iter > 0: ana_task.link_input_data += [ '%s/weight.w > weight.w' % ana_task_ref ] ana_task.copy_output_data += [ 'weight.w > $SHARED/iter_%s/weight.w' % cur_iter ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > output/iter%s/lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_runs = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH', 'export PATH=/u/sciteam/balasubr/.local/bin:$PATH', 'source /u/sciteam/balasubr/ve-extasy/bin/activate' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_runs, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_CUs ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '$SHARED/iter_%s/tmp.gro > tmp.gro' % cur_iter, '$SHARED/iter_%s/tmpha.ev > tmpha.ev' % cur_iter, '$SHARED/iter_%s/out.nn > out.nn' % cur_iter, '$SHARED/input.gro > input.gro' ] if cur_iter > 0: post_ana_task.link_input_data += [ '%s/weight.w > weight_new.w' % ana_task_ref ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter%s/out.gro' % cur_iter, 'weight.w > output/iter%s/weight.w' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 return wf