def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt', 'file2.txt', '>', 'output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = [ 'output.txt > %s/output_%s.txt' % (cur_dir, x + 1) ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.executable = '/bin/bash' t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt'] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object to hold character count tasks s2 = Stage() # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create another Stage object to hold checksum tasks s3 = Stage() # Create a Task object t3 = Task() t3.executable = '/bin/bash' t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt'] # Copy data from the task in the first stage to the current task's location t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)] # Download the output of the current task to the current location t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) return p
def selection(self, ps_file, select_file): tasks = [] t = Task() t.pre_exec = ['/bin/cp {0} {1}'.format(self.param_space, ps_file)] t.executable = self.analysis t.arguments = [ps_file, self.n_samples, select_file] t.download_output_data = [select_file] tasks.append(t) return tasks
def generate_pipeline(): # Create a Pipeline object p = Pipeline() p.name = 'p1' # Create a Stage object s1 = Stage() s1.name = 's1' # Create a Task object which creates a file named 'output.txt' of size 1 MB t1 = Task() t1.name = 't1' t1.executable = ['/bin/echo'] t1.arguments = ['"Hello World"'] t1.stdout = 'temp.txt' # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) # Create a Stage object s2 = Stage() s2.name = 's2' # Create a Task object which creates a file named 'output.txt' of size 1 MB t2 = Task() t2.name = 't2' t2.executable = ['/bin/cat'] t2.arguments = [ '$Pipeline_%s_Stage_%s_Task_%s/temp.txt' % (p.name, s1.name, t1.name) ] t2.stdout = 'output.txt' t2.download_output_data = ['output.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) return p
def test_assignment_exceptions(): t = Task() data_type = [1, 'a', True, list()] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data
def generate_pipeline(): # Create a Pipeline object p = Pipeline() # Create a Stage object s1 = Stage() # Create a Task object which creates a file named 'output.txt' of size 1 MB for x in range(10): t1 = Task() t1.executable = 'cat' t1.arguments = ['file1.txt','file2.txt','>','output.txt'] t1.copy_input_data = ['$SHARED/file1.txt', '$SHARED/file2.txt'] t1.download_output_data = ['output.txt > %s/output_%s.txt' %(cur_dir,x+1)] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) return p
def generate_task(self): task = Task() task.name = self.name task.executable = ["tar", "czvfh"] task.arguments = [self.output_name, "*{}".format(self.extension)] task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } links = [ self.input_data([self.extension], **x) for x in self._input_sim._ensemble_product() ] links = [l for link in links for l in link] task.link_input_data.extend(links) task.download_output_data = [self.output_name] return task
def generate_task(self): task = Task() task.name = self.name task.executable = [NAMD_TI_ANALYSIS] task.arguments = ['-f', '>', self.output] task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } links = [ self.input_data([self.extension], **x) for x in self._input_sim._ensemble_product() ] links = [l for link in links for l in link] task.link_input_data.extend(links) task.download_output_data = [self.output_name] return task
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
workers.append(w) t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t = json.dumps(t.to_dict()) msg_num = 0 start = time.time() while msg_num < num_tasks: #message = 'message_%s'%msg_num msg_num+=1 channel.basic_publish(exchange='', routing_key=worker_queue[msg_num%num_queues],
# Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task() t2.executable = '/bin/bash' t2.arguments = [ '-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt' ] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid) ] # Download the output of the current task to the current location t2.download_output_data = ['ccount.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign the workflow as a set or list of Pipelines to the Application Manager appman.workflow = set([p]) # Create a dictionary describe four mandatory keys: # resource, walltime, cpus and project
] t1.executable = ['./bin/xspecfem3D'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.copy_input_data = ['/projects/TROMP/entk/scratch/specfem_data.tar.gz'] t1.post_exec = [ # Tar output files 'tar -zcf specfem_final.tar.gz bin DATA DATABASES_MPI OUTPUT_FILES', # Copy to scratch folder 'cp specfem_final.tar.gz /projects/TROMP/entk/scratch/', ] t1.download_output_data = ['STDOUT', 'STDERR', 'specfem_final.tar.gz'] specfem_stage.add_tasks(t1) p.add_stages(specfem_stage) res_dict = { 'resource': 'princeton.tiger_cpu', 'project': 'geo', 'queue': 'cpu', 'schema': 'local', 'walltime': 15, 'cpus': 4, } try:
def InitCycle( self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps ): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350, min_temp=250, replicas=Replicas, timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar", "w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range(Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ 'untar_input_files.py', 'Input_Files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat", "r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]), '%s/prmtop' % (Book[k - 1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin' % (Book[k - 1][n0]) ] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/' ] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def init_cycle(self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs( max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path)+'/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': str(), 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': {'uid':None, 'name': None}, 'parent_pipeline': {'uid':None, 'name': None}} t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}} t.executable = 'sleep' d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': 'sleep', 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': {'uid': 's1', 'name': 'stage1'}, 'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
def init_cycle( self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec ): # "cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first cycle """ #Initialize Pipeline self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #bookkeeping tar_dict = dict() #bookkeeping #Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range(replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage repo = git.Repo('.', search_parent_directories=True) aux_function_path = repo.working_tree_dir untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = [ str(aux_function_path) + '/repex/untar_input_files.py', 'input_files.tar' ] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 #untar_tsk.post_exec = [''] untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0) md_tsk.link_input_data += [ '%s/inpcrd' % tar_dict[0], '%s/prmtop' % tar_dict[0], '%s/mdin_{0}'.format(r) % tar_dict[0] #Use for full temperature exchange ] md_tsk.arguments = [ '-O', '-p', 'prmtop', '-i', 'mdin_{0}'.format(r), '-c', 'inpcrd', '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=0), '-r', 'restrt'.format(replica=r, cycle=0), #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0), #'-o', '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0), #'-r', '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0), #'-x', '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0), '-inf', 'mdinfo_{0}'.format(r) ] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) # Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk0' #ex_tsk.pre_exec = ['module load python/2.7.10'] ex_tsk.executable = [python_path] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.book.append(md_dict) return p
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [ python_path ] #['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description. """ pipeline = 'p1' stage = 's1' task = 't1' placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = 'grompp' t1.arguments = ['hello'] t1.cpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s cud = create_cud_from_task(t1, placeholders) assert cud.name == '%s,%s,%s,%s,%s,%s' % ( t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.post_exec == t1.post_exec assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert { 'source': 'upload_input.dat', 'target': 'upload_input.dat' } in cud.input_staging assert { 'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat' } in cud.input_staging assert { 'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat' } in cud.input_staging assert { 'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat' } in cud.output_staging assert { 'source': 'download_output.dat', 'target': 'download_output.dat' } in cud.output_staging
def test_output_list_from_task(): """ **Purpose**: Test if the 'get_output_list_from_task' function generates the correct RP output transfer directives when given a Task. """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}} for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_output_list_from_task(t, placeholders) # Test copy output data t = Task() t.copy_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0]) t = Task() t.copy_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['source'] == t.copy_output_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.copy_output_data[0].split('>')[1].strip()) # Test move output data t = Task() t.move_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0]) t = Task() t.move_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['source'] == t.move_output_data[0].split('>')[0].strip() assert ip_list[0]['target'] == os.path.basename( t.move_output_data[0].split('>')[1].strip()) # Test download input data t = Task() t.download_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.download_output_data[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0]) t = Task() t.download_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholders) assert 'action' not in ip_list[0] assert ip_list[0]['source'] == t.download_output_data[0].split('>')[0] \ .strip() assert ip_list[0]['target'] == os.path.basename( t.download_output_data[0].split('>')[1].strip())
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, tar_stg.uid, tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range(Replicas): md_tsk = Task() md_tsk.executable = [ '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI' ] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += [ '%s/inpcrd' % dict_tarball[0], '%s/prmtop' % dict_tarball[0], '%s/mdin' % dict_tarball[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0) ] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)] ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
# Add Stage to the Pipeline p.add_stages(s1) # Create another Stage object s2 = Stage() s2.name = 'Stage 2' # Create a Task object t2 = Task() t2.executable = ['/bin/bash'] t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt'] # Copy data from the task in the first stage to the current task's location t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)] # Download the output of the current task to the current location t2.download_output_data = ['ccount.txt'] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Assign the workflow as a set or list of Pipelines to the Application Manager appman.workflow = set([p]) # Create a dictionary to describe our resource request for XSEDE Stampede res_dict = {
def cycle(k): #read exchangePairs.dat # with open("exchangePairs.dat","r") as f: ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, Blue Waters #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]), '%s/prmtop'%(Book[k-1][n0]), #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])] '%s/mdin'%(Book[k-1][n0])] ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) #print d md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #Create exchange stage ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print d #print Book return p
def push_function(ind, num_push, num_queues): try: mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost', port=32769)) mq_channel = mq_connection.channel() tasks_pushed = 0 global MAX_TASKS proc_tasks = MAX_TASKS/num_push push_times = [] proc_mem = [] t = Task() t.arguments = ["--template=PLCpep7_template.mdp", "--newname=PLCpep7_run.mdp", "--wldelta=100", "--equilibrated=False", "--lambda_state=0", "--seed=1"] t.cores = 20 t.copy_input_data = ['$STAGE_2_TASK_1/PLCpep7.tpr'] t.download_output_data = ['PLCpep7.xtc > PLCpep7_run1_gen0.xtc', 'PLCpep7.log > PLCpep7_run1_gen0.log', 'PLCpep7_dhdl.xvg > PLCpep7_run1_gen0_dhdl.xvg', 'PLCpep7_pullf.xvg > PLCpep7_run1_gen0_pullf.xvg', 'PLCpep7_pullx.xvg > PLCpep7_run1_gen0_pullx.xvg', 'PLCpep7.gro > PLCpep7_run1_gen0.gro' ] t_dict = t.to_dict() print 'Size of task: ', asizeof.asizeof(t_dict) name = 'queue_%s'%(ind%num_queues) while (tasks_pushed < proc_tasks)and(not kill_pusher.is_set()): corr_id = str(uuid.uuid4()) obj = { 'task': t_dict, 'id': corr_id} mq_channel.basic_publish( exchange='', routing_key=name, properties=pika.BasicProperties(correlation_id = corr_id), body=json.dumps(obj) ) tasks_pushed +=1 cur_time = time.time() push_times.append(cur_time) mem = psutil.virtual_memory().available/(2**20) # MBytes proc_mem.append(mem) # print '%s: Push average throughput: %s tasks/sec'%(name, # float(tasks_pushed/(cur_time - start_time))) print 'Push: ',tasks_pushed f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(len(push_times)): f.write('%s %s\n'%(push_times[i],proc_mem[i])) #f.write('%s\n'%(push_times[ind])) f.close() print 'Push proc killed' except KeyboardInterrupt: print len(push_times) f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times),len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close() print 'Push proc killed' except Exception as ex: print 'Unexpected error: %s'%ex print traceback.format_exc() f = open(DATA + '/push_%s.txt'%ind,'w') for i in range(min(len(push_times), len(proc_mem))): f.write('%s %s\n'%(push_times[i], proc_mem[i])) f.close()
s.name = 'Stage %s'%s_cnt if(stage==1) # Create Task 1, training t = Task() t.name = 'my-task1' t.executable = ['sbatch'] # Assign executable to the task # Assign arguments for the task executable t.arguments = ['/Code/trainbatch.bat'] else # Create Task 2, t = Task() t.name = 'my-task2' t.executable = ['sbatch'] # Assign executable to the task # Assign arguments for the task executable t.arguments = ['/Code/predscript.bat'] t.download_output_data = ['classified_images'] #Download resuting images s.add_tasks(t) # Add Stage to the Pipeline p.add_stages(s) return p if __name__ == '__main__': p1 = generate_pipeline(name='Pipeline 1', stages=2)
def init_cycle(): # Create Pipeline Obj p = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() d = dict() dict_tarball = dict() #Create Tarball stage tar_stg = Stage() #Create Tar/untar task tar_tsk = Task() tar_tsk.executable = ['python'] tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py'] tar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] tar_tsk.cores = 1 tar_stg.add_tasks(tar_tsk) #task_uids.append(tar_tsk.uid) p.add_stages(tar_stg) #stage_uids.append(tar_stg.uid) dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,tar_stg.uid,tar_tsk.uid) #Create initial MD stage md_stg = Stage() #Create MD task for n0 in range (Replicas): md_tsk = Task() md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'] #MD Engine, BW #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI'] #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin'] md_tsk.link_input_data += ['%s/inpcrd'%dict_tarball[0], '%s/prmtop'%dict_tarball[0], '%s/mdin'%dict_tarball[0]] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)] md_tsk.cores = Replica_Cores md_tsk.mpi = True d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) task_uids.append(md_tsk.uid) p.add_stages(md_stg) stage_uids.append(md_stg.uid) #print d #Create Exchange Stage ex_stg = Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) stage_uids.append(ex_stg.uid) Book.append(d) #print Book return p
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name): # Initialize a task object t = Task() # Define magic variable dictionary mvar_dict = {"PIPELINE_ID": pipe_name} # Give this task object a name t.name = task_name # Pre exec let you load modules, set environment before executing the workload if tcfg['pre_exec'] != "": t.pre_exec = [tcfg['pre_exec']] # Executable to use for the task t.executable = tcfg['executable'] # If there's a user-defined input file (likely for genmod modules), add it to the # options list and upload file list if needed if "input_data_file" in tcfg['options']: tcfg['upload_input_data'].append( os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file'])) # List of arguments for the executable t.arguments = [tcfg['script']] + match_options(tcfg['options'], ecfg['options']) # CPU requirements for this task t.cpu_threads = { 'processes': tcfg['cpu']['processes'], 'process-type': tcfg['cpu']['process-type'], 'threads-per-process': tcfg['cpu']['threads-per-process'], 'thread-type': tcfg['cpu']['thread-type'], } # Upload data from your local machine to the remote machine # Note: Remote machine can be the local machine t.upload_input_data = tcfg['upload_input_data'] # Copy data from other stages/tasks for use in this task copy_list = [] if "copy_input_data" in tcfg.keys(): for copy_stage in tcfg['copy_input_data'].keys(): for copy_task in tcfg['copy_input_data'][copy_stage].keys(): loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format( pipe_name, copy_stage, copy_task) copy_list.extend([ '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x)) for x in tcfg['copy_input_data'][copy_stage][copy_task] ]) # Append the copy list (if any) to the task object t.copy_input_data = copy_list # Set the download data for the task download_list = [] outdir = os.path.join(ecfg['exp_dir'], "output") if "download_output_data" in tcfg.keys(): download_list.extend([ '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir) for x in tcfg['download_output_data'] ]) # Append the download list to this task t.download_output_data = download_list # Return the task object return (t)
def func_on_true(): global cur_iter, book # Create Stage 2 s2 = Stage() s2.name = 'iter%s-s2' % cur_iter[instance] # Create a Task t2 = Task() t2.name = 'iter%s-s2-t2' % cur_iter[instance] t2.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if cur_iter[instance] == 1: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.name, book[p.name]['stages'][-2]['name'], book[p.name]['stages'][-2]['task']) ] # Add the Task to the Stage s2.add_tasks(t2) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s2.name, 'task': t2.name}) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() s3.name = 'iter%s-s3' % cur_iter[instance] # Create a Task t3 = Task() t3.name = 'iter%s-s3-t3' % cur_iter[instance] t3.pre_exec = ['source %s/bin/GMXRC.bash' % GMX_PATH] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.name, book[p.name]['stages'][-1]['name'], book[p.name]['stages'][-1]['task']) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance) ] t3.download_output_data = [ 'CB7G3.xtc > ./output/CB7G3_run{1}_gen{0}.xtc'.format( cur_iter[instance], instance), 'CB7G3.log > ./output/CB7G3_run{1}_gen{0}.log'.format( cur_iter[instance], instance), 'CB7G3_dhdl.xvg > ./output/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullf.xvg > ./output/CB7G3_run{1}_gen{0}_pullf.xvg'.format( cur_iter[instance], instance), 'CB7G3_pullx.xvg > ./output/CB7G3_run{1}_gen{0}_pullx.xvg'.format( cur_iter[instance], instance), 'CB7G3.gro > ./output/CB7G3_run{1}_gen{0}.gro'.format( cur_iter[instance], instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s3.name, 'task': t3.name}) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() s4.name = 'iter%s-s4' % cur_iter[instance] # Create a Task t4 = Task() t4.name = 'iter%s-s4-t4' % cur_iter[instance] t4.pre_exec = [ 'module load python/2.7.7-anaconda', 'export PYTHONPATH=%s/alchemical_analysis:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=%s:$PYTHONPATH' % ALCH_ANA_PATH, 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ 'analysis_2.py', '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', # '--prev_data=%s'%DATA_LOC '--gen={0}'.format(cur_iter[instance], instance), '--run={1}'.format(cur_iter[instance], instance) ] t4.cores = 1 t4.copy_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > ./output/results_run{1}_gen{0}.txt'. format(cur_iter[instance], instance), 'STDOUT > ./output/stdout_run{1}_gen{0}'.format( cur_iter[instance], instance), 'STDERR > ./output/stderr_run{1}_gen{0}'.format( cur_iter[instance], instance), 'CB7G3_run.mdp > ./output/CB7G3_run{1}_gen{0}.mdp'.format( cur_iter[instance], instance), 'results_average.txt > ./output/results_average_run{1}_gen{0}.txt'. format(cur_iter[instance], instance) ] s4.post_exec = { 'condition': func_condition, 'on_true': func_on_true, 'on_false': func_on_false } # Add the Task to the Stage s4.add_tasks(t4) # Add current Task and Stage to our book book[p.name]['stages'].append({'name': s4.name, 'task': t4.name}) # Add Stage to the Pipeline p.add_stages(s4) print book
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle), 'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range(Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=Cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.Book[Cycle - 1][ExchangeArray[r]]), '%s/prmtop' % (self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r) % (self.Book[0][r]) ] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle + 1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range(Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.arguments = [ 'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(Cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) #####Exchange Stages elif N_Stg != 0 and N_Stg%2 = 1: t = Task() t.executable = ['python'] t.upload_input_data = ['exchangeMethods/RandEx.py'] #t.link_input_data = [''] t.arguments = ['RandEx.py', Replicas] t.cores = 1 t.mpi = False t.download_output_data = ['exchangePairs.txt'] stg.add_tasks(t) task_uids['Stage_%s'%N_Stg].append(t.uid) p.add_stages(stg) stage_uids.append(stg.uid) ######Subsequent MD stages else: ### Open file, ExchangePairs = [] with open('exchangePairs.txt', "rb") as file ### read file into list, ### use list ot populate data staging placeholders for i in file.readlines():
def get_pipeline(instance, iterations): # Create a Pipeline object p = Pipeline() # Create Stage 1 s1 = Stage() # Create a Task t1 = Task() t1.pre_exec = ['module load python/2.7.7-anaconda'] t1.executable = ['python'] t1.arguments = [ 'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname', 'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False', '--lambda_state', '0', '--seed', '%s' % SEED ] t1.cores = 1 t1.copy_input_data = [ '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py' ] # Add the Task to the Stage s1.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(s1) for it in range(1, iterations + 1): # Create Stage 2 s2 = Stage() # Create a Task t2 = Task() t2.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t2.executable = ['gmx grompp'] t2.arguments = [ '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n', 'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10' ] t2.cores = 1 t2.copy_input_data = [ '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp', '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp' ] if it == 0: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro' ] else: t2.copy_input_data += [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' % (p.uid, s4.uid, t4.uid), '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' % (p.uid, s3.uid, t3.uid) ] # Add the Task to the Stage s2.add_tasks(t2) # Add Stage to the Pipeline p.add_stages(s2) # Create Stage 3 s3 = Stage() # Create a Task t3 = Task() t3.pre_exec = [ 'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash' ] t3.executable = ['gmx mdrun'] t3.arguments = [ '-nt', 20, '-deffnm', 'CB7G3', '-dhdl', 'CB7G3_dhdl.xvg', ] t3.cores = 20 # t3.mpi = True t3.copy_input_data = [ '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid) ] t3.copy_output_data = [ 'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance) ] t3.download_output_data = [ 'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance), 'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance), 'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format( it, instance), 'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format( it, instance), 'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format( it, instance), 'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance) ] # Add the Task to the Stage s3.add_tasks(t3) # Add Stage to the Pipeline p.add_stages(s3) # Create Stage 4 s4 = Stage() # Create a Task t4 = Task() t4.pre_exec = [ 'module load python', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH', 'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH', 'ln -s ../staging_area data' ] t4.executable = ['python'] t4.arguments = [ '--newname=CB7G3_run.mdp', '--template=CB7G3_template.mdp', '--dir=./data', #'--prev_data=%s'%DATA_LOC '--gen={0}'.format(it, instance), '--run={1}'.format(it, instance) ] t4.cores = 1 t4.link_input_data = [ '$SHARED/analysis_2.py', '$SHARED/alchemical_analysis.py', '$SHARED/CB7G3_template.mdp', ] t4.download_output_data = [ 'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format( it, instance), 'STDOUT > stdout_run{1}_gen{0}'.format(it, instance), 'STDERR > stderr_run{1}_gen{0}'.format(it, instance), 'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance), 'results_average.txt > results_average_run{1}_gen{0}.txt'.format( it, instance) ] # Add the Task to the Stage s4.add_tasks(t4) # Add Stage to the Pipeline p.add_stages(s4) return p
def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(Cycle),'r') as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() q.name = 'genpipeline{0}'.format(Cycle) #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(Cycle) self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid) for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=Cycle) md_tsk.link_input_data = ['%s/restrt > inpcrd'%(self.Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(self.Book[0][r]), #'%s/prmtop'%(self.Tarball_path[0]), '%s/mdin_{0}'.format(r)%(self.Book[0][r])] #'%s/mdin'%(self.Book[0][r])] #'%s/mdin'%(self.Tarball_path[0])] md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] #md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(Cycle+1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(Cycle+1) ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '{0}'.format(Cycle+1)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_{0}.dat'.format(Cycle+1)] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.Book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid) #print d #print self.Book return q
def test_rp_da_scheduler_bw(): """ **Purpose**: Run an EnTK application on localhost """ p1 = Pipeline() p1.name = 'p1' n = 10 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s' % x t.executable = ['/bin/hostname'] t.arguments = ['>', 'hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 10 t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt' % (x)] s1.add_tasks(t) p1.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = ['/bin/hostname'] t.arguments = ['>', 'hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt' % (x)] t.tag = 't%s' % x s2.add_tasks(t) p1.add_stages(s2) res_dict = { 'resource': 'ncsa.bw_aprun', 'walltime': 10, 'cpus': 128, 'project': 'gk4', 'queue': 'high' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() for i in range(n): assert open('s1_t%s_hostname.txt' % i, 'r').readline().strip() == open('s2_t%s_hostname.txt' % i, 'r').readline().strip() txts = glob('%s/*.txt' % os.getcwd()) for f in txts: os.remove(f)
def InitCycle(self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps): # "Cycle" = 1 MD stage plus the subsequent exchange computation """ Initial cycle consists of: 1) Create tarball of MD input data 2) Transfer the tarball to pilot sandbox 3) Untar the tarball 4) Run first Cycle """ #Initialize Pipeline #self._prof.prof('InitTar', uid=self._uid) p = Pipeline() p.name = 'initpipeline' md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping ##Write the input files self._prof.prof('InitWriteInputs', uid=self._uid) writeInputs.writeInputs(max_temp=350,min_temp=250,replicas=Replicas,timesteps=timesteps) self._prof.prof('EndWriteInputs', uid=self._uid) self._prof.prof('InitTar', uid=self._uid) #Create Tarball of input data tar = tarfile.open("Input_Files.tar","w") for name in ["prmtop", "inpcrd", "mdin"]: tar.add(name) for r in range (Replicas): tar.add('mdin_{0}'.format(r)) tar.close() #delete all input files outside the tarball for r in range (Replicas): os.remove('mdin_{0}'.format(r)) self._prof.prof('EndTar', uid=self._uid) #Create Untar Stage untar_stg = Stage() untar_stg.name = 'untarStg' #Untar Task untar_tsk = Task() untar_tsk.name = 'untartsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, untar_stg.name, untar_tsk.name) # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() md_stg.name = 'mdstg0' self._prof.prof('InitMD_0', uid=self._uid) # MD tasks for r in range (Replicas): md_tsk = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_tsk.link_input_data += [ '%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], '%s/mdin_{0}'.format(r)%tar_dict[0] #Use for full temperature exchange #'%s/mdin'%tar_dict[0] #Testing only ] md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, md_stg.name, md_tsk.name) md_stg.add_tasks(md_tsk) self.md_task_list.append(md_tsk) #print md_tsk.uid p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() ex_stg.name = 'exstg0' self._prof.prof('InitEx_0', uid=self._uid) #with open('logfile.log', 'a') as logfile: # logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n') # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.name = 'extsk0' ex_tsk.executable = ['python'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas), '0'] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs_0.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) self.ex_task_list.append(ex_tsk) #self.ex_task_uids.append(ex_tsk.uid) self.Book.append(md_dict) return p
def test_rp_da_scheduler_bw(): """ **Purpose**: Run an EnTK application on localhost """ p1 = Pipeline() p1.name = 'p1' n = 10 s1 = Stage() s1.name = 's1' for x in range(n): t = Task() t.name = 't%s'%x t.executable = ['/bin/hostname'] t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.lfs_per_process = 10 t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt'%(x)] s1.add_tasks(t) p1.add_stages(s1) s2 = Stage() s2.name = 's2' for x in range(n): t = Task() t.executable = ['/bin/hostname'] t.arguments = ['>','hostname.txt'] t.cpu_reqs['processes'] = 1 t.cpu_reqs['threads_per_process'] = 16 t.cpu_reqs['thread_type'] = '' t.cpu_reqs['process_type'] = '' t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt'%(x)] t.tag = 't%s'%x s2.add_tasks(t) p1.add_stages(s2) res_dict = { 'resource' : 'ncsa.bw_aprun', 'walltime' : 10, 'cpus' : 128, 'project' : 'gk4', 'queue' : 'high' } os.environ['RADICAL_PILOT_DBURL'] = MLAB appman = AppManager(hostname=hostname, port=port) appman.resource_desc = res_dict appman.workflow = [p1] appman.run() for i in range(n): assert open('s1_t%s_hostname.txt'%i,'r').readline().strip() == open('s2_t%s_hostname.txt'%i,'r').readline().strip() txts = glob('%s/*.txt' % os.getcwd()) for f in txts: os.remove(f)
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod): # "Cycle" = 1 MD stage plus the subsequent exchange computation #Initialize Pipeline p = Pipeline() md_dict = dict() #Bookkeeping tar_dict = dict() #Bookkeeping #Create Tarball of input data #Create Untar Stage untar_stg = Stage() #Untar Task untar_tsk = Task() untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar'] untar_tsk.arguments = ['untar_input_files.py','Input_Files.tar'] untar_tsk.cores = 1 untar_stg.add_tasks(untar_tsk) p.add_stages(untar_stg) tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, untar_stg.uid, untar_tsk.uid) print tar_dict[0] # First MD stage: needs to be defined separately since workflow is not built from a predetermined order md_stg = Stage() # MD tasks for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0], '%s/prmtop'%tar_dict[0], #'%s/mdin_{0}'.format(r)%tar_dict[0] '%s/mdin'%tar_dict[0] ] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user? md_tsk.arguments = ['-O','-p','prmtop', '-i', 'mdin', #'mdin_{0}'.format(r), # Use this for full Temperature Exchange '-c','inpcrd','-o','out_{0}'.format(r), '-inf','mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) p.add_stages(md_stg) #stage_uids.append(md_stg.uid) # First Exchange Stage ex_stg = Stage() # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. # Said pairs then exchange configurations by linking output configuration files appropriately. ex_tsk = Task() ex_tsk.executable = ['python'] #ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] ex_tsk.upload_input_data = [ExchangeMethod] for r in range (Replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) p.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print Book return p
def test_task_to_dict(): """ **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a dictionary """ t = Task() d = t.to_dict() assert d == { 'uid': None, 'name': None, 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': [], 'executable': [], 'arguments': [], 'post_exec': [], 'cpu_reqs': { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None }, 'gpu_reqs': { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None }, 'lfs_per_process': 0, 'upload_input_data': [], 'copy_input_data': [], 'link_input_data': [], 'move_input_data': [], 'copy_output_data': [], 'move_output_data': [], 'download_output_data': [], 'stdout': None, 'stderr': None, 'exit_code': None, 'path': None, 'tag': None, 'parent_stage': { 'uid': None, 'name': None }, 'parent_pipeline': { 'uid': None, 'name': None } } t = Task() t.uid = 'test.0000' t.name = 'new' t.pre_exec = ['module load abc'] t.executable = ['sleep'] t.arguments = ['10'] t.cpu_reqs['processes'] = 10 t.cpu_reqs['threads_per_process'] = 2 t.gpu_reqs['processes'] = 5 t.gpu_reqs['threads_per_process'] = 3 t.lfs_per_process = 1024 t.upload_input_data = ['test1'] t.copy_input_data = ['test2'] t.link_input_data = ['test3'] t.move_input_data = ['test4'] t.copy_output_data = ['test5'] t.move_output_data = ['test6'] t.download_output_data = ['test7'] t.stdout = 'out' t.stderr = 'err' t.exit_code = 1 t.path = 'a/b/c' t.tag = 'task.0010' t.parent_stage = {'uid': 's1', 'name': 'stage1'} t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'} d = t.to_dict() assert d == { 'uid': 'test.0000', 'name': 'new', 'state': states.INITIAL, 'state_history': [states.INITIAL], 'pre_exec': ['module load abc'], 'executable': ['sleep'], 'arguments': ['10'], 'post_exec': [], 'cpu_reqs': { 'processes': 10, 'process_type': None, 'threads_per_process': 2, 'thread_type': None }, 'gpu_reqs': { 'processes': 5, 'process_type': None, 'threads_per_process': 3, 'thread_type': None }, 'lfs_per_process': 1024, 'upload_input_data': ['test1'], 'copy_input_data': ['test2'], 'link_input_data': ['test3'], 'move_input_data': ['test4'], 'copy_output_data': ['test5'], 'move_output_data': ['test6'], 'download_output_data': ['test7'], 'stdout': 'out', 'stderr': 'err', 'exit_code': 1, 'path': 'a/b/c', 'tag': 'task.0010', 'parent_stage': { 'uid': 's1', 'name': 'stage1' }, 'parent_pipeline': { 'uid': 'p1', 'name': 'pipeline1' } }
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec): """ All cycles after the initial cycle Pulls up exchange pairs file and generates the new workflow """ self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid) with open('exchangePairs_{0}.dat'.format(cycle), 'r') as f: # Read exchangePairs.dat exchange_array = [] for line in f: exchange_array.append(int(line.split()[1])) #exchange_array.append(line) #print exchange_array q = Pipeline() q.name = 'genpipeline{0}'.format(cycle) #bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create MD stage md_stg = Stage() md_stg.name = 'mdstage{0}'.format(cycle) self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid) for r in range(replicas): md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec) md_tsk.name = 'mdtsk-{replica}-{cycle}'.format( replica=r, cycle=cycle) md_tsk.link_input_data = [ '%s/restrt > inpcrd' % (self.book[cycle - 1][exchange_array[r]]), '%s/prmtop' % (self.book[0][r]), '%s/mdin_{0}'.format(r) % (self.book[0][r]) ] ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet. #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd', # #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]), # '%s/prmtop'%(self.book[0][r]), # '%s/mdin_{0}'.format(r)%(self.Book[0][r])] md_tsk.arguments = [ '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1), '-o', 'out-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-r', 'restrt', #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle), '-x', 'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle), '-inf', 'mdinfo_{0}'.format(r) ] #md_tsk.tag = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0) md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % ( q.name, md_stg.name, md_tsk.name) self.md_task_list.append(md_tsk) md_stg.add_tasks(md_tsk) q.add_stages(md_stg) ex_stg = Stage() ex_stg.name = 'exstg{0}'.format(cycle + 1) #Create Exchange Task ex_tsk = Task() ex_tsk.name = 'extsk{0}'.format(cycle + 1) ex_tsk.executable = [python_path]#['/usr/bin/python'] #['/opt/python/bin/python'] ex_tsk.upload_input_data = [exchange_method] for r in range(replicas): ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)] ex_tsk.pre_exec = ['mv *.py exchange_method.py'] ex_tsk.arguments = [ 'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1) ] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = [ 'exchangePairs_{0}.dat'.format(cycle + 1) ] # Finds exchange partners, also Generates exchange history trace ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) self.ex_task_list.append(ex_tsk) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) self.book.append(md_dict) #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid) #print d #print self.book return q
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod): """ All cycles after the initial cycle """ with open("exchangePairs.dat","r") as f: # Read exchangePairs.dat ExchangeArray = [] for line in f: ExchangeArray.append(int(line.split()[1])) #ExchangeArray.append(line) #print ExchangeArray q = Pipeline() #Bookkeeping stage_uids = list() task_uids = list() ## = dict() md_dict = dict() #Create initial MD stage md_stg = Stage() for r in range (Replicas): md_tsk = Task() md_tsk.executable = [MD_Executable] #MD Engine, Blue Waters md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]), '%s/prmtop'%(Book[Cycle-1][r]), #'%s/mdin_{0}'.format(r)%(Book[k-1][r])] '%s/mdin'%(Book[Cycle-1][r])] md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user? #md_tsk.pre_exec = ['module load amber'] #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)] md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)] md_tsk.cores = Replica_Cores md_tsk.mpi = True md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid) md_stg.add_tasks(md_tsk) #task_uids.append(md_tsk.uid) q.add_stages(md_stg) ex_stg= Stage() #Create Exchange Task ex_tsk = Task() ex_tsk.executable = ['python'] ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py'] for n1 in range (Replicas): #print d[n1] ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)] ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)] ex_tsk.cores = 1 ex_tsk.mpi = False ex_tsk.download_output_data = ['exchangePairs.dat'] ex_stg.add_tasks(ex_tsk) #task_uids.append(ex_tsk.uid) q.add_stages(ex_stg) #stage_uids.append(ex_stg.uid) Book.append(md_dict) #print d #print Book return q #p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod) #q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod) #return (p, q)
def test_output_list_from_task(): """ **Purpose**: Test if the 'get_output_list_from_task' function generates the correct RP output transfer directives when given a Task """ pipeline = str(ru.generate_id('pipeline')) stage = str(ru.generate_id('stage')) task = str(ru.generate_id('task')) placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } for t in [1, 'a', list(), dict(), True]: with pytest.raises(TypeError): t = list() get_output_list_from_task(t, placeholder_dict) # Test copy output data t = Task() t.copy_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_output_data[0] assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0]) t = Task() t.copy_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.copy_output_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.COPY assert ip_list[0]['target'] == os.path.basename(t.copy_output_data[0].split('>')[1].strip()) # Test move output data t = Task() t.move_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_output_data[0] assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0]) t = Task() t.move_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.move_output_data[0].split('>')[0].strip() assert ip_list[0]['action'] == rp.MOVE assert ip_list[0]['target'] == os.path.basename(t.move_output_data[0].split('>')[1].strip()) # Test download input data t = Task() t.download_output_data = ['/home/vivek/test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.download_output_data[0] assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0]) t = Task() t.download_output_data = ['/home/vivek/test.dat > new_test.dat'] ip_list = get_output_list_from_task(t, placeholder_dict) assert ip_list[0]['source'] == t.download_output_data[0].split('>')[0].strip() assert 'action' not in ip_list[0] assert ip_list[0]['target'] == os.path.basename(t.download_output_data[0].split('>')[1].strip())