def test_issue_239(): t = Task() t.cpu_reqs = {'processes': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.cpu_reqs = {'threads_per_process': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.gpu_reqs = {'processes': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None} t.gpu_reqs = {'threads_per_process': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None}
def test_issue_239(): t = Task() t.cpu_reqs = {'processes': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None } t.cpu_reqs = {'threads_per_process': 1} assert t.cpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None } t.gpu_reqs = {'processes': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None } t.gpu_reqs = {'threads_per_process': 1} assert t.gpu_reqs == { 'processes': 1, 'thread_type': None, 'threads_per_process': 1, 'process_type': None }
def generate_ml_stage(self) -> Stage: stage = Stage() stage.name = "learning" cfg = self.cfg.ml_stage task = Task() task.cpu_reqs = cfg.cpu_reqs.dict() task.gpu_reqs = cfg.gpu_reqs.dict() task.pre_exec = cfg.pre_exec task.executable = cfg.executable task.arguments = cfg.arguments # Update base parameters cfg.run_config.input_path = self.aggregated_data_path( self.cur_iteration) cfg.run_config.output_path = self.model_path(self.cur_iteration) if self.cur_iteration > 0: cfg.run_config.init_weights_path = self.latest_ml_checkpoint_path( self.cur_iteration - 1) cfg_path = self.experiment_dirs["ml_runs"].joinpath( f"ml_{self.cur_iteration:03d}.yaml") cfg.run_config.dump_yaml(cfg_path) task.arguments += ["-c", cfg_path] stage.add_tasks(task) return stage
def generate_outlier_detection_stage(self) -> Stage: stage = Stage() stage.name = "outlier_detection" cfg = self.cfg.od_stage task = Task() task.cpu_reqs = cfg.cpu_reqs.dict() task.gpu_reqs = cfg.gpu_reqs.dict() task.pre_exec = cfg.pre_exec task.executable = cfg.executable task.arguments = cfg.arguments self.outlier_pdbs_path(self.cur_iteration).mkdir() # Update base parameters cfg.run_config.experiment_directory = self.cfg.experiment_directory cfg.run_config.input_path = self.aggregated_data_path( self.cur_iteration) cfg.run_config.output_path = self.outlier_pdbs_path(self.cur_iteration) cfg.run_config.weights_path = self.latest_ml_checkpoint_path( self.cur_iteration) cfg.run_config.restart_points_path = self.restart_points_path( self.cur_iteration) cfg_path = self.experiment_dirs["od_runs"].joinpath( f"od_{self.cur_iteration:03d}.yaml") cfg.run_config.dump_yaml(cfg_path) task.arguments += ["-c", cfg_path] stage.add_tasks(task) return stage
def post_stage(): if (not os.path.exists(f'{run_dir}/aggregator/stop.aggregator')): nstages = len(p.stages) s = Stage() s.name = f"{nstages}" t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } t.name = f" {i}_{nstages} " t.executable = PYTHON t.arguments = [ f'{current_dir}/simulation.py', f'{run_dir}/simulations/all/{i}_{nstages}', ADIOS_XML ] subprocess.getstatusoutput( f'ln -s {run_dir}/simulations/all/{i}_{nstages} {run_dir}/simulations/new/{i}_{nstages}' ) s.add_tasks(t) s.post_exec = post_stage p.add_stages(s)
def esmacs(self, rct_stage, stage, outdir="equilibration", name=None): for i in range(1, 13): t = Task() t.pre_exec = [ "export WDIR=\"{}/{}\"".format(self.run_dir, name), ". {}".format(self.conda_init), "conda activate {}".format(self.esmacs_tenv), "module load {}".format(self.esmacs_tmodules), "mkdir -p $WDIR/replicas/rep{}/{}".format(i, outdir), "cd $WDIR/replicas/rep{}/{}".format(i, outdir), "rm -f {}.log {}.xml {}.dcd {}.chk".format( stage, stage, stage, stage), "export OMP_NUM_THREADS=1" ] # t.executable = '/ccs/home/litan/miniconda3/envs/wf3/bin/python3.7' t.executable = 'python3' t.arguments = ['$WDIR/{}.py'.format(stage)] t.post_exec = [] t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } getattr(self, rct_stage).add_tasks(t) print(getattr(self, rct_stage).to_dict())
def generate_task(cfg: BaseStageConfig) -> Task: task = Task() task.cpu_reqs = cfg.cpu_reqs.dict().copy() task.gpu_reqs = cfg.gpu_reqs.dict().copy() task.pre_exec = cfg.pre_exec.copy() task.executable = cfg.executable task.arguments = cfg.arguments.copy() return task
def generate_ML_stage(num_ML=1): """ Function to generate the learning stage """ s3 = Stage() s3.name = 'learning' # learn task for i in range(num_ML): t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = [] t3.pre_exec = ['module reset'] t3.pre_exec += [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t3.pre_exec += ['module load cuda/9.1.85'] t3.pre_exec += ['conda activate rp.copy'] t3.pre_exec += ['export CUDA_VISIBLE_DEVICES=0'] t3.pre_exec += [ 'export PYTHONPATH=/gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps:$PYTHONPATH' ] t3.pre_exec += [ 'cd /gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps' ] time_stamp = int(time.time()) dim = i + 3 cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp) t3.pre_exec += ['mkdir -p {0} && cd {0}'.format(cvae_dir)] t3.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python' ] # train_cvae.py t3.arguments = [ '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps/train_cvae.py', '-f', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE/cvae_input.h5', '-d', dim ] t3.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t3.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the learn task to the learning stage s3.add_tasks(t3) time.sleep(1) return s3
def generate_interfacing_stage(): s4 = Stage() s4.name = 'scanning' # Scaning for outliers and prepare the next stage of MDs t4 = Task() t4.pre_exec = [] #t4.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] #t4.pre_exec += ['module load cuda/9.1.85'] #t4.pre_exec += ['conda activate %s' % conda_path] t4.pre_exec += [ 'module unload prrte', 'module unload python', 'module load xl', 'module load xalt', 'module load spectrum-mpi', 'module load cuda', 'module list' ] t4.pre_exec += [ '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'conda deactivate', 'conda deactivate', 'conda activate /gpfs/alpine/proj-shared/med110/wf-2/conda/envs/ibm-wml-ce-cloned' ] #'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2'] t4.pre_exec += [ 'export PYTHONPATH=%s/CVAE_exps:%s/CVAE_exps/cvae:$PYTHONPATH' % (base_path, base_path) ] t4.pre_exec += ['cd %s/Outlier_search' % base_path] #t4.executable = ['%s/bin/python' % conda_path] t4.executable = ['python'] t4.arguments = [ 'outlier_locator.py', '--md', md_path, '--cvae', cvae_path, '--pdb', pdb_file ] #'--ref', ref_pdb_file] t4.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 16, 'thread_type': 'OpenMP' } t4.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } s4.add_tasks(t4) s4.post_exec = func_condition return s4
def generate_interfacing_stage(): s4 = Stage() s4.name = 'scanning' # Scaning for outliers and prepare the next stage of MDs t4 = Task() t4.pre_exec = [] t4.pre_exec = ['module reset'] t4.pre_exec += [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t4.pre_exec += ['module load cuda/9.1.85'] t4.pre_exec += ['conda activate rp.copy'] t4.pre_exec += ['export CUDA_VISIBLE_DEVICES=0'] t4.pre_exec += [ 'export PYTHONPATH=/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps:$PYTHONPATH' ] t4.pre_exec += [ 'cd /gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/Outlier_search' ] # python outlier_locator.py -m ../MD_exps/fs-pep -c ../CVAE_exps -p ../MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb t4.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python'] t4.arguments = [ 'outlier_locator.py', '--md', '../MD_exps/fs-pep', '--cvae', '../CVAE_exps --pdb', '../MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb' ] # t4.arguments = ['/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/Outlier_search/outlier_locator.py', # '-m', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep', # '-c', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps', # '-p', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb' # ] t4.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 12, 'thread_type': 'OpenMP' } t4.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } s4.add_tasks(t4) s4.post_exec = func_condition return s4
def generate_MD_stage(num_MD=1): """ Function to generate MD stage. """ s1 = Stage() s1.name = 'MD' # MD tasks time_stamp = int(time.time()) for i in range(num_MD): t1 = Task() t1.pre_exec = [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t1.pre_exec += ['module load cuda/9.1.85'] t1.pre_exec += ['conda activate %s' % conda_path] t1.pre_exec += [ 'export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path ] t1.pre_exec += ['cd %s/MD_exps/fs-pep' % base_path] t1.pre_exec += [ 'mkdir -p omm_runs_%d && cd omm_runs_%d' % (time_stamp + i, time_stamp + i) ] t1.executable = ['%s/bin/python' % conda_path] # run_openmm.py t1.arguments = ['%s/MD_exps/fs-pep/run_openmm.py' % base_path] # t1.arguments += ['--topol', '%s/MD_exps/fs-pep/pdb/topol.top' % base_path] t1.arguments += [ '--pdb_file', '%s/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb' % base_path, '--length', LEN_sim ] # assign hardware the task t1.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the MD task to the simulating stage s1.add_tasks(t1) return s1
def generate_task(self, **ensembles): """ Generate a `radical.entk` task. Parameters ---------- ensembles: dict, OrderedDict Dictionary of the *current* values of variables that are ensembles. All the variables that were declared with `add_ensemble` should be specified here so that a correct task object can be generated. """ [setattr(self, k, w) for k, w in ensembles.iteritems()] if not self.all_variables_defined(): raise ValueError('Some variables are not defined!') task = Task() task.name = ensembles['task_name'] task.pre_exec += self.engine.pre_exec task.executable += str(self.engine.executable) task.arguments += self.engine.arguments task.cpu_reqs = { 'processes': self._processes, 'process_type': 'MPI' if self.engine.uses_mpi else None, 'threads_per_process': self._threads_per_process, 'thread_type': None } task.gpu_reqs = { 'processes': self._gpu_processes, 'process_type': 'MPI' if self.engine.gpu_uses_mpi else None, 'threads_per_process': self._gpu_threads_per_process, 'thread_type': None } task.arguments.extend(self.arguments) task.copy_input_data.extend(self.copied_files) task.copy_input_data.extend(self.system.copied_files) task.post_exec.append('echo "{}" > sim_desc.txt'.format(task.name)) task.link_input_data.extend(self.input_data(**ensembles)) task.link_input_data.extend(self.system.linked_files) task.pre_exec.extend( self._sed.format(n, v, f) for f, vs in self.get_variables().items() for n, v in vs) return task
def esmacs(cfg, names, stage, outdir): s = Stage() s.name = 'S3.%s' % stage #print("DEBUG:instantiation: %s" % len(s._tasks)) for comp in names: #print("DEBUG:first loop: %s" % len(s._tasks)) for i in range(1, cfg['n_replicas']): #print("DEBUG:second loop:start: %s" % len(s._tasks)) t = Task() # RCT native t.pre_exec = [ #". /sw/summit/lmod/lmod/init/profile", "export WDIR=\"{}\"".format(comp), ". {}".format(cfg['conda_init']), "conda activate {}".format(cfg['conda_esmacs_task_env']), "module load {}".format(cfg['esmacs_task_modules']), "mkdir -p $WDIR/replicas/rep{}/{}".format(i, outdir), "cd $WDIR/replicas/rep{}/{}".format(i, outdir), #"rm -f {}.log {}.xml {}.dcd {}.chk".format(stage, stage, stage, stage), "export OMP_NUM_THREADS=1"] t.executable = 'python3' t.arguments = ['$WDIR/{}.py'.format(stage)] # Bash wrapper #t.executable = '%s/wf3.sh' % comp #t.arguments = [comp, i, outdir, stage, # cfg['conda_init'], # cfg['conda_esmacs_task_env'], # cfg['esmacs_task_modules']] t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP'} t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA'} s.add_tasks(t) #print("DEBUG:second loop:end: %s" % len(s._tasks)) return s
def generate_interfacing_stage(): s4 = Stage() s4.name = 'scanning' # Scaning for outliers and prepare the next stage of MDs t4 = Task() t4.pre_exec = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh'] t4.pre_exec += ['conda activate %s' % cfg['conda_pytorch']] t4.pre_exec += ['mkdir -p %s/Outlier_search/outlier_pdbs' % cfg['base_path']] t4.pre_exec += ['export models=""; for i in `ls -d %s/CVAE_exps/model-cvae_runs*/`; do if [ "$models" != "" ]; then models=$models","$i; else models=$i; fi; done;cat /dev/null' % cfg['base_path']] t4.pre_exec += ['export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8'] t4.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4'] cmd_cat = 'cat /dev/null' cmd_jsrun = 'jsrun -n %s -a 6 -g 6 -r 1 -c 7' % cfg['node_counts'] #molecules_path = '/gpfs/alpine/world-shared/ven201/tkurth/molecules/' t4.executable = [' %s; %s %s/examples/outlier_detection/run_optics_dist_summit_entk.sh' % (cmd_cat, cmd_jsrun, cfg['molecules_path'])] t4.arguments = ['%s/bin/python' % cfg['conda_pytorch']] t4.arguments += ['%s/examples/outlier_detection/optics.py' % cfg['molecules_path'], '--sim_path', '%s/MD_exps/%s' % (cfg['base_path'], cfg['system_name']), '--pdb_out_path', '%s/Outlier_search/outlier_pdbs' % cfg['base_path'], '--restart_points_path', '%s/Outlier_search/restart_points.json' % cfg['base_path'], '--data_path', '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'], '--model_paths', '$models', '--model_type', cfg['model_type'], '--min_samples', 10, '--n_outliers', 500, '--dim1', cfg['residues'], '--dim2', cfg['residues'], '--cm_format', 'sparse-concat', '--batch_size', cfg['batch_size'], '--distributed', '-iw', cfg['init_weights']] t4.cpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 12, 'thread_type' : 'OpenMP'} t4.gpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : 'CUDA'} s4.add_tasks(t4) s4.post_exec = func_condition return s4
def generate_ML_stage(num_ML=1): """ Function to generate the learning stage """ s3 = Stage() s3.name = 'learning' # learn task time_stamp = int(time.time()) for i in range(num_ML): t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = [] t3.pre_exec += [ '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh' ] t3.pre_exec += ['module load cuda/9.1.85'] t3.pre_exec += ['conda activate %s' % conda_path] t3.pre_exec += [ 'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path ] t3.pre_exec += ['cd %s' % cvae_path] dim = i + 3 cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i) t3.pre_exec += ['mkdir -p {0} && cd {0}'.format(cvae_dir)] t3.executable = ['%s/bin/python' % conda_path] # train_cvae.py t3.arguments = [ '%s/train_cvae.py' % cvae_path, '--h5_file', '%s/cvae_input.h5' % agg_path, '--dim', dim ] t3.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t3.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the learn task to the learning stage s3.add_tasks(t3) return s3
def describe_MD_pipeline(): p = Pipeline() p.name = 'MD' # MD stage s1 = Stage() s1.name = 'OpenMM' # Each Task() is an OpenMM executable that will run on a single GPU. # Set sleep time for local testing # for i in range(18): task = Task() task.name = 'md' task.pre_exec = [] # task.pre_exec += ['export MINICONDA=/gpfs/alpine/scratch/jdakka/bip178/miniconda'] # task.pre_exec += ['export PATH=$MINICONDA/bin:$PATH'] # task.pre_exec += ['export LD_LIBRARY_PATH=$MINICONDA/lib:$LD_LIBRARY_PATH'] task.pre_exec += ['module load python/2.7.15-anaconda2-5.3.0'] task.pre_exec += ['module load cuda/9.1.85'] task.pre_exec += ['module load gcc/6.4.0'] task.pre_exec += ['source activate openmm'] task.pre_exec += ['cd /gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/results_2'] task.executable = '/ccs/home/jdakka/.conda/envs/openmm/bin/python' task.arguments = ['run_openmm.py', '-f', '/gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb'] task.cpu_reqs = {'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } task.gpu_reqs = {'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the MD task to the Docking Stage s1.add_tasks(task) # Add MD stage to the MD Pipeline p.add_stages(s1) return p
def generate_ML_tasks(self): """ Function to generate the learning stage """ p = Pipeline() p.name = 'learning' s3 = Stage() s3.name = 'training' # learn task for i in range(self.num_ML): t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = [] t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] t3.pre_exec += ['module load cuda/10.1.168'] t3.pre_exec += ['conda activate %s' % conda_path] t3.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path] t3.pre_exec += ['cd %s' % cvae_path] t3.pre_exec += [f"sleep {i}"] dim = i + 3 t3.executable = ['%s/bin/python' % conda_path] # train_cvae.py t3.arguments = [ '%s/train_cvae.py' % cvae_path, '--h5_file', '%s/cvae_input.h5' % agg_path, '--dim', dim] t3.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t3.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the learn task to the learning stage s3.add_tasks(t3) p.add_stages(s3) return p
def generate_MD_tasks(self): """ Function to generate MD tasks. """ p = Pipeline() p.name = "MD" s1 = Stage() s1.name = 'MD' # MD tasks for i in range(self.num_MD): t1 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py t1.pre_exec = ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] t1.pre_exec += ['module load cuda/10.1.168'] t1.pre_exec += ['conda activate %s' % conda_path] t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path] t1.pre_exec += ['cd %s' % md_path] # t1.pre_exec += [f"sleep {i}"] t1.executable = ['%s/bin/python' % conda_path] # run_openmm.py t1.arguments = ['%s/run_openmm.py' % md_path] t1.arguments += ['--pdb_file', pdb_file] if top_file: t1.arguments += ['--topol', top_file] t1.arguments += ['--length', 1000] # assign hardware the task t1.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t1.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the MD task to the simulating stage s1.add_tasks(t1) p.add_stages(s1) return p
def generate_interfacing_task(self): p = Pipeline() p.name = 'interfacing' s4 = Stage() s4.name = 'scanning' # Scaning for outliers and prepare the next stage of MDs t4 = Task() t4.pre_exec = [] t4.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] t4.pre_exec += ['module load cuda/10.1.168'] t4.pre_exec += ['conda activate %s' % conda_path] t4.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path] t4.pre_exec += ['cd %s/Outlier_search' % base_path] t4.executable = ['%s/bin/python' % conda_path] t4.arguments = [ 'outlier_locator.py', '--md', md_path, '--cvae', cvae_path, '--pdb', pdb_file, '--ref', ref_pdb_file, '--n_out', self.num_outliers, '--timeout', self.t_timeout] t4.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 12, 'thread_type': 'OpenMP' } t4.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } s4.add_tasks(t4) p.add_stages(s4) return p
def generate_md_stage(self) -> Stage: stage = Stage() stage.name = "MD" cfg = self.cfg.md_stage if self.cur_iteration > 0: outlier_filename = self.restart_points_path(self.cur_iteration - 1) pdb_filenames = get_outlier_pdbs(outlier_filename) else: pdb_filenames = get_initial_pdbs(cfg.run_config.initial_pdb_dir) for i, pdb_filename in zip(range(cfg.num_jobs), cycle(pdb_filenames)): task = Task() task.cpu_reqs = cfg.cpu_reqs.dict() task.gpu_reqs = cfg.gpu_reqs.dict() task.pre_exec = cfg.pre_exec task.executable = cfg.executable task.arguments = cfg.arguments # Set unique output directory name for task dir_prefix = f"md_{self.cur_iteration:03d}_{i:04d}" # Update base parameters cfg.run_config.result_dir = self.experiment_dirs["md_runs"] cfg.run_config.dir_prefix = dir_prefix cfg.run_config.pdb_file = pdb_filename # Write MD yaml to tmp directory to be picked up and moved by MD job cfg_path = self.experiment_dirs["tmp"].joinpath( f"{dir_prefix}.yaml") cfg.run_config.dump_yaml(cfg_path) task.arguments += ["-c", cfg_path] stage.add_tasks(task) return stage
def create_workflow(Kconfig): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-grlsd' if cur_iter == 0: restart_iter = 0 else: restart_iter = cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro', '--clone', str(Kconfig.num_replicas) ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/iter_%s/input.gro' % (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter), '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ else: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'module load bwpy', 'export tasks=pre_proc', 'export iter=-1', 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['python'] pre_proc_task.arguments = [ 'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro', 'input.gro' ] pre_proc_task.copy_input_data = [ '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1), '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py' ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): sim_task = Task() if Kconfig.use_gpus == 'False': sim_task.executable = [ '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python' ] sim_task.pre_exec = [ 'module load bwpy', 'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"', 'export PATH=/u/sciteam/hruska/local/bin:$PATH', 'export iter=%s' % cur_iter ] sim_task.cores = int( Kconfig.num_CUs_per_MD_replica ) #on bluewaters tasks on one node are executed concurently else: sim_task.executable = ['python'] sim_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro', '--md_steps', str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log' ] sim_task.link_input_data = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) if restart_iter == cur_iter: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (pre_proc_task_ref, sim_num)) else: sim_task.link_input_data.append( '%s/temp/start%s.gro > start.gro' % (post_ana_task_ref, sim_num)) sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated pre_ana_stage = Stage() pre_ana_task = Task() pre_ana_task.pre_exec = [ 'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan, xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=pre_ana', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_ana_task.executable = ['python'] pre_ana_task.arguments = ['pre_analyze_openmm.py'] pre_ana_task.link_input_data = [ '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py' ] for sim_num in range( min(int(Kconfig.num_parallel_MD_sim), int(Kconfig.num_replicas))): pre_ana_task.link_input_data += [ '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num) ] pre_ana_task.copy_output_data = [ 'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter), 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter) ] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter pre_ana_stage.add_tasks(pre_ana_task) wf.add_stages(pre_ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=lsdmap', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['lsdmap'] #/u/sciteam/hruska/local/bin/lsdmap ana_task.arguments = [ '-f', os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro', '-n', 'out.nn', '-w', 'weight.w' ] ana_task.cores = 1 ana_task.link_input_data = [ '$SHARED/{0} > {0}'.format( os.path.basename(Kconfig.lsdm_config_file)), '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter) ] ana_task.copy_output_data = [ 'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter, 'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter, #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter, 'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter), 'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter), 'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter), 'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter), 'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter) ] if cur_iter > 0: ana_task.link_input_data += [ '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1) ] if (cur_iter % Kconfig.nsave == 0): ana_task.download_output_data = [ 'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # post_lsdmap: # Purpose: Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop # to generate the new coordinate file to be used by the simulation_step in the next iteration. # Arguments: # num_replicas = number of configurations to be generated in the new coordinate file # out = output filename # cycle = iteration number # max_dead_neighbors = max dead neighbors to be considered # max_alive_neighbors = max alive neighbors to be considered # numCUs = number of simulation instances/ number of smaller files post_ana_stage = Stage() post_ana_task = Task() post_ana_task._name = 'post_ana_task' if Kconfig.restarts == 'clustering': post_ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module add bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=post_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] post_ana_task.executable = ['python'] post_ana_task.arguments = [ 'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev', 'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro', Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors, 'input.gro', cur_iter, Kconfig.num_parallel_MD_sim, 'weight_out.w', 'tmpha.eg' ] post_ana_task.link_input_data = [ '$SHARED/post_analyze.py > post_analyze.py', '$SHARED/selection.py > selection.py', '$SHARED/selection-cluster.py > selection-cluster.py', '$SHARED/reweighting.py > reweighting.py', '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py', '%s/iter_%s/weight_out.w > weight.w' % (combined_path, cur_iter - 1), '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter), '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter), '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter), '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter) ] if (cur_iter % Kconfig.nsave == 0): post_ana_task.download_output_data = [ 'out.gro > output/iter_%s/out.gro' % cur_iter, 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path, cur_iter, cur_iter) ] post_ana_task.copy_output_data = [ 'ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path, cur_iter), 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path, cur_iter), 'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path, cur_iter), 'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png' % cur_iter, 'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png' % cur_iter, 'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png' % cur_iter ] post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, post_ana_stage.uid, post_ana_task.uid) post_ana_stage.add_tasks(post_ana_task) wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_create_cud_from_task(): """ **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete Task description """ pipeline = 'p1' stage = 's1' task = 't1' placeholder_dict = { pipeline: { stage: { task: '/home/vivek/some_file.txt' } } } t1 = Task() t1.name = 't1' t1.pre_exec = ['module load gromacs'] t1.executable = ['grompp'] t1.arguments = ['hello'] t1.cpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t1.gpu_reqs = {'processes': 4, 'process_type': 'MPI', 'threads_per_process': 2, 'thread_type': 'OpenMP' } t1.post_exec = ['echo test'] t1.upload_input_data = ['upload_input.dat'] t1.copy_input_data = ['copy_input.dat'] t1.link_input_data = ['link_input.dat'] t1.copy_output_data = ['copy_output.dat'] t1.download_output_data = ['download_output.dat'] p = Pipeline() p.name = 'p1' s = Stage() s.name = 's1' s.tasks = t1 p.stages = s p._assign_uid('test') cud = create_cud_from_task(t1, placeholder_dict) assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'], t1.parent_pipeline['uid'], t1.parent_pipeline['name']) assert cud.pre_exec == t1.pre_exec # rp returns executable as a string regardless of whether assignment was using string or list assert cud.executable == t1.executable assert cud.arguments == t1.arguments assert cud.cpu_processes == t1.cpu_reqs['processes'] assert cud.cpu_threads == t1.cpu_reqs['threads_per_process'] assert cud.cpu_process_type == t1.cpu_reqs['process_type'] assert cud.cpu_thread_type == t1.cpu_reqs['thread_type'] assert cud.gpu_processes == t1.gpu_reqs['processes'] assert cud.gpu_threads == t1.gpu_reqs['threads_per_process'] assert cud.gpu_process_type == t1.gpu_reqs['process_type'] assert cud.gpu_thread_type == t1.gpu_reqs['thread_type'] assert cud.post_exec == t1.post_exec assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) num_replicas = int(Kconfig.num_replicas) #if cur_iter==0: # restart_iter=0 #else: # restart_iter=cur_iter if cur_iter == 0: pre_proc_stage = Stage() pre_proc_task = Task() pre_proc_task.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task.executable = ['mv'] pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] pre_proc_task.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file) ] pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage.uid, pre_proc_task.uid) pre_proc_stage.add_tasks(pre_proc_task) wf.add_stages(pre_proc_stage) # ------------------------------------------------------------------------------------------------------------------ while (cur_iter < int(Kconfig.num_iterations)): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() def_rep_per_thread = int(num_replicas / num_parallel) + 1 num_allocated_rep = 0 num_used_threads = 0 while (num_allocated_rep < num_replicas): if (num_used_threads == num_parallel): print("ALLERT tried use more gpus than allocated") if ((num_replicas - num_allocated_rep) > def_rep_per_thread): use_replicas = def_rep_per_thread else: use_replicas = (num_replicas - num_allocated_rep) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=md', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } sim_task.cpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } sim_task.arguments = [ 'run_openmm.py', '--trajstride', '10', '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log' ] link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr if str(Kconfig.strategy) == 'extend': copy_out = [] for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] copy_out = copy_out + [ '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter + 1), idx) ] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = [ 'module unload PrgEnv-cray', 'module load PrgEnv-gnu', 'module unload bwpy', 'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf', 'module add cudatoolkit/7.5.18-1.0502.10743.2.1', 'module add cmake', 'module unload darshan xalt', 'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes', 'export FC=ftn', 'source /projects/sciteam/bamm/hruska/vpy2/bin/activate', 'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] ana_task.executable = ['python'] ana_task.arguments = [ 'run-tica-msm.py', '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/run-tica-msm.py > run-tica-msm.py', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def test_task_exceptions(s, l, i, b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s, l, i, b] for data in data_type: if not isinstance(data, str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data, list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.executable = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def test_task_exceptions(s,l,i,b): """ **Purpose**: Test if all attribute assignments raise exceptions for invalid values """ t = Task() data_type = [s,l,i,b] for data in data_type: if not isinstance(data,str): with pytest.raises(TypeError): t.name = data with pytest.raises(TypeError): t.path = data with pytest.raises(TypeError): t.parent_stage = data with pytest.raises(TypeError): t.parent_pipeline = data with pytest.raises(TypeError): t.stdout = data with pytest.raises(TypeError): t.stderr = data if not isinstance(data,list): with pytest.raises(TypeError): t.pre_exec = data with pytest.raises(TypeError): t.arguments = data with pytest.raises(TypeError): t.post_exec = data with pytest.raises(TypeError): t.upload_input_data = data with pytest.raises(TypeError): t.copy_input_data = data with pytest.raises(TypeError): t.link_input_data = data with pytest.raises(TypeError): t.move_input_data = data with pytest.raises(TypeError): t.copy_output_data = data with pytest.raises(TypeError): t.download_output_data = data with pytest.raises(TypeError): t.move_output_data = data if not isinstance(data, str) and not isinstance(data, list): with pytest.raises(TypeError): t.executable = data if not isinstance(data, str) and not isinstance(data, unicode): with pytest.raises(ValueError): t.cpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } t.gpu_reqs = { 'processes': 1, 'process_type': data, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': data } if not isinstance(data, int): with pytest.raises(TypeError): t.cpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None } t.gpu_reqs = { 'processes': data, 'process_type': None, 'threads_per_process': 1, 'thread_type': None } t.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': data, 'thread_type': None }
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path if str(socket.gethostname()) == 'giotto.rice.edu': combined_path = str(Kconfig.remote_output_directory) + '-giotto' else: combined_path = str(Kconfig.remote_output_directory ) #'/u/sciteam/hruska/scratch/extasy-tica' num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) #run-tica-msm4.py md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) if cur_iter == 0: #pre_proc_stage = Stage() #pre_proc_task = Task() #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] #pre_proc_task.executable = ['mv'] #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ] #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid) #pre_proc_stage.add_tasks(pre_proc_task) #wf.add_stages(pre_proc_stage) pre_proc_stage2 = Stage() pre_proc_task2 = Task() pre_proc_task2.pre_exec = [ 'export tasks=pre_proc_task', 'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1' ] pre_proc_task2.executable = ['ls'] pre_proc_task2.arguments = ['-l'] pre_proc_task2.copy_input_data = [ '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig), '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana), '$SHARED/%s > %s/%s' % (Kconfig.md_run_file, combined_path, Kconfig.md_run_file), '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference) ] # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) pre_proc_stage2.add_tasks(pre_proc_task2) wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 #num_used_threads=0 #print(def_rep_per_thread) while (num_allocated_rep < num_replicas): #if (num_used_threads>=num_parallel): # print("ALLERT tried use more gpus than allocated") def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) #if ((num_replicas-num_allocated_rep)>def_rep_per_thread): # check if use all threads # use_replicas=def_rep_per_thread #else: #use pnly part of threads # use_replicas=(num_replicas-num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 20, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--trajstride', str(Kconfig.trajstride), '--Kconfig', str(args.Kconfig), '--idxstart', str(num_allocated_rep), '--idxend', str((num_allocated_rep + use_replicas)), '--path', combined_path, '--iter', str(cur_iter), '--md_steps', str(Kconfig.md_steps), '--save_traj', str(Kconfig.save_alltraj), '>', 'md.log' ] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/system-5.xml > system-5.xml', '$SHARED/integrator-5.xml > integrator-5.xml', '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) wf.add_stages(sim_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # pre_ana_task: # Purpose: The output of each gromacs instance in the simulaxftion stage is a small coordinate file. # Concatenate such files from each of the gromacs instances to form a larger file. # Arguments: # numCUs = number of simulation instances / number of small files to be concatenated if str(Kconfig.strategy) != 'extend': ana_stage = Stage() ana_task = Task() ana_task.pre_exec = ana_settings + [ 'export tasks=tica_msm_ana', 'export iter=%s' % cur_iter ] ana_task.executable = ['python'] ana_task.arguments = [ script_ana, '--path', combined_path, '--n_select', str(num_replicas), '--cur_iter', str(cur_iter), '--Kconfig', str(args.Kconfig), '--ref', str(Kconfig.md_reference), '>', 'analyse.log' ] ana_task.cpu_reqs = { 'processes': 1, 'process_type': 'MPI', 'threads_per_process': 16, 'thread_type': None } ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig) ] #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))): ana_task.copy_output_data = [ 'analyse.log > %s/iter%s_analyse.log' % (combined_path, cur_iter) ] #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter), # 'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)] #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, ana_stage.uid, ana_task.uid) ana_stage.add_tasks(ana_task) wf.add_stages(ana_stage) # -------------------------------------------------------------------------------------------------------------- # -------------------------------------------------------------------------------------------------------------- # lsdmap: # Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values. # Arguments: # config = name of the config file to be used during LSDMap #if(cur_iter % Kconfig.nsave == 0): # post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter, # 'weight_out.w > output/iter_%s/weight_out.w' % cur_iter, # 'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter), # 'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter), # '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter) # ] #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter), # 'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter), # 'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter), # 'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)] #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid) #post_ana_stage.add_tasks(post_ana_task) #wf.add_stages(post_ana_stage) # -------------------------------------------------------------------------------------------------------------- cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def generate_MD_stage(num_MD=1): """ Function to generate MD stage. """ s1 = Stage() s1.name = 'MD' initial_MD = True outlier_filepath = '%s/Outlier_search/restart_points.json' % cfg['base_path'] if os.path.exists(outlier_filepath): initial_MD = False outlier_file = open(outlier_filepath, 'r') outlier_list = json.load(outlier_file) outlier_file.close() # MD tasks time_stamp = int(time.time()) for i in range(num_MD): t1 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py t1.pre_exec = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh'] t1.pre_exec += ['module load cuda/9.1.85'] t1.pre_exec += ['conda activate %s' % cfg['conda_openmm']] t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:%s/MD_exps/MD_utils:$PYTHONPATH' % (cfg['base_path'], cfg['base_path'])] t1.pre_exec += ['cd %s/MD_exps/%s' % (cfg['base_path'], cfg['system_name'])] t1.pre_exec += ['mkdir -p omm_runs_%d && cd omm_runs_%d' % (time_stamp+i, time_stamp+i)] t1.executable = ['%s/bin/python' % cfg['conda_openmm']] # run_openmm.py t1.arguments = ['%s/MD_exps/%s/run_openmm.py' % (cfg['base_path'], cfg['system_name'])] #t1.arguments += ['--topol', '%s/MD_exps/fs-pep/pdb/topol.top' % cfg['base_path']] if 'top_file' in cfg: t1.arguments += ['--topol', cfg['top_file']] # pick initial point of simulation if initial_MD or i >= len(outlier_list): t1.arguments += ['--pdb_file', cfg['pdb_file'] ] elif outlier_list[i].endswith('pdb'): t1.arguments += ['--pdb_file', outlier_list[i]] t1.pre_exec += ['cp %s ./' % outlier_list[i]] elif outlier_list[i].endswith('chk'): t1.arguments += ['--pdb_file', cfg['pdb_file'], '-c', outlier_list[i]] t1.pre_exec += ['cp %s ./' % outlier_list[i]] # how long to run the simulation if initial_MD: t1.arguments += ['--length', cfg['LEN_initial']] else: t1.arguments += ['--length', cfg['LEN_iter']] # assign hardware the task t1.cpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 4, 'thread_type' : 'OpenMP'} t1.gpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : 'CUDA'} # Add the MD task to the simulating stage s1.add_tasks(t1) return s1
def generate_ML_stage(num_ML=1): """ Function to generate the learning stage """ s3 = Stage() s3.name = 'learning' # learn task time_stamp = int(time.time()) for i in range(num_ML): t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = [] #t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'] #t3.pre_exec += ['module load cuda/9.1.85'] #t3.pre_exec += ['conda activate %s' % conda_path] #t3.pre_exec += ['module unload python'] #t3.pre_exec += ['. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', # 'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', # 'conda deactivate', # 'conda deactivate', # 'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2'] #t3.pre_exec += ['module load ibm-wml-ce'] t3.pre_exec += [ 'module unload prrte', 'module unload python', 'module load xl', 'module load xalt', 'module load spectrum-mpi', 'module load cuda', 'module list' ] t3.pre_exec += [ 'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path ] t3.pre_exec += ['cd %s' % cvae_path] dim = i + 3 cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i) t3.pre_exec += [ 'mkdir -p {0}/benchmarks && cd {0}'.format(cvae_dir) ] #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)] t3.pre_exec += ['module load ibm-wml-ce', 'env'] t3.pre_exec += [ '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh', 'conda deactivate', 'conda deactivate', 'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2' ] t3.pre_exec += ['export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5"'] t3.pre_exec += [ 'jsrun --erf_input /gpfs/alpine/med110/scratch/atrifan2/covid19/PLPro/entk_cvae_md_hvd/RANKFILE /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim) ] #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)] t3.executable = [ 'date' ] #t3.executable = ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python'] #t3.arguments = [ '%s/cvae/train_cvae.py' % (cvae_path), # '-f', '../bytes-train.tfrecords', # '--dim', dim] #t3.executable = ['%s/bin/python' % conda_path] # train_cvae.py #t3.arguments = ['%s/train_cvae.py' % cvae_path, # '--h5_file', '%s/cvae_input.h5' % agg_path, # '--dim', dim] t3.cpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 4, 'thread_type': 'OpenMP' } t3.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } # Add the learn task to the learning stage s3.add_tasks(t3) return s3
def generate_ML_stage(num_ML=1): """ Function to generate the learning stage """ # learn task time_stamp = int(time.time()) stages=[] for i in range(num_ML): s3 = Stage() s3.name = 'learning' t3 = Task() # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py t3.pre_exec = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh'] t3.pre_exec += ['module load gcc/7.4.0', 'module load cuda/10.1.243', 'module load hdf5/1.10.4', 'export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8'] t3.pre_exec += ['conda activate %s' % cfg['conda_pytorch']] dim = i + 3 cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp+i) t3.pre_exec += ['cd %s/CVAE_exps' % cfg['base_path']] t3.pre_exec += ['export LD_LIBRARY_PATH=/gpfs/alpine/proj-shared/med110/atrifan/scripts/cuda/targets/ppc64le-linux/lib/:$LD_LIBRARY_PATH'] #t3.pre_exec += ['mkdir -p %s && cd %s' % (cvae_dir, cvae_dir)] # model_id creates sub-dir # this is for ddp, distributed t3.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4'] #pnodes = cfg['node_counts'] // num_ML # partition pnodes = 1#max(1, pnodes) hp = cfg['ml_hpo'][i] cmd_cat = 'cat /dev/null' cmd_jsrun = 'jsrun -n %s -r 1 -g 6 -a 6 -c 42 -d packed' % pnodes # VAE config # cmd_vae = '%s/examples/run_vae_dist_summit_entk.sh' % cfg['molecules_path'] # cmd_sparse = ' '.join(['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"], # "./", cvae_dir, 'sparse-concat', 'resnet', # str(cfg['residues']), str(cfg['residues']), # str(hp['latent_dim']), 'amp', 'non-distributed', # str(hp['batch_size']), str(cfg['epoch']), # str(cfg['sample_interval']), # hp['optimizer'], cfg['init_weights']]) # AAE config cmd_vae = '%s/examples/bin/summit/run_aae_dist_summit_entk.sh' % cfg['molecules_path'] t3.executable = ['%s; %s %s' % (cmd_cat, cmd_jsrun, cmd_vae)] t3.arguments = ['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"], "./", cvae_dir, str(cfg['residues']), str(hp['latent_dim']), 'non-amp', 'distributed', str(hp['batch_size']), str(cfg['epoch']), str(cfg['sample_interval']), hp['optimizer'], hp['loss_weights'], cfg['init_weights']] #+ f'{cfg['molecules_path']}/examples/run_vae_dist_summit.sh -i {sparse_matrix_path} -o ./ --model_id {cvae_dir} -f sparse-concat -t resnet --dim1 168 --dim2 168 -d 21 --amp --distributed -b {batch_size} -e {epoch} -S 3'] # , # '-i', sparse_matrix_path, # '-o', './', # '--model_id', cvae_dir, # '-f', 'sparse-concat', # '-t', 'resnet', # # fs-pep # '--dim1', 168, # '--dim2', 168, # '-d', 21, # '--amp', # sparse matrix # '--distributed', # '-b', batch_size, # batch size # '-e', epoch,# epoch # '-S', 3 # ] t3.cpu_reqs = {'processes' : 1, 'process_type' : 'MPI', 'threads_per_process': 4, 'thread_type' : 'OpenMP'} t3.gpu_reqs = {'processes' : 1, 'process_type' : None, 'threads_per_process': 1, 'thread_type' : 'CUDA'} # Add the learn task to the learning stage s3.add_tasks(t3) stages.append(s3) return stages
def create_workflow(Kconfig, args): wf = Pipeline() # ------------------------------------------------------------------------------------------------------------------ cur_iter = int(Kconfig.start_iter) #0 #assumed of iteration non zero that files are in combined_path combined_path = str(Kconfig.remote_output_directory) num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE) num_replicas = int(Kconfig.num_replicas) script_ana = str(Kconfig.script_ana) config_file = str(args.Kconfig).rsplit('/', 1)[-1] try: systemxml = str(Kconfig.systemxml) except: systemxml = 'system-5.xml' try: integratorxml = str(Kconfig.integratorxml) except: integratorxml = 'integrator-5.xml' md_settings = Kconfig.md_env if Kconfig.env_ana_same == 'True': ana_settings = md_settings else: ana_settings = Kconfig.ana_env print("set", num_parallel, md_settings) iter_found = 0 while len(glob.glob('%s/iter%s_input*.pdb' % (combined_path, iter_found))) >= num_replicas: iter_found += 1 cur_iter = max(0, iter_found - 1) print("cur_iter", cur_iter) #if cur_iter==0: # pre_proc_stage2 = Stage() # pre_proc_task2 = Task() # pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'] # pre_proc_task2.executable = ['ls'] # pre_proc_task2.arguments = ['-l'] # pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file), # '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana), # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file), # '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ] # pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid) # pre_proc_stage2.add_tasks(pre_proc_task2) # wf.add_stages(pre_proc_stage2) # ------------------------------------------------------------------------------------------------------------------ start_iter = cur_iter while (cur_iter < int(Kconfig.num_iterations) and cur_iter < start_iter + 1): # -------------------------------------------------------------------------------------------------------------- # sim_stage: # Purpose: In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the # previous iteration. Run gromacs on each of the smaller files. Parameter files and executables # are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration. # Arguments : # grompp = gromacs parameters filename # topol = topology filename sim_stage = Stage() sim_task_ref = list() num_allocated_rep = 0 num_used_parallel = 0 while (num_allocated_rep < num_replicas): def_rep_per_thread = int( math.ceil( float(num_replicas - num_allocated_rep) / float(num_parallel - num_used_parallel))) use_replicas = min(def_rep_per_thread, num_replicas - num_allocated_rep) print("u", cur_iter, use_replicas, num_replicas, num_parallel, def_rep_per_thread, num_allocated_rep, num_used_parallel) sim_task = Task() sim_task.executable = ['python'] pre_exec_arr = md_settings + [ 'export tasks=md', 'export iter=%s' % cur_iter ] #if cur_iter==0 and num_allocated_rep==0: # pre_exec_arr = pre_exec_arr + [ 'mv %s'] sim_task.pre_exec = pre_exec_arr sim_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } sim_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } sim_task.arguments = [ 'run_openmm.py', '--Kconfig', config_file, '--idxstart', str(num_allocated_rep), '--idxend', str(num_allocated_rep + use_replicas), '--path', combined_path, '>', 'md.log' ] #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig), #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)), #'--path',combined_path,'--iter',str(cur_iter), #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log'] if Kconfig.md_use_xml == 'yes': link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (systemxml, systemxml), '$SHARED/%s > %s' % (integratorxml, integratorxml), '$SHARED/%s > %s' % (config_file, config_file) ] else: link_arr = [ '$SHARED/%s > run_openmm.py' % (os.path.basename(Kconfig.md_run_file)), '$SHARED/%s > %s' % (config_file, config_file) ] copy_arr = [] if cur_iter == 0: for idx in range(num_allocated_rep, num_allocated_rep + use_replicas): copy_arr = copy_arr + [ '$SHARED/%s > %s/iter0_input%s.pdb' % (Kconfig.md_input_file, combined_path, idx) ] #if num_allocated_rep==0: # copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig), # '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path, # '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file) # ] #if cur_iter==0 and num_allocated_rep==0: # copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)] sim_task.link_input_data = link_arr #+ copy_arr sim_task.copy_input_data = copy_arr copy_out = [] #if str(Kconfig.strategy)=='extend': # for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)] #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas): ## #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)] # copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)] sim_task.copy_output_data = copy_out #if Kconfig.ndx_file is not None: # sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file))) num_allocated_rep = num_allocated_rep + use_replicas num_used_parallel = num_used_parallel + 1 sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, sim_stage.uid, sim_task.uid)) sim_stage.add_tasks(sim_task) if str(Kconfig.strategy) != 'extend': for anatask in range(1): print("analysis task", anatask) ana_task = Task() ana_task.executable = ['python'] pre_exec_arr = ana_settings ana_task.pre_exec = pre_exec_arr ana_task.link_input_data = [ '$SHARED/%s > %s' % (script_ana, script_ana), '$SHARED/%s > %s' % (config_file, config_file) ] ana_task.gpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 1, 'thread_type': 'CUDA' } ana_task.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 10, 'thread_type': 'OpenMP' } ana_task.arguments = [ script_ana, '--Kconfig', config_file, '>', "analysis.log" ] ana_task.copy_output_data = [ 'analysis.log > %s/analysis_iter%s_r%s.log' % (combined_path, cur_iter, anatask) ] ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % ( wf.uid, sim_stage.uid, ana_task.uid) sim_stage.add_tasks(ana_task) wf.add_stages(sim_stage) cur_iter += 1 Kconfig.start_iter = str(cur_iter) return wf
def main(cmt_filename): '''This tiny function runs shit Args: cmt_filename: str containing the path to the cmt solution that is supposed to be inverted for Usage: From the commandline: python pipeline <path/to/cmtsolution> ''' # Path to pipeline file pipelinepath = os.path.abspath(__file__) pipelinedir = os.path.dirname(pipelinepath) # Define parameter directory param_path = os.path.join(os.path.dirname(pipelinedir), "params") databaseparam_path = os.path.join(param_path, "Database/DatabaseParameters.yml") DB_params = read_yaml_file(databaseparam_path) print(DB_params) # Earthquake specific database parameters # Dir and eq_id eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename) # Earthquake file in the database cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt") # Create a Pipeline object p = Pipeline() # ---- DATABASE ENTRY TASK ---------------------------------------------- # # Path to function create_database_func = os.path.join(pipelinedir, "01_Create_Database_Entry.py") # Create a Stage object database_entry = Stage() t1 = Task() t1.name = 'database-entry' t1.pre_exec = [ # Conda activate DB_params["conda-activate"] ] t1.executable = [DB_params['bin-python']] # Assign executable to the task t1.arguments = [create_database_func, os.path.abspath(cmt_filename)] # In the future maybe to database dir as a total log? t1.stdout = os.path.join(pipelinedir, "database-entry." + eq_id + ".stdout") t1.stderr = os.path.join(pipelinedir, "database-entry." + eq_id + ".stderr") # Add Task to the Stage database_entry.add_tasks(t1) # Add Stage to the Pipeline p.add_stages(database_entry) # # ---- REQUEST DATA ----------------------------------------------------- # # # # Path to function # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py") # # # Create a Stage object # datarequest = Stage() # # datarequest_t = Task() # datarequest_t.name = 'data-request' # datarequest_t.pre_exec = [ # Conda activate # DB_params["conda-activate"]] # datarequest_t.executable = [DB_params['bin-python']] # Assign executable # # to the task # datarequest_t.arguments = [request_data_func, cmt_file_db] # # # In the future maybe to database dir as a total log? # datarequest_t.stdout = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stdout") # datarequest_t.stderr = os.path.join(pipelinedir, # "datarequest." + eq_id + ".stderr") # # # Add Task to the Stage # datarequest.add_tasks(datarequest_t) # # # Add Stage to the Pipeline # p.add_stages(datarequest) # ---- Write Sources ---------------------------------------------------- # # Path to function write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py") # Create a Stage object w_sources = Stage() w_sources.name = 'Write-Sources' # Create Task for stage w_sources_t = Task() w_sources_t.name = 'Write-Sources' w_sources_t.pre_exec = [ # Conda activate DB_params["conda-activate"] ] w_sources_t.executable = [DB_params['bin-python']] # Assign executable # to the task w_sources_t.arguments = [write_source_func, cmt_file_db] # In the future maybe to database dir as a total log? w_sources_t.stdout = os.path.join(pipelinedir, "write_sources." + eq_id + ".stdout") w_sources_t.stderr = os.path.join(pipelinedir, "write_sources." + eq_id + ".stderr") # Add Task to the Stage w_sources.add_tasks(w_sources_t) # Add Stage to the Pipeline p.add_stages(w_sources) # ---- Run Specfem ----------------------------------------------------- # specfemspec_path = os.path.join(param_path, "SpecfemParams/SpecfemParams.yml") comp_and_modules_path = os.path.join( param_path, "SpecfemParams/" "CompilersAndModules.yml") # Load Parameters specfemspecs = read_yaml_file(specfemspec_path) cm_dict = read_yaml_file(comp_and_modules_path) attr = [ "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp", "CMT_depth", "CMT_lat", "CMT_lon" ] simdir = os.path.join(eq_dir, "CMT_SIMs") # Create a Stage object runSF3d = Stage() runSF3d.name = 'Simulation' for at in attr[0]: sf_t = Task() sf_t.name = 'run-' + at # Module Loading sf_t.pre_exec = [ # Get rid of existing modules 'module purge' ] for module in cm_dict["modulelist"]: sf_t.pre_exec.append("module load %s" % module) sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"]) # Change directory to specfem directories sf_t.pre_exec.append( # Change directory "cd %s" % os.path.join(simdir, at)) sf_t.executable = ['./bin/xspecfem3D'] # Assign executable # In the future maybe to database dir as a total log? sf_t.stdout = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stdout") sf_t.stderr = os.path.join(pipelinedir, "run_specfem." + eq_id + ".stderr") sf_t.gpu_reqs = { 'processes': 6, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } # Add Task to the Stage runSF3d.add_tasks(sf_t) # Add Simulation stage to the Pipeline p.add_stages(runSF3d) # Create Application Manager appman = AppManager(hostname=hostname, port=port) # Create a dictionary describe four mandatory keys: # resource, walltime, and cpus # resource is 'local.localhost' to execute locally res_dict = { 'resource': 'princeton.tiger_gpu', 'project': 'geo', 'queue': 'gpu', 'schema': 'local', 'walltime': 300, 'cpus': 2, 'gpus': 6 } # Assign resource request description to the Application Manager appman.resource_desc = res_dict # Assign the workflow as a set or list of Pipelines to the Application Manager # Note: The list order is not guaranteed to be preserved appman.workflow = set([p]) # Run the Application Manager appman.run()
# Untar the specfem input data 'tar xf specfem_data_event_%s.tar' % event, # Link to common DATABASES_MPI containing mesh files (~55GB) 'ln -s /lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/DATABASES_MPI DATABASES_MPI' ] t.executable = ['./bin/xspecfem3D'] t.cpu_reqs = { 'processes': 0, 'process_type': 'MPI', 'threads_per_process': 0, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 384, 'process_type': 'MPI', 'threads_per_process': 1, 'thread_type': 'OpenMP' } t.copy_input_data = [ '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_data_event_%s.tar' % event, '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_validator.py' ] t.post_exec = [ 'python specfem_validator.py OUTPUT_FILES/output_solver.txt' ] specfem_stage.add_tasks(t2) p.add_stages(specfem_stage) res_dict = {
p = Pipeline() s = Stage() aggregator_dir = f'{run_dir}/aggregator' for i in range(ntasks): t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP' } t.gpu_reqs = { 'processes': 0, 'process_type': None, 'threads_per_process': 0, 'thread_type': None } t.executable = PYTHON t.arguments = [ f'{current_dir}/simulation.py', f'{run_dir}/simulations/all', ADIOS_XML, i, aggregator_dir ] s.add_tasks(t) t = Task() t.cpu_reqs = { 'processes': 1, 'process_type': None, 'threads_per_process': 4, 'thread_type': 'OpenMP'