def test_issue_239():

    t = Task()
    t.cpu_reqs = {'processes': 1}
    assert t.cpu_reqs == {  'processes': 1,
                            'thread_type': None,
                            'threads_per_process': 1,
                            'process_type': None}

    t.cpu_reqs = {'threads_per_process': 1}
    assert t.cpu_reqs == {  'processes': 1,
                            'thread_type': None,
                            'threads_per_process': 1,
                            'process_type': None}

    t.gpu_reqs = {'processes': 1}
    assert t.gpu_reqs == {  'processes': 1,
                            'thread_type': None,
                            'threads_per_process': 1,
                            'process_type': None}

    t.gpu_reqs = {'threads_per_process': 1}
    assert t.gpu_reqs == {  'processes': 1,
                            'thread_type': None,
                            'threads_per_process': 1,
                            'process_type': None}
Ejemplo n.º 2
0
def test_issue_239():

    t = Task()
    t.cpu_reqs = {'processes': 1}
    assert t.cpu_reqs == {
        'processes': 1,
        'thread_type': None,
        'threads_per_process': 1,
        'process_type': None
    }

    t.cpu_reqs = {'threads_per_process': 1}
    assert t.cpu_reqs == {
        'processes': 1,
        'thread_type': None,
        'threads_per_process': 1,
        'process_type': None
    }

    t.gpu_reqs = {'processes': 1}
    assert t.gpu_reqs == {
        'processes': 1,
        'thread_type': None,
        'threads_per_process': 1,
        'process_type': None
    }

    t.gpu_reqs = {'threads_per_process': 1}
    assert t.gpu_reqs == {
        'processes': 1,
        'thread_type': None,
        'threads_per_process': 1,
        'process_type': None
    }
    def generate_ml_stage(self) -> Stage:
        stage = Stage()
        stage.name = "learning"
        cfg = self.cfg.ml_stage

        task = Task()
        task.cpu_reqs = cfg.cpu_reqs.dict()
        task.gpu_reqs = cfg.gpu_reqs.dict()
        task.pre_exec = cfg.pre_exec
        task.executable = cfg.executable
        task.arguments = cfg.arguments

        # Update base parameters
        cfg.run_config.input_path = self.aggregated_data_path(
            self.cur_iteration)
        cfg.run_config.output_path = self.model_path(self.cur_iteration)
        if self.cur_iteration > 0:
            cfg.run_config.init_weights_path = self.latest_ml_checkpoint_path(
                self.cur_iteration - 1)

        cfg_path = self.experiment_dirs["ml_runs"].joinpath(
            f"ml_{self.cur_iteration:03d}.yaml")
        cfg.run_config.dump_yaml(cfg_path)

        task.arguments += ["-c", cfg_path]
        stage.add_tasks(task)

        return stage
    def generate_outlier_detection_stage(self) -> Stage:
        stage = Stage()
        stage.name = "outlier_detection"
        cfg = self.cfg.od_stage

        task = Task()
        task.cpu_reqs = cfg.cpu_reqs.dict()
        task.gpu_reqs = cfg.gpu_reqs.dict()
        task.pre_exec = cfg.pre_exec
        task.executable = cfg.executable
        task.arguments = cfg.arguments

        self.outlier_pdbs_path(self.cur_iteration).mkdir()

        # Update base parameters
        cfg.run_config.experiment_directory = self.cfg.experiment_directory
        cfg.run_config.input_path = self.aggregated_data_path(
            self.cur_iteration)
        cfg.run_config.output_path = self.outlier_pdbs_path(self.cur_iteration)
        cfg.run_config.weights_path = self.latest_ml_checkpoint_path(
            self.cur_iteration)
        cfg.run_config.restart_points_path = self.restart_points_path(
            self.cur_iteration)

        cfg_path = self.experiment_dirs["od_runs"].joinpath(
            f"od_{self.cur_iteration:03d}.yaml")
        cfg.run_config.dump_yaml(cfg_path)

        task.arguments += ["-c", cfg_path]
        stage.add_tasks(task)

        return stage
Ejemplo n.º 5
0
 def post_stage():
     if (not os.path.exists(f'{run_dir}/aggregator/stop.aggregator')):
         nstages = len(p.stages)
         s = Stage()
         s.name = f"{nstages}"
         t = Task()
         t.cpu_reqs = {
             'processes': 1,
             'process_type': None,
             'threads_per_process': 4,
             'thread_type': 'OpenMP'
         }
         t.gpu_reqs = {
             'processes': 0,
             'process_type': None,
             'threads_per_process': 0,
             'thread_type': None
         }
         t.name = f" {i}_{nstages} "
         t.executable = PYTHON
         t.arguments = [
             f'{current_dir}/simulation.py',
             f'{run_dir}/simulations/all/{i}_{nstages}', ADIOS_XML
         ]
         subprocess.getstatusoutput(
             f'ln -s  {run_dir}/simulations/all/{i}_{nstages} {run_dir}/simulations/new/{i}_{nstages}'
         )
         s.add_tasks(t)
         s.post_exec = post_stage
         p.add_stages(s)
Ejemplo n.º 6
0
    def esmacs(self, rct_stage, stage, outdir="equilibration", name=None):

        for i in range(1, 13):
            t = Task()
            t.pre_exec = [
                "export WDIR=\"{}/{}\"".format(self.run_dir, name),
                ". {}".format(self.conda_init),
                "conda activate {}".format(self.esmacs_tenv),
                "module load {}".format(self.esmacs_tmodules),
                "mkdir -p $WDIR/replicas/rep{}/{}".format(i, outdir),
                "cd $WDIR/replicas/rep{}/{}".format(i, outdir),
                "rm -f {}.log {}.xml {}.dcd {}.chk".format(
                    stage, stage, stage, stage), "export OMP_NUM_THREADS=1"
            ]
            # t.executable = '/ccs/home/litan/miniconda3/envs/wf3/bin/python3.7'
            t.executable = 'python3'
            t.arguments = ['$WDIR/{}.py'.format(stage)]
            t.post_exec = []
            t.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            getattr(self, rct_stage).add_tasks(t)
            print(getattr(self, rct_stage).to_dict())
Ejemplo n.º 7
0
def generate_task(cfg: BaseStageConfig) -> Task:
    task = Task()
    task.cpu_reqs = cfg.cpu_reqs.dict().copy()
    task.gpu_reqs = cfg.gpu_reqs.dict().copy()
    task.pre_exec = cfg.pre_exec.copy()
    task.executable = cfg.executable
    task.arguments = cfg.arguments.copy()
    return task
Ejemplo n.º 8
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        s3 = Stage()
        s3.name = 'learning'

        # learn task
        for i in range(num_ML):
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            t3.pre_exec = ['module reset']
            t3.pre_exec += [
                '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
            ]
            t3.pre_exec += ['module load cuda/9.1.85']
            t3.pre_exec += ['conda activate rp.copy']
            t3.pre_exec += ['export CUDA_VISIBLE_DEVICES=0']

            t3.pre_exec += [
                'export PYTHONPATH=/gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps:$PYTHONPATH'
            ]
            t3.pre_exec += [
                'cd /gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps'
            ]
            time_stamp = int(time.time())
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp)
            t3.pre_exec += ['mkdir -p {0} && cd {0}'.format(cvae_dir)]
            t3.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python'
                             ]  # train_cvae.py
            t3.arguments = [
                '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps/train_cvae.py',
                '-f',
                '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE/cvae_input.h5',
                '-d', dim
            ]

            t3.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t3.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the learn task to the learning stage
            s3.add_tasks(t3)
            time.sleep(1)
        return s3
Ejemplo n.º 9
0
    def generate_interfacing_stage():
        s4 = Stage()
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs
        t4 = Task()
        t4.pre_exec = []
        #t4.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
        #t4.pre_exec += ['module load cuda/9.1.85']
        #t4.pre_exec += ['conda activate %s' % conda_path]
        t4.pre_exec += [
            'module unload prrte', 'module unload python', 'module load xl',
            'module load xalt', 'module load spectrum-mpi', 'module load cuda',
            'module list'
        ]
        t4.pre_exec += [
            '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            'conda deactivate', 'conda deactivate',
            'conda activate /gpfs/alpine/proj-shared/med110/wf-2/conda/envs/ibm-wml-ce-cloned'
        ]
        #'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2']

        t4.pre_exec += [
            'export PYTHONPATH=%s/CVAE_exps:%s/CVAE_exps/cvae:$PYTHONPATH' %
            (base_path, base_path)
        ]
        t4.pre_exec += ['cd %s/Outlier_search' % base_path]
        #t4.executable = ['%s/bin/python' % conda_path]
        t4.executable = ['python']
        t4.arguments = [
            'outlier_locator.py', '--md', md_path, '--cvae', cvae_path,
            '--pdb', pdb_file
        ]
        #'--ref', ref_pdb_file]

        t4.cpu_reqs = {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 16,
            'thread_type': 'OpenMP'
        }
        t4.gpu_reqs = {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 1,
            'thread_type': 'CUDA'
        }
        s4.add_tasks(t4)
        s4.post_exec = func_condition

        return s4
Ejemplo n.º 10
0
    def generate_interfacing_stage():
        s4 = Stage()
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs
        t4 = Task()
        t4.pre_exec = []
        t4.pre_exec = ['module reset']
        t4.pre_exec += [
            '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
        ]
        t4.pre_exec += ['module load cuda/9.1.85']
        t4.pre_exec += ['conda activate rp.copy']
        t4.pre_exec += ['export CUDA_VISIBLE_DEVICES=0']

        t4.pre_exec += [
            'export PYTHONPATH=/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps:$PYTHONPATH'
        ]
        t4.pre_exec += [
            'cd /gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/Outlier_search'
        ]
        # python outlier_locator.py -m ../MD_exps/fs-pep -c ../CVAE_exps -p ../MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb
        t4.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python']
        t4.arguments = [
            'outlier_locator.py', '--md', '../MD_exps/fs-pep', '--cvae',
            '../CVAE_exps --pdb',
            '../MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb'
        ]
        #     t4.arguments = ['/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/Outlier_search/outlier_locator.py',
        #             '-m', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep',
        #             '-c', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps',
        #             '-p', '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb'
        #             ]

        t4.cpu_reqs = {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 12,
            'thread_type': 'OpenMP'
        }
        t4.gpu_reqs = {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 1,
            'thread_type': 'CUDA'
        }
        s4.add_tasks(t4)
        s4.post_exec = func_condition

        return s4
Ejemplo n.º 11
0
    def generate_MD_stage(num_MD=1):
        """
        Function to generate MD stage. 
        """
        s1 = Stage()
        s1.name = 'MD'

        # MD tasks
        time_stamp = int(time.time())
        for i in range(num_MD):
            t1 = Task()
            t1.pre_exec = [
                '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
            ]
            t1.pre_exec += ['module load cuda/9.1.85']
            t1.pre_exec += ['conda activate %s' % conda_path]
            t1.pre_exec += [
                'export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path
            ]
            t1.pre_exec += ['cd %s/MD_exps/fs-pep' % base_path]
            t1.pre_exec += [
                'mkdir -p omm_runs_%d && cd omm_runs_%d' %
                (time_stamp + i, time_stamp + i)
            ]
            t1.executable = ['%s/bin/python' % conda_path]  # run_openmm.py
            t1.arguments = ['%s/MD_exps/fs-pep/run_openmm.py' % base_path]
            #   t1.arguments += ['--topol', '%s/MD_exps/fs-pep/pdb/topol.top' % base_path]
            t1.arguments += [
                '--pdb_file',
                '%s/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb' % base_path,
                '--length', LEN_sim
            ]

            # assign hardware the task
            t1.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t1.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
        return s1
Ejemplo n.º 12
0
    def generate_task(self, **ensembles):
        """ Generate a `radical.entk` task.

        Parameters
        ----------
        ensembles: dict, OrderedDict
            Dictionary of the *current* values of variables that are ensembles. All the variables
            that were declared with `add_ensemble` should be specified here so that a correct
            task object can be generated.
        """

        [setattr(self, k, w) for k, w in ensembles.iteritems()]

        if not self.all_variables_defined():
            raise ValueError('Some variables are not defined!')

        task = Task()
        task.name = ensembles['task_name']

        task.pre_exec += self.engine.pre_exec
        task.executable += str(self.engine.executable)
        task.arguments += self.engine.arguments
        task.cpu_reqs = {
            'processes': self._processes,
            'process_type': 'MPI' if self.engine.uses_mpi else None,
            'threads_per_process': self._threads_per_process,
            'thread_type': None
        }

        task.gpu_reqs = {
            'processes': self._gpu_processes,
            'process_type': 'MPI' if self.engine.gpu_uses_mpi else None,
            'threads_per_process': self._gpu_threads_per_process,
            'thread_type': None
        }

        task.arguments.extend(self.arguments)
        task.copy_input_data.extend(self.copied_files)
        task.copy_input_data.extend(self.system.copied_files)

        task.post_exec.append('echo "{}" > sim_desc.txt'.format(task.name))

        task.link_input_data.extend(self.input_data(**ensembles))
        task.link_input_data.extend(self.system.linked_files)

        task.pre_exec.extend(
            self._sed.format(n, v, f)
            for f, vs in self.get_variables().items() for n, v in vs)

        return task
Ejemplo n.º 13
0
def esmacs(cfg, names, stage, outdir):

    s = Stage()
    s.name = 'S3.%s' % stage
    #print("DEBUG:instantiation:  %s" % len(s._tasks))

    for comp in names:
        #print("DEBUG:first loop: %s" % len(s._tasks))
        for i in range(1, cfg['n_replicas']):
            #print("DEBUG:second loop:start: %s" % len(s._tasks))
            t = Task()

            # RCT native
            t.pre_exec = [
                #". /sw/summit/lmod/lmod/init/profile",
                "export WDIR=\"{}\"".format(comp),
                ". {}".format(cfg['conda_init']),
                "conda activate {}".format(cfg['conda_esmacs_task_env']),
                "module load {}".format(cfg['esmacs_task_modules']),
                "mkdir -p $WDIR/replicas/rep{}/{}".format(i, outdir),
                "cd $WDIR/replicas/rep{}/{}".format(i, outdir),
                #"rm -f {}.log {}.xml {}.dcd {}.chk".format(stage, stage, stage, stage),
                "export OMP_NUM_THREADS=1"]

            t.executable = 'python3'
            t.arguments = ['$WDIR/{}.py'.format(stage)]

            # Bash wrapper
            #t.executable = '%s/wf3.sh' % comp
            #t.arguments  = [comp, i, outdir, stage,
            #                cfg['conda_init'],
            #                cfg['conda_esmacs_task_env'],
            #                cfg['esmacs_task_modules']]

            t.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'}

            t.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'}
            s.add_tasks(t)
            #print("DEBUG:second loop:end: %s" % len(s._tasks))

    return s
Ejemplo n.º 14
0
    def generate_interfacing_stage():
        s4 = Stage()
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs
        t4 = Task()

        t4.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
        t4.pre_exec += ['conda activate %s' % cfg['conda_pytorch']]
        t4.pre_exec += ['mkdir -p %s/Outlier_search/outlier_pdbs' % cfg['base_path']]
        t4.pre_exec += ['export models=""; for i in `ls -d %s/CVAE_exps/model-cvae_runs*/`; do if [ "$models" != "" ]; then    models=$models","$i; else models=$i; fi; done;cat /dev/null' % cfg['base_path']]
        t4.pre_exec += ['export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8']
        t4.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4']

        cmd_cat = 'cat /dev/null'
        cmd_jsrun = 'jsrun -n %s -a 6 -g 6 -r 1 -c 7' % cfg['node_counts']

        #molecules_path = '/gpfs/alpine/world-shared/ven201/tkurth/molecules/'
        t4.executable = [' %s; %s %s/examples/outlier_detection/run_optics_dist_summit_entk.sh' % (cmd_cat, cmd_jsrun, cfg['molecules_path'])]
        t4.arguments = ['%s/bin/python' % cfg['conda_pytorch']]
        t4.arguments += ['%s/examples/outlier_detection/optics.py' % cfg['molecules_path'],
                        '--sim_path', '%s/MD_exps/%s' % (cfg['base_path'], cfg['system_name']),
                        '--pdb_out_path', '%s/Outlier_search/outlier_pdbs' % cfg['base_path'],
                        '--restart_points_path',
                        '%s/Outlier_search/restart_points.json' % cfg['base_path'],
                        '--data_path', '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'],
                        '--model_paths', '$models',
                        '--model_type', cfg['model_type'],
                        '--min_samples', 10,
                        '--n_outliers', 500,
                        '--dim1', cfg['residues'],
                        '--dim2', cfg['residues'],
                        '--cm_format', 'sparse-concat',
                        '--batch_size', cfg['batch_size'],
                        '--distributed',
                        '-iw', cfg['init_weights']]

        t4.cpu_reqs = {'processes'          : 1,
                       'process_type'       : None,
                       'threads_per_process': 12,
                       'thread_type'        : 'OpenMP'}
        t4.gpu_reqs = {'processes'          : 1,
                       'process_type'       : None,
                       'threads_per_process': 1,
                       'thread_type'        : 'CUDA'}

        s4.add_tasks(t4)
        s4.post_exec = func_condition
        return s4
Ejemplo n.º 15
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        s3 = Stage()
        s3.name = 'learning'

        # learn task
        time_stamp = int(time.time())
        for i in range(num_ML):
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            t3.pre_exec += [
                '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
            ]
            t3.pre_exec += ['module load cuda/9.1.85']
            t3.pre_exec += ['conda activate %s' % conda_path]

            t3.pre_exec += [
                'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path
            ]
            t3.pre_exec += ['cd %s' % cvae_path]
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i)
            t3.pre_exec += ['mkdir -p {0} && cd {0}'.format(cvae_dir)]
            t3.executable = ['%s/bin/python' % conda_path]  # train_cvae.py
            t3.arguments = [
                '%s/train_cvae.py' % cvae_path, '--h5_file',
                '%s/cvae_input.h5' % agg_path, '--dim', dim
            ]

            t3.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t3.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the learn task to the learning stage
            s3.add_tasks(t3)

        return s3
Ejemplo n.º 16
0
    def describe_MD_pipeline():
        p = Pipeline()
        p.name = 'MD'

        # MD stage
        s1 = Stage()
        s1.name = 'OpenMM'

        # Each Task() is an OpenMM executable that will run on a single GPU.
        # Set sleep time for local testing
        # for i in range(18):

        task = Task()
        task.name = 'md' 
        
        task.pre_exec    = []

        # task.pre_exec   += ['export MINICONDA=/gpfs/alpine/scratch/jdakka/bip178/miniconda']
        # task.pre_exec   += ['export PATH=$MINICONDA/bin:$PATH']
        # task.pre_exec   += ['export LD_LIBRARY_PATH=$MINICONDA/lib:$LD_LIBRARY_PATH']
        task.pre_exec   += ['module load python/2.7.15-anaconda2-5.3.0']
        task.pre_exec   += ['module load cuda/9.1.85']
        task.pre_exec   += ['module load gcc/6.4.0']
        task.pre_exec   += ['source activate openmm']
        task.pre_exec   += ['cd /gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/results_2']
        task.executable  = '/ccs/home/jdakka/.conda/envs/openmm/bin/python'
        task.arguments = ['run_openmm.py', '-f', 
        '/gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb']
        task.cpu_reqs = {'processes': 1,
                         'process_type': None,
                         'threads_per_process': 1,
                         'thread_type': None
                         }

        task.gpu_reqs = {'processes': 1,
                         'process_type': None,
                         'threads_per_process': 1,
                         'thread_type': 'CUDA'
                        }

        # Add the MD task to the Docking Stage
        s1.add_tasks(task)

        # Add MD stage to the MD Pipeline
        p.add_stages(s1)


        return p
Ejemplo n.º 17
0
    def generate_ML_tasks(self): 
        """
        Function to generate the learning stage
        """
        p = Pipeline() 
        p.name = 'learning' 
        s3 = Stage()
        s3.name = 'training'

        # learn task
        for i in range(self.num_ML): 
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            t3.pre_exec += ['module load cuda/10.1.168']
            t3.pre_exec += ['conda activate %s' % conda_path] 

            t3.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path]
            t3.pre_exec += ['cd %s' % cvae_path]
            t3.pre_exec += [f"sleep {i}"]
            dim = i + 3 
            t3.executable = ['%s/bin/python' % conda_path]  # train_cvae.py
            t3.arguments = [
                    '%s/train_cvae.py' % cvae_path, 
                    '--h5_file', '%s/cvae_input.h5' % agg_path, 
                    '--dim', dim] 
            
            t3.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 4,
                    'thread_type': 'OpenMP'
                    }
            t3.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                    }
        
            # Add the learn task to the learning stage
            s3.add_tasks(t3)
        p.add_stages(s3)

        return p 
Ejemplo n.º 18
0
    def generate_MD_tasks(self): 
        """
        Function to generate MD tasks. 
        """
        p = Pipeline() 
        p.name = "MD"
        s1 = Stage()
        s1.name = 'MD'

        # MD tasks
        for i in range(self.num_MD):
            t1 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py
            t1.pre_exec = ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            t1.pre_exec += ['module load cuda/10.1.168']
            t1.pre_exec += ['conda activate %s' % conda_path] 
            t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path] 
            t1.pre_exec += ['cd %s' % md_path] 
            # t1.pre_exec += [f"sleep {i}"]
            t1.executable = ['%s/bin/python' % conda_path]  # run_openmm.py
            t1.arguments = ['%s/run_openmm.py' % md_path] 
            t1.arguments += ['--pdb_file', pdb_file]
            if top_file: 
                t1.arguments += ['--topol', top_file]
            t1.arguments += ['--length', 1000]

            # assign hardware the task 
            t1.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 4,
                    'thread_type': 'OpenMP'
                    }
            t1.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                    }
                              
            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
            p.add_stages(s1)
        return p
Ejemplo n.º 19
0
    def generate_interfacing_task(self): 
        p = Pipeline() 
        p.name = 'interfacing'
        s4 = Stage() 
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs 
        t4 = Task() 
        t4.pre_exec = [] 
        t4.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
        t4.pre_exec += ['module load cuda/10.1.168']
        t4.pre_exec += ['conda activate %s' % conda_path] 

        t4.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path] 
        t4.pre_exec += ['cd %s/Outlier_search' % base_path] 
        t4.executable = ['%s/bin/python' % conda_path] 
        t4.arguments = [
                'outlier_locator.py', 
                '--md',  md_path, 
                '--cvae', cvae_path, 
                '--pdb', pdb_file, 
                '--ref', ref_pdb_file,
                '--n_out', self.num_outliers, 
                '--timeout', self.t_timeout]

        t4.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 12,
                'thread_type': 'OpenMP'
                }
        t4.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
                }
        s4.add_tasks(t4) 
        p.add_stages(s4)
        
        return p
    def generate_md_stage(self) -> Stage:
        stage = Stage()
        stage.name = "MD"
        cfg = self.cfg.md_stage

        if self.cur_iteration > 0:
            outlier_filename = self.restart_points_path(self.cur_iteration - 1)
            pdb_filenames = get_outlier_pdbs(outlier_filename)
        else:
            pdb_filenames = get_initial_pdbs(cfg.run_config.initial_pdb_dir)

        for i, pdb_filename in zip(range(cfg.num_jobs), cycle(pdb_filenames)):
            task = Task()
            task.cpu_reqs = cfg.cpu_reqs.dict()
            task.gpu_reqs = cfg.gpu_reqs.dict()
            task.pre_exec = cfg.pre_exec
            task.executable = cfg.executable
            task.arguments = cfg.arguments

            # Set unique output directory name for task
            dir_prefix = f"md_{self.cur_iteration:03d}_{i:04d}"

            # Update base parameters
            cfg.run_config.result_dir = self.experiment_dirs["md_runs"]
            cfg.run_config.dir_prefix = dir_prefix
            cfg.run_config.pdb_file = pdb_filename

            # Write MD yaml to tmp directory to be picked up and moved by MD job
            cfg_path = self.experiment_dirs["tmp"].joinpath(
                f"{dir_prefix}.yaml")
            cfg.run_config.dump_yaml(cfg_path)

            task.arguments += ["-c", cfg_path]
            stage.add_tasks(task)

        return stage
def create_workflow(Kconfig):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    combined_path = str(Kconfig.remote_output_directory
                        )  #'/u/sciteam/hruska/scratch/extasy-grlsd'
    if cur_iter == 0:
        restart_iter = 0
    else:
        restart_iter = cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'module load bwpy', 'export tasks=pre_proc', 'export iter=-1',
            'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['python']
        pre_proc_task.arguments = [
            'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro',
            'input.gro', '--clone',
            str(Kconfig.num_replicas)
        ]
        pre_proc_task.copy_input_data = [
            '$SHARED/%s > %s/iter_%s/input.gro' %
            (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter),
            '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file),
            '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py'
        ]

        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        # ------------------------------------------------------------------------------------------------------------------
    else:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'module load bwpy', 'export tasks=pre_proc', 'export iter=-1',
            'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['python']
        pre_proc_task.arguments = [
            'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro',
            'input.gro'
        ]
        pre_proc_task.copy_input_data = [
            '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1),
            '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py'
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        for sim_num in range(
                min(int(Kconfig.num_parallel_MD_sim),
                    int(Kconfig.num_replicas))):

            sim_task = Task()
            if Kconfig.use_gpus == 'False':
                sim_task.executable = [
                    '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python'
                ]
                sim_task.pre_exec = [
                    'module load bwpy',
                    'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"',
                    'export PATH=/u/sciteam/hruska/local/bin:$PATH',
                    'export iter=%s' % cur_iter
                ]
                sim_task.cores = int(
                    Kconfig.num_CUs_per_MD_replica
                )  #on bluewaters tasks on one node are executed concurently
            else:
                sim_task.executable = ['python']
                sim_task.pre_exec = [
                    'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy',
                    'module add bwpy-mpi', 'module add fftw',
                    'module add cray-netcdf',
                    'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                    'module add cmake', 'module unload darshan, xalt',
                    'export CRAYPE_LINK_TYPE=dynamic',
                    'export CRAY_ADD_RPATH=yes', 'export FC=ftn',
                    'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                    'export tasks=md',
                    'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
                ]
                sim_task.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                sim_task.cpu_reqs = {
                    'processes': 0,
                    'process_type': None,
                    'threads_per_process': 0,
                    'thread_type': None
                }
            sim_task.arguments = [
                'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro',
                '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log'
            ]
            sim_task.link_input_data = [
                '$SHARED/%s > run_openmm.py' %
                (os.path.basename(Kconfig.md_run_file))
            ]

            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))
            if restart_iter == cur_iter:
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (pre_proc_task_ref, sim_num))
            else:
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (post_ana_task_ref, sim_num))

            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated

        pre_ana_stage = Stage()
        pre_ana_task = Task()
        pre_ana_task.pre_exec = [
            'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy',
            'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf',
            'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
            'module add cmake', 'module unload darshan, xalt',
            'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
            'export FC=ftn',
            'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
            'export tasks=pre_ana',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_ana_task.executable = ['python']
        pre_ana_task.arguments = ['pre_analyze_openmm.py']

        pre_ana_task.link_input_data = [
            '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py'
        ]

        for sim_num in range(
                min(int(Kconfig.num_parallel_MD_sim),
                    int(Kconfig.num_replicas))):
            pre_ana_task.link_input_data += [
                '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num)
            ]

        pre_ana_task.copy_output_data = [
            'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter),
            'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter)
        ]
        #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

        pre_ana_stage.add_tasks(pre_ana_task)
        wf.add_stages(pre_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        ana_stage = Stage()
        ana_task = Task()
        ana_task.pre_exec = [
            'module load PrgEnv-gnu', 'module unload bwpy',
            'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw',
            'module add cray-netcdf',
            'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
            'module add cmake', 'module unload darshan xalt',
            'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
            'export FC=ftn',
            'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
            'export tasks=lsdmap',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        ana_task.executable = ['lsdmap']  #/u/sciteam/hruska/local/bin/lsdmap
        ana_task.arguments = [
            '-f',
            os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro',
            '-n', 'out.nn', '-w', 'weight.w'
        ]

        ana_task.cores = 1
        ana_task.link_input_data = [
            '$SHARED/{0} > {0}'.format(
                os.path.basename(Kconfig.lsdm_config_file)),
            '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter)
        ]
        ana_task.copy_output_data = [
            'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter,
            'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter,
            #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter,
            'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter),
            'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter),
            'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter),
            'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter),
            'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter)
        ]
        if cur_iter > 0:
            ana_task.link_input_data += [
                '%s/iter_%s/weight_out.w > weight.w' %
                (combined_path, cur_iter - 1)
            ]

        if (cur_iter % Kconfig.nsave == 0):
            ana_task.download_output_data = [
                'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter
            ]

        ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, ana_stage.uid, ana_task.uid)

        ana_stage.add_tasks(ana_task)
        wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # post_lsdmap:
        #     Purpose:   Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
        #                 to generate the new coordinate file to be used by the simulation_step in the next iteration.
        #     Arguments:
        #             num_replicas              = number of configurations to be generated in the new coordinate file
        #             out                   = output filename
        #             cycle                 = iteration number
        #             max_dead_neighbors    = max dead neighbors to be considered
        #             max_alive_neighbors   = max alive neighbors to be considered
        #             numCUs                = number of simulation instances/ number of smaller files

        post_ana_stage = Stage()
        post_ana_task = Task()
        post_ana_task._name = 'post_ana_task'
        if Kconfig.restarts == 'clustering':
            post_ana_task.pre_exec = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module add bwpy/0.3.0',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=post_ana', 'export PYEMMA_NJOBS=1',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            post_ana_task.executable = ['python']
            post_ana_task.arguments = [
                'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev',
                'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro',
                Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors,
                'input.gro', cur_iter, Kconfig.num_parallel_MD_sim,
                'weight_out.w', 'tmpha.eg'
            ]

            post_ana_task.link_input_data = [
                '$SHARED/post_analyze.py > post_analyze.py',
                '$SHARED/selection.py > selection.py',
                '$SHARED/selection-cluster.py > selection-cluster.py',
                '$SHARED/reweighting.py > reweighting.py',
                '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py',
                '%s/iter_%s/weight_out.w > weight.w' %
                (combined_path, cur_iter - 1),
                '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter),
                '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter),
                '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter),
                '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter)
            ]

            if (cur_iter % Kconfig.nsave == 0):
                post_ana_task.download_output_data = [
                    'out.gro > output/iter_%s/out.gro' % cur_iter,
                    'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
                    'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png'
                    % (cur_iter),
                    'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
                    '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' %
                    (combined_path, cur_iter, cur_iter)
                ]

            post_ana_task.copy_output_data = [
                'ncopies.nc > %s/iter_%s/ncopies.nc' %
                (combined_path, cur_iter),
                'weight_out.w > %s/iter_%s/weight_out.w' %
                (combined_path, cur_iter),
                'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter),
                'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png'
                % cur_iter,
                'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png'
                % cur_iter,
                'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png'
                % cur_iter
            ]

        post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, post_ana_stage.uid, post_ana_task.uid)

        post_ana_stage.add_tasks(post_ana_task)
        wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete
    Task description
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = ['grompp']
    t1.arguments = ['hello']
    t1.cpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 1,
                   'thread_type': 'OpenMP'
                   }
    t1.gpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 2,
                   'thread_type': 'OpenMP'
                   }
    t1.post_exec = ['echo test']

    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'
    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    p._assign_uid('test')

    cud = create_cud_from_task(t1, placeholder_dict)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name,
                                              t1.parent_stage['uid'], t1.parent_stage['name'],
                                              t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']
    assert cud.post_exec == t1.post_exec

    assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging
    assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging
    assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging
    assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging
    assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
Ejemplo n.º 23
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE)
    num_replicas = int(Kconfig.num_replicas)
    #if cur_iter==0:
    #	restart_iter=0
    #else:
    #	restart_iter=cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['mv']
        pre_proc_task.arguments = [
            combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M")
        ]
        pre_proc_task.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file)
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        # ------------------------------------------------------------------------------------------------------------------

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        def_rep_per_thread = int(num_replicas / num_parallel) + 1
        num_allocated_rep = 0
        num_used_threads = 0
        while (num_allocated_rep < num_replicas):
            if (num_used_threads == num_parallel):
                print("ALLERT tried use more gpus than allocated")
            if ((num_replicas - num_allocated_rep) > def_rep_per_thread):
                use_replicas = def_rep_per_thread
            else:
                use_replicas = (num_replicas - num_allocated_rep)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module load bwpy',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=md',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }
            sim_task.cpu_reqs = {
                'processes': 0,
                'process_type': None,
                'threads_per_process': 0,
                'thread_type': None
            }
            sim_task.arguments = [
                'run_openmm.py', '--trajstride', '10', '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log'
            ]
            link_arr = [
                '$SHARED/%s > run_openmm.py' %
                (os.path.basename(Kconfig.md_run_file))
            ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            if str(Kconfig.strategy) == 'extend':
                copy_out = []
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_out = copy_out + [
                        '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' %
                        (combined_path, cur_iter, idx, combined_path,
                         (cur_iter + 1), idx)
                    ]

                sim_task.copy_output_data = copy_out
                #if Kconfig.ndx_file is not None:
                #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module load bwpy/0.3.0',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            ana_task.executable = ['python']
            ana_task.arguments = [
                'run-tica-msm.py', '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/run-tica-msm.py > run-tica-msm.py',
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Ejemplo n.º 24
0
def test_task_exceptions(s, l, i, b):
    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s, l, i, b]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
def test_task_exceptions(s,l,i,b):

    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s,l,i,b]

    for data in data_type:

        if not isinstance(data,str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data,list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, list):

            with pytest.raises(TypeError):
                t.executable = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
Ejemplo n.º 26
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)  #run-tica-msm4.py

    md_settings = Kconfig.md_env
    if Kconfig.env_ana_same == 'True':
        ana_settings = md_settings
    else:
        ana_settings = Kconfig.ana_env
    print("set", num_parallel, md_settings)
    iter_found = 0
    while len(glob.glob('%s/iter%s_input*.pdb' %
                        (combined_path, iter_found))) >= num_replicas:
        iter_found += 1
    cur_iter = max(0, iter_found - 1)
    print("cur_iter", cur_iter)
    if cur_iter == 0:
        #pre_proc_stage = Stage()
        #pre_proc_task = Task()
        #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1']
        #pre_proc_task.executable = ['mv']
        #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ]
        #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        #pre_proc_stage.add_tasks(pre_proc_task)
        #wf.add_stages(pre_proc_stage)
        pre_proc_stage2 = Stage()
        pre_proc_task2 = Task()
        pre_proc_task2.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task2.executable = ['ls']
        pre_proc_task2.arguments = ['-l']
        pre_proc_task2.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_reference, combined_path, Kconfig.md_reference)
        ]  # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ]
        pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
        pre_proc_stage2.add_tasks(pre_proc_task2)
        wf.add_stages(pre_proc_stage2)

        # ------------------------------------------------------------------------------------------------------------------
    start_iter = cur_iter
    while (cur_iter < int(Kconfig.num_iterations)
           and cur_iter < start_iter + 1):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        num_allocated_rep = 0
        num_used_parallel = 0
        #num_used_threads=0
        #print(def_rep_per_thread)
        while (num_allocated_rep < num_replicas):
            #if (num_used_threads>=num_parallel):
            #   print("ALLERT tried use more gpus than allocated")
            def_rep_per_thread = int(
                math.ceil(
                    float(num_replicas - num_allocated_rep) /
                    float(num_parallel - num_used_parallel)))
            use_replicas = min(def_rep_per_thread,
                               num_replicas - num_allocated_rep)
            #if ((num_replicas-num_allocated_rep)>def_rep_per_thread):  # check if use all threads
            #   use_replicas=def_rep_per_thread
            #else:  #use pnly part of threads
            #   use_replicas=(num_replicas-num_allocated_rep)
            print("u", cur_iter, use_replicas, num_replicas, num_parallel,
                  def_rep_per_thread, num_allocated_rep, num_used_parallel)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            sim_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 20,
                'thread_type': 'OpenMP'
            }
            sim_task.arguments = [
                'run_openmm.py', '--trajstride',
                str(Kconfig.trajstride), '--Kconfig',
                str(args.Kconfig), '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj',
                str(Kconfig.save_alltraj), '>', 'md.log'
            ]
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/system-5.xml > system-5.xml',
                    '$SHARED/integrator-5.xml > integrator-5.xml',
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            #if str(Kconfig.strategy)=='extend':
            #  for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #    copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)]

            #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            ##     #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #     copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            num_used_parallel = num_used_parallel + 1
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = ana_settings + [
                'export tasks=tica_msm_ana',
                'export iter=%s' % cur_iter
            ]
            ana_task.executable = ['python']
            ana_task.arguments = [
                script_ana, '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '--ref',
                str(Kconfig.md_reference), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': 'MPI',
                'threads_per_process': 16,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/%s > %s' % (script_ana, script_ana),
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Ejemplo n.º 27
0
    def generate_MD_stage(num_MD=1):
        """
        Function to generate MD stage.
        """
        s1 = Stage()
        s1.name = 'MD'

        initial_MD = True
        outlier_filepath = '%s/Outlier_search/restart_points.json' % cfg['base_path']

        if os.path.exists(outlier_filepath):
            initial_MD = False
            outlier_file = open(outlier_filepath, 'r')
            outlier_list = json.load(outlier_file)
            outlier_file.close()

        # MD tasks
        time_stamp = int(time.time())
        for i in range(num_MD):
            t1 = Task()

            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py
            t1.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
            t1.pre_exec += ['module load cuda/9.1.85']
            t1.pre_exec += ['conda activate %s' % cfg['conda_openmm']]
            t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:%s/MD_exps/MD_utils:$PYTHONPATH' %
                (cfg['base_path'], cfg['base_path'])]
            t1.pre_exec += ['cd %s/MD_exps/%s' % (cfg['base_path'], cfg['system_name'])]
            t1.pre_exec += ['mkdir -p omm_runs_%d && cd omm_runs_%d' % (time_stamp+i, time_stamp+i)]

            t1.executable = ['%s/bin/python' % cfg['conda_openmm']]  # run_openmm.py
            t1.arguments = ['%s/MD_exps/%s/run_openmm.py' % (cfg['base_path'], cfg['system_name'])]
            #t1.arguments += ['--topol', '%s/MD_exps/fs-pep/pdb/topol.top' % cfg['base_path']]

            if 'top_file' in cfg:
                t1.arguments += ['--topol', cfg['top_file']]

            # pick initial point of simulation
            if initial_MD or i >= len(outlier_list):
                t1.arguments += ['--pdb_file', cfg['pdb_file'] ]
            elif outlier_list[i].endswith('pdb'):
                t1.arguments += ['--pdb_file', outlier_list[i]]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]
            elif outlier_list[i].endswith('chk'):
                t1.arguments += ['--pdb_file', cfg['pdb_file'],
                        '-c', outlier_list[i]]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]

            # how long to run the simulation
            if initial_MD:
                t1.arguments += ['--length', cfg['LEN_initial']]
            else:
                t1.arguments += ['--length', cfg['LEN_iter']]

            # assign hardware the task
            t1.cpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 4,
                           'thread_type'        : 'OpenMP'}
            t1.gpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 1,
                           'thread_type'        : 'CUDA'}

            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
        return s1
Ejemplo n.º 28
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        s3 = Stage()
        s3.name = 'learning'

        # learn task
        time_stamp = int(time.time())
        for i in range(num_ML):
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            #t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            #t3.pre_exec += ['module load cuda/9.1.85']
            #t3.pre_exec += ['conda activate %s' % conda_path]

            #t3.pre_exec += ['module unload python']
            #t3.pre_exec += ['. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            #        'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
            #        'conda deactivate',
            #        'conda deactivate',
            #        'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2']
            #t3.pre_exec += ['module load ibm-wml-ce']
            t3.pre_exec += [
                'module unload prrte', 'module unload python',
                'module load xl', 'module load xalt',
                'module load spectrum-mpi', 'module load cuda', 'module list'
            ]
            t3.pre_exec += [
                'export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path
            ]
            t3.pre_exec += ['cd %s' % cvae_path]
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp + i)
            t3.pre_exec += [
                'mkdir -p {0}/benchmarks && cd {0}'.format(cvae_dir)
            ]
            #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)]
            t3.pre_exec += ['module load ibm-wml-ce', 'env']
            t3.pre_exec += [
                '. /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
                'source /sw/summit/ibm-wml-ce/anaconda-base/etc/profile.d/conda.sh',
                'conda deactivate', 'conda deactivate',
                'conda activate /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2'
            ]
            t3.pre_exec += ['export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5"']
            t3.pre_exec += [
                'jsrun --erf_input /gpfs/alpine/med110/scratch/atrifan2/covid19/PLPro/entk_cvae_md_hvd/RANKFILE /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s'
                % (cvae_path, dim)
            ]
            #t3.pre_exec += ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/ddlrun /sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python %s/cvae/train_cvae.py -f ../bytes-train.tfrecords --dim %s' % (cvae_path, dim)]
            t3.executable = [
                'date'
            ]  #t3.executable = ['/sw/summit/ibm-wml-ce/anaconda-base/envs/ibm-wml-ce-1.7.0-2/bin/python']
            #t3.arguments = [ '%s/cvae/train_cvae.py' % (cvae_path),
            #        '-f', '../bytes-train.tfrecords',
            #        '--dim', dim]
            #t3.executable = ['%s/bin/python' % conda_path]  # train_cvae.py
            #t3.arguments = ['%s/train_cvae.py' % cvae_path,
            #        '--h5_file', '%s/cvae_input.h5' % agg_path,
            #        '--dim', dim]

            t3.cpu_reqs = {
                'processes': 6,
                'process_type': 'MPI',
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t3.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the learn task to the learning stage
            s3.add_tasks(t3)

        return s3
Ejemplo n.º 29
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        # learn task
        time_stamp = int(time.time())
        stages=[]
        for i in range(num_ML):
            s3 = Stage()
            s3.name = 'learning'


            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
            t3.pre_exec += ['module load gcc/7.4.0',
                            'module load cuda/10.1.243',
                            'module load hdf5/1.10.4',
                            'export LANG=en_US.utf-8',
                            'export LC_ALL=en_US.utf-8']
            t3.pre_exec += ['conda activate %s' % cfg['conda_pytorch']]
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp+i)
            t3.pre_exec += ['cd %s/CVAE_exps' % cfg['base_path']]
            t3.pre_exec += ['export LD_LIBRARY_PATH=/gpfs/alpine/proj-shared/med110/atrifan/scripts/cuda/targets/ppc64le-linux/lib/:$LD_LIBRARY_PATH']
            #t3.pre_exec += ['mkdir -p %s && cd %s' % (cvae_dir, cvae_dir)] # model_id creates sub-dir
            # this is for ddp, distributed
            t3.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4']
            #pnodes = cfg['node_counts'] // num_ML # partition
            pnodes = 1#max(1, pnodes)

            hp = cfg['ml_hpo'][i]
            cmd_cat    = 'cat /dev/null'
            cmd_jsrun  = 'jsrun -n %s -r 1 -g 6 -a 6 -c 42 -d packed' % pnodes

            # VAE config
            # cmd_vae    = '%s/examples/run_vae_dist_summit_entk.sh' % cfg['molecules_path']
            # cmd_sparse = ' '.join(['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"],
            #                        "./", cvae_dir, 'sparse-concat', 'resnet',
            #                        str(cfg['residues']), str(cfg['residues']),
            #                        str(hp['latent_dim']), 'amp', 'non-distributed',
            #                        str(hp['batch_size']), str(cfg['epoch']),
            #                        str(cfg['sample_interval']),
            #                        hp['optimizer'], cfg['init_weights']])

            # AAE config
            cmd_vae    = '%s/examples/bin/summit/run_aae_dist_summit_entk.sh' % cfg['molecules_path']
            t3.executable = ['%s; %s %s' % (cmd_cat, cmd_jsrun, cmd_vae)]
            t3.arguments = ['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"],
                                   "./",
                                   cvae_dir,
                                   str(cfg['residues']),
                                   str(hp['latent_dim']),
                                   'non-amp',
                                   'distributed',
                                   str(hp['batch_size']),
                                   str(cfg['epoch']),
                                   str(cfg['sample_interval']),
                                   hp['optimizer'],
                                   hp['loss_weights'],
                                   cfg['init_weights']]

            #+ f'{cfg['molecules_path']}/examples/run_vae_dist_summit.sh -i {sparse_matrix_path} -o ./ --model_id {cvae_dir} -f sparse-concat -t resnet --dim1 168 --dim2 168 -d 21 --amp --distributed -b {batch_size} -e {epoch} -S 3']
        #     ,
        #             '-i', sparse_matrix_path,
        #             '-o', './',
        #             '--model_id', cvae_dir,
        #             '-f', 'sparse-concat',
        #             '-t', 'resnet',
        #             # fs-pep
        #             '--dim1', 168,
        #             '--dim2', 168,
        #             '-d', 21,
        #             '--amp',      # sparse matrix
        #             '--distributed',
        #             '-b', batch_size, # batch size
        #             '-e', epoch,# epoch
        #             '-S', 3
        #             ]

            t3.cpu_reqs = {'processes'          : 1,
                           'process_type'       : 'MPI',
                           'threads_per_process': 4,
                           'thread_type'        : 'OpenMP'}
            t3.gpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 1,
                           'thread_type'        : 'CUDA'}

            # Add the learn task to the learning stage
            s3.add_tasks(t3)
            stages.append(s3)
        return stages
Ejemplo n.º 30
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    combined_path = str(Kconfig.remote_output_directory)
    num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)
    config_file = str(args.Kconfig).rsplit('/', 1)[-1]
    try:
        systemxml = str(Kconfig.systemxml)
    except:
        systemxml = 'system-5.xml'
    try:
        integratorxml = str(Kconfig.integratorxml)
    except:
        integratorxml = 'integrator-5.xml'
    md_settings = Kconfig.md_env
    if Kconfig.env_ana_same == 'True':
        ana_settings = md_settings
    else:
        ana_settings = Kconfig.ana_env
    print("set", num_parallel, md_settings)
    iter_found = 0
    while len(glob.glob('%s/iter%s_input*.pdb' %
                        (combined_path, iter_found))) >= num_replicas:
        iter_found += 1
    cur_iter = max(0, iter_found - 1)
    print("cur_iter", cur_iter)
    #if cur_iter==0:
    #  pre_proc_stage2 = Stage()
    #  pre_proc_task2 = Task()
    #  pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1']
    #  pre_proc_task2.executable = ['ls']
    #  pre_proc_task2.arguments = ['-l']
    #  pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file),
    #                                 '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana),
    #                                 '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file),
    #                                   '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ]
    #  pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
    #  pre_proc_stage2.add_tasks(pre_proc_task2)
    #  wf.add_stages(pre_proc_stage2)

    # ------------------------------------------------------------------------------------------------------------------
    start_iter = cur_iter
    while (cur_iter < int(Kconfig.num_iterations)
           and cur_iter < start_iter + 1):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        num_allocated_rep = 0
        num_used_parallel = 0
        while (num_allocated_rep < num_replicas):
            def_rep_per_thread = int(
                math.ceil(
                    float(num_replicas - num_allocated_rep) /
                    float(num_parallel - num_used_parallel)))
            use_replicas = min(def_rep_per_thread,
                               num_replicas - num_allocated_rep)
            print("u", cur_iter, use_replicas, num_replicas, num_parallel,
                  def_rep_per_thread, num_allocated_rep, num_used_parallel)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            sim_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 10,
                'thread_type': 'OpenMP'
            }
            sim_task.arguments = [
                'run_openmm.py', '--Kconfig', config_file, '--idxstart',
                str(num_allocated_rep), '--idxend',
                str(num_allocated_rep + use_replicas), '--path', combined_path,
                '>', 'md.log'
            ]
            #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig),
            #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)),
            #'--path',combined_path,'--iter',str(cur_iter),
            #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log']
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (systemxml, systemxml),
                    '$SHARED/%s > %s' % (integratorxml, integratorxml),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            #if str(Kconfig.strategy)=='extend':
            #  for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #    copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)]

            #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            ##     #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #     copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            num_used_parallel = num_used_parallel + 1
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)
        if str(Kconfig.strategy) != 'extend':
            for anatask in range(1):
                print("analysis task", anatask)
                ana_task = Task()
                ana_task.executable = ['python']
                pre_exec_arr = ana_settings
                ana_task.pre_exec = pre_exec_arr
                ana_task.link_input_data = [
                    '$SHARED/%s > %s' % (script_ana, script_ana),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
                ana_task.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                }
                ana_task.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 10,
                    'thread_type': 'OpenMP'
                }
                ana_task.arguments = [
                    script_ana, '--Kconfig', config_file, '>', "analysis.log"
                ]
                ana_task.copy_output_data = [
                    'analysis.log > %s/analysis_iter%s_r%s.log' %
                    (combined_path, cur_iter, anatask)
                ]
                ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                    wf.uid, sim_stage.uid, ana_task.uid)
                sim_stage.add_tasks(ana_task)
        wf.add_stages(sim_stage)
        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Ejemplo n.º 31
0
def main(cmt_filename):
    '''This tiny function runs shit

    Args:
        cmt_filename: str containing the path to the cmt solution that is
                      supposed to be inverted for

    Usage:
        From the commandline:
            python pipeline <path/to/cmtsolution>

    '''

    # Path to pipeline file
    pipelinepath = os.path.abspath(__file__)
    pipelinedir = os.path.dirname(pipelinepath)

    # Define parameter directory
    param_path = os.path.join(os.path.dirname(pipelinedir), "params")
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)
    print(DB_params)

    # Earthquake specific database parameters
    # Dir and eq_id
    eq_dir, eq_id = get_eq_entry_path(DB_params["databasedir"], cmt_filename)
    # Earthquake file in the database
    cmt_file_db = os.path.join(eq_dir, "eq_" + eq_id + ".cmt")

    # Create a Pipeline object
    p = Pipeline()

    # ---- DATABASE ENTRY TASK ---------------------------------------------- #

    # Path to function
    create_database_func = os.path.join(pipelinedir,
                                        "01_Create_Database_Entry.py")

    # Create a Stage object
    database_entry = Stage()

    t1 = Task()
    t1.name = 'database-entry'
    t1.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    t1.executable = [DB_params['bin-python']]  # Assign executable to the task
    t1.arguments = [create_database_func, os.path.abspath(cmt_filename)]

    # In the future maybe to database dir as a total log?
    t1.stdout = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stdout")
    t1.stderr = os.path.join(pipelinedir,
                             "database-entry." + eq_id + ".stderr")

    # Add Task to the Stage
    database_entry.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(database_entry)

    # # ---- REQUEST DATA ----------------------------------------------------- #
    #
    # # Path to function
    # request_data_func = os.path.join(pipelinedir, "02_Request_Data.py")
    #
    # # Create a Stage object
    # datarequest = Stage()
    #
    # datarequest_t = Task()
    # datarequest_t.name = 'data-request'
    # datarequest_t.pre_exec = [  # Conda activate
    #     DB_params["conda-activate"]]
    # datarequest_t.executable = [DB_params['bin-python']]  # Assign executable
    #                                                       # to the task
    # datarequest_t.arguments = [request_data_func, cmt_file_db]
    #
    # # In the future maybe to database dir as a total log?
    # datarequest_t.stdout = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stdout")
    # datarequest_t.stderr = os.path.join(pipelinedir,
    #                                   "datarequest." + eq_id + ".stderr")
    #
    # # Add Task to the Stage
    # datarequest.add_tasks(datarequest_t)
    #
    # # Add Stage to the Pipeline
    # p.add_stages(datarequest)

    # ---- Write Sources ---------------------------------------------------- #

    # Path to function
    write_source_func = os.path.join(pipelinedir, "03_Write_Sources.py")

    # Create a Stage object
    w_sources = Stage()
    w_sources.name = 'Write-Sources'

    # Create Task for stage
    w_sources_t = Task()
    w_sources_t.name = 'Write-Sources'
    w_sources_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    w_sources_t.executable = [DB_params['bin-python']]  # Assign executable
    # to the task
    w_sources_t.arguments = [write_source_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    w_sources_t.stdout = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stdout")
    w_sources_t.stderr = os.path.join(pipelinedir,
                                      "write_sources." + eq_id + ".stderr")

    # Add Task to the Stage
    w_sources.add_tasks(w_sources_t)

    # Add Stage to the Pipeline
    p.add_stages(w_sources)

    # ---- Run Specfem ----------------------------------------------------- #

    specfemspec_path = os.path.join(param_path,
                                    "SpecfemParams/SpecfemParams.yml")
    comp_and_modules_path = os.path.join(
        param_path, "SpecfemParams/"
        "CompilersAndModules.yml")

    # Load Parameters
    specfemspecs = read_yaml_file(specfemspec_path)
    cm_dict = read_yaml_file(comp_and_modules_path)

    attr = [
        "CMT", "CMT_rr", "CMT_tt", "CMT_pp", "CMT_rt", "CMT_rp", "CMT_tp",
        "CMT_depth", "CMT_lat", "CMT_lon"
    ]

    simdir = os.path.join(eq_dir, "CMT_SIMs")

    # Create a Stage object
    runSF3d = Stage()
    runSF3d.name = 'Simulation'

    for at in attr[0]:
        sf_t = Task()
        sf_t.name = 'run-' + at

        # Module Loading
        sf_t.pre_exec = [  # Get rid of existing modules
            'module purge'
        ]
        for module in cm_dict["modulelist"]:
            sf_t.pre_exec.append("module load %s" % module)
        sf_t.pre_exec.append("module load %s" % cm_dict["gpu_module"])

        # Change directory to specfem directories
        sf_t.pre_exec.append(  # Change directory
            "cd %s" % os.path.join(simdir, at))

        sf_t.executable = ['./bin/xspecfem3D']  # Assign executable

        # In the future maybe to database dir as a total log?
        sf_t.stdout = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stdout")
        sf_t.stderr = os.path.join(pipelinedir,
                                   "run_specfem." + eq_id + ".stderr")

        sf_t.gpu_reqs = {
            'processes': 6,
            'process_type': 'MPI',
            'threads_per_process': 1,
            'thread_type': 'OpenMP'
        }

        # Add Task to the Stage
        runSF3d.add_tasks(sf_t)

    # Add Simulation stage to the Pipeline
    p.add_stages(runSF3d)

    # Create Application Manager
    appman = AppManager(hostname=hostname, port=port)

    # Create a dictionary describe four mandatory keys:
    # resource, walltime, and cpus
    # resource is 'local.localhost' to execute locally
    res_dict = {
        'resource': 'princeton.tiger_gpu',
        'project': 'geo',
        'queue': 'gpu',
        'schema': 'local',
        'walltime': 300,
        'cpus': 2,
        'gpus': 6
    }

    # Assign resource request description to the Application Manager
    appman.resource_desc = res_dict

    # Assign the workflow as a set or list of Pipelines to the Application Manager
    # Note: The list order is not guaranteed to be preserved
    appman.workflow = set([p])

    # Run the Application Manager
    appman.run()
Ejemplo n.º 32
0
            # Untar the specfem input data
            'tar xf specfem_data_event_%s.tar' % event,

            # Link to common DATABASES_MPI containing mesh files (~55GB)
            'ln -s /lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/DATABASES_MPI DATABASES_MPI'
        ]
        t.executable = ['./bin/xspecfem3D']
        t.cpu_reqs = {
            'processes': 0,
            'process_type': 'MPI',
            'threads_per_process': 0,
            'thread_type': 'OpenMP'
        }
        t.gpu_reqs = {
            'processes': 384,
            'process_type': 'MPI',
            'threads_per_process': 1,
            'thread_type': 'OpenMP'
        }
        t.copy_input_data = [
            '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_data_event_%s.tar'
            % event,
            '/lustre/atlas/scratch/vivekb/bip149/ssflow-N-seq-events/specfem_validator.py'
        ]
        t.post_exec = [
            'python specfem_validator.py OUTPUT_FILES/output_solver.txt'
        ]
        specfem_stage.add_tasks(t2)

    p.add_stages(specfem_stage)

    res_dict = {
Ejemplo n.º 33
0
p = Pipeline()
s = Stage()

aggregator_dir = f'{run_dir}/aggregator'

for i in range(ntasks):
    t = Task()
    t.cpu_reqs = {
        'processes': 1,
        'process_type': None,
        'threads_per_process': 4,
        'thread_type': 'OpenMP'
    }
    t.gpu_reqs = {
        'processes': 0,
        'process_type': None,
        'threads_per_process': 0,
        'thread_type': None
    }
    t.executable = PYTHON
    t.arguments = [
        f'{current_dir}/simulation.py', f'{run_dir}/simulations/all',
        ADIOS_XML, i, aggregator_dir
    ]
    s.add_tasks(t)

t = Task()
t.cpu_reqs = {
    'processes': 1,
    'process_type': None,
    'threads_per_process': 4,
    'thread_type': 'OpenMP'