Esempio n. 1
0
    def generate_aggregating_task(self): 
        """ 
        Function to concatenate the MD trajectory (h5 contact map) 
        """ 
        p = Pipeline() 
        p.name = 'aggragating' 
        s2 = Stage()
        s2.name = 'aggregating'

        # Aggregation task
        t2 = Task()
        # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py
        t2.pre_exec = [] 
        t2.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
        t2.pre_exec += ['conda activate %s' % conda_path] 
        t2.pre_exec += ['cd %s' % agg_path]
        t2.executable = ['%s/bin/python' % conda_path]  # MD_to_CVAE.py
        t2.arguments = [
                '%s/MD_to_CVAE.py' % agg_path, 
                '--sim_path', md_path, 
                '--train_frames', 100000]

        # assign hardware the task 
        t2.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
                }
        # Add the aggregation task to the aggreagating stage
        s2.add_tasks(t2)
        p.add_stages(s2) 
        return p
Esempio n. 2
0
def generate_pipeline(nid):

    p       = Pipeline()
    s1      = Stage()
    s2      = Stage()
    t1      = Task()

    p.name  = 'p%s' % nid
    s1.name = 's1'
    s2.name = 's2'
    t1.name = 't1'

    t1.executable = '/bin/echo'
    t1.arguments  = ['hello']

    s1.add_tasks(t1)
    p.add_stages(s1)

    for cnt in range(10):

        tn            = Task()
        tn.name       = 't%s' % (cnt + 1)
        tn.executable = '/bin/echo'
        tn.arguments  = ['world']

        # Copy data from the task in first stage to the current task's location
        tn.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/output.txt'
                              % (p.name, s1.name, t1.name)]
        s2.add_tasks(tn)

    p.add_stages(s2)

    return p
def generate_pipeline(name, stages):

    # Create a Pipeline object
    p = Pipeline()
    p.name = name


    for s_cnt in range(stages):

        # Create a Stage object
        s = Stage()
        s.name = 'Stage %s'%s_cnt

        for t_cnt in range(5):

            # Create a Task object
            t = Task()
            t.name = 'my-task'        # Assign a name to the task (optional)
            t.executable = '/bin/echo'   # Assign executable to the task
            # Assign arguments for the task executable
            t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)]

            # Add the Task to the Stage
            s.add_tasks(t)

        # Add Stage to the Pipeline
        p.add_stages(s)

    return p
Esempio n. 4
0
def GeneratePipeline(pcfg, ecfg, pipe_name, exp_dir):

    # Append the exp_dir to the ecfg dictionary to simplify things a bit
    ecfg['exp_dir'] = exp_dir

    # Append the input file to the list of options (if need be)
    if "input_data_file" in ecfg.keys():
        ecfg['options']['input_data_file'] = ecfg['input_data_file']

    # Append the pipeline id to the list of options
    ecfg['options']['pipeline_id'] = pipe_name

    # Initialize the pipeline object
    p = Pipeline()

    # Give the pipeline a name
    p.name = pipe_name

    # Loop through the necessary stages for this module
    stage_names = ["pre-process", "fit", "project", "post-process"]
    for this_stage in stage_names:
        if this_stage in pcfg.keys():

            # Populate the pipeline with the stages
            p.add_stages(
                GenerateStage(pcfg[this_stage], ecfg, p.name, this_stage))

    return (p)
Esempio n. 5
0
def generate_pipeline(name, stages):

    # Create a Pipeline object
    p = Pipeline()
    p.name = name

    for s_cnt in range(stages):

        # Create a Stage object
        s = Stage()
        s.name = 'Stage %s' % s_cnt

        for t_cnt in range(5):

            # Create a Task object
            t = Task()
            t.name = 'my-task'  # Assign a name to the task (optional)
            t.executable = '/bin/echo'  # Assign executable to the task
            # Assign arguments for the task executable
            t.arguments = ['I am task %s in %s in %s' % (t_cnt, s_cnt, name)]

            # Add the Task to the Stage
            s.add_tasks(t)

        # Add Stage to the Pipeline
        p.add_stages(s)

    return p
Esempio n. 6
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'
    s1_task_uids = []

    for cnt in range(128):

        # Create a Task object
        t1 = Task()
        t1.name = 't%s' % (cnt + 1)
        # to make a python script executable:
        # 1) add to first line "shebang": #!/usr/bin/env python
        # 2) chmod +x SerialCode.py
        # The executable always has to be in the Target Machine
        t1.executable = '~/SerialCode.py'

        # Add the Task to the Stage
        s1.add_tasks(t1)
        s1_task_uids.append(t1.name)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_state_order():

    """
    **Purpose**: Test if the Pipeline, Stage and Task are assigned their states in the correct order
    """

    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/date']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1

    p1 = Pipeline()
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB
    os.environ['RP_ENABLE_OLD_DEFINES'] = 'True'
    
    appman = Amgr(hostname=hostname, port=port)
    appman.resource_desc = res_dict

    appman.workflow = [p1]
    appman.run()

    p_state_hist = p1.state_history
    assert p_state_hist == ['DESCRIBED', 'SCHEDULING', 'DONE']

    s_state_hist = p1.stages[0].state_history
    assert s_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'DONE']

    tasks = p1.stages[0].tasks

    for t in tasks:

        t_state_hist = t.state_history
        assert t_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'SUBMITTING', 'SUBMITTED',
                            'EXECUTED', 'DEQUEUEING', 'DEQUEUED', 'DONE']
Esempio n. 8
0
def test_state_order():

    """
    **Purpose**: Test if the Pipeline, Stage and Task are assigned their states in the correct order
    """

    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/date']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1

    p1 = Pipeline()
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB
    os.environ['RP_ENABLE_OLD_DEFINES'] = 'True'
    
    appman = Amgr(hostname=hostname, port=port)
    appman.resource_desc = res_dict

    appman.workflow = [p1]
    appman.run()

    p_state_hist = p1.state_history
    assert p_state_hist == ['DESCRIBED', 'SCHEDULING', 'DONE']

    s_state_hist = p1.stages[0].state_history
    assert s_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'DONE']

    tasks = p1.stages[0].tasks

    for t in tasks:

        t_state_hist = t.state_history
        assert t_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'SUBMITTING', 'SUBMITTED',
                            'EXECUTED', 'DEQUEUEING', 'DEQUEUED', 'DONE']
Esempio n. 9
0
def get_pipeline(shared_fs=False, size=1):

    p = Pipeline()
    p.name = 'p'

    n = 4

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x

        # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name>

        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x]
        else:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1        
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 1024

        s1.add_tasks(t)

    p.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x]
        else:
            t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p.add_stages(s2)    

    return p
def get_pipeline(shared_fs=False, size=1):

    p = Pipeline()
    p.name = 'p'

    n = 4

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x

        # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name>

        t.executable = 'dd'

        if not shared_fs:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x]
        else:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 1024

        s1.add_tasks(t)

    p.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x]
        else:
            t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p.add_stages(s2)

    return p
Esempio n. 11
0
    def test_pipeline_assignment_exceptions(self, mocked_generate_id,
                                            mocked_Lock, mocked_Event, l, i,
                                            b, se):

        p = Pipeline()

        data_type = [l, i, b, se]
        print(data_type)

        for data in data_type:
            if not isinstance(data, str):
                with self.assertRaises(TypeError):
                    p.name = data

            if isinstance(data,str):
                with self.assertRaises(ValueError):
                    p.name = data

            with self.assertRaises(TypeError):
                p.stages = data

            with self.assertRaises(TypeError):
                p.add_stages(data)
Esempio n. 12
0
def generate_pipeline(nid):

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p%s' % nid

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't2'
    t1.executable = ['/bin/echo']
    t1.arguments = ['hello']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object to hold character count tasks
    s2 = Stage()
    s2.name = 's2'
    s2_task_uids = []

    for cnt in range(10):

        # Create a Task object
        t2 = Task()
        t2.name = 't%s' % (cnt + 1)
        t2.executable = ['/bin/echo']
        t2.arguments = ['world']
        # Copy data from the task in the first stage to the current task's location
        t2.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/output.txt' %
            (p.name, s1.name, t1.name)
        ]

        # Add the Task to the Stage
        s2.add_tasks(t2)
        s2_task_uids.append(t2.name)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    return p
Esempio n. 13
0
    def describe_MD_pipeline():
        p = Pipeline()
        p.name = 'MD'

        # MD stage
        s1 = Stage()
        s1.name = 'OpenMM'

        # Each Task() is an OpenMM executable that will run on a single GPU.
        # Set sleep time for local testing
        # for i in range(18):

        task = Task()
        task.name = 'md' 
        
        task.pre_exec    = []

        # task.pre_exec   += ['export MINICONDA=/gpfs/alpine/scratch/jdakka/bip178/miniconda']
        # task.pre_exec   += ['export PATH=$MINICONDA/bin:$PATH']
        # task.pre_exec   += ['export LD_LIBRARY_PATH=$MINICONDA/lib:$LD_LIBRARY_PATH']
        task.pre_exec   += ['module load python/2.7.15-anaconda2-5.3.0']
        task.pre_exec   += ['module load cuda/9.1.85']
        task.pre_exec   += ['module load gcc/6.4.0']
        task.pre_exec   += ['source activate openmm']
        task.pre_exec   += ['cd /gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/results_2']
        task.executable  = '/ccs/home/jdakka/.conda/envs/openmm/bin/python'
        task.arguments = ['run_openmm.py', '-f', 
        '/gpfs/alpine/scratch/jdakka/bip178/benchmarks/MD_exps/fs-pep/pdb/100-fs-peptide-400K.pdb']
        task.cpu_reqs = {'processes': 1,
                         'process_type': None,
                         'threads_per_process': 1,
                         'thread_type': None
                         }

        task.gpu_reqs = {'processes': 1,
                         'process_type': None,
                         'threads_per_process': 1,
                         'thread_type': 'CUDA'
                        }

        # Add the MD task to the Docking Stage
        s1.add_tasks(task)

        # Add MD stage to the MD Pipeline
        p.add_stages(s1)


        return p
def test_pipeline_pass_uid():

    p = Pipeline()
    p._uid = 'test'
    p.name = 'p1'

    s1 = Stage()
    s2 = Stage()
    p.add_stages([s1,s2])

    p._pass_uid()

    assert s1.parent_pipeline['uid'] == p.uid
    assert s1.parent_pipeline['name'] == p.name
    assert s2.parent_pipeline['uid'] == p.uid
    assert s2.parent_pipeline['name'] == p.name
Esempio n. 15
0
def test_pipeline_pass_uid():

    p = Pipeline()
    p._uid = 'test'
    p.name = 'p1'

    s1 = Stage()
    s2 = Stage()
    p.add_stages([s1,s2])

    p._pass_uid()

    assert s1.parent_pipeline['uid'] == p.uid
    assert s1.parent_pipeline['name'] == p.name
    assert s2.parent_pipeline['uid'] == p.uid
    assert s2.parent_pipeline['name'] == p.name
def generate_ML_pipeline():

    p = Pipeline()
    p.name = 'ML'

    s1 = Stage()
    s1.name = 'Generator-ML'

    # the generator/ML Pipeline will consist of 1 Stage, 2 Tasks Task 1 :
    # Generator; Task 2: ConvNet/Active Learning Model
    # NOTE: Generator and ML/AL are alive across the whole workflow execution.
    # For local testing, sleep time is longer than the total execution time of
    # the MD pipelines.

    t1 = Task()
    t1.name = "generator"
    t1.pre_exec = [
        # 'module load python/2.7.15-anaconda2-5.3.0',
        # 'module load cuda/9.1.85',
        # 'module load gcc/6.4.0',
        # 'source activate snakes'
    ]
    # t1.executable = ['python']
    # t1.arguments  = ['/ccs/home/jdakka/tf.py']
    t1.executable = ['sleep']
    t1.arguments = ['5']
    s1.add_tasks(t1)

    t2 = Task()
    t2.name = "ml-al"
    t2.pre_exec = [
        # 'module load python/2.7.15-anaconda2-5.3.0',
        # 'module load cuda/9.1.85',
        # 'module load gcc/6.4.0',
        # 'source activate snakes'
    ]
    # t2.executable = ['python']
    # t2.arguments  = ['/ccs/home/jdakka/tf.py']
    t2.executable = ['sleep']
    t2.arguments = ['10']
    s1.add_tasks(t2)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Esempio n. 17
0
    def generate_ML_tasks(self): 
        """
        Function to generate the learning stage
        """
        p = Pipeline() 
        p.name = 'learning' 
        s3 = Stage()
        s3.name = 'training'

        # learn task
        for i in range(self.num_ML): 
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            t3.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            t3.pre_exec += ['module load cuda/10.1.168']
            t3.pre_exec += ['conda activate %s' % conda_path] 

            t3.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path]
            t3.pre_exec += ['cd %s' % cvae_path]
            t3.pre_exec += [f"sleep {i}"]
            dim = i + 3 
            t3.executable = ['%s/bin/python' % conda_path]  # train_cvae.py
            t3.arguments = [
                    '%s/train_cvae.py' % cvae_path, 
                    '--h5_file', '%s/cvae_input.h5' % agg_path, 
                    '--dim', dim] 
            
            t3.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 4,
                    'thread_type': 'OpenMP'
                    }
            t3.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                    }
        
            # Add the learn task to the learning stage
            s3.add_tasks(t3)
        p.add_stages(s3)

        return p 
def test_pipeline_assignment_exceptions(t, l, i, b, se):

    p = Pipeline()

    data_type = [t, l, i, b, se]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                p.name = data

        with pytest.raises(TypeError):
            p.stages = data

        with pytest.raises(TypeError):
            p.add_stages(data)
Esempio n. 19
0
def get_pipeline(workflow_cfg, resource):

    ## Extract resource-independent global parameters
    total_iters = workflow_cfg['global']['total_iters']
    ensemble_size = workflow_cfg['global']['ensemble_size']
    sim_duration = workflow_cfg['global']['sim_duration']

    # Create one Pipeline for the entire workflow. The Pipeline contains 1
    # Simulation stage and 1 Analysis stage per iteration.
    # Please refer to the API reference for more details about Pipeline, Stage,
    # Task. Link: https://radicalentk.readthedocs.io/en/latest/api/app_create.html
    p = Pipeline()
    p.name = 'simple-mdff'

    for _ in range(total_iters):
        one_cycle(p, workflow_cfg, resource)  # update pipeline, p
    return p
Esempio n. 20
0
def test_pipeline_assignment_exceptions(t, l, i, b, se):

    p = Pipeline()

    data_type = [t, l, i, b, se]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                p.name = data

        with pytest.raises(TypeError):
            p.stages = data

        with pytest.raises(TypeError):
            p.add_stages(data)
Esempio n. 21
0
    def generate_MD_tasks(self): 
        """
        Function to generate MD tasks. 
        """
        p = Pipeline() 
        p.name = "MD"
        s1 = Stage()
        s1.name = 'MD'

        # MD tasks
        for i in range(self.num_MD):
            t1 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py
            t1.pre_exec = ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
            t1.pre_exec += ['module load cuda/10.1.168']
            t1.pre_exec += ['conda activate %s' % conda_path] 
            t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:$PYTHONPATH' % base_path] 
            t1.pre_exec += ['cd %s' % md_path] 
            # t1.pre_exec += [f"sleep {i}"]
            t1.executable = ['%s/bin/python' % conda_path]  # run_openmm.py
            t1.arguments = ['%s/run_openmm.py' % md_path] 
            t1.arguments += ['--pdb_file', pdb_file]
            if top_file: 
                t1.arguments += ['--topol', top_file]
            t1.arguments += ['--length', 1000]

            # assign hardware the task 
            t1.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 4,
                    'thread_type': 'OpenMP'
                    }
            t1.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                    }
                              
            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
            p.add_stages(s1)
        return p
Esempio n. 22
0
    def describe_MD_pipline():
        p = Pipeline()
        p.name = 'MD'

        # Docking stage
        s1 = Stage()
        s1.name = 'Docking'

        # Docking task
        t1 = Task()
        t1.executable = ['sleep']
        t1.arguments = ['30']

        # Add the Docking task to the Docking Stage
        s1.add_tasks(t1)

        # Add Docking stage to the pipeline
        p.add_stages(s1)

        # MD stage
        s2 = Stage()
        s2.name = 'Simulation'

        # Each Task() is an OpenMM executable that will run on a single GPU.
        # Set sleep time for local testing
        for i in range(6):
            t2 = Task()
            t2.executable = ['sleep']
            t2.arguments = ['60']

            # Add the MD task to the Docking Stage
            s2.add_tasks(t2)

        # Add post-exec to the Stage
        s2.post_exec = {
            'condition': func_condition,
            'on_true': func_on_true,
            'on_false': func_on_false
        }

        # Add MD stage to the MD Pipeline
        p.add_stages(s2)

        return p
Esempio n. 23
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = ['/bin/echo']
    t1.arguments = ['"Hello World"']
    t1.stdout = 'temp.txt'

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create a Stage object
    s2 = Stage()
    s2.name = 's2'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t2 = Task()
    t2.name = 't2'
    t2.executable = ['/bin/cat']
    t2.arguments = [
        '$Pipeline_%s_Stage_%s_Task_%s/temp.txt' % (p.name, s1.name, t1.name)
    ]
    t2.stdout = 'output.txt'
    t2.download_output_data = ['output.txt']

    # Add the Task to the Stage
    s2.add_tasks(t2)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    return p
Esempio n. 24
0
def describe_ML_pipline():
    p = Pipeline()
    p.name = 'ML'

    # Learning stage
    s1 = Stage()
    s1.name = 'learning'

    # Docking rescroring task
    t1 = Task()
    t1.executable = ['/gpfs/alpine/scratch/mturilli1/bip179/bin/run_learner_wrapper.sh']

    # Add the docking rescroring task to the docking rescroring stage
    s1.add_tasks(t1)

    # Add the docking rescroring stage to the pipeline
    p.add_stages(s1)

    return p
def generate_pipeline(name, stages):  #generate the pipeline of prediction and blob detection

    # Create a Pipeline object
    p = Pipeline()
    p.name = name


    for s_cnt in range(stages):

        # Create a Stage object
        s = Stage()
        s.name = 'Stage %s'%s_cnt
    if(stage==1)
            # Create Task 1, training
            t = Task()
            t.name = 'my-task1'         
             t.executable = ['sbatch']   # Assign executable to the task   
             # Assign arguments for the task executable
             t.arguments = ['/Code/trainbatch.bat']
Esempio n. 26
0
def test_integration_local():

    """
    **Purpose**: Run an EnTK application on localhost
    """


    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = '/bin/echo'
        t1.arguments = ['hello']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1


    p1 = Pipeline()
    p1.name = 'p1'
    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }


    appman = AppManager(hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()
def test_integration_local():

    """
    **Purpose**: Run an EnTK application on localhost
    """

    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/echo']
        t1.arguments = ['hello']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1

    p1 = Pipeline()
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB

    appman = AppManager(hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()
Esempio n. 28
0
    def generate_interfacing_task(self): 
        p = Pipeline() 
        p.name = 'interfacing'
        s4 = Stage() 
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs 
        t4 = Task() 
        t4.pre_exec = [] 
        t4.pre_exec += ['. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh']
        t4.pre_exec += ['module load cuda/10.1.168']
        t4.pre_exec += ['conda activate %s' % conda_path] 

        t4.pre_exec += ['export PYTHONPATH=%s/CVAE_exps:$PYTHONPATH' % base_path] 
        t4.pre_exec += ['cd %s/Outlier_search' % base_path] 
        t4.executable = ['%s/bin/python' % conda_path] 
        t4.arguments = [
                'outlier_locator.py', 
                '--md',  md_path, 
                '--cvae', cvae_path, 
                '--pdb', pdb_file, 
                '--ref', ref_pdb_file,
                '--n_out', self.num_outliers, 
                '--timeout', self.t_timeout]

        t4.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 12,
                'thread_type': 'OpenMP'
                }
        t4.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
                }
        s4.add_tasks(t4) 
        p.add_stages(s4)
        
        return p
Esempio n. 29
0
def test_assignment_exceptions():

    p = Pipeline()

    data_type = [1, 'a', True, [1], set([1])]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                p.name = data

        with pytest.raises(TypeError):
            p.stages = data

        with pytest.raises(TypeError):
            p.add_stages(data)

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                p.remove_stages(data)
Esempio n. 30
0
def setup_replicas(replicas, min_temp, max_temp, timesteps, basename):

    writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename)
    tar = tarfile.open("input_files.tar", "w")
    for name in [basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"]:
        tar.add(name)
    for r in range(replicas):
        tar.add('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))
    tar.close()
    for r in range(replicas):
        os.remove('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))



    setup_p = Pipeline()
    setup_p.name = 'untarPipe'

    repo = git.Repo('.', search_parent_directories=True)
    aux_function_path = repo.working_tree_dir


    untar_stg = Stage()
    untar_stg.name = 'untarStg'

    #Untar Task
        
    untar_tsk = Task()
    untar_tsk.name = 'untarTsk'
    untar_tsk.executable = ['python']
    untar_tsk.upload_input_data = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.cpu_reqs = 1
    untar_tsk.post_exec = []
    untar_stg.add_tasks(untar_tsk)
    setup_p.add_stages(untar_stg)
    global replica_sandbox
    replica_sandbox='$Pipeline_%s_Stage_%s_Task_%s'%(setup_p.name, untar_stg.name, untar_tsk.name)
    print replica_sandbox

    return setup_p
def describe_MD_pipline(t2_senv):
    p = Pipeline()
    p.name = 'MD'

    # Ligand parameterization stage
    #    s1 = Stage()
    #    s1.name = 'parameterization'

    # ligand parameterization task
    #    t1 = Task()
    #    t1.executable = ['sleep']
    #    t1.arguments = ['30']

    # Add the parameterization task to the parameterization stage
    #    s1.add_tasks(t1)

    # Add parameterization stage to the pipeline
    #    p.add_stages(s1)

    # Docking rescroring stage
    s2 = Stage()
    s2.name = 'rescoring'

    # Docking rescroring task
    t2 = Task()
    # t2.pre_exec = t2_senv
    # t2.executable = ['python']
    # t2.arguments = ['1_mmgbsa.py', '-p', '"test"', '-n', '0']
    t2.executable = [
        '/gpfs/alpine/scratch/mturilli1/bip179/bin/mmgbsa_wrapper.sh'
    ]

    # Add the docking rescroring task to the docking rescroring stage
    s2.add_tasks(t2)

    # Add the docking rescroring stage to the pipeline
    p.add_stages(s2)

    return p
Esempio n. 32
0
def generate_esmacs(cfg):

    cfg['base_dir'] = cfg['work_dir']+'/'+cfg['proj']
    cfg['run_dir']  = cfg['base_dir']+'/'+cfg['data_dir']

    esmacs_names = glob.glob("{}/input/lig*".format(cfg['run_dir']))
    print("{}/input/lig*".format(cfg['run_dir']))
    print("DEBUG:generate_esmacs:esmacs_names %s" % esmacs_names)

    p = Pipeline()
    p.name = 'S3.ESMACS.%s' % cfg['type_esmacs'].upper()

    s1 = esmacs(cfg, esmacs_names, stage="eq1", outdir="equilibration")
    p.add_stages(s1)

    # if cfg['type_esmacs'] == 'fg':
    #     s2 = esmacs(cfg, esmacs_names, stage="eq2", outdir="equilibration")
    #     p.add_stages(s2)

    s3 = esmacs(cfg, esmacs_names, stage="sim1", outdir="simulation")
    p.add_stages(s3)

    return p
Esempio n. 33
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = ['/bin/false']
    # t1.arguments = ['"Hello World"','>>','temp.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Esempio n. 34
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create 4K tasks to ensure we don't hit any RMQ connection drops
    for _ in range(4096):
        t1 = Task()
        t1.executable = '/bin/echo'
        t1.arguments = ['"Hello World"']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create 4K tasks to ensure we don't hit any RMQ connection drops
    for _ in range(4096):
        t1 = Task()
        t1.executable = ['/bin/echo']
        t1.arguments = ['"Hello World"']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = ['/bin/false']
    # t1.arguments = ['"Hello World"','>>','temp.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Esempio n. 37
0
def describe_MD_pipline():
    p = Pipeline()
    p.name = 'MD'

    # Docking rescroring stage
    s1 = Stage()
    s1.name = 'rescoring'

    # Docking rescroring task
    t1 = Task()
    t1.executable = ['/gpfs/alpine/scratch/mturilli1/bip179/bin/mmgbsa_wrapper.sh']

    # Alternative approach for later optimization
    # t2.pre_exec = t2_senv
    # t2.executable = ['python']
    # t2.arguments = ['1_mmgbsa.py', '-p', '"test"', '-n', '0']

    # Add the docking rescroring task to the docking rescroring stage
    s1.add_tasks(t1)

    # Add the docking rescroring stage to the pipeline
    p.add_stages(s1)

    return p
Esempio n. 38
0
    def InitCycle(self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps): # "Cycle" = 1 MD stage plus the subsequent exchange computation

        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first Cycle
        """    
        
        #Initialize Pipeline
        #self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict    = dict() #Bookkeeping
        tar_dict   = dict() #Bookkeeping

        ##Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

                             

        writeInputs.writeInputs(max_temp=350,min_temp=250,replicas=Replicas,timesteps=timesteps)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        
        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("Input_Files.tar","w")
        for name in ["prmtop", "inpcrd", "mdin"]:
            tar.add(name)
        for r in range (Replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range (Replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

                
        #Create Untar Stage

        untar_stg = Stage()
        untar_stg.name = 'untarStg'
    
        #Untar Task

        untar_tsk                   = Task()
        untar_tsk.name              = 'untartsk'
        untar_tsk.executable        = ['python']
        
        untar_tsk.upload_input_data = ['untar_input_files.py','Input_Files.tar']
        untar_tsk.arguments         = ['untar_input_files.py','Input_Files.tar']
        untar_tsk.cores             = 1

        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

             
        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name,
                                                       untar_stg.name,
                                                       untar_tsk.name)
                 


        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)
        
        # MD tasks
               
        for r in range (Replicas):

            
            md_tsk                  = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable)
            md_tsk.name             = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0)
            md_tsk.link_input_data += [
                                       '%s/inpcrd'%tar_dict[0],
                                       '%s/prmtop'%tar_dict[0],
                                       '%s/mdin_{0}'.format(r)%tar_dict[0]  #Use for full temperature exchange
                                       #'%s/mdin'%tar_dict[0]  #Testing only
                                       ] 
            md_tsk.arguments        = ['-O','-p','prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange
                                       '-c','inpcrd','-o','out_{0}'.format(r),
                                       '-inf','mdinfo_{0}'.format(r)]
            md_dict[r]              = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)
                                                    

        # First Exchange Stage
        
        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)
        #with open('logfile.log', 'a') as logfile:
         #   logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n')
        # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
        # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. 
        # Said pairs then exchange configurations by linking output configuration files appropriately.

        ex_tsk                      = Task()
        ex_tsk.name                 = 'extsk0'
        ex_tsk.executable           = ['python']
        ex_tsk.upload_input_data    = [ExchangeMethod]  
        for r in range (Replicas):
            ex_tsk.link_input_data     += ['%s/mdinfo_%s'%(md_dict[r],r)]
        ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas), '0']
        ex_tsk.cores                = 1
        ex_tsk.mpi                  = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.Book.append(md_dict)
        return p
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete
    Task description
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = ['grompp']
    t1.arguments = ['hello']
    t1.cpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 1,
                   'thread_type': 'OpenMP'
                   }
    t1.gpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 2,
                   'thread_type': 'OpenMP'
                   }
    t1.post_exec = ['echo test']

    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'
    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    p._assign_uid('test')

    cud = create_cud_from_task(t1, placeholder_dict)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name,
                                              t1.parent_stage['uid'], t1.parent_stage['name'],
                                              t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']
    assert cud.post_exec == t1.post_exec

    assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging
    assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging
    assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging
    assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging
    assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
Esempio n. 40
0
def generate_training_pipeline(cfg):
    """
    Function to generate the CVAE_MD pipeline
    """
    CUR_STAGE = cfg['CUR_STAGE']
    MAX_STAGE = cfg['MAX_STAGE']

    def generate_MD_stage(num_MD=1):
        """
        Function to generate MD stage.
        """
        s1 = Stage()
        s1.name = 'MD'

        initial_MD = True
        outlier_filepath = '%s/Outlier_search/restart_points.json' % cfg['base_path']

        if os.path.exists(outlier_filepath):
            initial_MD = False
            outlier_file = open(outlier_filepath, 'r')
            outlier_list = json.load(outlier_file)
            outlier_file.close()

        # MD tasks
        time_stamp = int(time.time())
        for i in range(num_MD):
            t1 = Task()

            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_exps/fs-pep/run_openmm.py
            t1.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
            t1.pre_exec += ['module load cuda/9.1.85']
            t1.pre_exec += ['conda activate %s' % cfg['conda_openmm']]
            t1.pre_exec += ['export PYTHONPATH=%s/MD_exps:%s/MD_exps/MD_utils:$PYTHONPATH' %
                (cfg['base_path'], cfg['base_path'])]
            t1.pre_exec += ['cd %s/MD_exps/%s' % (cfg['base_path'], cfg['system_name'])]
            t1.pre_exec += ['mkdir -p omm_runs_%d && cd omm_runs_%d' % (time_stamp+i, time_stamp+i)]

            t1.executable = ['%s/bin/python' % cfg['conda_openmm']]  # run_openmm.py
            t1.arguments = ['%s/MD_exps/%s/run_openmm.py' % (cfg['base_path'], cfg['system_name'])]
            #t1.arguments += ['--topol', '%s/MD_exps/fs-pep/pdb/topol.top' % cfg['base_path']]

            if 'top_file' in cfg:
                t1.arguments += ['--topol', cfg['top_file']]

            # pick initial point of simulation
            if initial_MD or i >= len(outlier_list):
                t1.arguments += ['--pdb_file', cfg['pdb_file'] ]
            elif outlier_list[i].endswith('pdb'):
                t1.arguments += ['--pdb_file', outlier_list[i]]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]
            elif outlier_list[i].endswith('chk'):
                t1.arguments += ['--pdb_file', cfg['pdb_file'],
                        '-c', outlier_list[i]]
                t1.pre_exec += ['cp %s ./' % outlier_list[i]]

            # how long to run the simulation
            if initial_MD:
                t1.arguments += ['--length', cfg['LEN_initial']]
            else:
                t1.arguments += ['--length', cfg['LEN_iter']]

            # assign hardware the task
            t1.cpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 4,
                           'thread_type'        : 'OpenMP'}
            t1.gpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 1,
                           'thread_type'        : 'CUDA'}

            # Add the MD task to the simulating stage
            s1.add_tasks(t1)
        return s1


    def generate_aggregating_stage():
        """
        Function to concatenate the MD trajectory (h5 contact map)
        """
        s2 = Stage()
        s2.name = 'aggregating'

        # Aggregation task
        t2 = Task()

        # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py
        t2.pre_exec = [
                '. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh',
                'conda activate %s' % cfg['conda_pytorch'],
                'export LANG=en_US.utf-8',
                'export LC_ALL=en_US.utf-8']
        # preprocessing for molecules' script, it needs files in a single
        # directory
        # the following pre-processing does:
        # 1) find all (.dcd) files from openmm results
        # 2) create a temp directory
        # 3) symlink them in the temp directory
        t2.pre_exec += [
                'export dcd_list=(`ls %s/MD_exps/%s/omm_runs_*/*dcd`)' % (cfg['base_path'], cfg['system_name']),
                'export tmp_path=`mktemp -p %s/MD_to_CVAE/ -d`' % cfg['base_path'],
                'for dcd in ${dcd_list[@]}; do tmp=$(basename $(dirname $dcd)); ln -s $dcd $tmp_path/$tmp.dcd; done',
                'ln -s %s $tmp_path/prot.pdb' % cfg['pdb_file'],
                'ls ${tmp_path}']

        t2.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4']

        node_cnt_constraint = cfg['md_counts'] * max(1, CUR_STAGE) // 12
        cmd_cat    = 'cat /dev/null'
        cmd_jsrun  = 'jsrun -n %s -r 1 -a 6 -c 7 -d packed' % min(cfg['node_counts'], node_cnt_constraint)

        t2.executable = ['%s; %s %s/bin/python' % (cmd_cat, cmd_jsrun, cfg['conda_pytorch'])]  # MD_to_CVAE.py
        t2.arguments = [
                '%s/scripts/traj_to_dset.py' % cfg['molecules_path'],
                '-t', '$tmp_path',
                '-p', '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'],
                '-r', '%s/Parameters/input_protein/prot.pdb' % cfg['base_path'],
                '-o', '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'],
                '--contact_maps_parameters',
                "kernel_type=threshold,threshold=%s" % cfg['cutoff'],
                '-s', cfg['selection'],
                '--rmsd',
                '--fnc',
                '--contact_map',
                '--point_cloud',
                '--num_workers', 2,
                '--distributed',
                '--verbose']

        # Add the aggregation task to the aggreagating stage
        t2.cpu_reqs = {'processes'          : 1,
                       'process_type'       : None,
                       'threads_per_process': 164,
                       'thread_type'        : 'OpenMP'}

        s2.add_tasks(t2)
        return s2


    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        # learn task
        time_stamp = int(time.time())
        stages=[]
        for i in range(num_ML):
            s3 = Stage()
            s3.name = 'learning'


            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
            t3.pre_exec += ['module load gcc/7.4.0',
                            'module load cuda/10.1.243',
                            'module load hdf5/1.10.4',
                            'export LANG=en_US.utf-8',
                            'export LC_ALL=en_US.utf-8']
            t3.pre_exec += ['conda activate %s' % cfg['conda_pytorch']]
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp+i)
            t3.pre_exec += ['cd %s/CVAE_exps' % cfg['base_path']]
            t3.pre_exec += ['export LD_LIBRARY_PATH=/gpfs/alpine/proj-shared/med110/atrifan/scripts/cuda/targets/ppc64le-linux/lib/:$LD_LIBRARY_PATH']
            #t3.pre_exec += ['mkdir -p %s && cd %s' % (cvae_dir, cvae_dir)] # model_id creates sub-dir
            # this is for ddp, distributed
            t3.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4']
            #pnodes = cfg['node_counts'] // num_ML # partition
            pnodes = 1#max(1, pnodes)

            hp = cfg['ml_hpo'][i]
            cmd_cat    = 'cat /dev/null'
            cmd_jsrun  = 'jsrun -n %s -r 1 -g 6 -a 6 -c 42 -d packed' % pnodes

            # VAE config
            # cmd_vae    = '%s/examples/run_vae_dist_summit_entk.sh' % cfg['molecules_path']
            # cmd_sparse = ' '.join(['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"],
            #                        "./", cvae_dir, 'sparse-concat', 'resnet',
            #                        str(cfg['residues']), str(cfg['residues']),
            #                        str(hp['latent_dim']), 'amp', 'non-distributed',
            #                        str(hp['batch_size']), str(cfg['epoch']),
            #                        str(cfg['sample_interval']),
            #                        hp['optimizer'], cfg['init_weights']])

            # AAE config
            cmd_vae    = '%s/examples/bin/summit/run_aae_dist_summit_entk.sh' % cfg['molecules_path']
            t3.executable = ['%s; %s %s' % (cmd_cat, cmd_jsrun, cmd_vae)]
            t3.arguments = ['%s/MD_to_CVAE/cvae_input.h5' % cfg["base_path"],
                                   "./",
                                   cvae_dir,
                                   str(cfg['residues']),
                                   str(hp['latent_dim']),
                                   'non-amp',
                                   'distributed',
                                   str(hp['batch_size']),
                                   str(cfg['epoch']),
                                   str(cfg['sample_interval']),
                                   hp['optimizer'],
                                   hp['loss_weights'],
                                   cfg['init_weights']]

            #+ f'{cfg['molecules_path']}/examples/run_vae_dist_summit.sh -i {sparse_matrix_path} -o ./ --model_id {cvae_dir} -f sparse-concat -t resnet --dim1 168 --dim2 168 -d 21 --amp --distributed -b {batch_size} -e {epoch} -S 3']
        #     ,
        #             '-i', sparse_matrix_path,
        #             '-o', './',
        #             '--model_id', cvae_dir,
        #             '-f', 'sparse-concat',
        #             '-t', 'resnet',
        #             # fs-pep
        #             '--dim1', 168,
        #             '--dim2', 168,
        #             '-d', 21,
        #             '--amp',      # sparse matrix
        #             '--distributed',
        #             '-b', batch_size, # batch size
        #             '-e', epoch,# epoch
        #             '-S', 3
        #             ]

            t3.cpu_reqs = {'processes'          : 1,
                           'process_type'       : 'MPI',
                           'threads_per_process': 4,
                           'thread_type'        : 'OpenMP'}
            t3.gpu_reqs = {'processes'          : 1,
                           'process_type'       : None,
                           'threads_per_process': 1,
                           'thread_type'        : 'CUDA'}

            # Add the learn task to the learning stage
            s3.add_tasks(t3)
            stages.append(s3)
        return stages


    def generate_interfacing_stage():
        s4 = Stage()
        s4.name = 'scanning'

        # Scaning for outliers and prepare the next stage of MDs
        t4 = Task()

        t4.pre_exec  = ['. /sw/summit/python/3.6/anaconda3/5.3.0/etc/profile.d/conda.sh']
        t4.pre_exec += ['conda activate %s' % cfg['conda_pytorch']]
        t4.pre_exec += ['mkdir -p %s/Outlier_search/outlier_pdbs' % cfg['base_path']]
        t4.pre_exec += ['export models=""; for i in `ls -d %s/CVAE_exps/model-cvae_runs*/`; do if [ "$models" != "" ]; then    models=$models","$i; else models=$i; fi; done;cat /dev/null' % cfg['base_path']]
        t4.pre_exec += ['export LANG=en_US.utf-8', 'export LC_ALL=en_US.utf-8']
        t4.pre_exec += ['unset CUDA_VISIBLE_DEVICES', 'export OMP_NUM_THREADS=4']

        cmd_cat = 'cat /dev/null'
        cmd_jsrun = 'jsrun -n %s -a 6 -g 6 -r 1 -c 7' % cfg['node_counts']

        #molecules_path = '/gpfs/alpine/world-shared/ven201/tkurth/molecules/'
        t4.executable = [' %s; %s %s/examples/outlier_detection/run_optics_dist_summit_entk.sh' % (cmd_cat, cmd_jsrun, cfg['molecules_path'])]
        t4.arguments = ['%s/bin/python' % cfg['conda_pytorch']]
        t4.arguments += ['%s/examples/outlier_detection/optics.py' % cfg['molecules_path'],
                        '--sim_path', '%s/MD_exps/%s' % (cfg['base_path'], cfg['system_name']),
                        '--pdb_out_path', '%s/Outlier_search/outlier_pdbs' % cfg['base_path'],
                        '--restart_points_path',
                        '%s/Outlier_search/restart_points.json' % cfg['base_path'],
                        '--data_path', '%s/MD_to_CVAE/cvae_input.h5' % cfg['base_path'],
                        '--model_paths', '$models',
                        '--model_type', cfg['model_type'],
                        '--min_samples', 10,
                        '--n_outliers', 500,
                        '--dim1', cfg['residues'],
                        '--dim2', cfg['residues'],
                        '--cm_format', 'sparse-concat',
                        '--batch_size', cfg['batch_size'],
                        '--distributed',
                        '-iw', cfg['init_weights']]

        t4.cpu_reqs = {'processes'          : 1,
                       'process_type'       : None,
                       'threads_per_process': 12,
                       'thread_type'        : 'OpenMP'}
        t4.gpu_reqs = {'processes'          : 1,
                       'process_type'       : None,
                       'threads_per_process': 1,
                       'thread_type'        : 'CUDA'}

        s4.add_tasks(t4)
        s4.post_exec = func_condition
        return s4


    def func_condition():
        nonlocal CUR_STAGE
        nonlocal MAX_STAGE
        if CUR_STAGE < MAX_STAGE:
            func_on_true()
        else:
            func_on_false()

    def func_on_true():
        nonlocal CUR_STAGE
        nonlocal MAX_STAGE
        print('finishing stage %d of %d' % (CUR_STAGE, MAX_STAGE))

        # --------------------------
        # MD stage
        s1 = generate_MD_stage(num_MD=cfg['md_counts'])
        # Add simulating stage to the training pipeline
        p.add_stages(s1)

        # --------------------------
        # Aggregate stage
        s2 = generate_aggregating_stage()
        p.add_stages(s2)

        if CUR_STAGE % cfg['RETRAIN_FREQ'] == 0:
            # --------------------------
            # Learning stage
            s3 = generate_ML_stage(num_ML=cfg['ml_counts'])
            # Add the learning stage to the pipeline
            p.add_stages(s3)

        # --------------------------
        # Outlier identification stage
        s4 = generate_interfacing_stage()
        p.add_stages(s4)

        CUR_STAGE += 1

    def func_on_false():
        print ('Done')

    p = Pipeline()
    p.name = 'MD_ML'

    # --------------------------
    # MD stage
    s1 = generate_MD_stage(num_MD=cfg['md_counts'])
    # Add simulating stage to the training pipeline
    p.add_stages(s1)

    # --------------------------
    # Aggregate stage
    s2 = generate_aggregating_stage()
    # Add the aggregating stage to the training pipeline
    p.add_stages(s2)

    # --------------------------
    # Learning stage
    s3 = generate_ML_stage(num_ML=cfg['ml_counts'])
    # Add the learning stage to the pipeline
    p.add_stages(s3)

    # --------------------------
    # Outlier identification stage
    s4 = generate_interfacing_stage()
    p.add_stages(s4)

    CUR_STAGE += 1

    return p
def test_rp_da_scheduler_bw():

    """
    **Purpose**: Run an EnTK application on localhost
    """

    p1 = Pipeline()
    p1.name = 'p1'

    n = 10

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x
        t.executable = ['/bin/hostname']
        t.arguments = ['>','hostname.txt']
        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 16
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 10
        t.download_output_data = ['hostname.txt > s1_t%s_hostname.txt'%(x)]

        s1.add_tasks(t)

    p1.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['/bin/hostname']
        t.arguments = ['>','hostname.txt']
        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 16
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.download_output_data = ['hostname.txt > s2_t%s_hostname.txt'%(x)]
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p1.add_stages(s2)

    res_dict = {
                'resource'      : 'ncsa.bw_aprun',
                'walltime'      : 10,
                'cpus'          : 128,
                'project'       : 'gk4',
                'queue'         : 'high'
            }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB

    appman = AppManager(hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()

    for i in range(n):
        assert open('s1_t%s_hostname.txt'%i,'r').readline().strip() == open('s2_t%s_hostname.txt'%i,'r').readline().strip()


    txts = glob('%s/*.txt' % os.getcwd())
    for f in txts:
        os.remove(f)
Esempio n. 42
0
    def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod):

        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """


        self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(Cycle),'r') as f:  # Read exchangePairs.dat
            ExchangeArray = []
            for line in f:
                ExchangeArray.append(int(line.split()[1]))
                #ExchangeArray.append(line)
                #print ExchangeArray
                    

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(Cycle)
        #Bookkeeping
        stage_uids = list()
        task_uids = list() ## = dict()
        md_dict = dict()


        #Create initial MD stage


        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(Cycle)

        self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid)
    
        for r in range (Replicas):
            md_tsk                 = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable)
            md_tsk.name            = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=Cycle)
            md_tsk.link_input_data = ['%s/restrt > inpcrd'%(self.Book[Cycle-1][ExchangeArray[r]]),
                                      '%s/prmtop'%(self.Book[0][r]),
                                      #'%s/prmtop'%(self.Tarball_path[0]),
                                      '%s/mdin_{0}'.format(r)%(self.Book[0][r])]

                                      #'%s/mdin'%(self.Book[0][r])]
                                      #'%s/mdin'%(self.Tarball_path[0])]

            md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            #md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)
        

        
        q.add_stages(md_stg)
                 
                                                                                            
                                                                                              
        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(Cycle+1)

        #Create Exchange Task
        ex_tsk                      = Task()
        ex_tsk.name                 = 'extsk{0}'.format(Cycle+1)
        ex_tsk.executable           = ['python']
        ex_tsk.upload_input_data    = [ExchangeMethod]
        for r in range (Replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)]

        ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas), '{0}'.format(Cycle+1)]
        ex_tsk.cores                = 1
        ex_tsk.mpi                  = False
        ex_tsk.download_output_data = ['exchangePairs_{0}.dat'.format(Cycle+1)] # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.Book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid)
        #print d
        #print self.Book
        return q
Esempio n. 43
0
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()
    p.name = 'p1'

    # Create a Stage object
    s1 = Stage()
    s1.name = 's1'

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.name = 't1'
    t1.executable = '/bin/bash'
    t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object to hold character count tasks
    s2 = Stage()
    s2.name = 's2'
    s2_task_uids = []

    for cnt in range(30):

        # Create a Task object
        t2 = Task()
        t2.name = 't%s' % (cnt + 1)
        t2.executable = '/bin/bash'
        t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt']
        # Copy data from the task in the first stage to the current task's location
        t2.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/output.txt' % (p.name, s1.name, t1.name)]

        # Add the Task to the Stage
        s2.add_tasks(t2)
        s2_task_uids.append(t2.name)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    # Create another Stage object to hold checksum tasks
    s3 = Stage()
    s3.name = 's3'

    for cnt in range(30):

        # Create a Task object
        t3 = Task()
        t3.name = 't%s' % (cnt + 1)
        t3.executable = '/bin/bash'
        t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt']
        # Copy data from the task in the first stage to the current task's location
        t3.copy_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/ccount.txt' % (p.name, s2.name, s2_task_uids[cnt])]
        # Download the output of the current task to the current location
        t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt]

        # Add the Task to the Stage
        s3.add_tasks(t3)

    # Add Stage to the Pipeline
    p.add_stages(s3)

    return p
Esempio n. 44
0
    def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec):
        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """

        self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(cycle),
                  'r') as f:  # Read exchangePairs.dat
            exchange_array = []
            for line in f:
                exchange_array.append(int(line.split()[1]))
                #exchange_array.append(line)
                #print exchange_array

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(cycle)
        #bookkeeping
        stage_uids = list()
        task_uids = list()  ## = dict()
        md_dict = dict()

        #Create MD stage

        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(cycle)

        self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid)

        for r in range(replicas):
            md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(
                replica=r, cycle=cycle)
            md_tsk.link_input_data = [
                '%s/restrt > inpcrd' %
                (self.book[cycle - 1][exchange_array[r]]),
                '%s/prmtop' % (self.book[0][r]),
                '%s/mdin_{0}'.format(r) % (self.book[0][r])
            ]

            ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet.
            #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd',
            #                          #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]),
            #                          '%s/prmtop'%(self.book[0][r]),
            #                          '%s/mdin_{0}'.format(r)%(self.Book[0][r])]

            md_tsk.arguments = [
                '-O',
                '-i',
                'mdin_{0}'.format(r),
                '-p',
                'prmtop',
                '-c',
                'inpcrd',
                #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1),
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-r',
                'restrt',
                #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            #md_tsk.tag              = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0)
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)

        q.add_stages(md_stg)

        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(cycle + 1)

        #Create Exchange Task
        ex_tsk = Task()
        ex_tsk.name = 'extsk{0}'.format(cycle + 1)
        ex_tsk.executable = [python_path]#['/usr/bin/python']  #['/opt/python/bin/python']
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = [
            'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1)
        ]
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = [
            'exchangePairs_{0}.dat'.format(cycle + 1)
        ]  # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid)
        #print d
        #print self.book
        return q
Esempio n. 45
0
    def init_cycle(self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec):  # "cycle" = 1 MD stage plus the subsequent exchange computation
        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first cycle
        """

        #Initialize Pipeline
        self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict = dict()  #bookkeeping
        tar_dict = dict()  #bookkeeping

        #Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

        writeInputs.writeInputs(
            max_temp=max_temp,
            min_temp=min_temp,
            replicas=replicas,
            timesteps=timesteps,
            basename=basename)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("input_files.tar", "w")
        for name in [
                basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"
        ]:
            tar.add(name)
        for r in range(replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range(replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

        #Create Untar Stage

        repo = git.Repo('.', search_parent_directories=True)
        aux_function_path = repo.working_tree_dir


        untar_stg = Stage()
        untar_stg.name = 'untarStg'

        #Untar Task
        
        untar_tsk = Task()
        untar_tsk.name = 'untartsk'
        untar_tsk.executable = ['python']

        untar_tsk.upload_input_data = [
            str(aux_function_path)+'/repex/untar_input_files.py', 'input_files.tar'
        ]
        untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
        untar_tsk.cpu_reqs = 1
        #untar_tsk.post_exec         = ['']
        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (
            p.name, untar_stg.name, untar_tsk.name)

        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. 

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)

        # MD tasks

        for r in range(replicas):

            md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0)
            md_tsk.link_input_data += [
                '%s/inpcrd' % tar_dict[0],
                '%s/prmtop' % tar_dict[0],
                '%s/mdin_{0}'.format(r) %
                tar_dict[0]  #Use for full temperature exchange
            ]
            md_tsk.arguments = [
                '-O',
                '-p',
                'prmtop',
                '-i',
                'mdin_{0}'.format(r),
                '-c',
                'inpcrd',
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=0),
                '-r',
                'restrt'.format(replica=r, cycle=0),
                #'-r',  'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0),
                #'-o',  '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-r',  '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-x',  '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)

        # First Exchange Stage

        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)

        # Create Exchange Task

        ex_tsk = Task()
        ex_tsk.name = 'extsk0'
        #ex_tsk.pre_exec             = ['module load python/2.7.10']
        ex_tsk.executable = [python_path]
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):
            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0']
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.book.append(md_dict)
        return p