Exemple #1
0
def get_pipeline(tasks):

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage 1
    s1 = Stage()

    # Create a Task object according to the app_name
    t1 = Task()
    t1.pre_exec = ['module load gromacs/5.0/INTEL-140-MVAPICH2-2.0']
    t1.executable = app_coll['grompp']['executable']
    t1.arguments = app_coll['grompp']['arguments']
    t1.cores = app_coll['grompp']['cores']
    t1.link_input_data = [
        '$SHARED/grompp.mdp > grompp.mdp', '$SHARED/input.gro > input.gro',
        '$SHARED/topol.top > topol.top'
    ]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create a Stage 2
    s2 = Stage()

    for cnt in range(tasks):

        # Create a Task object according to the app_name
        t2 = Task()
        t2.pre_exec = [
            'module load gromacs/5.0/INTEL-140-MVAPICH2-2.0',
            'export OMP_NUM_THREADS=%s' % num_cores
        ]
        t2.executable = app_coll['mdrun']['executable']
        t2.arguments = app_coll['mdrun']['arguments']
        #t2.cores = app_coll['mdrun']['cores']
        t2.cores = num_cores
        t2.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/topol.tpr' % (p.uid, s1.uid, t1.uid)
        ]

        # Add the Task to the Stage
        s2.add_tasks(t2)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    return p
Exemple #2
0
def test_assignment_exceptions():

    t = Task()

    data_type = [1, 'a', True, list()]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data
Exemple #3
0
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP
                 ComputeUnitDescription with the complete Task description.
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}}

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = 'grompp'
    t1.arguments = ['hello']
    t1.cpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 1,
        'thread_type': 'OpenMP'
    }
    t1.gpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 2,
        'thread_type': 'OpenMP'
    }
    t1.post_exec = ['echo test']
    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    cud = create_cud_from_task(t1, placeholders)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (
        t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'],
        t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.post_exec == t1.post_exec
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']

    assert {
        'source': 'upload_input.dat',
        'target': 'upload_input.dat'
    } in cud.input_staging
    assert {
        'source': 'copy_input.dat',
        'action': rp.COPY,
        'target': 'copy_input.dat'
    } in cud.input_staging
    assert {
        'source': 'link_input.dat',
        'action': rp.LINK,
        'target': 'link_input.dat'
    } in cud.input_staging
    assert {
        'source': 'copy_output.dat',
        'action': rp.COPY,
        'target': 'copy_output.dat'
    } in cud.output_staging
    assert {
        'source': 'download_output.dat',
        'target': 'download_output.dat'
    } in cud.output_staging
Exemple #4
0
def test_input_list_from_task():
    """
    **Purpose**: Test if the 'get_input_list_from_task' function generates the
                 correct RP input transfer directives when given a Task.
    """

    pipeline = str(ru.generate_id('pipeline'))
    stage = str(ru.generate_id('stage'))
    task = str(ru.generate_id('task'))

    placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}}

    for t in [1, 'a', list(), dict(), True]:
        with pytest.raises(TypeError):
            t = list()
            get_input_list_from_task(t, placeholders)

    # Test link input data
    t = Task()
    t.link_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['source'] == t.link_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0])

    t = Task()
    t.link_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.link_input_data[0].split('>')[1].strip())

    # Test copy input data
    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['source'] == t.copy_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0])

    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.copy_input_data[0].split('>')[1].strip())

    # Test move input data
    t = Task()
    t.move_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['source'] == t.move_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0])

    t = Task()
    t.move_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.move_input_data[0].split('>')[1].strip())

    # Test upload input data

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert 'action' not in ip_list[0]
    assert ip_list[0]['source'] == t.upload_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0])

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert 'action' not in ip_list[0]
    assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.upload_input_data[0].split('>')[1].strip())
def test_input_list_from_task():
    """
    **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives
    when given a Task
    """

    pipeline = str(ru.generate_id('pipeline'))
    stage = str(ru.generate_id('stage'))
    task = str(ru.generate_id('task'))

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    for t in [1, 'a', list(), dict(), True]:
        with pytest.raises(TypeError):
            t = list()
            get_input_list_from_task(t, placeholder_dict)

    # Test link input data
    t = Task()
    t.link_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.link_input_data[0]
    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0])

    t = Task()
    t.link_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0].split('>')[1].strip())

    # Test copy input data
    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.copy_input_data[0]
    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0])

    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0].split('>')[1].strip())


    # Test move input data
    t = Task()
    t.move_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.move_input_data[0]
    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0])

    t = Task()
    t.move_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0].split('>')[1].strip())

    # Test upload input data

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.upload_input_data[0]
    assert 'action' not in ip_list[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0])

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip()
    assert 'action' not in ip_list[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0].split('>')[1].strip())
Exemple #6
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE)
    num_replicas = int(Kconfig.num_replicas)
    #if cur_iter==0:
    #	restart_iter=0
    #else:
    #	restart_iter=cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['mv']
        pre_proc_task.arguments = [
            combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M")
        ]
        pre_proc_task.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file)
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        # ------------------------------------------------------------------------------------------------------------------

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        def_rep_per_thread = int(num_replicas / num_parallel) + 1
        num_allocated_rep = 0
        num_used_threads = 0
        while (num_allocated_rep < num_replicas):
            if (num_used_threads == num_parallel):
                print("ALLERT tried use more gpus than allocated")
            if ((num_replicas - num_allocated_rep) > def_rep_per_thread):
                use_replicas = def_rep_per_thread
            else:
                use_replicas = (num_replicas - num_allocated_rep)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module load bwpy',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=md',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }
            sim_task.cpu_reqs = {
                'processes': 0,
                'process_type': None,
                'threads_per_process': 0,
                'thread_type': None
            }
            sim_task.arguments = [
                'run_openmm.py', '--trajstride', '10', '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log'
            ]
            link_arr = [
                '$SHARED/%s > run_openmm.py' %
                (os.path.basename(Kconfig.md_run_file))
            ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            if str(Kconfig.strategy) == 'extend':
                copy_out = []
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_out = copy_out + [
                        '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' %
                        (combined_path, cur_iter, idx, combined_path,
                         (cur_iter + 1), idx)
                    ]

                sim_task.copy_output_data = copy_out
                #if Kconfig.ndx_file is not None:
                #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module load bwpy/0.3.0',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=tica_msm_ana', 'export PYEMMA_NJOBS=1',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            ana_task.executable = ['python']
            ana_task.arguments = [
                'run-tica-msm.py', '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/run-tica-msm.py > run-tica-msm.py',
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemple #7
0
def test_task_to_dict():
    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {
        'uid': None,
        'name': None,
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': [],
        'executable': [],
        'arguments': [],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 1,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 0,
            'process_type': None,
            'threads_per_process': 0,
            'thread_type': None
        },
        'lfs_per_process': 0,
        'upload_input_data': [],
        'copy_input_data': [],
        'link_input_data': [],
        'move_input_data': [],
        'copy_output_data': [],
        'move_output_data': [],
        'download_output_data': [],
        'stdout': None,
        'stderr': None,
        'exit_code': None,
        'path': None,
        'tag': None,
        'parent_stage': {
            'uid': None,
            'name': None
        },
        'parent_pipeline': {
            'uid': None,
            'name': None
        }
    }

    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {
        'uid': 'test.0000',
        'name': 'new',
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': ['module load abc'],
        'executable': ['sleep'],
        'arguments': ['10'],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 10,
            'process_type': None,
            'threads_per_process': 2,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 5,
            'process_type': None,
            'threads_per_process': 3,
            'thread_type': None
        },
        'lfs_per_process': 1024,
        'upload_input_data': ['test1'],
        'copy_input_data': ['test2'],
        'link_input_data': ['test3'],
        'move_input_data': ['test4'],
        'copy_output_data': ['test5'],
        'move_output_data': ['test6'],
        'download_output_data': ['test7'],
        'stdout': 'out',
        'stderr': 'err',
        'exit_code': 1,
        'path': 'a/b/c',
        'tag': 'task.0010',
        'parent_stage': {
            'uid': 's1',
            'name': 'stage1'
        },
        'parent_pipeline': {
            'uid': 'p1',
            'name': 'pipeline1'
        }
    }
Exemple #8
0
def get_pipeline(instance, iterations):

    # Create a Pipeline object
    p = Pipeline()

    # Create Stage 1
    s1 = Stage()

    # Create a Task
    t1 = Task()
    t1.pre_exec = ['module load python/2.7.7-anaconda']
    t1.executable = ['python']
    t1.arguments = [
        'analysis_1.py', '--template', 'CB7G3_template.mdp', '--newname',
        'CB7G3_run.mdp', '--wldelta', '2', '--equilibrated', 'False',
        '--lambda_state', '0', '--seed',
        '%s' % SEED
    ]
    t1.cores = 1
    t1.copy_input_data = [
        '$SHARED/CB7G3_template.mdp', '$SHARED/analysis_1.py'
    ]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    for it in range(1, iterations + 1):

        # Create Stage 2
        s2 = Stage()

        # Create a Task
        t2 = Task()
        t2.pre_exec = [
            'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash'
        ]
        t2.executable = ['gmx grompp']
        t2.arguments = [
            '-f', 'CB7G3_run.mdp', '-c', 'CB7G3.gro', '-p', 'CB7G3.top', '-n',
            'CB7G3.ndx', '-o', 'CB7G3.tpr', '-maxwarn', '10'
        ]
        t2.cores = 1
        t2.copy_input_data = [
            '$SHARED/CB7G3.ndx', '$SHARED/CB7G3.top', '$SHARED/3atomtypes.itp',
            '$SHARED/3_GMX.itp', '$SHARED/cucurbit_7_uril_GMX.itp'
        ]

        if it == 0:
            t2.copy_input_data += [
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' %
                (p.uid, s1.uid, t1.uid), '$SHARED/CB7G3.gro'
            ]
        else:
            t2.copy_input_data += [
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3_run.mdp' %
                (p.uid, s4.uid, t4.uid),
                '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.gro' %
                (p.uid, s3.uid, t3.uid)
            ]

        # Add the Task to the Stage
        s2.add_tasks(t2)

        # Add Stage to the Pipeline
        p.add_stages(s2)

        # Create Stage 3
        s3 = Stage()

        # Create a Task
        t3 = Task()
        t3.pre_exec = [
            'source /home/trje3733/pkgs/gromacs/5.1.3.wlmod/bin/GMXRC.bash'
        ]
        t3.executable = ['gmx mdrun']
        t3.arguments = [
            '-nt',
            20,
            '-deffnm',
            'CB7G3',
            '-dhdl',
            'CB7G3_dhdl.xvg',
        ]
        t3.cores = 20
        # t3.mpi = True
        t3.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/CB7G3.tpr' % (p.uid, s2.uid, t2.uid)
        ]
        t3.copy_output_data = [
            'CB7G3_dhdl.xvg > $SHARED/CB7G3_run{1}_gen{0}_dhdl.xvg'.format(
                it, instance),
            'CB7G3_pullf.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullf.xvg'.format(
                it, instance),
            'CB7G3_pullx.xvg > $SHARED/CB7G3_run{1}_gen{0}_pullx.xvg'.format(
                it, instance),
            'CB7G3.log > $SHARED/CB7G3_run{1}_gen{0}.log'.format(it, instance)
        ]
        t3.download_output_data = [
            'CB7G3.xtc > CB7G3_run{1}_gen{0}.xtc'.format(it, instance),
            'CB7G3.log > CB7G3_run{1}_gen{0}.log'.format(it, instance),
            'CB7G3_dhdl.xvg > CB7G3_run{1}_gen{0}_dhdl.xvg'.format(
                it, instance),
            'CB7G3_pullf.xvg > CB7G3_run{1}_gen{0}_pullf.xvg'.format(
                it, instance),
            'CB7G3_pullx.xvg > CB7G3_run{1}_gen{0}_pullx.xvg'.format(
                it, instance),
            'CB7G3.gro > CB7G3_run{1}_gen{0}.gro'.format(it, instance)
        ]

        # Add the Task to the Stage
        s3.add_tasks(t3)

        # Add Stage to the Pipeline
        p.add_stages(s3)

        # Create Stage 4
        s4 = Stage()

        # Create a Task
        t4 = Task()
        t4.pre_exec = [
            'module load python',
            'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis/alchemical_analysis:$PYTHONPATH',
            'export PYTHONPATH=/home/vivek91/modules/alchemical-analysis:$PYTHONPATH',
            'export PYTHONPATH=/home/vivek91/.local/lib/python2.7/site-packages:$PYTHONPATH',
            'ln -s ../staging_area data'
        ]
        t4.executable = ['python']
        t4.arguments = [
            '--newname=CB7G3_run.mdp',
            '--template=CB7G3_template.mdp',
            '--dir=./data',
            #'--prev_data=%s'%DATA_LOC
            '--gen={0}'.format(it, instance),
            '--run={1}'.format(it, instance)
        ]
        t4.cores = 1
        t4.link_input_data = [
            '$SHARED/analysis_2.py',
            '$SHARED/alchemical_analysis.py',
            '$SHARED/CB7G3_template.mdp',
        ]
        t4.download_output_data = [
            'analyze_1/results.txt > results_run{1}_gen{0}.txt'.format(
                it, instance),
            'STDOUT > stdout_run{1}_gen{0}'.format(it, instance),
            'STDERR > stderr_run{1}_gen{0}'.format(it, instance),
            'CB7G3_run.mdp > CB7G3_run{1}_gen{0}.mdp'.format(it, instance),
            'results_average.txt > results_average_run{1}_gen{0}.txt'.format(
                it, instance)
        ]

        # Add the Task to the Stage
        s4.add_tasks(t4)

        # Add Stage to the Pipeline
        p.add_stages(s4)

    return p
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod):

    """
    All cycles after the initial cycle
    """

    with open("exchangePairs.dat","r") as f:  # Read exchangePairs.dat
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
            #print ExchangeArray
                

    q = Pipeline()
    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    md_dict = dict()


    #Create initial MD stage


    md_stg = Stage()
    for r in range (Replicas):
        md_tsk                 = Task()
        md_tsk.executable      = [MD_Executable]  #MD Engine, Blue Waters
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]),
                                  '%s/prmtop'%(Book[Cycle-1][r]),
                                  #'%s/mdin_{0}'.format(r)%(Book[k-1][r])]
                                  '%s/mdin'%(Book[Cycle-1][r])]

        md_tsk.pre_exec        = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user?
        #md_tsk.pre_exec       = ['module load amber']
        #md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
        md_tsk.cores           = Replica_Cores
        md_tsk.mpi             = True
        md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        md_stg.add_tasks(md_tsk)

        #task_uids.append(md_tsk.uid)
    q.add_stages(md_stg)
             
                                                                                         
                                                                                          
    ex_stg= Stage()
    #Create Exchange Task
    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]

    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    q.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
        #print d
        #print Book
    return q
def generate_pipeline(cfg):

    cfg_file = cfg['run_cfg_file']  # resource and workload config
    run_file = cfg['run_file']  # runs for this campaign

    # setup S1 workload
    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    runs = check_runs(cfg_file, run_file)

    if not runs:
        print('S1: nothing to run, exiting.')
        return

    # for each run in the campaign:
    # - create cfg with requested receptor and smiles
    # - create a number of masters as EnTK tasks and add them to a pipeline
    # - submit configured number of masters with that cfg

    # setup EnTK pipeline
    p = Pipeline()
    p.name = 'S1.RAPTOR'
    s = Stage()

    # create cfg
    subs = dict()
    rurl = cfg.fs_url + cfg.workload.results
    d = rs.filesystem.Directory(rurl)
    ls = [str(u).split('/')[-1] for u in d.list()]

    workload = cfg.workload

    for receptor, smiles, n_workers, runtime in runs:

        print('%30s  %s' % (receptor, smiles))
        name = '%s_-_%s' % (receptor, smiles)
        tgt = '%s.%s.gz' % (name, workload.output)

        cpw = cfg.cpw
        gpw = cfg.gpw
        n_masters = cfg.n_masters

        cfg.workload.receptor = receptor
        cfg.workload.smiles = smiles
        cfg.workload.name = name
        cfg.runtime = runtime
        cfg.n_workers = n_workers
        print('n_workers: %d' % cfg.n_workers)

        ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

        for i in range(n_masters):
            t = Task()

            t.pre_exec = [
                '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate'
            ]

            t.executable = "python3"
            t.arguments = ['wf0_master.py', i]
            t.cpu_reqs = {
                'processes': 1,
                'threads_per_process': 4,
                'thread_type': None,
                'process_type': None
            }
            t.upload_input_data = [
                'wf0_master.py', 'wf0_worker.py',
                'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py'
            ]
            t.link_input_data = ['%s > input_dir' % workload.input_dir]
            #t.download_output_data = ['%s.%s.gz > results/%s.%s.gz' %
            #    (name, workload.output, name, workload.output)]

            s.add_tasks(t)

    p.add_stages(s)

    return p
def test_task_exceptions(s,l,i,b):

    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s,l,i,b]

    for data in data_type:

        if not isinstance(data,str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data,list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, list):

            with pytest.raises(TypeError):
                t.executable = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
def test_task_to_dict():

    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {   'uid': None,
                    'name': None,
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': [],
                    'executable': str(),
                    'arguments': [],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 0,
                                'process_type': None,
                                'threads_per_process': 0,
                                'thread_type': None
                                },
                    'lfs_per_process': 0,
                    'upload_input_data': [],
                    'copy_input_data': [],
                    'link_input_data': [],
                    'move_input_data': [],
                    'copy_output_data': [],
                    'move_output_data': [],
                    'download_output_data': [],
                    'stdout': None,
                    'stderr': None,
                    'exit_code': None,
                    'path': None,
                    'tag': None,
                    'parent_stage': {'uid':None, 'name': None},
                    'parent_pipeline': {'uid':None, 'name': None}}


    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}


    t.executable = 'sleep'
    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
Exemple #13
0
def create_workflow(Kconfig):

    # User settings
    ENSEMBLE_SIZE = int(Kconfig.num_CUs)  # Number of ensemble members
    TOTAL_ITERS = int(
        Kconfig.num_iterations)  # Number of iterations to run current trial

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    '''
    pre_proc_stage :
            
        Purpose : Transfers files, Split the input file into smaller files to be used by each of the
            gromacs instances in the first iteration.

        Arguments :     
            inputfile = file to be split
            numCUs    = number of simulation instances/ number of smaller files
    '''
    pre_proc_stage = Stage()
    pre_proc_task = Task()
    pre_proc_task.pre_exec = ['module load bwpy']
    pre_proc_task.executable = ['python']
    pre_proc_task.arguments = [
        'spliter.py', Kconfig.num_CUs,
        os.path.basename(Kconfig.md_input_file)
    ]
    pre_proc_task.copy_input_data = [
        '$SHARED/%s' % os.path.basename(Kconfig.md_input_file),
        '$SHARED/spliter.py', '$SHARED/gro.py'
    ]
    pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
        wf.uid, pre_proc_stage.uid, pre_proc_task.uid)

    pre_proc_stage.add_tasks(pre_proc_task)
    wf.add_stages(pre_proc_stage)
    # ------------------------------------------------------------------------------------------------------------------

    cur_iter = 0
    while (cur_iter < TOTAL_ITERS):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        for sim_num in range(ENSEMBLE_SIZE):

            sim_task = Task()
            sim_task.pre_exec = [
                'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash',
                'module load bwpy', 'module load platform-mpi',
                'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH',
                'export PATH=/u/sciteam/balasubr/.local/bin:$PATH'
            ]
            sim_task.executable = ['python']
            sim_task.cores = 16
            sim_task.arguments = [
                'run.py', '--mdp',
                os.path.basename(Kconfig.mdp_file), '--top',
                os.path.basename(Kconfig.top_file), '--gro', 'start.gro',
                '--out', 'out.gro'
            ]
            sim_task.link_input_data = [
                '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.mdp_file)),
                '$SHARED/{0} > {0}'.format(os.path.basename(Kconfig.top_file)),
                '$SHARED/run.py > run.py'
            ]

            if Kconfig.ndx_file is not None:
                sim_task.link_input_data.append('$SHARED/{0}'.format(
                    os.path.basename(Kconfig.ndx_file)))

            if (cur_iter == 0):
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (pre_proc_task_ref, sim_num))

            else:
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (post_ana_task_ref, sim_num))

            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulation stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated

        pre_ana_stage = Stage()
        pre_ana_task = Task()
        pre_ana_task.pre_exec = [
            'source /u/sciteam/balasubr/modules/gromacs/build-cpu-serial/bin/GMXRC.bash',
            'module load bwpy'
        ]
        pre_ana_task.executable = ['python']
        pre_ana_task.arguments = [
            'pre_analyze.py', Kconfig.num_CUs, 'tmp.gro', '.'
        ]

        pre_ana_task.link_input_data = [
            '$SHARED/pre_analyze.py > pre_analyze.py'
        ]
        for sim_num in range(ENSEMBLE_SIZE):
            pre_ana_task.link_input_data += [
                '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num)
            ]

        pre_ana_task.copy_output_data = [
            'tmpha.gro > $SHARED/iter_%s/tmpha.gro' % cur_iter,
            'tmp.gro > $SHARED/iter_%s/tmp.gro' % cur_iter
        ]

        pre_ana_stage.add_tasks(pre_ana_task)
        wf.add_stages(pre_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        ana_stage = Stage()
        ana_task = Task()
        ana_task.pre_exec = [
            'module load bwpy', 'module load platform-mpi',
            'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH',
            'export PATH=/u/sciteam/balasubr/.local/bin:$PATH',
            'source /u/sciteam/balasubr/ve-extasy/bin/activate'
        ]
        ana_task.executable = ['lsdmap']
        ana_task.arguments = [
            '-f',
            os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro',
            '-n', 'out.nn', '-w', 'weight.w'
        ]

        ana_task.cores = 1
        ana_task.link_input_data = [
            '$SHARED/{0} > {0}'.format(
                os.path.basename(Kconfig.lsdm_config_file)),
            '$SHARED/iter_%s/tmpha.gro > tmpha.gro' % cur_iter
        ]
        ana_task.copy_output_data = [
            'tmpha.ev > $SHARED/iter_%s/tmpha.ev' % cur_iter,
            'out.nn > $SHARED/iter_%s/out.nn' % cur_iter
        ]
        if cur_iter > 0:
            ana_task.link_input_data += [
                '%s/weight.w > weight.w' % ana_task_ref
            ]
            ana_task.copy_output_data += [
                'weight.w > $SHARED/iter_%s/weight.w' % cur_iter
            ]

        if (cur_iter % Kconfig.nsave == 0):
            ana_task.download_output_data = [
                'lsdmap.log > output/iter%s/lsdmap.log' % cur_iter
            ]

        ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, ana_stage.uid, ana_task.uid)

        ana_stage.add_tasks(ana_task)
        wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # post_lsdmap:
        #     Purpose:   Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
        #                 to generate the new coordinate file to be used by the simulation_step in the next iteration.
        #     Arguments:
        #             num_runs              = number of configurations to be generated in the new coordinate file
        #             out                   = output filename
        #             cycle                 = iteration number
        #             max_dead_neighbors    = max dead neighbors to be considered
        #             max_alive_neighbors   = max alive neighbors to be considered
        #             numCUs                = number of simulation instances/ number of smaller files

        post_ana_stage = Stage()
        post_ana_task = Task()
        post_ana_task.pre_exec = [
            'module load bwpy',
            'export PYTHONPATH=/u/sciteam/balasubr/.local/lib/python2.7/site-packages:$PYTHONPATH',
            'export PATH=/u/sciteam/balasubr/.local/bin:$PATH',
            'source /u/sciteam/balasubr/ve-extasy/bin/activate'
        ]
        post_ana_task.executable = ['python']
        post_ana_task.arguments = [
            'post_analyze.py', Kconfig.num_runs, 'tmpha.ev', 'ncopies.nc',
            'tmp.gro', 'out.nn', 'weight.w', 'out.gro',
            Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors,
            'input.gro', cur_iter, Kconfig.num_CUs
        ]

        post_ana_task.link_input_data = [
            '$SHARED/post_analyze.py > post_analyze.py',
            '$SHARED/selection.py > selection.py',
            '$SHARED/reweighting.py > reweighting.py',
            '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py',
            '$SHARED/iter_%s/tmp.gro > tmp.gro' % cur_iter,
            '$SHARED/iter_%s/tmpha.ev > tmpha.ev' % cur_iter,
            '$SHARED/iter_%s/out.nn > out.nn' % cur_iter,
            '$SHARED/input.gro > input.gro'
        ]

        if cur_iter > 0:
            post_ana_task.link_input_data += [
                '%s/weight.w > weight_new.w' % ana_task_ref
            ]

        if (cur_iter % Kconfig.nsave == 0):
            post_ana_task.download_output_data = [
                'out.gro > output/iter%s/out.gro' % cur_iter,
                'weight.w > output/iter%s/weight.w' % cur_iter
            ]

        post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, post_ana_stage.uid, post_ana_task.uid)

        post_ana_stage.add_tasks(post_ana_task)
        wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1

    return wf
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete
    Task description
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = ['grompp']
    t1.arguments = ['hello']
    t1.cpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 1,
                   'thread_type': 'OpenMP'
                   }
    t1.gpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 2,
                   'thread_type': 'OpenMP'
                   }
    t1.post_exec = ['echo test']

    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'
    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    p._assign_uid('test')

    cud = create_cud_from_task(t1, placeholder_dict)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name,
                                              t1.parent_stage['uid'], t1.parent_stage['name'],
                                              t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']
    assert cud.post_exec == t1.post_exec

    assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging
    assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging
    assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging
    assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging
    assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)  #run-tica-msm4.py
    vpy4_settings = [
        'module swap PrgEnv-cray PrgEnv-gnu', 'module load bwpy/1.2.4',
        'module add bwpy-mpi', 'module add fftw/3.3.4.10',
        'module add cray-netcdf',
        'module add cudatoolkit/9.1.85_3.10-1.0502.df1cc54.3.1',
        'module add cmake/3.1.3', 'module unload darshan xalt',
        'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
        'export FC=ftn',
        'source /projects/sciteam/bamm/hruska/vpy4/bin/activate',
        'printenv > env.log', 'export OMP_NUM_THREADS=1',
        'export PYEMMA_NJOBS=1'
    ]
    vpy4_2_settings = [
        'module unload bwpy', 'module load bwpy',
        'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"',
        'export CPATH="${BWPY_INCLUDE_PATH}"',
        'export LIBRARY_PATH="${BWPY_LIBRARY_PATH}"',
        'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"',
        'module load craype-ml-plugin-py3/1.1.0',
        'export MPICH_GNI_MALLOC_FALLBACK=enabled',
        ' export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64',
        'export MPICH_MAX_THREAD_SAFETY=multiple',
        'export MPICH_RMA_OVER_DMAPP=1',
        'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugin',
        'source /projects/sciteam/bamm/hruska/vpy4/bin/activate',
        'bwpy-environ', 'printenv > env.log', 'export OMP_NUM_THREADS=1',
        'export PYEMMA_NJOBS=1'
    ]
    vpy8_settings = [
        'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
        'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy',
        'module load bwpy/2.0.0-pre0', 'module load bwpy-mpi',
        'module add cudatoolkit',
        'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"',
        'export CPATH="${BWPY_INCLUDE_PATH}"',
        'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"',
        'module load craype-ml-plugin-py3/1.1.0',
        'export MPICH_GNI_MALLOC_FALLBACK=enable',
        'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64',
        'export MPICH_MAX_THREAD_SAFETY=multiple',
        'export MPICH_RMA_OVER_DMAPP=1',
        'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins',
        'source /projects/sciteam/bamm/hruska/vpy8/bin/activate',
        'printenv > env.log', 'export OMP_NUM_THREADS=1',
        'export PYEMMA_NJOBS=1'
    ]
    vpy9_settings = [
        'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
        'module unload gcc', 'module load gcc/5.3.0', 'module unload bwpy',
        'module load bwpy/2.0.0-pre1', 'module load bwpy-mpi',
        'module add cudatoolkit',
        'export MODULEPATH="/sw/bw/bwpy/modulefiles/:${MODULEPATH}"',
        'export CPATH="${BWPY_INCLUDE_PATH}"',
        'export LDFLAGS="${LDFLAGS} -Wl,--rpath=${BWPY_LIBRARY_PATH}"',
        'export MPICH_GNI_MALLOC_FALLBACK=enable',
        'export MPICH_GNI_MAX_VSHORT_MSG_SIZE=64',
        'export MPICH_MAX_THREAD_SAFETY=multiple',
        'export MPICH_RMA_OVER_DMAPP=1',
        'export OPENMM_PLUGIN_DIR=/mnt/bwpy/single/usr/lib/plugins',
        'source /projects/sciteam/bamm/hruska/vpy9/bin/activate',
        'printenv > env.log', 'export OMP_NUM_THREADS=1',
        'export PYEMMA_NJOBS=1'
    ]  #'module load craype-ml-plugin-py3/1.1.0'
    if Kconfig.md_env == 'vpy4':
        md_settings = vpy4_settings
    if Kconfig.md_env == 'vpy8':
        md_settings = vpy8_settings
    if Kconfig.ana_env == 'vpy8':
        ana_settings = vpy8_settings
    if Kconfig.md_env == 'vpy9':
        md_settings = vpy9_settings
    if Kconfig.ana_env == 'vpy9':
        ana_settings = vpy9_settings

    #if cur_iter==0:
    #	restart_iter=0
    #else:
    #	restart_iter=cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['mv']
        pre_proc_task.arguments = [
            combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M")
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        pre_proc_stage2 = Stage()
        pre_proc_task2 = Task()
        pre_proc_task2.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task2.executable = ['ls']
        pre_proc_task2.arguments = ['-l']
        pre_proc_task2.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_reference, combined_path, Kconfig.md_reference),
            '$SHARED/%s > %s/%s' %
            ('analyze3.py', combined_path, 'analyze3.py')
        ]

        pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
        pre_proc_stage2.add_tasks(pre_proc_task2)
        wf.add_stages(pre_proc_stage2)

        # ------------------------------------------------------------------------------------------------------------------

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        def_rep_per_thread = int(np.ceil(num_replicas / num_parallel))
        num_allocated_rep = 0
        num_used_threads = 0
        while (num_allocated_rep < num_replicas):
            if (num_used_threads == num_parallel):
                print("ALLERT tried use more gpus than allocated")
            if ((num_replicas - num_allocated_rep) > def_rep_per_thread):
                use_replicas = def_rep_per_thread
            else:
                use_replicas = (num_replicas - num_allocated_rep)
            sim_task = Task()
            sim_task.executable = ['bwpy-environ']  #'python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': None
            }
            sim_task.cpu_reqs = {
                'processes': 0,
                'process_type': None,
                'threads_per_process': 0,
                'thread_type': None
            }
            sim_task.arguments = [
                'python', 'run_openmm.py', '--trajstride',
                str(Kconfig.trajstride), '--Kconfig',
                str(args.Kconfig), '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'True', '>', 'md.log'
            ]
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/system-5.xml > system-5.xml',
                    '$SHARED/integrator-5.xml > integrator-5.xml',
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            if str(Kconfig.strategy) == 'extend':
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                    copy_out = copy_out + [
                        '%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' %
                        (combined_path, cur_iter, idx, combined_path,
                         (cur_iter + 1), idx)
                    ]

            for idx in range(num_allocated_rep,
                             num_allocated_rep + use_replicas):
                #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
                copy_out = copy_out + [
                    'md.log > %s/md_logs/iter%s_md%s.log' %
                    (combined_path, cur_iter, idx)
                ]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = ana_settings + [
                'export tasks=tica_msm_ana',
                'export iter=%s' % cur_iter
            ]
            ana_task.executable = ['bwpy-environ']
            ana_task.arguments = [
                'python', script_ana, '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '--ref',
                str(Kconfig.md_reference), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': 'MPI',
                'threads_per_process': 16,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/%s > %s' % (script_ana, script_ana),
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    if str(socket.gethostname()) == 'giotto.rice.edu':
        combined_path = str(Kconfig.remote_output_directory) + '-giotto'
    else:
        combined_path = str(Kconfig.remote_output_directory
                            )  #'/u/sciteam/hruska/scratch/extasy-tica'
    num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)  #run-tica-msm4.py

    md_settings = Kconfig.md_env
    if Kconfig.env_ana_same == 'True':
        ana_settings = md_settings
    else:
        ana_settings = Kconfig.ana_env
    print("set", num_parallel, md_settings)
    iter_found = 0
    while len(glob.glob('%s/iter%s_input*.pdb' %
                        (combined_path, iter_found))) >= num_replicas:
        iter_found += 1
    cur_iter = max(0, iter_found - 1)
    print("cur_iter", cur_iter)
    if cur_iter == 0:
        #pre_proc_stage = Stage()
        #pre_proc_task = Task()
        #pre_proc_task.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1']
        #pre_proc_task.executable = ['mv']
        #pre_proc_task.arguments = [ combined_path, combined_path + time.strftime("%Y-%m-%d-%H-%M") ]
        #pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        #pre_proc_stage.add_tasks(pre_proc_task)
        #wf.add_stages(pre_proc_stage)
        pre_proc_stage2 = Stage()
        pre_proc_task2 = Task()
        pre_proc_task2.pre_exec = [
            'export tasks=pre_proc_task',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task2.executable = ['ls']
        pre_proc_task2.arguments = ['-l']
        pre_proc_task2.copy_input_data = [
            '$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig),
            '$SHARED/%s > %s/%s' % (script_ana, combined_path, script_ana),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_run_file, combined_path, Kconfig.md_run_file),
            '$SHARED/%s > %s/%s' %
            (Kconfig.md_reference, combined_path, Kconfig.md_reference)
        ]  # '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ]
        pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
        pre_proc_stage2.add_tasks(pre_proc_task2)
        wf.add_stages(pre_proc_stage2)

        # ------------------------------------------------------------------------------------------------------------------
    start_iter = cur_iter
    while (cur_iter < int(Kconfig.num_iterations)
           and cur_iter < start_iter + 1):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        num_allocated_rep = 0
        num_used_parallel = 0
        #num_used_threads=0
        #print(def_rep_per_thread)
        while (num_allocated_rep < num_replicas):
            #if (num_used_threads>=num_parallel):
            #   print("ALLERT tried use more gpus than allocated")
            def_rep_per_thread = int(
                math.ceil(
                    float(num_replicas - num_allocated_rep) /
                    float(num_parallel - num_used_parallel)))
            use_replicas = min(def_rep_per_thread,
                               num_replicas - num_allocated_rep)
            #if ((num_replicas-num_allocated_rep)>def_rep_per_thread):  # check if use all threads
            #   use_replicas=def_rep_per_thread
            #else:  #use pnly part of threads
            #   use_replicas=(num_replicas-num_allocated_rep)
            print("u", cur_iter, use_replicas, num_replicas, num_parallel,
                  def_rep_per_thread, num_allocated_rep, num_used_parallel)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            sim_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 20,
                'thread_type': 'OpenMP'
            }
            sim_task.arguments = [
                'run_openmm.py', '--trajstride',
                str(Kconfig.trajstride), '--Kconfig',
                str(args.Kconfig), '--idxstart',
                str(num_allocated_rep), '--idxend',
                str((num_allocated_rep + use_replicas)), '--path',
                combined_path, '--iter',
                str(cur_iter), '--md_steps',
                str(Kconfig.md_steps), '--save_traj',
                str(Kconfig.save_alltraj), '>', 'md.log'
            ]
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/system-5.xml > system-5.xml',
                    '$SHARED/integrator-5.xml > integrator-5.xml',
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            #if str(Kconfig.strategy)=='extend':
            #  for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #    copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)]

            #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            ##     #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #     copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            num_used_parallel = num_used_parallel + 1
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated
        if str(Kconfig.strategy) != 'extend':
            ana_stage = Stage()
            ana_task = Task()
            ana_task.pre_exec = ana_settings + [
                'export tasks=tica_msm_ana',
                'export iter=%s' % cur_iter
            ]
            ana_task.executable = ['python']
            ana_task.arguments = [
                script_ana, '--path', combined_path, '--n_select',
                str(num_replicas), '--cur_iter',
                str(cur_iter), '--Kconfig',
                str(args.Kconfig), '--ref',
                str(Kconfig.md_reference), '>', 'analyse.log'
            ]

            ana_task.cpu_reqs = {
                'processes': 1,
                'process_type': 'MPI',
                'threads_per_process': 16,
                'thread_type': None
            }

            ana_task.link_input_data = [
                '$SHARED/%s > %s' % (script_ana, script_ana),
                '$SHARED/%s > %s' % (args.Kconfig, args.Kconfig)
            ]

            #for sim_num in range(min(int(Kconfig.num_parallel_MD_sim),int(Kconfig.num_replicas))):
            ana_task.copy_output_data = [
                'analyse.log > %s/iter%s_analyse.log' %
                (combined_path, cur_iter)
            ]

            #ana_task.copy_output_data = ['tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path,cur_iter),
            #                              'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path,cur_iter)]
            #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

            ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                wf.uid, ana_stage.uid, ana_task.uid)
            ana_stage.add_tasks(ana_task)
            wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        #if(cur_iter % Kconfig.nsave == 0):
        #     post_ana_task.download_output_data = ['out.gro > output/iter_%s/out.gro' % cur_iter,
        #                                   'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
        #                                   'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png' % (cur_iter),
        #                                   'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
        #                                   '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' % (combined_path,cur_iter,cur_iter)
        #                                   ]

        #post_ana_task.copy_output_data = ['ncopies.nc > %s/iter_%s/ncopies.nc' % (combined_path,cur_iter),
        #                           'weight_out.w > %s/iter_%s/weight_out.w' % (combined_path,cur_iter),
        #                           'out.gro > %s/iter_%s/out.gro' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png' % (combined_path,cur_iter),
        #                           'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png' % (combined_path,cur_iter)]

        #post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s'%(wf.uid, post_ana_stage.uid, post_ana_task.uid)

        #post_ana_stage.add_tasks(post_ana_task)
        #wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemple #17
0
def one_cycle(p, workflow_cfgs, resource):

    ## Simulation related parameters
    sim_pre_exec = workflow_cfg[resource]['simulation']['pre_exec']
    sim_cpus = workflow_cfg[resource]['simulation']['cpus']

    ## Analysis related parameters
    ana_pre_exec = workflow_cfg[resource]['analysis']['pre_exec']
    ana_cpus = workflow_cfg[resource]['analysis']['cpus']

    task1_output = ['4ake-target_autopsf.situs']
    task2_output = ['4ake-target_autopsf-grid.dx']
    task3_output = ['1ake-docked-noh_autopsf-grid.pdb']
    task4_output = ['1ake-extrabonds.txt']
    task5_output = [
        '1ake-extrabonds-cispeptide.txt', '1ake-extrabonds-chirality.txt'
    ]

    first_stage = Stage()
    # We use names of pipelines, stages, tasks to refer to data of a
    # particular task
    first_stage.name = 'Generating a simulated density map'

    # Create tasks and add them to stage
    task1 = Task()
    task1.name = 'Starting to load the target PDB'
    task1.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task1.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task1_tcl_cmds = ['mol new 4ake-target.pdb']
    task1_tcl_cmds += ['package require autopsf']
    task1_tcl_cmds += ['autopsf 4ake-target.pdb']
    task1_tcl_cmds += ['set sel [atomselect top all]']
    task1_tcl_cmds += ['package require mdff']
    task1_tcl_cmds += ['mdff sim $sel -res 5 -o {}'.format(task1_output[0])]
    task1_tcl_cmds += ['mol new {}'.format(task1_output[0])]

    set_vmd_run(task1, task1_tcl_cmds, "first_stage.tcl")
    #task.copy_input_data = ["first_stage.tcl"j
    task1.link_input_data = [
        "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data']
    ]
    first_stage.add_tasks(task1)
    # Add sim_stage to Pipeline
    p.add_stages(first_stage)

    second_stage = Stage()
    second_stage.name = 'Converting the density map to an MDFF potential'

    task2 = Task()
    task2.name = 'generate dx file'
    task2.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task2.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task2_tcl_cmds = ['package require mdff']
    task2_tcl_cmds += [
        'mdff griddx -i {} -o {}'.format(task1_output[0], task2_output[0])
    ]
    task2.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name,
                                                  task1.name, task1_output[0])
    ]
    task2.link_input_data = [ ("$SHARED/%s" % x) for x in \
            workflow_cfg[resource]['shared_data'] ]

    set_vmd_run(task2, task2_tcl_cmds, "second_stage.tcl")
    second_stage.add_tasks(task2)
    p.add_stages(second_stage)

    third_stage = Stage()
    third_stage.name = 'Preparing the initial structure'

    task3 = Task()
    task3.name = 'Starting to load the initial structure'
    task3.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task3.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task3_tcl_cmds = ['mol new 1ake-docked-noh.pdb']
    task3_tcl_cmds += ['package require autopsf']
    task3_tcl_cmds += ['autopsf 1ake-docked-noh.pdb']
    task3_tcl_cmds += ['package require mdff']
    task3_tcl_cmds += [
        'mdff gridpdb -psf 1ake-docked-noh_autopsf.psf -pdb 1ake-docked-noh_autopsf.pdb -o {}'
        .format(task3_output[0])
    ]
    task3.link_input_data = [
        "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data']
    ]

    set_vmd_run(task3, task3_tcl_cmds, "third_stage.tcl")
    third_stage.add_tasks(task3)
    p.add_stages(third_stage)

    fourth_stage = Stage()
    fourth_stage.name = 'Defining secondary structure restraints'

    task4 = Task()
    task4.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task4.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task4_tcl_cmds = [
        'package require ssrestraints', 'mol new 1ake-docked-noh_autopsf.psf',
        'mol addfile 1ake-docked-noh_autopsf.pdb',
        'ssrestraints -psf 1ake-docked-noh_autopsf.psf -pdb 1ake-docked-noh_autopsf.pdb -o {} -hbonds'
        .format(task4_output[0])
    ]

    task4.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf')
    ]

    set_vmd_run(task4, task4_tcl_cmds, "fourth_stage.tcl")
    fourth_stage.add_tasks(task4)
    p.add_stages(fourth_stage)

    fifth_stage = Stage()
    fifth_stage.name = 'cispeptide and chirality restraints'

    task5 = Task()
    task5.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task5.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task5_tcl_cmds = [
        'mol new 1ake-docked-noh_autopsf.psf',
        'mol addfile 1ake-docked-noh_autopsf.pdb',
        'package require cispeptide', 'package require chirality',
        'cispeptide restrain -o {}'.format(task5_output[0]),
        'chirality restrain -o {}'.format(task5_output[1])
    ]

    task5.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf')
    ]

    set_vmd_run(task5, task5_tcl_cmds, 'fifth_stage.tcl')
    fifth_stage.add_tasks(task5)
    p.add_stages(fifth_stage)

    sixth_stage = Stage()
    sixth_stage.name = 'Running the MDFF simulation with NAMD'
    task6 = Task()
    task6.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task6.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task6_tcl_cmds = ['package require mdff']
    task6_tcl_cmds += [ 'mdff setup -o adk -psf 1ake-docked-noh_autopsf.psf ' \
            + '-pdb 1ake-docked-noh_autopsf.pdb ' \
            + '-griddx 4ake-target_autopsf-grid.dx ' \
            + '-gridpdb 1ake-docked-noh_autopsf-grid.pdb ' \
            + '-extrab {1ake-extrabonds.txt 1ake-extrabonds-cispeptide.txt 1ake-extrabonds-chirality.txt} ' \
            + '-gscale 0.3 -numsteps 50000' ]

    task6.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, second_stage.name, task2.name,
            '4ake-target_autopsf-grid.dx'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf-grid.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fourth_stage.name,
                                                  task4.name, task4_output[0]),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fifth_stage.name,
                                                  task5.name, task5_output[0]),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, fifth_stage.name,
                                                  task5.name, task5_output[1])
    ]

    set_vmd_run(task6, task6_tcl_cmds, "sixth_stage.tcl")
    sixth_stage.add_tasks(task6)
    p.add_stages(sixth_stage)

    seventh_stage = Stage()
    seventh_stage.name = "NAMD simulation"
    task7 = Task()
    task7.cpu_reqs['processes'] = int(sim_cpus) // summit_hw_thread_cnt
    task7.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task7.cpu_reqs['process_type'] = 'MPI'
    task7.cpu_reqs['thread_type'] = 'OpenMP'
    task7.pre_exec = sim_pre_exec
    task7.executable = [namd_path]
    task7.arguments = ['+ppn', summit_hw_thread_cnt, 'adk-step1.namd']
    task7.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name,
                                                  task6.name,
                                                  'adk-step1.namd'),
        #'$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'adk-step2.namd'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '1ake-docked-noh_autopsf.psf'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '1ake-docked-noh_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '1ake-docked-noh_autopsf-grid.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '4ake-target_autopsf-grid.dx'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '1ake-extrabonds-chirality.txt'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, sixth_stage.name, task6.name,
            '1ake-extrabonds-cispeptide.txt'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name,
                                                  task6.name,
                                                  '1ake-extrabonds.txt'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name,
                                                  task6.name,
                                                  'mdff_template.namd'),
        #'$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name, task6.name, 'par_all27_prot_lipid_na.inp')
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, sixth_stage.name,
                                                  task6.name,
                                                  'par_all36_prot.prm')
    ]
    task7.download_output_data = ['adk-step1.dcd']
    seventh_stage.add_tasks(task7)
    #task7_2 = Task()
    #task7_2.cpu_reqs['threads_per_process'] = sim_cpus
    #task7_2.executable = [ 'namd2' ]
    #task7_2.executable = [ 'namd2' ]
    #task7_2.arguments = ['+ppn', sim_cpus, 'adk-step2.namd']
    #seventh_stage.add_tasks(task7_2)
    p.add_stages(seventh_stage)

    # Visualizing the MDFF trajectory
    #
    # mol new 4ake-target_autopsf.psf
    # mol addfile 4ake-target_autopsf.pdb
    # mol new 1ake-docked-noh_autopsf.psf
    # mol addfile 1ake-docked-noh_autopsf-docked.pdb
    # mol addfile adk-step1.dcd
    # mol addfile adk-step2.dcd

    eighth_stage = Stage()
    eighth_stage.name = 'Calculating the root mean square deviation'
    task8 = Task()
    task8.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task8.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task8_tcl_cmds = [
        'mol new 1ake-docked-noh_autopsf.psf',
        'mol addfile adk-step1.dcd waitfor all',
        'mol new 4ake-target_autopsf.pdb', 'package require mdff',
        'mdff check -rmsd -refpdb 4ake-target_autopsf.pdb'
    ]

    task8.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name,
                                                  task1.name,
                                                  '4ake-target_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name,
                                                  task7.name, 'adk-step1.dcd')
    ]

    set_vmd_run(task8, task8_tcl_cmds, "eighth_stage.tcl")
    eighth_stage.add_tasks(task8)
    p.add_stages(eighth_stage)

    ninth_stage = Stage()
    ninth_stage.name = 'Calculating the root mean square deviation for backbone atoms'
    task9 = Task()
    task9.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task9.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task9_tcl_cmds = [
        'mol new 1ake-docked-noh_autopsf.psf',
        'mol addfile adk-step1.dcd waitfor all',
        'mol new 4ake-target_autopsf.pdb', 'package require mdff',
        'set selbb [atomselect 0 "backbone"]',
        'set selbbref [atomselect 1 "backbone"]', '$selbb frame 0',
        'measure rmsd $selbb $selbbref', '$selbb frame last',
        'measure rmsd $selbb $selbbref'
    ]

    task9.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, first_stage.name,
                                                  task1.name,
                                                  '4ake-target_autopsf.pdb'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name,
                                                  task7.name, 'adk-step1.dcd')
    ]

    set_vmd_run(task9, task9_tcl_cmds, "ninth_stage.tcl")
    ninth_stage.add_tasks(task9)
    p.add_stages(ninth_stage)

    tenth_stage = Stage()
    tenth_stage.name = 'Calculating the cross-correlation coefficient'
    task10 = Task()
    task10.cpu_reqs['threads_per_process'] = summit_hw_thread_cnt
    task10.cpu_reqs['processes'] = ana_cpus // summit_hw_thread_cnt
    task10_tcl_cmds = ['mol new 1ake-docked-noh_autopsf.psf']
    task10_tcl_cmds += ['mol addfile adk-step1.dcd waitfor all'
                        ]  # load the full mdff trajectory
    #task10_tcl_cmds += [ 'mol new 4ake-target_autopsf.stius' ]        # load target EM density
    task10_tcl_cmds += [
        'package require mdff', 'set selall [atomselect 0 "all"]',
        '$selall frame 0', 'mdff ccc $selall -i target-density-5A.dx -res 5',
        '$selall frame last', 'mdff ccc $selall -i target-density-5A.dx -res 5'
    ]

    task10.copy_input_data = [
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, second_stage.name, task2.name,
            '4ake-target_autopsf-grid.dx'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(
            p.name, third_stage.name, task3.name,
            '1ake-docked-noh_autopsf.psf'),
        '$Pipeline_{}_Stage_{}_Task_{}/{}'.format(p.name, seventh_stage.name,
                                                  task7.name, 'adk-step1.dcd')
    ]
    task10.link_input_data = [
        "$SHARED/%s" % x for x in workflow_cfg[resource]['shared_data']
    ]

    set_vmd_run(task10, task10_tcl_cmds, "tenth_stage.tcl")
    tenth_stage.add_tasks(task10)
    p.add_stages(tenth_stage)
Exemple #18
0
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod):

    """
    All cycles after the initial cycle
    """

    with open("exchangePairs.dat","r") as f:  # Read exchangePairs.dat
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
            #print ExchangeArray
                

    q = Pipeline()
    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    md_dict = dict()


    #Create initial MD stage


    md_stg = Stage()
    for r in range (Replicas):
        md_tsk                 = Task()
        md_tsk.executable      = [MD_Executable]  #MD Engine, Blue Waters
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]),
                                  '%s/prmtop'%(Book[Cycle-1][r]),
                                  #'%s/mdin_{0}'.format(r)%(Book[k-1][r])]
                                  '%s/mdin'%(Book[Cycle-1][r])]

        md_tsk.pre_exec        = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user?
        #md_tsk.pre_exec       = ['module load amber']
        #md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
        md_tsk.cores           = Replica_Cores
        md_tsk.mpi             = True
        md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        md_stg.add_tasks(md_tsk)

        #task_uids.append(md_tsk.uid)
    q.add_stages(md_stg)
             
                                                                                         
                                                                                          
    ex_stg= Stage()
    #Create Exchange Task
    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]

    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    q.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
        #print d
        #print Book
    return q

#p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod)
#q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod)

#return (p, q)
                                                                            
def cycle(k):


    #read exchangePairs.dat
    #
    with open("exchangePairs.dat","r") as f:
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
        #print ExchangeArray    

    
    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    d = dict() 

    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range (Replicas):
        md_tsk = Task()
        md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine, Blue Waters
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC 
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]),
                                  '%s/prmtop'%(Book[k-1][n0]),
                                  #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])]
                                  '%s/mdin'%(Book[k-1][n0])]   
                                  ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct
                                   
                              
        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters
        #md_tsk.pre_exec = ['module load amber']
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        #print d
        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)

    #Create exchange stage 

    ex_stg= Stage()
    
    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]
        
        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]
    
    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print d
    #print Book
    return p
Exemple #20
0
def create_workflow(Kconfig, args):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    combined_path = str(Kconfig.remote_output_directory)
    num_parallel = int(Kconfig.NODESIZE) * int(Kconfig.GPUs_per_NODE)
    num_replicas = int(Kconfig.num_replicas)
    script_ana = str(Kconfig.script_ana)
    config_file = str(args.Kconfig).rsplit('/', 1)[-1]
    try:
        systemxml = str(Kconfig.systemxml)
    except:
        systemxml = 'system-5.xml'
    try:
        integratorxml = str(Kconfig.integratorxml)
    except:
        integratorxml = 'integrator-5.xml'
    md_settings = Kconfig.md_env
    if Kconfig.env_ana_same == 'True':
        ana_settings = md_settings
    else:
        ana_settings = Kconfig.ana_env
    print("set", num_parallel, md_settings)
    iter_found = 0
    while len(glob.glob('%s/iter%s_input*.pdb' %
                        (combined_path, iter_found))) >= num_replicas:
        iter_found += 1
    cur_iter = max(0, iter_found - 1)
    print("cur_iter", cur_iter)
    #if cur_iter==0:
    #  pre_proc_stage2 = Stage()
    #  pre_proc_task2 = Task()
    #  pre_proc_task2.pre_exec = ['export tasks=pre_proc_task','export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1']
    #  pre_proc_task2.executable = ['ls']
    #  pre_proc_task2.arguments = ['-l']
    #  pre_proc_task2.copy_input_data = ['$SHARED/%s > %s/%s' % (config_file,combined_path, config_file),
    #                                 '$SHARED/%s > %s/%s' % (script_ana,combined_path,script_ana),
    #                                 '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file),
    #                                   '$SHARED/%s > %s/%s' % (Kconfig.md_reference, combined_path, Kconfig.md_reference)]# '$SHARED/%s > %s/%s' % ('analyze3.py', combined_path, 'analyze3.py') ]
    #  pre_proc_task_ref2 = '$Pipeline_%s_Stage_%s_Task_%s' % (wf.uid, pre_proc_stage2.uid, pre_proc_task2.uid)
    #  pre_proc_stage2.add_tasks(pre_proc_task2)
    #  wf.add_stages(pre_proc_stage2)

    # ------------------------------------------------------------------------------------------------------------------
    start_iter = cur_iter
    while (cur_iter < int(Kconfig.num_iterations)
           and cur_iter < start_iter + 1):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There arei 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        num_allocated_rep = 0
        num_used_parallel = 0
        while (num_allocated_rep < num_replicas):
            def_rep_per_thread = int(
                math.ceil(
                    float(num_replicas - num_allocated_rep) /
                    float(num_parallel - num_used_parallel)))
            use_replicas = min(def_rep_per_thread,
                               num_replicas - num_allocated_rep)
            print("u", cur_iter, use_replicas, num_replicas, num_parallel,
                  def_rep_per_thread, num_allocated_rep, num_used_parallel)
            sim_task = Task()
            sim_task.executable = ['python']

            pre_exec_arr = md_settings + [
                'export tasks=md',
                'export iter=%s' % cur_iter
            ]
            #if cur_iter==0 and num_allocated_rep==0:
            #  pre_exec_arr = pre_exec_arr + [ 'mv %s']
            sim_task.pre_exec = pre_exec_arr
            sim_task.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }
            sim_task.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 10,
                'thread_type': 'OpenMP'
            }
            sim_task.arguments = [
                'run_openmm.py', '--Kconfig', config_file, '--idxstart',
                str(num_allocated_rep), '--idxend',
                str(num_allocated_rep + use_replicas), '--path', combined_path,
                '>', 'md.log'
            ]
            #'--trajstride', str(Kconfig.trajstride),'--Kconfig', str(args.Kconfig),
            #'--idxstart',str(num_allocated_rep), '--idxend',str((num_allocated_rep+use_replicas)),
            #'--path',combined_path,'--iter',str(cur_iter),
            #'--md_steps',str(Kconfig.md_steps), '--save_traj', 'True','>', 'md.log']
            if Kconfig.md_use_xml == 'yes':
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (systemxml, systemxml),
                    '$SHARED/%s > %s' % (integratorxml, integratorxml),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            else:
                link_arr = [
                    '$SHARED/%s > run_openmm.py' %
                    (os.path.basename(Kconfig.md_run_file)),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
            copy_arr = []
            if cur_iter == 0:
                for idx in range(num_allocated_rep,
                                 num_allocated_rep + use_replicas):
                    copy_arr = copy_arr + [
                        '$SHARED/%s > %s/iter0_input%s.pdb' %
                        (Kconfig.md_input_file, combined_path, idx)
                    ]
                #if num_allocated_rep==0:
                #  copy_arr=copy_arr + ['$SHARED/%s > %s/%s' % (args.Kconfig,combined_path, args.Kconfig),
                #                         '$SHARED/run-tica-msm.py > %s/run-tica-msm.py' % combined_path,
                #                         '$SHARED/%s > %s/%s' % (Kconfig.md_run_file,combined_path,Kconfig.md_run_file)
                #                           ]

            #if cur_iter==0 and num_allocated_rep==0:
            #   copy_arr = copy_arr +['$SHARED/%s > %s/%s' % (args.Kconfig, combined_path, args.Kconfig)]
            sim_task.link_input_data = link_arr  #+ copy_arr
            sim_task.copy_input_data = copy_arr
            copy_out = []
            #if str(Kconfig.strategy)=='extend':
            #  for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #    copy_out=copy_out+['%s/iter%s_out%s.pdb > %s/iter%s_input%s.pdb' % (combined_path, cur_iter, idx, combined_path, (cur_iter+1), idx)]

            #for idx in range(num_allocated_rep, num_allocated_rep+use_replicas):
            ##     #copy_arr=copy_arr+['$SHARED/%s > iter0_input%s.pdb' % (Kconfig.md_input_file, idx)]
            #     copy_out=copy_out+['md.log > %s/md_logs/iter%s_md%s.log' % (combined_path, cur_iter, idx)]

            sim_task.copy_output_data = copy_out
            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))

            num_allocated_rep = num_allocated_rep + use_replicas
            num_used_parallel = num_used_parallel + 1
            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)
        if str(Kconfig.strategy) != 'extend':
            for anatask in range(1):
                print("analysis task", anatask)
                ana_task = Task()
                ana_task.executable = ['python']
                pre_exec_arr = ana_settings
                ana_task.pre_exec = pre_exec_arr
                ana_task.link_input_data = [
                    '$SHARED/%s > %s' % (script_ana, script_ana),
                    '$SHARED/%s > %s' % (config_file, config_file)
                ]
                ana_task.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': 'CUDA'
                }
                ana_task.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 10,
                    'thread_type': 'OpenMP'
                }
                ana_task.arguments = [
                    script_ana, '--Kconfig', config_file, '>', "analysis.log"
                ]
                ana_task.copy_output_data = [
                    'analysis.log > %s/analysis_iter%s_r%s.log' %
                    (combined_path, cur_iter, anatask)
                ]
                ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
                    wf.uid, sim_stage.uid, ana_task.uid)
                sim_stage.add_tasks(ana_task)
        wf.add_stages(sim_stage)
        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemple #21
0
def generate_pipeline(cfg):

    cfg_file = cfg['run_cfg_file']  # resource and workload config
    run_file = cfg['run_file']  # runs for this campaign

    # setup S1 workload
    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    runs = check_runs(cfg_file, run_file)

    if not runs:
        print('S1: nothing to run, exiting.')
        return

    # for each run in the campaign:
    # - create cfg with requested receptor and smiles
    # - create a number of masters as EnTK tasks and add them to a pipeline
    # - submit configured number of masters with that cfg

    # setup EnTK pipeline
    p = Pipeline()
    p.name = 'S1-RAPTOR'
    s = Stage()

    # create cfg
    subs = dict()
    rurl = cfg.fs_url + cfg.workload.results
    d = rs.filesystem.Directory(rurl)
    ls = [str(u).split('/')[-1] for u in d.list()]

    workload = cfg.workload

    for receptor, smiles, nodes, runtime in runs:

        print('%30s  %s' % (receptor, smiles))
        name = '%s_-_%s' % (receptor, smiles)
        tgt = '%s.%s.gz' % (name, workload.output)
        # rec  = False

        # if tgt in ls:
        #     if workload.recompute:
        #         rec += 1
        #         d.move(tgt, tgt + '.bak')
        #     else:
        #         print('skip      1 %s' % name)
        #         continue

        # if smiles in ls:
        #     if smiles not in subs:
        #         subs[smiles] = [str(u).split('/')[-1]  for u in d.list('%s/*' % smiles)]
        #     if tgt in subs[smiles]:
        #         if workload.recompute:
        #             rec += 2
        #             d.move('%s/%s'     % (smiles, tgt),
        #                     '%s/%s.bak' % (smiles, tgt))
        #         else:
        #             print('skip      2 %s' % name)
        #             continue

        ## if os.path.exists('results/%s.%s.gz' % (name, wofkload.output)):
        ##     print('skip      3 %s' % name)
        ##     continue

        #if rec: print('recompute %d %s' % (rec, name))
        #else  : print('compute   2 %s'  %       name)

        cpn = cfg.cpn
        gpn = cfg.gpn
        n_masters = cfg.n_masters

        cfg.workload.receptor = receptor
        cfg.workload.smiles = smiles
        cfg.workload.name = name
        cfg.nodes = nodes
        cfg.runtime = runtime
        cfg.n_workers = int(nodes / n_masters - 1)
        print('n_workers: %d' % cfg.n_workers)

        ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

        for i in range(n_masters):
            t = Task()

            t.pre_exec = [
                '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate'
            ]

            t.executable = "python3"
            t.arguments = ['wf0_master.py', i]
            t.cpu_threads = cpn
            t.upload_input_data = [
                'wf0_master.py', 'wf0_worker.py',
                'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py'
            ]
            t.link_input_data = ['%s > input_dir' % workload.input_dir]
            t.download_output_data = [
                '%s.%s.gz > results/%s.%s.gz' %
                (name, workload.output, name, workload.output)
            ]
            # t.input_staging  = [{'source': 'wf0_master.py',
            #                         'target': 'wf0_master.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'wf0_worker.py',
            #                         'target': 'wf0_worker.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'configs/wf0.%s.cfg' % name,
            #                         'target': 'wf0.cfg',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.input_dir,
            #                         'target': 'input_dir',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.impress_dir,
            #                         'target': 'impress_md',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'read_ligand_dict.py',
            #                         'target': 'read_ligand_dict.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                     ]
            # t.output_staging = [{'source': '%s.%s.gz'         % (name, workload.output),
            #                      'target': 'results/%s.%s.gz' % (name, workload.output),
            #                      'action': rp.TRANSFER,
            #                      'flags' : rp.DEFAULT_FLAGS}]
            s.add_tasks(t)

    p.add_stages(s)

    return p
Exemple #22
0
def test_task_exceptions(s, l, i, b):
    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s, l, i, b]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
Exemple #23
0
def cycle(k):

    #read exchangePairs.dat
    #
    with open("exchangePairs.dat", "r") as f:
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
        #print ExchangeArray

    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list()  ## = dict()
    d = dict()

    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range(Replicas):
        md_tsk = Task()
        md_tsk.executable = [
            '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'
        ]  #MD Engine, Blue Waters
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        md_tsk.link_input_data = [
            '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]),
            '%s/prmtop' % (Book[k - 1][n0]),
            #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])]
            '%s/mdin' % (Book[k - 1][n0])
        ]
        ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct

        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'
                           ]  #Preexec, BLue Waters
        #md_tsk.pre_exec = ['module load amber']
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = [
            '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o',
            'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)
        ]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid,
                                                   md_tsk.uid)
        #print d
        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)

    #Create exchange stage

    ex_stg = Stage()

    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range(Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)]

    ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print d
    #print Book
    return p
def create_workflow(Kconfig):

    wf = Pipeline()

    # ------------------------------------------------------------------------------------------------------------------
    cur_iter = int(Kconfig.start_iter)  #0
    #assumed of iteration non zero that files are in combined_path
    combined_path = str(Kconfig.remote_output_directory
                        )  #'/u/sciteam/hruska/scratch/extasy-grlsd'
    if cur_iter == 0:
        restart_iter = 0
    else:
        restart_iter = cur_iter

    if cur_iter == 0:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'module load bwpy', 'export tasks=pre_proc', 'export iter=-1',
            'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['python']
        pre_proc_task.arguments = [
            'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro',
            'input.gro', '--clone',
            str(Kconfig.num_replicas)
        ]
        pre_proc_task.copy_input_data = [
            '$SHARED/%s > %s/iter_%s/input.gro' %
            (os.path.basename(Kconfig.md_input_file), combined_path, cur_iter),
            '$SHARED/%s > input.gro' % os.path.basename(Kconfig.md_input_file),
            '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py'
        ]

        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)
        # ------------------------------------------------------------------------------------------------------------------
    else:
        pre_proc_stage = Stage()
        pre_proc_task = Task()
        pre_proc_task.pre_exec = [
            'module load bwpy', 'export tasks=pre_proc', 'export iter=-1',
            'export OMP_NUM_THREADS=1'
        ]
        pre_proc_task.executable = ['python']
        pre_proc_task.arguments = [
            'spliter.py', '-n', Kconfig.num_parallel_MD_sim, '-gro',
            'input.gro'
        ]
        pre_proc_task.copy_input_data = [
            '%s/iter_%s/out.gro > input.gro' % (combined_path, cur_iter - 1),
            '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py'
        ]
        pre_proc_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, pre_proc_stage.uid, pre_proc_task.uid)
        pre_proc_stage.add_tasks(pre_proc_task)
        wf.add_stages(pre_proc_stage)

    while (cur_iter < int(Kconfig.num_iterations)):

        # --------------------------------------------------------------------------------------------------------------
        # sim_stage:
        #     Purpose:  In iter=1, use the input files from pre_loop, else use the outputs of the analysis stage in the
        #               previous iteration. Run gromacs on each of the smaller files. Parameter files and executables
        #                are input from pre_loop. There are 'numCUs' number of instances of gromacs per iteration.
        #     Arguments :
        #           grompp    = gromacs parameters filename
        #           topol     = topology filename

        sim_stage = Stage()
        sim_task_ref = list()
        for sim_num in range(
                min(int(Kconfig.num_parallel_MD_sim),
                    int(Kconfig.num_replicas))):

            sim_task = Task()
            if Kconfig.use_gpus == 'False':
                sim_task.executable = [
                    '/sw/bw/bwpy/0.3.0/python-single/usr/bin/python'
                ]
                sim_task.pre_exec = [
                    'module load bwpy',
                    'export PYTHONPATH="/u/sciteam/hruska/local/lib/python2.7/site-packages:/u/sciteam/hruska/local:/u/sciteam/hruska/local/lib/python:$PYTHONPATH"',
                    'export PATH=/u/sciteam/hruska/local/bin:$PATH',
                    'export iter=%s' % cur_iter
                ]
                sim_task.cores = int(
                    Kconfig.num_CUs_per_MD_replica
                )  #on bluewaters tasks on one node are executed concurently
            else:
                sim_task.executable = ['python']
                sim_task.pre_exec = [
                    'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy',
                    'module add bwpy-mpi', 'module add fftw',
                    'module add cray-netcdf',
                    'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                    'module add cmake', 'module unload darshan, xalt',
                    'export CRAYPE_LINK_TYPE=dynamic',
                    'export CRAY_ADD_RPATH=yes', 'export FC=ftn',
                    'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                    'export tasks=md',
                    'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
                ]
                sim_task.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                sim_task.cpu_reqs = {
                    'processes': 0,
                    'process_type': None,
                    'threads_per_process': 0,
                    'thread_type': None
                }
            sim_task.arguments = [
                'run_openmm.py', '--gro', 'start.gro', '--out', 'out.gro',
                '--md_steps',
                str(Kconfig.md_steps), '--save_traj', 'False', '>', 'md.log'
            ]
            sim_task.link_input_data = [
                '$SHARED/%s > run_openmm.py' %
                (os.path.basename(Kconfig.md_run_file))
            ]

            #if Kconfig.ndx_file is not None:
            #    sim_task.link_input_data.append('$SHARED/{0}'.format(os.path.basename(Kconfig.ndx_file)))
            if restart_iter == cur_iter:
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (pre_proc_task_ref, sim_num))
            else:
                sim_task.link_input_data.append(
                    '%s/temp/start%s.gro > start.gro' %
                    (post_ana_task_ref, sim_num))

            sim_task_ref.append('$Pipeline_%s_Stage_%s_Task_%s' %
                                (wf.uid, sim_stage.uid, sim_task.uid))
            sim_stage.add_tasks(sim_task)

        wf.add_stages(sim_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # pre_ana_task:
        #     Purpose:   The output of each gromacs instance in the simulaxftion stage is a small coordinate file.
        #                 Concatenate such files from each of the gromacs instances to form a larger file.
        #     Arguments:
        #             numCUs = number of simulation instances / number of small files to be concatenated

        pre_ana_stage = Stage()
        pre_ana_task = Task()
        pre_ana_task.pre_exec = [
            'module swap PrgEnv-cray PrgEnv-gnu', 'module add bwpy',
            'module add bwpy-mpi', 'module add fftw', 'module add cray-netcdf',
            'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
            'module add cmake', 'module unload darshan, xalt',
            'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
            'export FC=ftn',
            'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
            'export tasks=pre_ana',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        pre_ana_task.executable = ['python']
        pre_ana_task.arguments = ['pre_analyze_openmm.py']

        pre_ana_task.link_input_data = [
            '$SHARED/pre_analyze_openmm.py > pre_analyze_openmm.py'
        ]

        for sim_num in range(
                min(int(Kconfig.num_parallel_MD_sim),
                    int(Kconfig.num_replicas))):
            pre_ana_task.link_input_data += [
                '%s/out.gro > out%s.gro' % (sim_task_ref[sim_num], sim_num)
            ]

        pre_ana_task.copy_output_data = [
            'tmpha.gro > %s/iter_%s/tmpha.gro' % (combined_path, cur_iter),
            'tmp.gro > %s/iter_%s/tmp.gro' % (combined_path, cur_iter)
        ]
        #'tmp.gro > resource://iter_%s/tmp.gro' % cur_iter

        pre_ana_stage.add_tasks(pre_ana_task)
        wf.add_stages(pre_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # lsdmap:
        #     Purpose: Perform LSDMap on the large coordinate file to generate weights and eigen values.
        #     Arguments:
        #             config = name of the config file to be used during LSDMap

        ana_stage = Stage()
        ana_task = Task()
        ana_task.pre_exec = [
            'module load PrgEnv-gnu', 'module unload bwpy',
            'module load bwpy/0.3.0', 'module add bwpy-mpi', 'module add fftw',
            'module add cray-netcdf',
            'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
            'module add cmake', 'module unload darshan xalt',
            'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
            'export FC=ftn',
            'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
            'export tasks=lsdmap',
            'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
        ]
        ana_task.executable = ['lsdmap']  #/u/sciteam/hruska/local/bin/lsdmap
        ana_task.arguments = [
            '-f',
            os.path.basename(Kconfig.lsdm_config_file), '-c', 'tmpha.gro',
            '-n', 'out.nn', '-w', 'weight.w'
        ]

        ana_task.cores = 1
        ana_task.link_input_data = [
            '$SHARED/{0} > {0}'.format(
                os.path.basename(Kconfig.lsdm_config_file)),
            '%s/iter_%s/tmpha.gro > tmpha.gro' % (combined_path, cur_iter)
        ]
        ana_task.copy_output_data = [
            'lsdmap.log > $SHARED/results/iter_%s_lsdmap.log' % cur_iter,
            'tmpha.eg > $SHARED/results/iter_%s_tmpha.eg' % cur_iter,
            #'lsdmap.log > output/iter_%s/lsdmap.log'%cur_iter,
            'tmpha.ev > %s/iter_%s/tmpha.ev' % (combined_path, cur_iter),
            'tmpha.eps > %s/iter_%s/tmpha.eps' % (combined_path, cur_iter),
            'tmpha.eg > %s/iter_%s/tmpha.eg' % (combined_path, cur_iter),
            'out.nn > %s/iter_%s/out.nn' % (combined_path, cur_iter),
            'lsdmap.log > %s/iter_%s/lsdmap.log' % (combined_path, cur_iter)
        ]
        if cur_iter > 0:
            ana_task.link_input_data += [
                '%s/iter_%s/weight_out.w > weight.w' %
                (combined_path, cur_iter - 1)
            ]

        if (cur_iter % Kconfig.nsave == 0):
            ana_task.download_output_data = [
                'lsdmap.log > ./results/iter_%s_lsdmap.log' % cur_iter
            ]

        ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, ana_stage.uid, ana_task.uid)

        ana_stage.add_tasks(ana_task)
        wf.add_stages(ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        # --------------------------------------------------------------------------------------------------------------
        # post_lsdmap:
        #     Purpose:   Use the weights, eigen values generated in lsdmap along with other parameter files from pre_loop
        #                 to generate the new coordinate file to be used by the simulation_step in the next iteration.
        #     Arguments:
        #             num_replicas              = number of configurations to be generated in the new coordinate file
        #             out                   = output filename
        #             cycle                 = iteration number
        #             max_dead_neighbors    = max dead neighbors to be considered
        #             max_alive_neighbors   = max alive neighbors to be considered
        #             numCUs                = number of simulation instances/ number of smaller files

        post_ana_stage = Stage()
        post_ana_task = Task()
        post_ana_task._name = 'post_ana_task'
        if Kconfig.restarts == 'clustering':
            post_ana_task.pre_exec = [
                'module unload PrgEnv-cray', 'module load PrgEnv-gnu',
                'module unload bwpy', 'module add bwpy/0.3.0',
                'module add bwpy-mpi', 'module add fftw',
                'module add cray-netcdf',
                'module add cudatoolkit/7.5.18-1.0502.10743.2.1',
                'module add cmake', 'module unload darshan xalt',
                'export CRAYPE_LINK_TYPE=dynamic', 'export CRAY_ADD_RPATH=yes',
                'export FC=ftn',
                'source /projects/sciteam/bamm/hruska/vpy2/bin/activate',
                'export tasks=post_ana', 'export PYEMMA_NJOBS=1',
                'export iter=%s' % cur_iter, 'export OMP_NUM_THREADS=1'
            ]
            post_ana_task.executable = ['python']
            post_ana_task.arguments = [
                'post_analyze.py', Kconfig.num_replicas, 'tmpha.ev',
                'ncopies.nc', 'tmp.gro', 'out.nn', 'weight.w', 'out.gro',
                Kconfig.max_alive_neighbors, Kconfig.max_dead_neighbors,
                'input.gro', cur_iter, Kconfig.num_parallel_MD_sim,
                'weight_out.w', 'tmpha.eg'
            ]

            post_ana_task.link_input_data = [
                '$SHARED/post_analyze.py > post_analyze.py',
                '$SHARED/selection.py > selection.py',
                '$SHARED/selection-cluster.py > selection-cluster.py',
                '$SHARED/reweighting.py > reweighting.py',
                '$SHARED/spliter.py > spliter.py', '$SHARED/gro.py > gro.py',
                '%s/iter_%s/weight_out.w > weight.w' %
                (combined_path, cur_iter - 1),
                '%s/iter_%s/tmp.gro > tmp.gro' % (combined_path, cur_iter),
                '%s/iter_%s/tmpha.ev > tmpha.ev' % (combined_path, cur_iter),
                '%s/iter_%s/tmpha.eg > tmpha.eg' % (combined_path, cur_iter),
                '%s/iter_%s/out.nn > out.nn' % (combined_path, cur_iter)
            ]

            if (cur_iter % Kconfig.nsave == 0):
                post_ana_task.download_output_data = [
                    'out.gro > output/iter_%s/out.gro' % cur_iter,
                    'weight_out.w > output/iter_%s/weight_out.w' % cur_iter,
                    'plot-scatter-cluster-10d.png > output/iter_%s/plot-scatter-cluster-10d.png'
                    % (cur_iter),
                    'ncopies.nc > output/iter_%s/ncopies.nc' % (cur_iter),
                    '%s/iter_%s/tmp.gro > output/iter_%s/tmp.gro' %
                    (combined_path, cur_iter, cur_iter)
                ]

            post_ana_task.copy_output_data = [
                'ncopies.nc > %s/iter_%s/ncopies.nc' %
                (combined_path, cur_iter),
                'weight_out.w > %s/iter_%s/weight_out.w' %
                (combined_path, cur_iter),
                'out.gro > %s/iter_%s/out.gro' % (combined_path, cur_iter),
                'plot-scatter-cluster-10d.png > %s/iter_%s/plot-scatter-cluster-10d.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d-counts.png > %s/iter_%s/plot-scatter-cluster-10d-counts.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d-ncopiess.png > %s/iter_%s/plot-scatter-cluster-10d-ncopiess.png'
                % (combined_path, cur_iter),
                'plot-scatter-cluster-10d.png > ./results/iter_%s_plot-scatter-cluster-10d.png'
                % cur_iter,
                'plot-scatter-cluster-10d-counts.png > ./results/iter_%s_plot-scatter-cluster-10d-counts.png'
                % cur_iter,
                'plot-scatter-cluster-10d-ncopiess.png > ./results/iter_%s_plot-scatter-cluster-10d-ncopiess.png'
                % cur_iter
            ]

        post_ana_task_ref = '$Pipeline_%s_Stage_%s_Task_%s' % (
            wf.uid, post_ana_stage.uid, post_ana_task.uid)

        post_ana_stage.add_tasks(post_ana_task)
        wf.add_stages(post_ana_stage)
        # --------------------------------------------------------------------------------------------------------------

        cur_iter += 1
        Kconfig.start_iter = str(cur_iter)

    return wf
Exemple #25
0
def generate_pipeline(name, stages):  #generate the pipeline of prediction and blob detection

    # Create a Pipeline object
    p = Pipeline()
    p.name = name

    for s_cnt in range(stages):


        if(s_cnt==0):
            # Create a Stage object
            s0 = Stage()
            s0.name = 'Stage %s'%s_cnt
            # Create Task 1, training
            t1 = Task()
            t1.name = 'Predictor'
            t1.pre_exec = ['module load psc_path/1.1',
                           'module load slurm/default',
                           'module load intel/17.4',
                           'module load python3',
                           'module load cuda',
                           'mkdir -p classified_images/crabeater',
                           'mkdir -p classified_images/weddel',
                           'mkdir -p classified_images/pack-ice',
                           'mkdir -p classified_images/other',
                           'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate'
                          ]
            t1.executable = 'python3'   # Assign executable to the task   
            # Assign arguments for the task executable
            t1.arguments = ['pt_predict.py','-class_names','crabeater','weddel','pack-ice','other']
            t1.link_input_data = ['/pylon5/mc3bggp/paraskev/seal_test/nn_model.pth.tar',
                                  '/pylon5/mc3bggp/paraskev/nn_images',
                                  '/pylon5/mc3bggp/paraskev/seal_test/test_images'
                                  ]
            t1.upload_input_data = ['pt_predict.py','sealnet_nas_scalable.py']
            t1.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
            t1.gpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
        
            s0.add_tasks(t1)    
            # Add Stage to the Pipeline
            p.add_stages(s0)
        else:
            # Create a Stage object
            s1 = Stage()
            s1.name = 'Stage %s'%s_cnt
            # Create Task 2,
            t2 = Task()
            t2.pre_exec = ['module load psc_path/1.1',
                           'module load slurm/default',
                           'module load intel/17.4',
                           'module load python3',
                           'module load cuda',
                           'module load opencv',
                           'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate',
                           'mkdir -p blob_detected'
                         ]
            t2.name = 'Blob_detector'         
            t2.executable = ['python3']   # Assign executable to the task   
            # Assign arguments for the task executable
            t2.arguments = ['blob_detector.py']
            t2.upload_input_data = ['blob_detector.py']
            t2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/classified_images'%(p.uid, s0.uid, t1.uid)]
            t2.download_output_data = ['blob_detected/'] #Download resuting images 
            t2.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
            t2.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'}
            s1.add_tasks(t2)
            # Add Stage to the Pipeline
            p.add_stages(s1)

    return p