def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = ['mv']
    t1.arguments = ['temp','/tmp/']
    t1.upload_input_data = ['%s/temp'%cur_dir]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = 'mv'
    t1.arguments = ['temp','/tmp/']
    t1.upload_input_data = ['%s/temp' % cur_dir]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Exemple #3
0
def setup_replicas(replicas, min_temp, max_temp, timesteps, basename):

    writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename)
    tar = tarfile.open("input_files.tar", "w")
    for name in [basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"]:
        tar.add(name)
    for r in range(replicas):
        tar.add('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))
    tar.close()
    for r in range(replicas):
        os.remove('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))



    setup_p = Pipeline()
    setup_p.name = 'untarPipe'

    repo = git.Repo('.', search_parent_directories=True)
    aux_function_path = repo.working_tree_dir


    untar_stg = Stage()
    untar_stg.name = 'untarStg'

    #Untar Task
        
    untar_tsk = Task()
    untar_tsk.name = 'untarTsk'
    untar_tsk.executable = ['python']
    untar_tsk.upload_input_data = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.cpu_reqs = 1
    untar_tsk.post_exec = []
    untar_stg.add_tasks(untar_tsk)
    setup_p.add_stages(untar_stg)
    global replica_sandbox
    replica_sandbox='$Pipeline_%s_Stage_%s_Task_%s'%(setup_p.name, untar_stg.name, untar_tsk.name)
    print replica_sandbox

    return setup_p
Exemple #4
0
def test_assignment_exceptions():

    t = Task()

    data_type = [1, 'a', True, list()]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data
Exemple #5
0
    def generate_task(self):

        task = Task()
        task.name = self.name
        task.pre_exec = [
            'env > env.log', 'export PATH=/home/dakka/miniconda3/bin:$PATH',
            'export LD_LIBRARY_PATH=/home/dakka/miniconda3/lib:$LD_LIBRARY_PATH',
            'source activate ve_hyperspace'
        ]
        task.executable = ['python']
        task.arguments = [
            'optimize.py', '--data_path', self.data_path, '--results_dir',
            self.results_dir
        ]
        task.cpu_reqs = {
            'processes': self.hyperparameters**2,
            'process_type': None,
            'threads_per_process': 32,
            'thread_type': 'MPI'
        }

        task.upload_input_data = [self.optimization_file]

        return task
def test_input_list_from_task():
    """
    **Purpose**: Test if the 'get_input_list_from_task' function generates the correct RP input transfer directives
    when given a Task
    """

    pipeline = str(ru.generate_id('pipeline'))
    stage = str(ru.generate_id('stage'))
    task = str(ru.generate_id('task'))

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    for t in [1, 'a', list(), dict(), True]:
        with pytest.raises(TypeError):
            t = list()
            get_input_list_from_task(t, placeholder_dict)

    # Test link input data
    t = Task()
    t.link_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.link_input_data[0]
    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0])

    t = Task()
    t.link_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0].split('>')[1].strip())

    # Test copy input data
    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.copy_input_data[0]
    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0])

    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0].split('>')[1].strip())


    # Test move input data
    t = Task()
    t.move_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.move_input_data[0]
    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0])

    t = Task()
    t.move_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip()
    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0].split('>')[1].strip())

    # Test upload input data

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.upload_input_data[0]
    assert 'action' not in ip_list[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0])

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholder_dict)

    assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip()
    assert 'action' not in ip_list[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0].split('>')[1].strip())
    def InitCycle(self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps): # "Cycle" = 1 MD stage plus the subsequent exchange computation

        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first Cycle
        """    
        
        #Initialize Pipeline
        #self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict    = dict() #Bookkeeping
        tar_dict   = dict() #Bookkeeping

        ##Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

                             

        writeInputs.writeInputs(max_temp=350,min_temp=250,replicas=Replicas,timesteps=timesteps)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        
        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("Input_Files.tar","w")
        for name in ["prmtop", "inpcrd", "mdin"]:
            tar.add(name)
        for r in range (Replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range (Replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

                
        #Create Untar Stage

        untar_stg = Stage()
        untar_stg.name = 'untarStg'
    
        #Untar Task

        untar_tsk                   = Task()
        untar_tsk.name              = 'untartsk'
        untar_tsk.executable        = ['python']
        
        untar_tsk.upload_input_data = ['untar_input_files.py','Input_Files.tar']
        untar_tsk.arguments         = ['untar_input_files.py','Input_Files.tar']
        untar_tsk.cores             = 1

        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

             
        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name,
                                                       untar_stg.name,
                                                       untar_tsk.name)
                 


        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)
        
        # MD tasks
               
        for r in range (Replicas):

            
            md_tsk                  = AMBERTask(cores=Replica_Cores, MD_Executable=md_executable)
            md_tsk.name             = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0)
            md_tsk.link_input_data += [
                                       '%s/inpcrd'%tar_dict[0],
                                       '%s/prmtop'%tar_dict[0],
                                       '%s/mdin_{0}'.format(r)%tar_dict[0]  #Use for full temperature exchange
                                       #'%s/mdin'%tar_dict[0]  #Testing only
                                       ] 
            md_tsk.arguments        = ['-O','-p','prmtop', '-i', 'mdin_{0}'.format(r), # Use this for full Temperature Exchange
                                       '-c','inpcrd','-o','out_{0}'.format(r),
                                       '-inf','mdinfo_{0}'.format(r)]
            md_dict[r]              = '$Pipeline_%s_Stage_%s_Task_%s'%(p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)
                                                    

        # First Exchange Stage
        
        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)
        #with open('logfile.log', 'a') as logfile:
         #   logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n')
        # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
        # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. 
        # Said pairs then exchange configurations by linking output configuration files appropriately.

        ex_tsk                      = Task()
        ex_tsk.name                 = 'extsk0'
        ex_tsk.executable           = ['python']
        ex_tsk.upload_input_data    = [ExchangeMethod]  
        for r in range (Replicas):
            ex_tsk.link_input_data     += ['%s/mdinfo_%s'%(md_dict[r],r)]
        ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas), '0']
        ex_tsk.cores                = 1
        ex_tsk.mpi                  = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.Book.append(md_dict)
        return p
    def InitCycle(
        self, Replicas, Replica_Cores, md_executable, ExchangeMethod, timesteps
    ):  # "Cycle" = 1 MD stage plus the subsequent exchange computation
        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first Cycle
        """

        #Initialize Pipeline
        #self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict = dict()  #Bookkeeping
        tar_dict = dict()  #Bookkeeping

        ##Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

        writeInputs.writeInputs(max_temp=350,
                                min_temp=250,
                                replicas=Replicas,
                                timesteps=timesteps)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("Input_Files.tar", "w")
        for name in ["prmtop", "inpcrd", "mdin"]:
            tar.add(name)
        for r in range(Replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range(Replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

        #Create Untar Stage

        untar_stg = Stage()
        untar_stg.name = 'untarStg'

        #Untar Task

        untar_tsk = Task()
        untar_tsk.name = 'untartsk'
        untar_tsk.executable = ['python']

        untar_tsk.upload_input_data = [
            'untar_input_files.py', 'Input_Files.tar'
        ]
        untar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar']
        untar_tsk.cores = 1

        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (
            p.name, untar_stg.name, untar_tsk.name)

        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)

        # MD tasks

        for r in range(Replicas):

            md_tsk = AMBERTask(cores=Replica_Cores,
                               MD_Executable=md_executable)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0)
            md_tsk.link_input_data += [
                '%s/inpcrd' % tar_dict[0],
                '%s/prmtop' % tar_dict[0],
                '%s/mdin_{0}'.format(r) %
                tar_dict[0]  #Use for full temperature exchange
                #'%s/mdin'%tar_dict[0]  #Testing only
            ]
            md_tsk.arguments = [
                '-O',
                '-p',
                'prmtop',
                '-i',
                'mdin_{0}'.format(r),  # Use this for full Temperature Exchange
                '-c',
                'inpcrd',
                '-o',
                'out_{0}'.format(r),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)

        # First Exchange Stage

        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)
        #with open('logfile.log', 'a') as logfile:
        #   logfile.write( '%.5f' %time.time() + ',' + 'InitEx0' + '\n')
        # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
        # check and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs.
        # Said pairs then exchange configurations by linking output configuration files appropriately.

        ex_tsk = Task()
        ex_tsk.name = 'extsk0'
        ex_tsk.executable = ['python']
        ex_tsk.upload_input_data = [ExchangeMethod]
        for r in range(Replicas):
            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas), '0']
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.Book.append(md_dict)
        return p
def test_task_exceptions(s,l,i,b):

    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s,l,i,b]

    for data in data_type:

        if not isinstance(data,str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data,list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, list):

            with pytest.raises(TypeError):
                t.executable = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': data,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': data
                            }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.cpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': data,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                            }
                t.gpu_reqs = {
                                'processes': 1,
                                'process_type': None,
                                'threads_per_process': data,
                                'thread_type': None
                            }
    def general_cycle(self, replicas, replica_cores, cycle, python_path, md_executable, exchange_method, pre_exec):
        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """

        self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(cycle),
                  'r') as f:  # Read exchangePairs.dat
            exchange_array = []
            for line in f:
                exchange_array.append(int(line.split()[1]))
                #exchange_array.append(line)
                #print exchange_array

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(cycle)
        #bookkeeping
        stage_uids = list()
        task_uids = list()  ## = dict()
        md_dict = dict()

        #Create MD stage

        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(cycle)

        self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid)

        for r in range(replicas):
            md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(
                replica=r, cycle=cycle)
            md_tsk.link_input_data = [
                '%s/restrt > inpcrd' %
                (self.book[cycle - 1][exchange_array[r]]),
                '%s/prmtop' % (self.book[0][r]),
                '%s/mdin_{0}'.format(r) % (self.book[0][r])
            ]

            ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet.
            #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd',
            #                          #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]),
            #                          '%s/prmtop'%(self.book[0][r]),
            #                          '%s/mdin_{0}'.format(r)%(self.Book[0][r])]

            md_tsk.arguments = [
                '-O',
                '-i',
                'mdin_{0}'.format(r),
                '-p',
                'prmtop',
                '-c',
                'inpcrd',
                #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1),
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-r',
                'restrt',
                #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            #md_tsk.tag              = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0)
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)

        q.add_stages(md_stg)

        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(cycle + 1)

        #Create Exchange Task
        ex_tsk = Task()
        ex_tsk.name = 'extsk{0}'.format(cycle + 1)
        ex_tsk.executable = [python_path]#['/usr/bin/python']  #['/opt/python/bin/python']
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = [
            'exchange_method.py', '{0}'.format(replicas), '{0}'.format(cycle + 1)
        ]
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = [
            'exchangePairs_{0}.dat'.format(cycle + 1)
        ]  # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid)
        #print d
        #print self.book
        return q
Exemple #11
0
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name):

    # Initialize a task object
    t = Task()

    # Define magic variable dictionary
    mvar_dict = {"PIPELINE_ID": pipe_name}

    # Give this task object a name
    t.name = task_name

    # Pre exec let you load modules, set environment before executing the workload
    if tcfg['pre_exec'] != "":
        t.pre_exec = [tcfg['pre_exec']]

    # Executable to use for the task
    t.executable = tcfg['executable']

    # If there's a user-defined input file (likely for genmod modules), add it to the
    # options list and upload file list if needed
    if "input_data_file" in tcfg['options']:
        tcfg['upload_input_data'].append(
            os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file']))

    # List of arguments for the executable
    t.arguments = [tcfg['script']] + match_options(tcfg['options'],
                                                   ecfg['options'])

    # CPU requirements for this task
    t.cpu_threads = {
        'processes': tcfg['cpu']['processes'],
        'process-type': tcfg['cpu']['process-type'],
        'threads-per-process': tcfg['cpu']['threads-per-process'],
        'thread-type': tcfg['cpu']['thread-type'],
    }

    # Upload data from your local machine to the remote machine
    # Note: Remote machine can be the local machine
    t.upload_input_data = tcfg['upload_input_data']

    # Copy data from other stages/tasks for use in this task
    copy_list = []
    if "copy_input_data" in tcfg.keys():
        for copy_stage in tcfg['copy_input_data'].keys():
            for copy_task in tcfg['copy_input_data'][copy_stage].keys():
                loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format(
                    pipe_name, copy_stage, copy_task)
                copy_list.extend([
                    '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x))
                    for x in tcfg['copy_input_data'][copy_stage][copy_task]
                ])

    # Append the copy list (if any) to the task object
    t.copy_input_data = copy_list

    # Set the download data for the task
    download_list = []
    outdir = os.path.join(ecfg['exp_dir'], "output")
    if "download_output_data" in tcfg.keys():
        download_list.extend([
            '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir)
            for x in tcfg['download_output_data']
        ])

    # Append the download list to this task
    t.download_output_data = download_list

    # Return the task object
    return (t)
Exemple #12
0
def init_cycle():

    # Create Pipeline Obj

    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list()  ## = dict()
    d = dict()
    dict_tarball = dict()

    #Create Tarball stage
    tar_stg = Stage()
    #Create Tar/untar task
    tar_tsk = Task()
    tar_tsk.executable = ['python']
    tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py']
    tar_tsk.arguments = ['untar_input_files.py', 'Input_Files.tar']
    tar_tsk.cores = 1
    tar_stg.add_tasks(tar_tsk)
    #task_uids.append(tar_tsk.uid)
    p.add_stages(tar_stg)
    #stage_uids.append(tar_stg.uid)
    dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, tar_stg.uid,
                                                         tar_tsk.uid)
    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range(Replicas):
        md_tsk = Task()
        md_tsk.executable = [
            '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'
        ]  #MD Engine, BW
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)]
        #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin']
        md_tsk.link_input_data += [
            '%s/inpcrd' % dict_tarball[0],
            '%s/prmtop' % dict_tarball[0],
            '%s/mdin' % dict_tarball[0]
        ]
        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/']
        #md_tsk.pre_exec = ['module load amber']
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = [
            '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o',
            'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)
        ]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid,
                                                   md_tsk.uid)

        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)
    #print d
    #Create Exchange Stage

    ex_stg = Stage()

    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range(Replicas):
        ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)]

    ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print Book
    return p
Exemple #13
0
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod):

    """
    All cycles after the initial cycle
    """

    with open("exchangePairs.dat","r") as f:  # Read exchangePairs.dat
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
            #print ExchangeArray
                

    q = Pipeline()
    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    md_dict = dict()


    #Create initial MD stage


    md_stg = Stage()
    for r in range (Replicas):
        md_tsk                 = Task()
        md_tsk.executable      = [MD_Executable]  #MD Engine, Blue Waters
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]),
                                  '%s/prmtop'%(Book[Cycle-1][r]),
                                  #'%s/mdin_{0}'.format(r)%(Book[k-1][r])]
                                  '%s/mdin'%(Book[Cycle-1][r])]

        md_tsk.pre_exec        = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user?
        #md_tsk.pre_exec       = ['module load amber']
        #md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
        md_tsk.cores           = Replica_Cores
        md_tsk.mpi             = True
        md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        md_stg.add_tasks(md_tsk)

        #task_uids.append(md_tsk.uid)
    q.add_stages(md_stg)
             
                                                                                         
                                                                                          
    ex_stg= Stage()
    #Create Exchange Task
    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]

    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    q.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
        #print d
        #print Book
    return q

#p = InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod)
#q = Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod)

#return (p, q)
                                                                            
Exemple #14
0
def generate_pipeline(cfg):

    cfg_file = cfg['run_cfg_file']  # resource and workload config
    run_file = cfg['run_file']  # runs for this campaign

    # setup S1 workload
    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    runs = check_runs(cfg_file, run_file)

    if not runs:
        print('S1: nothing to run, exiting.')
        return

    # for each run in the campaign:
    # - create cfg with requested receptor and smiles
    # - create a number of masters as EnTK tasks and add them to a pipeline
    # - submit configured number of masters with that cfg

    # setup EnTK pipeline
    p = Pipeline()
    p.name = 'S1-RAPTOR'
    s = Stage()

    # create cfg
    subs = dict()
    rurl = cfg.fs_url + cfg.workload.results
    d = rs.filesystem.Directory(rurl)
    ls = [str(u).split('/')[-1] for u in d.list()]

    workload = cfg.workload

    for receptor, smiles, nodes, runtime in runs:

        print('%30s  %s' % (receptor, smiles))
        name = '%s_-_%s' % (receptor, smiles)
        tgt = '%s.%s.gz' % (name, workload.output)
        # rec  = False

        # if tgt in ls:
        #     if workload.recompute:
        #         rec += 1
        #         d.move(tgt, tgt + '.bak')
        #     else:
        #         print('skip      1 %s' % name)
        #         continue

        # if smiles in ls:
        #     if smiles not in subs:
        #         subs[smiles] = [str(u).split('/')[-1]  for u in d.list('%s/*' % smiles)]
        #     if tgt in subs[smiles]:
        #         if workload.recompute:
        #             rec += 2
        #             d.move('%s/%s'     % (smiles, tgt),
        #                     '%s/%s.bak' % (smiles, tgt))
        #         else:
        #             print('skip      2 %s' % name)
        #             continue

        ## if os.path.exists('results/%s.%s.gz' % (name, wofkload.output)):
        ##     print('skip      3 %s' % name)
        ##     continue

        #if rec: print('recompute %d %s' % (rec, name))
        #else  : print('compute   2 %s'  %       name)

        cpn = cfg.cpn
        gpn = cfg.gpn
        n_masters = cfg.n_masters

        cfg.workload.receptor = receptor
        cfg.workload.smiles = smiles
        cfg.workload.name = name
        cfg.nodes = nodes
        cfg.runtime = runtime
        cfg.n_workers = int(nodes / n_masters - 1)
        print('n_workers: %d' % cfg.n_workers)

        ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

        for i in range(n_masters):
            t = Task()

            t.pre_exec = [
                '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate'
            ]

            t.executable = "python3"
            t.arguments = ['wf0_master.py', i]
            t.cpu_threads = cpn
            t.upload_input_data = [
                'wf0_master.py', 'wf0_worker.py',
                'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py'
            ]
            t.link_input_data = ['%s > input_dir' % workload.input_dir]
            t.download_output_data = [
                '%s.%s.gz > results/%s.%s.gz' %
                (name, workload.output, name, workload.output)
            ]
            # t.input_staging  = [{'source': 'wf0_master.py',
            #                         'target': 'wf0_master.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'wf0_worker.py',
            #                         'target': 'wf0_worker.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'configs/wf0.%s.cfg' % name,
            #                         'target': 'wf0.cfg',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.input_dir,
            #                         'target': 'input_dir',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.impress_dir,
            #                         'target': 'impress_md',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'read_ligand_dict.py',
            #                         'target': 'read_ligand_dict.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                     ]
            # t.output_staging = [{'source': '%s.%s.gz'         % (name, workload.output),
            #                      'target': 'results/%s.%s.gz' % (name, workload.output),
            #                      'action': rp.TRANSFER,
            #                      'flags' : rp.DEFAULT_FLAGS}]
            s.add_tasks(t)

    p.add_stages(s)

    return p
Exemple #15
0
def cycle(k):

    #read exchangePairs.dat
    #
    with open("exchangePairs.dat", "r") as f:
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
        #print ExchangeArray

    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list()  ## = dict()
    d = dict()

    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range(Replicas):
        md_tsk = Task()
        md_tsk.executable = [
            '/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI'
        ]  #MD Engine, Blue Waters
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        md_tsk.link_input_data = [
            '%s/restrt > inpcrd' % (Book[k - 1][ExchangeArray[n0]]),
            '%s/prmtop' % (Book[k - 1][n0]),
            #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])]
            '%s/mdin' % (Book[k - 1][n0])
        ]
        ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct

        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'
                           ]  #Preexec, BLue Waters
        #md_tsk.pre_exec = ['module load amber']
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = [
            '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o',
            'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)
        ]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s' % (p.uid, md_stg.uid,
                                                   md_tsk.uid)
        #print d
        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)

    #Create exchange stage

    ex_stg = Stage()

    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range(Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s' % (d[n1], n1)]

    ex_tsk.arguments = ['TempEx.py', '{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print d
    #print Book
    return p
def init_cycle():

    # Create Pipeline Obj

    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    d = dict()    
    dict_tarball = dict()
    
    #Create Tarball stage
    tar_stg = Stage()
    #Create Tar/untar task
    tar_tsk = Task()
    tar_tsk.executable = ['python']
    tar_tsk.upload_input_data = ['Input_Files.tar', 'untar_input_files.py']
    tar_tsk.arguments = ['untar_input_files.py','Input_Files.tar']
    tar_tsk.cores = 1
    tar_stg.add_tasks(tar_tsk)
    #task_uids.append(tar_tsk.uid)
    p.add_stages(tar_stg)
    #stage_uids.append(tar_stg.uid)
    dict_tarball[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,tar_stg.uid,tar_tsk.uid)
    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range (Replicas):    
        md_tsk = Task()
        md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine, BW
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        #md_tsk.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)]
        #md_tsk.upload_input_data = ['inpcrd','prmtop','mdin']
        md_tsk.link_input_data += ['%s/inpcrd'%dict_tarball[0],
                                  '%s/prmtop'%dict_tarball[0],
                                   '%s/mdin'%dict_tarball[0]]  
        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/']
        #md_tsk.pre_exec = ['module load amber']    
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0), '-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)

        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)
    #print d 
    #Create Exchange Stage
    
    ex_stg = Stage()

    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]
    
    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print Book
    return p
def cycle(k):


    #read exchangePairs.dat
    #
    with open("exchangePairs.dat","r") as f:
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
        #print ExchangeArray    

    
    p = Pipeline()

    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    d = dict() 

    #Create initial MD stage

    md_stg = Stage()

    #Create MD task
    for n0 in range (Replicas):
        md_tsk = Task()
        md_tsk.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine, Blue Waters
        #md_tsk.executable = ['/usr/local/packages/amber/16/INTEL-140-MVAPICH2-2.0/bin/pmemd.MPI'] #MD Engine, SuperMIC 
        #md_tsk.executable = ['/opt/amber/bin/pmemd.MPI']
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[k-1][ExchangeArray[n0]]),
                                  '%s/prmtop'%(Book[k-1][n0]),
                                  #'%s/mdin_{0}'.format(n0)%(Book[k-1][n0])]
                                  '%s/mdin'%(Book[k-1][n0])]   
                                  ##Above: Copy from previous PIPELINE, make sure bookkeeping is correct
                                   
                              
        md_tsk.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] #Preexec, BLue Waters
        #md_tsk.pre_exec = ['module load amber']
        #md_tsk.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        d[n0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        #print d
        md_stg.add_tasks(md_tsk)
        task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    stage_uids.append(md_stg.uid)

    #Create exchange stage 

    ex_stg= Stage()
    
    #Create Exchange Task

    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]
        
        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]
    
    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    stage_uids.append(ex_stg.uid)
    Book.append(d)
    #print d
    #print Book
    return p
def test_task_to_dict():

    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {   'uid': None,
                    'name': None,
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': [],
                    'executable': str(),
                    'arguments': [],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 1,
                                'process_type': None,
                                'threads_per_process': 1,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 0,
                                'process_type': None,
                                'threads_per_process': 0,
                                'thread_type': None
                                },
                    'lfs_per_process': 0,
                    'upload_input_data': [],
                    'copy_input_data': [],
                    'link_input_data': [],
                    'move_input_data': [],
                    'copy_output_data': [],
                    'move_output_data': [],
                    'download_output_data': [],
                    'stdout': None,
                    'stderr': None,
                    'exit_code': None,
                    'path': None,
                    'tag': None,
                    'parent_stage': {'uid':None, 'name': None},
                    'parent_pipeline': {'uid':None, 'name': None}}


    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}


    t.executable = 'sleep'
    d = t.to_dict()

    assert d == {   'uid': 'test.0000',
                    'name': 'new',
                    'state': states.INITIAL,
                    'state_history': [states.INITIAL],
                    'pre_exec': ['module load abc'],
                    'executable': 'sleep',
                    'arguments': ['10'],
                    'post_exec': [],
                    'cpu_reqs': { 'processes': 10,
                                'process_type': None,
                                'threads_per_process': 2,
                                'thread_type': None
                                },
                    'gpu_reqs': { 'processes': 5,
                                'process_type': None,
                                'threads_per_process': 3,
                                'thread_type': None
                                },
                    'lfs_per_process': 1024,
                    'upload_input_data': ['test1'],
                    'copy_input_data': ['test2'],
                    'link_input_data': ['test3'],
                    'move_input_data': ['test4'],
                    'copy_output_data': ['test5'],
                    'move_output_data': ['test6'],
                    'download_output_data': ['test7'],
                    'stdout': 'out',
                    'stderr': 'err',
                    'exit_code': 1,
                    'path': 'a/b/c',
                    'tag': 'task.0010',
                    'parent_stage': {'uid': 's1', 'name': 'stage1'},
                    'parent_pipeline': {'uid': 'p1', 'name': 'pipeline1'}}
Exemple #19
0
def InitCycle(Replicas, Replica_Cores, MD_Executable, ExchangeMethod):     # "Cycle" = 1 MD stage plus the subsequent exchange computation

    #Initialize Pipeline
    p = Pipeline()

    md_dict    = dict() #Bookkeeping
    tar_dict   = dict() #Bookkeeping


    #Create Tarball of input data

        


    #Create Untar Stage
    untar_stg = Stage()
    #Untar Task
    untar_tsk                   = Task()
    untar_tsk.executable        = ['python']
    untar_tsk.upload_input_data = ['untar_input_files.py','../../Input_Files.tar']
    untar_tsk.arguments         = ['untar_input_files.py','Input_Files.tar']
    untar_tsk.cores             = 1

    untar_stg.add_tasks(untar_tsk)
    p.add_stages(untar_stg)


    tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid,
                                                   untar_stg.uid,
                                                   untar_tsk.uid)
    print tar_dict[0]
    # First MD stage: needs to be defined separately since workflow is not built from a predetermined order
    md_stg = Stage()


    # MD tasks

    for r in range (Replicas):
        md_tsk                  = Task()
        md_tsk.executable       = [MD_Executable]
        md_tsk.link_input_data += ['%s/inpcrd'%tar_dict[0],
                                   '%s/prmtop'%tar_dict[0],
                                   #'%s/mdin_{0}'.format(r)%tar_dict[0]
                                   '%s/mdin'%tar_dict[0] 
                                   ] 
        md_tsk.pre_exec         = ['export AMBERHOME=$HOME/amber/amber14/'] #Should be abstracted from the user?
        md_tsk.arguments        = ['-O','-p','prmtop', '-i', 'mdin',               #'mdin_{0}'.format(r), # Use this for full Temperature Exchange
                                   '-c','inpcrd','-o','out_{0}'.format(r),
                                   '-inf','mdinfo_{0}'.format(r)]
        md_tsk.cores = Replica_Cores
        md_tsk.mpi = True
        md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)

        md_stg.add_tasks(md_tsk)
        #task_uids.append(md_tsk.uid)
    p.add_stages(md_stg)
    #stage_uids.append(md_stg.uid)
                                                

    # First Exchange Stage
    ex_stg = Stage()

    # Create Exchange Task. Exchange task performs a Metropolis Hastings thermodynamic balance condition
    # and spits out the exchangePairs.dat file that contains a sorted list of ordered pairs. 
    # Said pairs then exchange configurations by linking output configuration files appropriately.

    ex_tsk                      = Task()
    ex_tsk.executable           = ['python']
    #ex_tsk.upload_input_data    = ['exchangeMethods/TempEx.py']
    ex_tsk.upload_input_data    = [ExchangeMethod]  
    for r in range (Replicas):
        ex_tsk.link_input_data     += ['%s/mdinfo_%s'%(md_dict[r],r)]
    ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores                = 1
    ex_tsk.mpi                  = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    p.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
    #print Book
    return p
Exemple #20
0
def test_input_list_from_task():
    """
    **Purpose**: Test if the 'get_input_list_from_task' function generates the
                 correct RP input transfer directives when given a Task.
    """

    pipeline = str(ru.generate_id('pipeline'))
    stage = str(ru.generate_id('stage'))
    task = str(ru.generate_id('task'))

    placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}}

    for t in [1, 'a', list(), dict(), True]:
        with pytest.raises(TypeError):
            t = list()
            get_input_list_from_task(t, placeholders)

    # Test link input data
    t = Task()
    t.link_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['source'] == t.link_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.link_input_data[0])

    t = Task()
    t.link_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.LINK
    assert ip_list[0]['source'] == t.link_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.link_input_data[0].split('>')[1].strip())

    # Test copy input data
    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['source'] == t.copy_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.copy_input_data[0])

    t = Task()
    t.copy_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.COPY
    assert ip_list[0]['source'] == t.copy_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.copy_input_data[0].split('>')[1].strip())

    # Test move input data
    t = Task()
    t.move_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['source'] == t.move_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.move_input_data[0])

    t = Task()
    t.move_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert ip_list[0]['action'] == rp.MOVE
    assert ip_list[0]['source'] == t.move_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.move_input_data[0].split('>')[1].strip())

    # Test upload input data

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert 'action' not in ip_list[0]
    assert ip_list[0]['source'] == t.upload_input_data[0]
    assert ip_list[0]['target'] == os.path.basename(t.upload_input_data[0])

    t = Task()
    t.upload_input_data = ['/home/vivek/test.dat > new_test.dat']
    ip_list = get_input_list_from_task(t, placeholders)

    assert 'action' not in ip_list[0]
    assert ip_list[0]['source'] == t.upload_input_data[0].split('>')[0].strip()
    assert ip_list[0]['target'] == os.path.basename(
        t.upload_input_data[0].split('>')[1].strip())
def Cycle(Replicas, Replica_Cores, Cycles, MD_Executable, ExchangeMethod):

    """
    All cycles after the initial cycle
    """

    with open("exchangePairs.dat","r") as f:  # Read exchangePairs.dat
        ExchangeArray = []
        for line in f:
            ExchangeArray.append(int(line.split()[1]))
            #ExchangeArray.append(line)
            #print ExchangeArray
                

    q = Pipeline()
    #Bookkeeping
    stage_uids = list()
    task_uids = list() ## = dict()
    md_dict = dict()


    #Create initial MD stage


    md_stg = Stage()
    for r in range (Replicas):
        md_tsk                 = Task()
        md_tsk.executable      = [MD_Executable]  #MD Engine, Blue Waters
        md_tsk.link_input_data = ['%s/restrt > inpcrd'%(Book[Cycle-1][ExchangeArray[r]]),
                                  '%s/prmtop'%(Book[Cycle-1][r]),
                                  #'%s/mdin_{0}'.format(r)%(Book[k-1][r])]
                                  '%s/mdin'%(Book[Cycle-1][r])]

        md_tsk.pre_exec        = ['export AMBERHOME=$HOME/amber/amber14/'] # Should be abstracted from user?
        #md_tsk.pre_exec       = ['module load amber']
        #md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(n0),'-inf', 'mdinfo_{0}'.format(n0)]
        md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
        md_tsk.cores           = Replica_Cores
        md_tsk.mpi             = True
        md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(p.uid, md_stg.uid, md_tsk.uid)
        md_stg.add_tasks(md_tsk)

        #task_uids.append(md_tsk.uid)
    q.add_stages(md_stg)
             
                                                                                         
                                                                                          
    ex_stg= Stage()
    #Create Exchange Task
    ex_tsk = Task()
    ex_tsk.executable = ['python']
    ex_tsk.upload_input_data = ['exchangeMethods/TempEx.py']
    for n1 in range (Replicas):
        #print d[n1]

        ex_tsk.link_input_data += ['%s/mdinfo_%s'%(d[n1],n1)]

    ex_tsk.arguments = ['TempEx.py','{0}'.format(Replicas)]
    ex_tsk.cores = 1
    ex_tsk.mpi = False
    ex_tsk.download_output_data = ['exchangePairs.dat']
    ex_stg.add_tasks(ex_tsk)
    #task_uids.append(ex_tsk.uid)
    q.add_stages(ex_stg)
    #stage_uids.append(ex_stg.uid)
    Book.append(md_dict)
        #print d
        #print Book
    return q
Exemple #22
0
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP
                 ComputeUnitDescription with the complete Task description.
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholders = {pipeline: {stage: {task: '/home/vivek/some_file.txt'}}}

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = 'grompp'
    t1.arguments = ['hello']
    t1.cpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 1,
        'thread_type': 'OpenMP'
    }
    t1.gpu_reqs = {
        'processes': 4,
        'process_type': 'MPI',
        'threads_per_process': 2,
        'thread_type': 'OpenMP'
    }
    t1.post_exec = ['echo test']
    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    cud = create_cud_from_task(t1, placeholders)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (
        t1.uid, t1.name, t1.parent_stage['uid'], t1.parent_stage['name'],
        t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.post_exec == t1.post_exec
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']

    assert {
        'source': 'upload_input.dat',
        'target': 'upload_input.dat'
    } in cud.input_staging
    assert {
        'source': 'copy_input.dat',
        'action': rp.COPY,
        'target': 'copy_input.dat'
    } in cud.input_staging
    assert {
        'source': 'link_input.dat',
        'action': rp.LINK,
        'target': 'link_input.dat'
    } in cud.input_staging
    assert {
        'source': 'copy_output.dat',
        'action': rp.COPY,
        'target': 'copy_output.dat'
    } in cud.output_staging
    assert {
        'source': 'download_output.dat',
        'target': 'download_output.dat'
    } in cud.output_staging
def test_create_cud_from_task():
    """
    **Purpose**: Test if the 'create_cud_from_task' function generates a RP ComputeUnitDescription with the complete
    Task description
    """

    pipeline = 'p1'
    stage = 's1'
    task = 't1'

    placeholder_dict = {
        pipeline: {
            stage: {
                task: '/home/vivek/some_file.txt'
            }
        }
    }

    t1 = Task()
    t1.name = 't1'
    t1.pre_exec = ['module load gromacs']
    t1.executable = ['grompp']
    t1.arguments = ['hello']
    t1.cpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 1,
                   'thread_type': 'OpenMP'
                   }
    t1.gpu_reqs = {'processes': 4,
                   'process_type': 'MPI',
                   'threads_per_process': 2,
                   'thread_type': 'OpenMP'
                   }
    t1.post_exec = ['echo test']

    t1.upload_input_data = ['upload_input.dat']
    t1.copy_input_data = ['copy_input.dat']
    t1.link_input_data = ['link_input.dat']
    t1.copy_output_data = ['copy_output.dat']
    t1.download_output_data = ['download_output.dat']

    p = Pipeline()
    p.name = 'p1'
    s = Stage()
    s.name = 's1'
    s.tasks = t1
    p.stages = s

    p._assign_uid('test')

    cud = create_cud_from_task(t1, placeholder_dict)

    assert cud.name == '%s,%s,%s,%s,%s,%s' % (t1.uid, t1.name,
                                              t1.parent_stage['uid'], t1.parent_stage['name'],
                                              t1.parent_pipeline['uid'], t1.parent_pipeline['name'])
    assert cud.pre_exec == t1.pre_exec

    # rp returns executable as a string regardless of whether assignment was using string or list
    assert cud.executable == t1.executable
    assert cud.arguments == t1.arguments
    assert cud.cpu_processes == t1.cpu_reqs['processes']
    assert cud.cpu_threads == t1.cpu_reqs['threads_per_process']
    assert cud.cpu_process_type == t1.cpu_reqs['process_type']
    assert cud.cpu_thread_type == t1.cpu_reqs['thread_type']
    assert cud.gpu_processes == t1.gpu_reqs['processes']
    assert cud.gpu_threads == t1.gpu_reqs['threads_per_process']
    assert cud.gpu_process_type == t1.gpu_reqs['process_type']
    assert cud.gpu_thread_type == t1.gpu_reqs['thread_type']
    assert cud.post_exec == t1.post_exec

    assert {'source': 'upload_input.dat', 'target': 'upload_input.dat'} in cud.input_staging
    assert {'source': 'copy_input.dat', 'action': rp.COPY, 'target': 'copy_input.dat'} in cud.input_staging
    assert {'source': 'link_input.dat', 'action': rp.LINK, 'target': 'link_input.dat'} in cud.input_staging
    assert {'source': 'copy_output.dat', 'action': rp.COPY, 'target': 'copy_output.dat'} in cud.output_staging
    assert {'source': 'download_output.dat', 'target': 'download_output.dat'} in cud.output_staging
    p = Pipeline()
    # Bookkeeping
    stage_uids = list()
    task_uids = dict()
    Stages = 1
    Replicas = 2


    for N_Stg in range(Stages):
        stg =  Stage() ## initialization
        task_uids['Stage_%s'%N_Stg] = list()
        if N_Stg == 0:
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine  
                t.upload_input_data = ['inpcrd', 'prmtop', 'mdin'] 
                t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] 
                t.arguments = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out']
                t.cores = 32
                t.mpi = True
                stg.add_tasks(t)
                task_uids['Stage_%s'%N_Stg].append(t.uid)
            p.add_stages(stg)
            stage_uids.append(stg.uid) 


        else:
        
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine 
    def init_cycle(self, replicas, replica_cores, python_path, md_executable, exchange_method, min_temp, max_temp, timesteps, basename, pre_exec):  # "cycle" = 1 MD stage plus the subsequent exchange computation
        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first cycle
        """

        #Initialize Pipeline
        self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict = dict()  #bookkeeping
        tar_dict = dict()  #bookkeeping

        #Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

        writeInputs.writeInputs(
            max_temp=max_temp,
            min_temp=min_temp,
            replicas=replicas,
            timesteps=timesteps,
            basename=basename)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("input_files.tar", "w")
        for name in [
                basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"
        ]:
            tar.add(name)
        for r in range(replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range(replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

        #Create Untar Stage

        repo = git.Repo('.', search_parent_directories=True)
        aux_function_path = repo.working_tree_dir


        untar_stg = Stage()
        untar_stg.name = 'untarStg'

        #Untar Task
        
        untar_tsk = Task()
        untar_tsk.name = 'untartsk'
        untar_tsk.executable = ['python']

        untar_tsk.upload_input_data = [
            str(aux_function_path)+'/repex/untar_input_files.py', 'input_files.tar'
        ]
        untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
        untar_tsk.cpu_reqs = 1
        #untar_tsk.post_exec         = ['']
        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (
            p.name, untar_stg.name, untar_tsk.name)

        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first. 

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)

        # MD tasks

        for r in range(replicas):

            md_tsk = AMBERTask(cores=replica_cores, md_executable=md_executable, pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0)
            md_tsk.link_input_data += [
                '%s/inpcrd' % tar_dict[0],
                '%s/prmtop' % tar_dict[0],
                '%s/mdin_{0}'.format(r) %
                tar_dict[0]  #Use for full temperature exchange
            ]
            md_tsk.arguments = [
                '-O',
                '-p',
                'prmtop',
                '-i',
                'mdin_{0}'.format(r),
                '-c',
                'inpcrd',
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=0),
                '-r',
                'restrt'.format(replica=r, cycle=0),
                #'-r',  'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0),
                #'-o',  '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-r',  '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-x',  '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)

        # First Exchange Stage

        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)

        # Create Exchange Task

        ex_tsk = Task()
        ex_tsk.name = 'extsk0'
        #ex_tsk.pre_exec             = ['module load python/2.7.10']
        ex_tsk.executable = [python_path]
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):
            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0']
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.book.append(md_dict)
        return p
Exemple #26
0
def test_task_exceptions(s, l, i, b):
    """
    **Purpose**: Test if all attribute assignments raise exceptions for invalid values
    """

    t = Task()

    data_type = [s, l, i, b]

    for data in data_type:

        if not isinstance(data, str):
            with pytest.raises(TypeError):
                t.name = data

            with pytest.raises(TypeError):
                t.path = data

            with pytest.raises(TypeError):
                t.parent_stage = data

            with pytest.raises(TypeError):
                t.parent_pipeline = data

            with pytest.raises(TypeError):
                t.stdout = data

            with pytest.raises(TypeError):
                t.stderr = data

        if not isinstance(data, list):

            with pytest.raises(TypeError):
                t.pre_exec = data

            with pytest.raises(TypeError):
                t.executable = data

            with pytest.raises(TypeError):
                t.arguments = data

            with pytest.raises(TypeError):
                t.post_exec = data

            with pytest.raises(TypeError):
                t.upload_input_data = data

            with pytest.raises(TypeError):
                t.copy_input_data = data

            with pytest.raises(TypeError):
                t.link_input_data = data

            with pytest.raises(TypeError):
                t.move_input_data = data

            with pytest.raises(TypeError):
                t.copy_output_data = data

            with pytest.raises(TypeError):
                t.download_output_data = data

            with pytest.raises(TypeError):
                t.move_output_data = data

        if not isinstance(data, str) and not isinstance(data, unicode):

            with pytest.raises(ValueError):
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': data,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': data
                }

        if not isinstance(data, int):

            with pytest.raises(TypeError):
                t.cpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.cpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': data,
                    'process_type': None,
                    'threads_per_process': 1,
                    'thread_type': None
                }
                t.gpu_reqs = {
                    'processes': 1,
                    'process_type': None,
                    'threads_per_process': data,
                    'thread_type': None
                }
    def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable,
                     ExchangeMethod):
        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """

        self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(Cycle),
                  'r') as f:  # Read exchangePairs.dat
            ExchangeArray = []
            for line in f:
                ExchangeArray.append(int(line.split()[1]))
                #ExchangeArray.append(line)
                #print ExchangeArray

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(Cycle)
        #Bookkeeping
        stage_uids = list()
        task_uids = list()  ## = dict()
        md_dict = dict()

        #Create initial MD stage

        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(Cycle)

        self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid)

        for r in range(Replicas):
            md_tsk = AMBERTask(cores=Replica_Cores,
                               MD_Executable=MD_Executable)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,
                                                           cycle=Cycle)
            md_tsk.link_input_data = [
                '%s/restrt > inpcrd' %
                (self.Book[Cycle - 1][ExchangeArray[r]]),
                '%s/prmtop' % (self.Book[0][r]),
                #'%s/prmtop'%(self.Tarball_path[0]),
                '%s/mdin_{0}'.format(r) % (self.Book[0][r])
            ]

            #'%s/mdin'%(self.Book[0][r])]
            #'%s/mdin'%(self.Tarball_path[0])]

            md_tsk.arguments = [
                '-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c',
                'inpcrd', '-o', 'out_{0}'.format(r), '-inf',
                'mdinfo_{0}'.format(r)
            ]
            #md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)

        q.add_stages(md_stg)

        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(Cycle + 1)

        #Create Exchange Task
        ex_tsk = Task()
        ex_tsk.name = 'extsk{0}'.format(Cycle + 1)
        ex_tsk.executable = ['python']
        ex_tsk.upload_input_data = [ExchangeMethod]
        for r in range(Replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]

        ex_tsk.arguments = [
            'TempEx.py', '{0}'.format(Replicas), '{0}'.format(Cycle + 1)
        ]
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = [
            'exchangePairs_{0}.dat'.format(Cycle + 1)
        ]  # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.Book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid)
        #print d
        #print self.Book
        return q
Exemple #28
0
    # Bookkeeping
    stage_uids = list()
    task_uids = dict()
    Stages = 3
    Replicas = 4
    for N_Stg in range(Stages):
        stg = Stage()  ## initialization
        task_uids['Stage_%s' % N_Stg] = list()
        if N_Stg == 0:
            for n0 in range(Replicas):
                t = Task()
                t.executable = [
                    '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d'
                ]  #MD Engine
                t.upload_input_data = [
                    'in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp'
                ]
                t.pre_exec = [
                    'module load gromacs',
                    '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'
                ]
                t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out']
                t.cores = 32
                stg.add_tasks(t)
                task_uids['Stage_%s' % N_Stg].append(t.uid)
            p.add_stages(stg)
            stage_uids.append(stg.uid)

        else:

            for n0 in range(Replicas):
Exemple #29
0
    def general_cycle(self, replicas, replica_cores, cycle, python_path,
                      md_executable, exchange_method, pre_exec):
        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """

        self._prof.prof('InitcreateMDwokflow_{0}'.format(cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(cycle),
                  'r') as f:  # Read exchangePairs.dat
            exchange_array = []
            for line in f:
                exchange_array.append(int(line.split()[1]))
                #exchange_array.append(line)
                #print exchange_array

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(cycle)
        #bookkeeping
        stage_uids = list()
        task_uids = list()  ## = dict()
        md_dict = dict()

        #Create MD stage

        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(cycle)

        self._prof.prof('InitMD_{0}'.format(cycle), uid=self._uid)

        for r in range(replicas):
            md_tsk = AMBERTask(cores=replica_cores,
                               md_executable=md_executable,
                               pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r,
                                                           cycle=cycle)
            md_tsk.link_input_data = [
                '%s/restrt > inpcrd' %
                (self.book[cycle - 1][exchange_array[r]]),
                '%s/prmtop' % (self.book[0][r]),
                '%s/mdin_{0}'.format(r) % (self.book[0][r])
            ]

            ### The Following softlinking scheme is to be used ONLY if node local file system is to be used: not fully supported yet.
            #md_tsk.link_input_data = ['$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=exchange_array[r],cycle=cycle-1) > '$NODE_LFS_PATH/inpcrd',
            #                          #'%s/restrt > inpcrd'%(self.book[cycle-1][exchange_array[r]]),
            #                          '%s/prmtop'%(self.book[0][r]),
            #                          '%s/mdin_{0}'.format(r)%(self.Book[0][r])]

            md_tsk.arguments = [
                '-O',
                '-i',
                'mdin_{0}'.format(r),
                '-p',
                'prmtop',
                '-c',
                'inpcrd',
                #'-c', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle-1),
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-r',
                'restrt',
                #'-r', 'rstrt-{replica}-{cycle}'.format(replica=r,cycle=cycle),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=cycle),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            #md_tsk.tag              = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=0)
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)

        q.add_stages(md_stg)

        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(cycle + 1)

        #Create Exchange Task
        ex_tsk = Task()
        ex_tsk.name = 'extsk{0}'.format(cycle + 1)
        ex_tsk.executable = [
            python_path
        ]  #['/usr/bin/python']  #['/opt/python/bin/python']
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = [
            'exchange_method.py', '{0}'.format(replicas),
            '{0}'.format(cycle + 1)
        ]
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = [
            'exchangePairs_{0}.dat'.format(cycle + 1)
        ]  # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(cycle), uid=self._uid)
        #print d
        #print self.book
        return q
def generate_pipeline(cfg):

    cfg_file = cfg['run_cfg_file']  # resource and workload config
    run_file = cfg['run_file']  # runs for this campaign

    # setup S1 workload
    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    runs = check_runs(cfg_file, run_file)

    if not runs:
        print('S1: nothing to run, exiting.')
        return

    # for each run in the campaign:
    # - create cfg with requested receptor and smiles
    # - create a number of masters as EnTK tasks and add them to a pipeline
    # - submit configured number of masters with that cfg

    # setup EnTK pipeline
    p = Pipeline()
    p.name = 'S1.RAPTOR'
    s = Stage()

    # create cfg
    subs = dict()
    rurl = cfg.fs_url + cfg.workload.results
    d = rs.filesystem.Directory(rurl)
    ls = [str(u).split('/')[-1] for u in d.list()]

    workload = cfg.workload

    for receptor, smiles, n_workers, runtime in runs:

        print('%30s  %s' % (receptor, smiles))
        name = '%s_-_%s' % (receptor, smiles)
        tgt = '%s.%s.gz' % (name, workload.output)

        cpw = cfg.cpw
        gpw = cfg.gpw
        n_masters = cfg.n_masters

        cfg.workload.receptor = receptor
        cfg.workload.smiles = smiles
        cfg.workload.name = name
        cfg.runtime = runtime
        cfg.n_workers = n_workers
        print('n_workers: %d' % cfg.n_workers)

        ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

        for i in range(n_masters):
            t = Task()

            t.pre_exec = [
                '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate'
            ]

            t.executable = "python3"
            t.arguments = ['wf0_master.py', i]
            t.cpu_reqs = {
                'processes': 1,
                'threads_per_process': 4,
                'thread_type': None,
                'process_type': None
            }
            t.upload_input_data = [
                'wf0_master.py', 'wf0_worker.py',
                'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py'
            ]
            t.link_input_data = ['%s > input_dir' % workload.input_dir]
            #t.download_output_data = ['%s.%s.gz > results/%s.%s.gz' %
            #    (name, workload.output, name, workload.output)]

            s.add_tasks(t)

    p.add_stages(s)

    return p
Exemple #31
0
def test_task_to_dict():
    """
    **Purpose**: Test if the 'to_dict' function of Task class converts all expected attributes of the Task into a
    dictionary
    """

    t = Task()
    d = t.to_dict()

    assert d == {
        'uid': None,
        'name': None,
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': [],
        'executable': [],
        'arguments': [],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 1,
            'process_type': None,
            'threads_per_process': 1,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 0,
            'process_type': None,
            'threads_per_process': 0,
            'thread_type': None
        },
        'lfs_per_process': 0,
        'upload_input_data': [],
        'copy_input_data': [],
        'link_input_data': [],
        'move_input_data': [],
        'copy_output_data': [],
        'move_output_data': [],
        'download_output_data': [],
        'stdout': None,
        'stderr': None,
        'exit_code': None,
        'path': None,
        'tag': None,
        'parent_stage': {
            'uid': None,
            'name': None
        },
        'parent_pipeline': {
            'uid': None,
            'name': None
        }
    }

    t = Task()
    t.uid = 'test.0000'
    t.name = 'new'
    t.pre_exec = ['module load abc']
    t.executable = ['sleep']
    t.arguments = ['10']
    t.cpu_reqs['processes'] = 10
    t.cpu_reqs['threads_per_process'] = 2
    t.gpu_reqs['processes'] = 5
    t.gpu_reqs['threads_per_process'] = 3
    t.lfs_per_process = 1024
    t.upload_input_data = ['test1']
    t.copy_input_data = ['test2']
    t.link_input_data = ['test3']
    t.move_input_data = ['test4']
    t.copy_output_data = ['test5']
    t.move_output_data = ['test6']
    t.download_output_data = ['test7']
    t.stdout = 'out'
    t.stderr = 'err'
    t.exit_code = 1
    t.path = 'a/b/c'
    t.tag = 'task.0010'
    t.parent_stage = {'uid': 's1', 'name': 'stage1'}
    t.parent_pipeline = {'uid': 'p1', 'name': 'pipeline1'}

    d = t.to_dict()

    assert d == {
        'uid': 'test.0000',
        'name': 'new',
        'state': states.INITIAL,
        'state_history': [states.INITIAL],
        'pre_exec': ['module load abc'],
        'executable': ['sleep'],
        'arguments': ['10'],
        'post_exec': [],
        'cpu_reqs': {
            'processes': 10,
            'process_type': None,
            'threads_per_process': 2,
            'thread_type': None
        },
        'gpu_reqs': {
            'processes': 5,
            'process_type': None,
            'threads_per_process': 3,
            'thread_type': None
        },
        'lfs_per_process': 1024,
        'upload_input_data': ['test1'],
        'copy_input_data': ['test2'],
        'link_input_data': ['test3'],
        'move_input_data': ['test4'],
        'copy_output_data': ['test5'],
        'move_output_data': ['test6'],
        'download_output_data': ['test7'],
        'stdout': 'out',
        'stderr': 'err',
        'exit_code': 1,
        'path': 'a/b/c',
        'tag': 'task.0010',
        'parent_stage': {
            'uid': 's1',
            'name': 'stage1'
        },
        'parent_pipeline': {
            'uid': 'p1',
            'name': 'pipeline1'
        }
    }
Exemple #32
0
    def init_cycle(
        self, replicas, replica_cores, python_path, md_executable,
        exchange_method, min_temp, max_temp, timesteps, basename, pre_exec
    ):  # "cycle" = 1 MD stage plus the subsequent exchange computation
        """ 
        Initial cycle consists of:
        1) Create tarball of MD input data 
        2) Transfer the tarball to pilot sandbox
        3) Untar the tarball
        4) Run first cycle
        """

        #Initialize Pipeline
        self._prof.prof('InitTar', uid=self._uid)
        p = Pipeline()
        p.name = 'initpipeline'

        md_dict = dict()  #bookkeeping
        tar_dict = dict()  #bookkeeping

        #Write the input files

        self._prof.prof('InitWriteInputs', uid=self._uid)

        writeInputs.writeInputs(max_temp=max_temp,
                                min_temp=min_temp,
                                replicas=replicas,
                                timesteps=timesteps,
                                basename=basename)

        self._prof.prof('EndWriteInputs', uid=self._uid)

        self._prof.prof('InitTar', uid=self._uid)
        #Create Tarball of input data

        tar = tarfile.open("input_files.tar", "w")
        for name in [
                basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"
        ]:
            tar.add(name)
        for r in range(replicas):
            tar.add('mdin_{0}'.format(r))
        tar.close()

        #delete all input files outside the tarball

        for r in range(replicas):
            os.remove('mdin_{0}'.format(r))

        self._prof.prof('EndTar', uid=self._uid)

        #Create Untar Stage

        repo = git.Repo('.', search_parent_directories=True)
        aux_function_path = repo.working_tree_dir

        untar_stg = Stage()
        untar_stg.name = 'untarStg'

        #Untar Task

        untar_tsk = Task()
        untar_tsk.name = 'untartsk'
        untar_tsk.executable = ['python']

        untar_tsk.upload_input_data = [
            str(aux_function_path) + '/repex/untar_input_files.py',
            'input_files.tar'
        ]
        untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
        untar_tsk.cpu_reqs = 1
        #untar_tsk.post_exec         = ['']
        untar_stg.add_tasks(untar_tsk)
        p.add_stages(untar_stg)

        tar_dict[0] = '$Pipeline_%s_Stage_%s_Task_%s' % (
            p.name, untar_stg.name, untar_tsk.name)

        # First MD stage: needs to be defined separately since workflow is not built from a predetermined order, also equilibration needs to happen first.

        md_stg = Stage()
        md_stg.name = 'mdstg0'
        self._prof.prof('InitMD_0', uid=self._uid)

        # MD tasks

        for r in range(replicas):

            md_tsk = AMBERTask(cores=replica_cores,
                               md_executable=md_executable,
                               pre_exec=pre_exec)
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=r, cycle=0)
            md_tsk.link_input_data += [
                '%s/inpcrd' % tar_dict[0],
                '%s/prmtop' % tar_dict[0],
                '%s/mdin_{0}'.format(r) %
                tar_dict[0]  #Use for full temperature exchange
            ]
            md_tsk.arguments = [
                '-O',
                '-p',
                'prmtop',
                '-i',
                'mdin_{0}'.format(r),
                '-c',
                'inpcrd',
                '-o',
                'out-{replica}-{cycle}'.format(replica=r, cycle=0),
                '-r',
                'restrt'.format(replica=r, cycle=0),
                #'-r',  'rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-x',
                'mdcrd-{replica}-{cycle}'.format(replica=r, cycle=0),
                #'-o',  '$NODE_LFS_PATH/out-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-r',  '$NODE_LFS_PATH/rstrt-{replica}-{cycle}'.format(replica=r,cycle=0),
                #'-x',  '$NODE_LFS_PATH/mdcrd-{replica}-{cycle}'.format(replica=r,cycle=0),
                '-inf',
                'mdinfo_{0}'.format(r)
            ]
            md_dict[r] = '$Pipeline_%s_Stage_%s_Task_%s' % (
                p.name, md_stg.name, md_tsk.name)

            md_stg.add_tasks(md_tsk)
            self.md_task_list.append(md_tsk)
            #print md_tsk.uid
        p.add_stages(md_stg)
        #stage_uids.append(md_stg.uid)

        # First Exchange Stage

        ex_stg = Stage()
        ex_stg.name = 'exstg0'
        self._prof.prof('InitEx_0', uid=self._uid)

        # Create Exchange Task

        ex_tsk = Task()
        ex_tsk.name = 'extsk0'
        #ex_tsk.pre_exec             = ['module load python/2.7.10']
        ex_tsk.executable = [python_path]
        ex_tsk.upload_input_data = [exchange_method]
        for r in range(replicas):
            ex_tsk.link_input_data += ['%s/mdinfo_%s' % (md_dict[r], r)]
        ex_tsk.pre_exec = ['mv *.py exchange_method.py']
        ex_tsk.arguments = ['exchange_method.py', '{0}'.format(replicas), '0']
        ex_tsk.cores = 1
        ex_tsk.mpi = False
        ex_tsk.download_output_data = ['exchangePairs_0.dat']
        ex_stg.add_tasks(ex_tsk)
        #task_uids.append(ex_tsk.uid)
        p.add_stages(ex_stg)
        self.ex_task_list.append(ex_tsk)
        #self.ex_task_uids.append(ex_tsk.uid)
        self.book.append(md_dict)
        return p
Exemple #33
0
    Replica_Cores = 1

    Pilot_Cores = Replicas * Replica_Cores

    
    for N_Stg in range(Stages):
        stg =  Stage() ## initialization
        task_uids['Stage_%s'%N_Stg] = list()

        #####Initial MD stage  

        if N_Stg == 0:
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/u/sciteam/mushnoor/amber/amber14/bin/sander.MPI']  #MD Engine  
                t.upload_input_data = ['inpcrd', 'prmtop', 'mdin_{0}'.format(n0)] 
                t.pre_exec = ['export AMBERHOME=$HOME/amber/amber14/'] 
                t.arguments = ['-O', '-i', 'mdin_{0}'.format(n0), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out']
                t.cores = Replica_Cores
                stg.add_tasks(t)
                task_uids['Stage_%s'%N_Stg].append(t.uid)
            p.add_stages(stg)
            stage_uids.append(stg.uid) 



        #####Exchange Stages    
        elif N_Stg != 0 and N_Stg%2 = 1:
            t = Task()
            t.executable = ['python']
            t.upload_input_data = ['exchangeMethods/RandEx.py']
    def GeneralCycle(self, Replicas, Replica_Cores, Cycle, MD_Executable, ExchangeMethod):

        """
        All cycles after the initial cycle
        Pulls up exchange pairs file and generates the new workflow
        """


        self._prof.prof('InitcreateMDwokflow_{0}'.format(Cycle), uid=self._uid)
        with open('exchangePairs_{0}.dat'.format(Cycle),'r') as f:  # Read exchangePairs.dat
            ExchangeArray = []
            for line in f:
                ExchangeArray.append(int(line.split()[1]))
                #ExchangeArray.append(line)
                #print ExchangeArray
                    

        q = Pipeline()
        q.name = 'genpipeline{0}'.format(Cycle)
        #Bookkeeping
        stage_uids = list()
        task_uids = list() ## = dict()
        md_dict = dict()


        #Create initial MD stage


        md_stg = Stage()
        md_stg.name = 'mdstage{0}'.format(Cycle)

        self._prof.prof('InitMD_{0}'.format(Cycle), uid=self._uid)
    
        for r in range (Replicas):
            md_tsk                 = AMBERTask(cores=Replica_Cores, MD_Executable=MD_Executable)
            md_tsk.name            = 'mdtsk-{replica}-{cycle}'.format(replica=r,cycle=Cycle)
            md_tsk.link_input_data = ['%s/restrt > inpcrd'%(self.Book[Cycle-1][ExchangeArray[r]]),
                                      '%s/prmtop'%(self.Book[0][r]),
                                      #'%s/prmtop'%(self.Tarball_path[0]),
                                      '%s/mdin_{0}'.format(r)%(self.Book[0][r])]

                                      #'%s/mdin'%(self.Book[0][r])]
                                      #'%s/mdin'%(self.Tarball_path[0])]

            md_tsk.arguments      = ['-O', '-i', 'mdin_{0}'.format(r), '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            #md_tsk.arguments       = ['-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd', '-o', 'out_{0}'.format(r),'-inf', 'mdinfo_{0}'.format(r)]
            md_dict[r]             = '$Pipeline_%s_Stage_%s_Task_%s'%(q.name, md_stg.name, md_tsk.name)
            self.md_task_list.append(md_tsk)
            md_stg.add_tasks(md_tsk)
        

        
        q.add_stages(md_stg)
                 
                                                                                            
                                                                                              
        ex_stg = Stage()
        ex_stg.name = 'exstg{0}'.format(Cycle+1)

        #Create Exchange Task
        ex_tsk                      = Task()
        ex_tsk.name                 = 'extsk{0}'.format(Cycle+1)
        ex_tsk.executable           = ['python']
        ex_tsk.upload_input_data    = [ExchangeMethod]
        for r in range (Replicas):

            ex_tsk.link_input_data += ['%s/mdinfo_%s'%(md_dict[r],r)]

        ex_tsk.arguments            = ['TempEx.py','{0}'.format(Replicas), '{0}'.format(Cycle+1)]
        ex_tsk.cores                = 1
        ex_tsk.mpi                  = False
        ex_tsk.download_output_data = ['exchangePairs_{0}.dat'.format(Cycle+1)] # Finds exchange partners, also  Generates exchange history trace

        ex_stg.add_tasks(ex_tsk)

        #task_uids.append(ex_tsk.uid)
        self.ex_task_list.append(ex_tsk)

        q.add_stages(ex_stg)

        #stage_uids.append(ex_stg.uid)

        self.Book.append(md_dict)
        #self._prof.prof('EndEx_{0}'.format(Cycle), uid=self._uid)
        #print d
        #print self.Book
        return q
    # Create a Pipeline object
    p = Pipeline()
    # Bookkeeping
    stage_uids = list()
    task_uids = dict()
    Stages = 3
    Replicas = 4
    for N_Stg in range(Stages):
        stg =  Stage() ## initialization
        task_uids['Stage_%s'%N_Stg] = list()
        if N_Stg == 0:
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d']  #MD Engine  
                t.upload_input_data = ['in.gro', 'in.top', 'FNF.itp', 'martini_v2.2.itp', 'in.mdp'] 
                t.pre_exec = ['module load gromacs', '/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d grompp -f in.mdp -c in.gro -o in.tpr -p in.top'] 
                t.arguments = ['mdrun', '-s', 'in.tpr', '-deffnm', 'out']
                t.cores = 32
                stg.add_tasks(t)
                task_uids['Stage_%s'%N_Stg].append(t.uid)
            p.add_stages(stg)
            stage_uids.append(stg.uid) 



        else:
        
            for n0 in range(Replicas):
                t = Task()
                t.executable = ['/usr/local/packages/gromacs/5.1.4/INTEL-140-MVAPICH2-2.0/bin/gmx_mpi_d']  #MD Engine  
Exemple #36
0
def generate_pipeline(name, stages):  #generate the pipeline of prediction and blob detection

    # Create a Pipeline object
    p = Pipeline()
    p.name = name

    for s_cnt in range(stages):


        if(s_cnt==0):
            # Create a Stage object
            s0 = Stage()
            s0.name = 'Stage %s'%s_cnt
            # Create Task 1, training
            t1 = Task()
            t1.name = 'Predictor'
            t1.pre_exec = ['module load psc_path/1.1',
                           'module load slurm/default',
                           'module load intel/17.4',
                           'module load python3',
                           'module load cuda',
                           'mkdir -p classified_images/crabeater',
                           'mkdir -p classified_images/weddel',
                           'mkdir -p classified_images/pack-ice',
                           'mkdir -p classified_images/other',
                           'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate'
                          ]
            t1.executable = 'python3'   # Assign executable to the task   
            # Assign arguments for the task executable
            t1.arguments = ['pt_predict.py','-class_names','crabeater','weddel','pack-ice','other']
            t1.link_input_data = ['/pylon5/mc3bggp/paraskev/seal_test/nn_model.pth.tar',
                                  '/pylon5/mc3bggp/paraskev/nn_images',
                                  '/pylon5/mc3bggp/paraskev/seal_test/test_images'
                                  ]
            t1.upload_input_data = ['pt_predict.py','sealnet_nas_scalable.py']
            t1.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
            t1.gpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
        
            s0.add_tasks(t1)    
            # Add Stage to the Pipeline
            p.add_stages(s0)
        else:
            # Create a Stage object
            s1 = Stage()
            s1.name = 'Stage %s'%s_cnt
            # Create Task 2,
            t2 = Task()
            t2.pre_exec = ['module load psc_path/1.1',
                           'module load slurm/default',
                           'module load intel/17.4',
                           'module load python3',
                           'module load cuda',
                           'module load opencv',
                           'source /pylon5/mc3bggp/paraskev/pytorchCuda/bin/activate',
                           'mkdir -p blob_detected'
                         ]
            t2.name = 'Blob_detector'         
            t2.executable = ['python3']   # Assign executable to the task   
            # Assign arguments for the task executable
            t2.arguments = ['blob_detector.py']
            t2.upload_input_data = ['blob_detector.py']
            t2.link_input_data = ['$Pipeline_%s_Stage_%s_Task_%s/classified_images'%(p.uid, s0.uid, t1.uid)]
            t2.download_output_data = ['blob_detected/'] #Download resuting images 
            t2.cpu_reqs = {'processes': 1,'threads_per_process': 1, 'thread_type': 'OpenMP'}
            t2.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'thread_type': 'OpenMP'}
            s1.add_tasks(t2)
            # Add Stage to the Pipeline
            p.add_stages(s1)

    return p