def test__copy_local_data(self):
        """ Test if copying of local data is handled properly.
        """
        from radical.ensemblemd.mdkernels import MDTaskDescription

        r1 = MDTaskDescription()
        r1.kernel = "TEST"
        r1.arguments = ["-f"]
        r1.copy_local_input_data = ["file1", "file2", "file3"]

        r1_bound = r1.bind(resource="localhost")

        assert r1_bound.pre_exec == [u'/bin/echo -n TEST:localhost', 'cp file1 .', 'cp file2 .', 'cp file3 .']
    def test__binding(self):
        """ Test the abstract MDTask -> resource binding.
        """
        from radical.ensemblemd.mdkernels import MDTaskDescription

        r1 = MDTaskDescription()
        r1.kernel = "TEST"
        r1.arguments = ["-f"]

        r1_bound = r1.bind(resource="localhost")
        print r1_bound.pre_exec
        assert r1_bound.pre_exec   == ["/bin/echo -n TEST:localhost"]
        assert r1_bound.executable == "/bin/hostname"
        assert r1_bound.arguments   == r1.arguments
        assert r1_bound.resource   == "localhost"
Beispiel #3
0
def run_checkenv(config):
    """Runs a simple job that performs some sanity tests, determines
    AMBER version, etc.
    """
    resource = config.RESOURCE
    allocation = config.ALLOCATION    

    ############################################################
    # The pilot description
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = 5 # minutes
    pdesc.cores      = 4 
    pdesc.project    = allocation
    pdesc.cleanup    = False

    ############################################################
    # The checkenv task
    mdtd = MDTaskDescription()
    mdtd.kernel = "NAMD"

    mdtd_bound = mdtd.bind(resource=resource)

    task_desc = radical.pilot.ComputeUnitDescription()
    task_desc.environment = mdtd_bound.environment
    task_desc.pre_exec    = mdtd_bound.pre_exec
    task_desc.executable  = mdtd_bound.executable
    task_desc.arguments   = mdtd_bound.arguments
    task_desc.mpi         = mdtd_bound.mpi
    task_desc.cores       = 1 # --version can only run on one core. hangs otherwise.g

    ############################################################
    # Call the batch runner
    br = BatchRunner(config=config)
    finished_units = br.run(pilot_description=pdesc, cu_descriptions=task_desc)

    print "\nNOTE: Task state 'Failed' is ok, as long as the 'info' output show up under RESULT."
    print "\nRESULT:\n"
    output_line_by_line = string.split(finished_units.stdout, '\n')
    for line in output_line_by_line:
        if "Info:" in line:
            print line


    br.close()
Beispiel #4
0
def run_checkenv(config):
    """Runs a simple job that performs some sanity tests, determines
    AMBER version, etc.
    """
    resource = config.RESOURCE
    allocation = config.ALLOCATION    

    ############################################################
    # The pilot description
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = 5 # minutes
    pdesc.cores      = 4 
    pdesc.project    = allocation
    pdesc.cleanup    = False
    pdesc.sandbox    = config.WORKDIR

    ############################################################
    # The checkenv task
    mdtd = MDTaskDescription()
    mdtd.kernel = "MMPBSA"
    mdtd.arguments = ["--version"]

    mdtd_bound = mdtd.bind(resource=resource)

    task_desc = radical.pilot.ComputeUnitDescription()
    task_desc.environment = mdtd_bound.environment
    task_desc.pre_exec    = mdtd_bound.pre_exec
    task_desc.executable  = mdtd_bound.executable
    task_desc.arguments   = mdtd_bound.arguments
    task_desc.mpi         = mdtd_bound.mpi
    task_desc.cores       = 1 # --version can only run on one core. hangs otherwise.g

    ############################################################
    # Call the batch runner
    br = BatchRunner(config=config)
    finished_units = br.run(pilot_description=pdesc, cu_descriptions=task_desc)

    print "\nRESULT:\n"
    print finished_units.stdout

    br.close()
Beispiel #5
0
def run_benchmark(config):
    # """Runs a workload.
    # """
    server = config.SERVER
    resource = config.RESOURCE
    username = config.USERNAME
    allocation = config.ALLOCATION

    dbname = config.FECALC_BENCHMARK_DBNAME
    pilot_sizes = config.FECALC_BENCHMARK_PILOT_SIZES
    task_parallelism = config.FECALC_BENCHMARK_TASK_PARALLELISM

    for ps in pilot_sizes:
        for tp in task_parallelism:
            tasks = ps / tp

            # Set up the session:
            session = radical.pilot.Session(database_url=server, database_name=dbname)

            cred = radical.pilot.Context("ssh")
            # cred = radical.pilot.SSHCredential()
            cred.user_id = username
            session.add_context(cred)

            print "Pilot size: %3s Task parallelism: %3s Num tasks: %3s. Session ID: %s" % (ps, tp, tasks, session.uid)

            workload = []

            for n in range(0, tasks):
                input_nmode = config.FECALC_BENCHMARK_INPUT_DATA[0]
                nmode_basen = os.path.basename(input_nmode)

                input_com = config.FECALC_BENCHMARK_INPUT_DATA[1]
                com_basen = os.path.basename(input_com)

                input_rec = config.FECALC_BENCHMARK_INPUT_DATA[2]
                rec_basen = os.path.basename(input_rec)

                input_lig = config.FECALC_BENCHMARK_INPUT_DATA[3]
                lig_basen = os.path.basename(input_lig)

                input_traj = config.FECALC_BENCHMARK_INPUT_DATA[4]
                traj_basen = os.path.basename(input_traj)

                mdtd = MDTaskDescription()
                mdtd.kernel = "MMPBSA"
                mdtd.arguments = "-i {0} -cp {1} -rp {2} -lp {3} -y {4}".format(
                    nmode_basen, com_basen, rec_basen, lig_basen, traj_basen
                )

                if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "remote":
                    mdtd.copy_local_input_data = [input_nmode, input_com, input_rec, input_lig, input_traj]

                mdtd_bound = mdtd.bind(resource=resource)

                mmpbsa_task = radical.pilot.ComputeUnitDescription()
                mmpbsa_task.environment = mdtd_bound.environment
                mmpbsa_task.pre_exec = mdtd_bound.pre_exec
                mmpbsa_task.executable = mdtd_bound.executable
                mmpbsa_task.arguments = mdtd_bound.arguments
                mmpbsa_task.mpi = mdtd_bound.mpi
                mmpbsa_task.cores = tp
                mmpbsa_task.name = "task-{0}".format(n)

                if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "local":
                    # No remote files. All files are local and need to be transferred
                    mmpbsa_task.input_data = [input_nmode, input_com, input_rec, input_lig, input_traj]

                workload.append(mmpbsa_task)

            # EXECUTE THE BENCHMARK WORKLOAD
            pmgr = radical.pilot.PilotManager(session=session)
            # pmgr.register_callback(resource_cb)

            ############################################################
            # The pilot description
            pdesc = radical.pilot.ComputePilotDescription()
            pdesc.resource = resource
            pdesc.runtime = 30
            pdesc.cores = ps
            pdesc.project = allocation
            pdesc.cleanup = True
            pdesc.sandbox = config.WORKDIR
            pdesc.cleanup = True

            pilot = pmgr.submit_pilots(pdesc)

            umgr = radical.pilot.UnitManager(session=session, scheduler=radical.pilot.SCHED_DIRECT_SUBMISSION)
            # umgr.register_callback(task_cb)
            umgr.add_pilots(pilot)

            tasks = umgr.submit_units(workload)
            print " o STARTED "
            umgr.wait_units()
            print " o FINISHED"

            pilot.cancel()

            # Close the session.
            session.close(delete=False)

    sys.exit(0)
Beispiel #6
0
        print "ERROR - Couldn't download sample data: %s" % str(ex)
        return 1

    ############################################################
    # The pilot description.
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = 30 # minutes
    pdesc.cores      = 4
    pdesc.project    = allocation
    pdesc.cleanup    = True

    ############################################################
    # The test task description.

    mdtd = MDTaskDescription()
    mdtd.kernel = "MMPBSA"
    mdtd.arguments = "-i nmode.5h.py -cp com.top.2 -rp rec.top.2 -lp lig.top -y rep1.traj"

    mdtd_bound = mdtd.bind(resource=resource)

    mmpbsa_test_task = radical.pilot.ComputeUnitDescription()
    mmpbsa_test_task.environment = mdtd_bound.environment 
    mmpbsa_test_task.pre_exec    = mdtd_bound.pre_exec
    mmpbsa_test_task.executable  = mdtd_bound.executable
    mmpbsa_test_task.arguments   = mdtd_bound.arguments
    mmpbsa_test_task.mpi         = mdtd_bound.mpi
    mmpbsa_test_task.cores       = 4

    mmpbsa_test_task.input_data  = ["/%s/nmode.5h.py" % os.getcwd(),
                                    "/%s/com.top.2" % os.getcwd(),
    def handle_schedule (self, schedule) :

        # we want to use bulk submission to the pilots, so we collect all units
        # assigned to the same set of pilots.  At the same time, we select
        # unscheduled units for later insertion into the wait queue.
        
        if  not schedule :
            logger.debug ('skipping empty unit schedule')
            return

      # print 'handle schedule:'
      # import pprint
      # pprint.pprint (schedule)
      #
        pilot_cu_map = dict()
        unscheduled  = list()

        pilot_ids = self.list_pilots ()

        for unit in schedule['units'].keys() :

            pid = schedule['units'][unit]

            if  None == pid :
                unscheduled.append (unit)
                continue

            else :

                if  pid not in pilot_ids :
                    raise RuntimeError ("schedule points to unknown pilot %s" % pid)

                if  pid not in pilot_cu_map :
                    pilot_cu_map[pid] = list()

                pilot_cu_map[pid].append (unit)


        # submit to all pilots which got something submitted to
        for pid in pilot_cu_map.keys():

            units_to_schedule = list()

            # if a kernel name is in the cu descriptions set, do kernel expansion
            for unit in pilot_cu_map[pid] :

                if  not pid in schedule['pilots'] :
                    # lost pilot, do not schedule unit
                    logger.warn ("unschedule unit %s, lost pilot %s" % (unit.uid, pid))
                    continue

                unit.sandbox = schedule['pilots'][pid]['sandbox'] + "/" + str(unit.uid)

                ud = unit.description

                if  'kernel' in ud and ud['kernel'] :

                    try :
                        from radical.ensemblemd.mdkernels import MDTaskDescription
                    except Exception as ex :
                        logger.error ("Kernels are not supported in" \
                              "compute unit descriptions -- install " \
                              "radical.ensemblemd.mdkernels!")
                        # FIXME: unit needs a '_set_state() method or something!
                        self._session._dbs.set_compute_unit_state (unit._uid, FAILED, 
                                ["kernel expansion failed"])
                        continue

                    pilot_resource = schedule['pilots'][pid]['resource']

                    mdtd           = MDTaskDescription ()
                    mdtd.kernel    = ud.kernel
                    mdtd_bound     = mdtd.bind (resource=pilot_resource)
                    ud.environment = mdtd_bound.environment
                    ud.pre_exec    = mdtd_bound.pre_exec
                    ud.executable  = mdtd_bound.executable
                    ud.mpi         = mdtd_bound.mpi


                units_to_schedule.append (unit)

            if  len(units_to_schedule) :
                self._worker.schedule_compute_units (pilot_uid=pid,
                                                     units=units_to_schedule)


        # report any change in wait_queue_size
        old_wait_queue_size = self.wait_queue_size

        self.wait_queue_size = len(unscheduled)
        if  old_wait_queue_size != self.wait_queue_size :
            self._worker.fire_manager_callback (WAIT_QUEUE_SIZE, self,
                                                self.wait_queue_size)

        if  len(unscheduled) :
            self._worker.unschedule_compute_units (units=unscheduled)

        logger.info ('%s units remain unscheduled' % len(unscheduled))
Beispiel #8
0
    def handle_schedule(self, schedule):

        # we want to use bulk submission to the pilots, so we collect all units
        # assigned to the same set of pilots.  At the same time, we select
        # unscheduled units for later insertion into the wait queue.

        if not schedule:
            logger.debug('skipping empty unit schedule')
            return

    # print 'handle schedule:'
    # import pprint
    # pprint.pprint (schedule)
    #
        pilot_cu_map = dict()
        unscheduled = list()

        pilot_ids = self.list_pilots()

        for unit in schedule['units'].keys():

            pid = schedule['units'][unit]

            if None == pid:
                unscheduled.append(unit)
                continue

            else:

                if pid not in pilot_ids:
                    raise RuntimeError("schedule points to unknown pilot %s" %
                                       pid)

                if pid not in pilot_cu_map:
                    pilot_cu_map[pid] = list()

                pilot_cu_map[pid].append(unit)

        # submit to all pilots which got something submitted to
        for pid in pilot_cu_map.keys():

            units_to_schedule = list()

            # if a kernel name is in the cu descriptions set, do kernel expansion
            for unit in pilot_cu_map[pid]:

                if not pid in schedule['pilots']:
                    # lost pilot, do not schedule unit
                    self._session.prof.prof('unschedule', uid=unit.uid)
                    logger.warn("unschedule unit %s, lost pilot %s" %
                                (unit.uid, pid))
                    continue

                unit.sandbox = schedule['pilots'][pid]['sandbox'] + "/" + str(
                    unit.uid)

                ud = unit.description

                if 'kernel' in ud and ud['kernel']:

                    try:
                        from radical.ensemblemd.mdkernels import MDTaskDescription
                    except Exception as ex:
                        logger.error ("Kernels are not supported in" \
                              "compute unit descriptions -- install " \
                              "radical.ensemblemd.mdkernels!")
                        # FIXME: unit needs a '_set_state() method or something!
                        self._session._dbs.set_compute_unit_state(
                            unit._uid, FAILED, ["kernel expansion failed"])
                        continue

                    pilot_resource = schedule['pilots'][pid]['resource']

                    mdtd = MDTaskDescription()
                    mdtd.kernel = ud.kernel
                    mdtd_bound = mdtd.bind(resource=pilot_resource)
                    ud.environment = mdtd_bound.environment
                    ud.pre_exec = mdtd_bound.pre_exec
                    ud.executable = mdtd_bound.executable
                    ud.mpi = mdtd_bound.mpi

                units_to_schedule.append(unit)

            if len(units_to_schedule):
                self._worker.schedule_compute_units(pilot_uid=pid,
                                                    units=units_to_schedule)

        # report any change in wait_queue_size
        old_wait_queue_size = self.wait_queue_size

        self.wait_queue_size = len(unscheduled)
        if old_wait_queue_size != self.wait_queue_size:
            self._worker.fire_manager_callback(WAIT_QUEUE_SIZE, self,
                                               self.wait_queue_size)

        if len(unscheduled):
            self._worker.unschedule_compute_units(units=unscheduled)

        logger.info('%s units remain unscheduled' % len(unscheduled))
Beispiel #9
0
    def construct_cud(self, task, stage):

        self.log('Constructing CUD for task %s stage %s' % (task, stage))
        cud = rp.ComputeUnitDescription()

        # Initialize input and output staging as list so that we can later just append.
        # TODO: Could this be an issue if there are no later appends?
        cud.input_staging = []
        cud.output_staging = []

        # The __TASK__ and __STAGE__ substitutions are arguably not
        # required from an application perspective, # but are
        # certainly useful for development/debugging purposes.
        task_substitutions = {'__TASK__': task, '__STAGE__': stage}

        if stage == 1:
            for label, pattern in self.io_desc.input_per_task_first_stage.items():
                tmp = Template(pattern)
                filename = tmp.substitute(TASK=task, STAGE=stage)
                if self.verbose:
                    print '### Using initial input file %s as %s' % (filename, label)
                task_substitutions[label] = os.path.basename(filename)

                url = rp.Url(filename)
                sd = {
                    'source': url.path,
                    'target': os.path.basename(filename)
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.LINK
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.input_staging.append(sd)

        for label, pattern in self.io_desc.input_all_tasks_per_stage.items():
            tmp = Template(pattern)
            filename = tmp.substitute(TASK=task, STAGE=stage)
            if self.verbose:
                print '### Using all task per stage input file %s as %s' % (filename, label)
            task_substitutions[label] = os.path.basename(filename)

            url = rp.Url(filename)
            sd = {
                'source': url.path,
                'target': os.path.basename(filename)
            }

            if url.host == 'localhost' or url.host is None:
                sd['action'] = rp.TRANSFER
            elif url.host == self.remote_fs.host:
                sd['action'] = rp.LINK
            else:
                print "### ERROR: Host not supported for this pilot!"

            cud.input_staging.append(sd)

        if stage != 1:
            for entry in self.io_desc.intermediate_output_per_task_per_stage:
                tmp = Template(entry['pattern'])
                filename = tmp.substitute(TASK=task, STAGE=stage-1)
                label = entry['input_label']
                if self.verbose:
                    print '### Using intermediate per task per stage input file %s as %s' % (filename, label)
                task_substitutions[label] = os.path.basename(filename)

                url = rp.Url(filename)
                sd = {
                    'source': url.path,
                    'target': os.path.basename(filename)
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.LINK
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.input_staging.append(sd)

        for label, pattern in self.io_desc.input_per_task_all_stages.items():
            tmp = Template(pattern)
            filename = tmp.substitute(TASK=task, STAGE=stage)
            if self.verbose:
                print '### Using per task all stage input file %s as %s' % (filename, label)
            task_substitutions[label] = os.path.basename(filename)

            url = rp.Url(filename)
            sd = {
                'source': url.path,
                'target': os.path.basename(filename)
            }

            if url.host == 'localhost' or url.host is None:
                sd['action'] = rp.TRANSFER
            elif url.host == self.remote_fs.host:
                sd['action'] = rp.LINK
            else:
                print "### ERROR: Host not supported for this pilot!"

            cud.input_staging.append(sd)

        for label, pattern in self.io_desc.output_per_task_per_stage.items():
            tmp = Template(pattern)
            filename = tmp.substitute(TASK=task, STAGE=stage)
            if label == 'STDOUT':
                if self.verbose:
                    print '### Using per task per stage STDOUT file as %s' % (filename)

                url = rp.Url(filename)
                sd = {
                    'source': 'STDOUT',
                    'target': url.path
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.COPY
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.output_staging.append(sd)

            elif label == 'STDERR':
                if self.verbose:
                    print '### Using per task per stage STDERR file as %s' % (filename)

                url = rp.Url(filename)
                sd = {
                    'source': 'STDERR',
                    'target': url.path
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.COPY
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.output_staging.append(sd)

            else:
                if self.verbose:
                    print '### Using per task per stage output file %s as %s' % (filename, label)
                basename = os.path.basename(filename)
                task_substitutions[label] = basename

                url = rp.Url(filename)
                sd = {
                    'source': basename,
                    'target': url.path
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.COPY
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.output_staging.append(sd)

        if stage != self.task_desc.num_stages: # If not the latest stage
            for entry in self.io_desc.intermediate_output_per_task_per_stage:
                tmp = Template(entry['pattern'])
                filename = tmp.substitute(TASK=task, STAGE=stage)
                label = entry['output_label']
                if self.verbose:
                    print '### Using intermediate per task per stage output file %s as %s' % (filename, label)
                basename = os.path.basename(filename)
                task_substitutions[label] = basename

                url = rp.Url(filename)
                sd = {
                    'source': basename,
                    'target': url.path
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.COPY
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.output_staging.append(sd)

        if stage == self.task_desc.num_stages: # If this is the last step
            for label, pattern in self.io_desc.output_per_task_final_stage.items():
                tmp = Template(pattern)
                filename = tmp.substitute(TASK=task, STAGE=stage)
                if self.verbose:
                    print '### Using per task final stage output file %s as %s' % (filename, label)
                basename = os.path.basename(filename)
                task_substitutions[label] = basename

                url = rp.Url(filename)
                sd = {
                    'source': basename,
                    'target': url.path
                }

                if url.host == 'localhost' or url.host is None:
                    sd['action'] = rp.TRANSFER
                elif url.host == self.remote_fs.host:
                    sd['action'] = rp.COPY
                else:
                    print "### ERROR: Host not supported for this pilot!"

                cud.output_staging.append(sd)


        # Name
        cud.name = "mtms-task-%s-%s" % (task, stage)

        # Cores
        cud.cores  =  self.task_desc.cores

        # Build MDTaskDescription from kernel name
        mdtd = MDTaskDescription()

        if not self.task_desc.kernel:
            raise Exception('Kernel not specified.')
        mdtd.kernel = self.task_desc.kernel

        # Bind to resource
        mdtd_bound = mdtd.bind(resource=self.resource_desc.resource)

        # Fill in CUD
        cud.environment = mdtd_bound.environment
        cud.pre_exec = mdtd_bound.pre_exec
        cud.executable = mdtd_bound.executable
        cud.mpi = mdtd_bound.mpi

        if not self.task_desc.arguments:
            if self.verbose:
                print '### Will execute "%s"' % cud.executable
            arguments = None
        else:
            tmp = Template(self.task_desc.arguments)
            arguments = tmp.substitute(task_substitutions)
            if self.verbose:
                print '### Will execute "%s %s"' % (cud.executable, arguments)

        # Arguments
        if arguments:
            cud.arguments = arguments

        return cud
Beispiel #10
0
def run_workload(config, workload):
    # """Runs a workload.
    # """
    server     = config.SERVER
    dbname     = config.DBNAME
    maxcpus    = config.MAXCPUS
    resource   = config.RESOURCE
    username   = config.USERNAME
    allocation = config.ALLOCATION

    # We cannot allocate more than "maxcpus". If the number of tasks is 
    # smaller than 'maxcpus', we chose the closest increment of 16. If it
    # is larger, we use "maxcpus" and adjust the runtime of the pilot.

    # NOTE: currently, we assume (near) homogenous runtime among all tasks.
    task_runtime = workload[0]["runtime"]

    cores = 0
    for task in workload:
        cores += task["cores"]

    if cores < maxcpus:
        pilot_size = cores
        pilot_runtime = task_runtime
    else:
        pilot_size = maxcpus
        pilot_runtime = task_runtime * (len(workload)/maxcpus)
        if len(workload)%maxcpus > 0:
            pilot_runtime += task_runtime

    print "\n * Number of tasks: %s" % len(workload)
    print " * Pilot size (# cores): %s" % pilot_size
    print " * Pilot runtime: %s\n" % pilot_runtime

    ############################################################
    # The pilot description
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = pilot_runtime
    pdesc.cores      = pilot_size
    pdesc.project    = allocation
    pdesc.cleanup    = False



    ############################################################
    # Workload definition
    tasknum   = 0
    all_tasks = []

    # Create CU descriptions from workload taks...
    for task in workload:
        tasknum += 1

        parmfile          = task["parmfile"]
        parmfile_basen    = os.path.basename(parmfile)

        coordinates       = task["coordinates"]
        coordinates_basen = os.path.basename(coordinates)

        conskfile         = task["conskfile"]
        coordinates_basen = os.path.basename(conskfile)

        input             = task["input"]
        input_basen       = os.path.basename(input)

        output            = task["output"]

        mdtd = MDTaskDescription()
        mdtd.kernel = "NAMD"
        mdtd.arguments = ["{0}".format(input_basen)]

        mdtd_bound = mdtd.bind(resource=resource)

        mmpbsa_task = radical.pilot.ComputeUnitDescription()
        mmpbsa_task.environment = mdtd_bound.environment 
        mmpbsa_task.pre_exec    = mdtd_bound.pre_exec
        mmpbsa_task.executable  = mdtd_bound.executable
        mmpbsa_task.arguments   = mdtd_bound.arguments
        mmpbsa_task.mpi         = mdtd_bound.mpi
        mmpbsa_task.cores       = task["cores"]
        mmpbsa_task.name        = task["name"]

        mmpbsa_task.input_data  = [parmfile, coordinates, conskfile, input]
        mmpbsa_task.output_data = ["STDOUT > %s" % output]

        all_tasks.append(mmpbsa_task)

    ############################################################
    # Call the batch runner
    br = BatchRunner(config=config)
    finished_units = br.run(pilot_description=pdesc, cu_descriptions=all_tasks)
    if type(finished_units) != list:
        finished_units = [finished_units]

    print "\nDONE"
    print "=============================================================================\n"

    for unit in finished_units:
        if unit.state == radical.pilot.DONE:
            t_start = unit.start_time
            t_stop = unit.stop_time
            t_run = t_stop - t_start
        else:
            t_run = "failed"

        local_output = unit.description.output_data[0].split(" > ")[1]
        print " o Task {0} RUNTIME {1} OUTPUT: {2}".format(unit.description.name, t_run, local_output)

    br.close()
Beispiel #11
0
        print "ERROR - Couldn't download sample data: %s" % str(ex)
        return 1

    ############################################################
    # The pilot description.
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = 30 # minutes
    pdesc.cores      = 16
    pdesc.project    = allocation
    pdesc.cleanup    = True

    ############################################################
    # The test task description.

    mdtd = MDTaskDescription()
    mdtd.kernel = "NAMD"
    mdtd.arguments = ["./eq0.inp"]

    mdtd_bound = mdtd.bind(resource=resource)

    mmpbsa_test_task = radical.pilot.ComputeUnitDescription()
    mmpbsa_test_task.environment = mdtd_bound.environment 
    mmpbsa_test_task.pre_exec    = mdtd_bound.pre_exec
    mmpbsa_test_task.executable  = mdtd_bound.executable
    mmpbsa_test_task.arguments   = mdtd_bound.arguments
    mmpbsa_test_task.mpi         = mdtd_bound.mpi
    mmpbsa_test_task.cores       = 16

    mmpbsa_test_task.input_data  = [ "/%s/complex.pdb" % os.getcwd(),
                                     "/%s/complex.top" % os.getcwd(),
Beispiel #12
0
def run_workload(config, workload):
    # """Runs a workload.
    # """
    server     = config.SERVER
    dbname     = config.DBNAME
    maxcpus    = config.MAXCPUS
    resource   = config.RESOURCE
    username   = config.USERNAME
    allocation = config.ALLOCATION

    # We cannot allocate more than "maxcpus". If the number of tasks is 
    # smaller than 'maxcpus', we chose the closest increment of 16. If it
    # is larger, we use "maxcpus" and adjust the runtime of the pilot.

    # NOTE: currently, we assume (near) homogenous runtime among all tasks.
    task_runtime = workload[0]["runtime"]

    cores = 0
    for task in workload:
        cores += task["cores"]

    if cores < maxcpus:
        pilot_size = cores
        pilot_runtime = task_runtime
    else:
        pilot_size = maxcpus
        pilot_runtime = task_runtime * (len(workload)/maxcpus)
        if len(workload)%maxcpus > 0:
            pilot_runtime += task_runtime

    print "\n * Number of tasks: %s" % len(workload)
    print " * Pilot size (# cores): %s" % pilot_size
    print " * Pilot runtime: %s\n" % pilot_runtime

    ############################################################
    # The pilot description
    pdesc = radical.pilot.ComputePilotDescription()
    pdesc.resource   = resource
    pdesc.runtime    = pilot_runtime
    pdesc.cores      = pilot_size
    pdesc.project    = allocation
    pdesc.cleanup    = False



    ############################################################
    # Workload definition
    tasknum   = 0
    all_tasks = []

    # Create CU descriptions from workload taks...
    for task in workload:
        tasknum += 1

        # Process data handling.
        idl = task["input_data_location"]
        if idl.lower() == "here":
            idl_remote = False
        elif idl.lower() == "there":
            idl_remote = True
        else:
            print "Unknown 'input_data_location': {0}".format(idl) 

        idl = task["output_data_location"]
        if idl.lower() == "here":
            odl_remote = False
        elif idl.lower() == "there":
            odl_remote = True
        else:
            print "Unknown 'output_data_location': {0}".format(odl) 

        input_nmode = task["input"]
        nmode_basen = os.path.basename(input_nmode)

        input_com   = task["complex_prmtop"]
        com_basen   = os.path.basename(input_com)

        input_rec   = task["receptor_prmtop"]
        rec_basen   = os.path.basename(input_rec)

        input_lig   = task["ligand_prmtop"]
        lig_basen   = os.path.basename(input_lig)

        input_traj  = task["trajectory"]
        traj_basen  = os.path.basename(input_traj)

        output      = task["output"]

        mdtd = MDTaskDescription()
        mdtd.kernel = "MMPBSA"
        mdtd.arguments = "-i {0} -cp {1} -rp {2} -lp {3} -y {4}".format(nmode_basen, com_basen, rec_basen, lig_basen, traj_basen)

        if idl_remote is True:
            mdtd.copy_local_input_data = [input_nmode, input_com, input_rec, input_lig, input_traj]

        mdtd_bound = mdtd.bind(resource=resource)

        mmpbsa_task = radical.pilot.ComputeUnitDescription()
        mmpbsa_task.environment = mdtd_bound.environment 
        mmpbsa_task.pre_exec    = mdtd_bound.pre_exec
        mmpbsa_task.executable  = mdtd_bound.executable
        mmpbsa_task.arguments   = mdtd_bound.arguments
        mmpbsa_task.mpi         = mdtd_bound.mpi
        mmpbsa_task.cores       = task["cores"]
        mmpbsa_task.name        = task["name"]

        if idl_remote is False:
            # No remote files. All files are local and need to be transferred
            mmpbsa_task.input_data  = [input_nmode, input_com, input_rec, input_lig, input_traj]


        mmpbsa_task.output_data = ["FINAL_RESULTS_MMPBSA.dat > %s" % output]

        all_tasks.append(mmpbsa_task)

    ############################################################
    # Call the batch runner
    br = BatchRunner(config=config)
    finished_units = br.run(pilot_description=pdesc, cu_descriptions=all_tasks)
    if type(finished_units) != list:
        finished_units = [finished_units]

    print "\nDONE"
    print "=============================================================================\n"

    for unit in finished_units:
        t_start = unit.start_time
        t_stop = unit.stop_time
        t_run = t_stop - t_start

        local_output = unit.description.output_data[0].split(" > ")[1]
        print " o Task {0} RUNTIME {1} OUTPUT: {2}".format(unit.description.name, t_run, local_output)

    br.close()