def test__copy_local_data(self): """ Test if copying of local data is handled properly. """ from radical.ensemblemd.mdkernels import MDTaskDescription r1 = MDTaskDescription() r1.kernel = "TEST" r1.arguments = ["-f"] r1.copy_local_input_data = ["file1", "file2", "file3"] r1_bound = r1.bind(resource="localhost") assert r1_bound.pre_exec == [u'/bin/echo -n TEST:localhost', 'cp file1 .', 'cp file2 .', 'cp file3 .']
def test__binding(self): """ Test the abstract MDTask -> resource binding. """ from radical.ensemblemd.mdkernels import MDTaskDescription r1 = MDTaskDescription() r1.kernel = "TEST" r1.arguments = ["-f"] r1_bound = r1.bind(resource="localhost") print r1_bound.pre_exec assert r1_bound.pre_exec == ["/bin/echo -n TEST:localhost"] assert r1_bound.executable == "/bin/hostname" assert r1_bound.arguments == r1.arguments assert r1_bound.resource == "localhost"
def run_checkenv(config): """Runs a simple job that performs some sanity tests, determines AMBER version, etc. """ resource = config.RESOURCE allocation = config.ALLOCATION ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 5 # minutes pdesc.cores = 4 pdesc.project = allocation pdesc.cleanup = False ############################################################ # The checkenv task mdtd = MDTaskDescription() mdtd.kernel = "NAMD" mdtd_bound = mdtd.bind(resource=resource) task_desc = radical.pilot.ComputeUnitDescription() task_desc.environment = mdtd_bound.environment task_desc.pre_exec = mdtd_bound.pre_exec task_desc.executable = mdtd_bound.executable task_desc.arguments = mdtd_bound.arguments task_desc.mpi = mdtd_bound.mpi task_desc.cores = 1 # --version can only run on one core. hangs otherwise.g ############################################################ # Call the batch runner br = BatchRunner(config=config) finished_units = br.run(pilot_description=pdesc, cu_descriptions=task_desc) print "\nNOTE: Task state 'Failed' is ok, as long as the 'info' output show up under RESULT." print "\nRESULT:\n" output_line_by_line = string.split(finished_units.stdout, '\n') for line in output_line_by_line: if "Info:" in line: print line br.close()
def run_checkenv(config): """Runs a simple job that performs some sanity tests, determines AMBER version, etc. """ resource = config.RESOURCE allocation = config.ALLOCATION ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 5 # minutes pdesc.cores = 4 pdesc.project = allocation pdesc.cleanup = False pdesc.sandbox = config.WORKDIR ############################################################ # The checkenv task mdtd = MDTaskDescription() mdtd.kernel = "MMPBSA" mdtd.arguments = ["--version"] mdtd_bound = mdtd.bind(resource=resource) task_desc = radical.pilot.ComputeUnitDescription() task_desc.environment = mdtd_bound.environment task_desc.pre_exec = mdtd_bound.pre_exec task_desc.executable = mdtd_bound.executable task_desc.arguments = mdtd_bound.arguments task_desc.mpi = mdtd_bound.mpi task_desc.cores = 1 # --version can only run on one core. hangs otherwise.g ############################################################ # Call the batch runner br = BatchRunner(config=config) finished_units = br.run(pilot_description=pdesc, cu_descriptions=task_desc) print "\nRESULT:\n" print finished_units.stdout br.close()
def run_benchmark(config): # """Runs a workload. # """ server = config.SERVER resource = config.RESOURCE username = config.USERNAME allocation = config.ALLOCATION dbname = config.FECALC_BENCHMARK_DBNAME pilot_sizes = config.FECALC_BENCHMARK_PILOT_SIZES task_parallelism = config.FECALC_BENCHMARK_TASK_PARALLELISM for ps in pilot_sizes: for tp in task_parallelism: tasks = ps / tp # Set up the session: session = radical.pilot.Session(database_url=server, database_name=dbname) cred = radical.pilot.Context("ssh") # cred = radical.pilot.SSHCredential() cred.user_id = username session.add_context(cred) print "Pilot size: %3s Task parallelism: %3s Num tasks: %3s. Session ID: %s" % (ps, tp, tasks, session.uid) workload = [] for n in range(0, tasks): input_nmode = config.FECALC_BENCHMARK_INPUT_DATA[0] nmode_basen = os.path.basename(input_nmode) input_com = config.FECALC_BENCHMARK_INPUT_DATA[1] com_basen = os.path.basename(input_com) input_rec = config.FECALC_BENCHMARK_INPUT_DATA[2] rec_basen = os.path.basename(input_rec) input_lig = config.FECALC_BENCHMARK_INPUT_DATA[3] lig_basen = os.path.basename(input_lig) input_traj = config.FECALC_BENCHMARK_INPUT_DATA[4] traj_basen = os.path.basename(input_traj) mdtd = MDTaskDescription() mdtd.kernel = "MMPBSA" mdtd.arguments = "-i {0} -cp {1} -rp {2} -lp {3} -y {4}".format( nmode_basen, com_basen, rec_basen, lig_basen, traj_basen ) if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "remote": mdtd.copy_local_input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] mdtd_bound = mdtd.bind(resource=resource) mmpbsa_task = radical.pilot.ComputeUnitDescription() mmpbsa_task.environment = mdtd_bound.environment mmpbsa_task.pre_exec = mdtd_bound.pre_exec mmpbsa_task.executable = mdtd_bound.executable mmpbsa_task.arguments = mdtd_bound.arguments mmpbsa_task.mpi = mdtd_bound.mpi mmpbsa_task.cores = tp mmpbsa_task.name = "task-{0}".format(n) if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "local": # No remote files. All files are local and need to be transferred mmpbsa_task.input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] workload.append(mmpbsa_task) # EXECUTE THE BENCHMARK WORKLOAD pmgr = radical.pilot.PilotManager(session=session) # pmgr.register_callback(resource_cb) ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 30 pdesc.cores = ps pdesc.project = allocation pdesc.cleanup = True pdesc.sandbox = config.WORKDIR pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) umgr = radical.pilot.UnitManager(session=session, scheduler=radical.pilot.SCHED_DIRECT_SUBMISSION) # umgr.register_callback(task_cb) umgr.add_pilots(pilot) tasks = umgr.submit_units(workload) print " o STARTED " umgr.wait_units() print " o FINISHED" pilot.cancel() # Close the session. session.close(delete=False) sys.exit(0)
print "ERROR - Couldn't download sample data: %s" % str(ex) return 1 ############################################################ # The pilot description. pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 30 # minutes pdesc.cores = 4 pdesc.project = allocation pdesc.cleanup = True ############################################################ # The test task description. mdtd = MDTaskDescription() mdtd.kernel = "MMPBSA" mdtd.arguments = "-i nmode.5h.py -cp com.top.2 -rp rec.top.2 -lp lig.top -y rep1.traj" mdtd_bound = mdtd.bind(resource=resource) mmpbsa_test_task = radical.pilot.ComputeUnitDescription() mmpbsa_test_task.environment = mdtd_bound.environment mmpbsa_test_task.pre_exec = mdtd_bound.pre_exec mmpbsa_test_task.executable = mdtd_bound.executable mmpbsa_test_task.arguments = mdtd_bound.arguments mmpbsa_test_task.mpi = mdtd_bound.mpi mmpbsa_test_task.cores = 4 mmpbsa_test_task.input_data = ["/%s/nmode.5h.py" % os.getcwd(), "/%s/com.top.2" % os.getcwd(),
def handle_schedule (self, schedule) : # we want to use bulk submission to the pilots, so we collect all units # assigned to the same set of pilots. At the same time, we select # unscheduled units for later insertion into the wait queue. if not schedule : logger.debug ('skipping empty unit schedule') return # print 'handle schedule:' # import pprint # pprint.pprint (schedule) # pilot_cu_map = dict() unscheduled = list() pilot_ids = self.list_pilots () for unit in schedule['units'].keys() : pid = schedule['units'][unit] if None == pid : unscheduled.append (unit) continue else : if pid not in pilot_ids : raise RuntimeError ("schedule points to unknown pilot %s" % pid) if pid not in pilot_cu_map : pilot_cu_map[pid] = list() pilot_cu_map[pid].append (unit) # submit to all pilots which got something submitted to for pid in pilot_cu_map.keys(): units_to_schedule = list() # if a kernel name is in the cu descriptions set, do kernel expansion for unit in pilot_cu_map[pid] : if not pid in schedule['pilots'] : # lost pilot, do not schedule unit logger.warn ("unschedule unit %s, lost pilot %s" % (unit.uid, pid)) continue unit.sandbox = schedule['pilots'][pid]['sandbox'] + "/" + str(unit.uid) ud = unit.description if 'kernel' in ud and ud['kernel'] : try : from radical.ensemblemd.mdkernels import MDTaskDescription except Exception as ex : logger.error ("Kernels are not supported in" \ "compute unit descriptions -- install " \ "radical.ensemblemd.mdkernels!") # FIXME: unit needs a '_set_state() method or something! self._session._dbs.set_compute_unit_state (unit._uid, FAILED, ["kernel expansion failed"]) continue pilot_resource = schedule['pilots'][pid]['resource'] mdtd = MDTaskDescription () mdtd.kernel = ud.kernel mdtd_bound = mdtd.bind (resource=pilot_resource) ud.environment = mdtd_bound.environment ud.pre_exec = mdtd_bound.pre_exec ud.executable = mdtd_bound.executable ud.mpi = mdtd_bound.mpi units_to_schedule.append (unit) if len(units_to_schedule) : self._worker.schedule_compute_units (pilot_uid=pid, units=units_to_schedule) # report any change in wait_queue_size old_wait_queue_size = self.wait_queue_size self.wait_queue_size = len(unscheduled) if old_wait_queue_size != self.wait_queue_size : self._worker.fire_manager_callback (WAIT_QUEUE_SIZE, self, self.wait_queue_size) if len(unscheduled) : self._worker.unschedule_compute_units (units=unscheduled) logger.info ('%s units remain unscheduled' % len(unscheduled))
def handle_schedule(self, schedule): # we want to use bulk submission to the pilots, so we collect all units # assigned to the same set of pilots. At the same time, we select # unscheduled units for later insertion into the wait queue. if not schedule: logger.debug('skipping empty unit schedule') return # print 'handle schedule:' # import pprint # pprint.pprint (schedule) # pilot_cu_map = dict() unscheduled = list() pilot_ids = self.list_pilots() for unit in schedule['units'].keys(): pid = schedule['units'][unit] if None == pid: unscheduled.append(unit) continue else: if pid not in pilot_ids: raise RuntimeError("schedule points to unknown pilot %s" % pid) if pid not in pilot_cu_map: pilot_cu_map[pid] = list() pilot_cu_map[pid].append(unit) # submit to all pilots which got something submitted to for pid in pilot_cu_map.keys(): units_to_schedule = list() # if a kernel name is in the cu descriptions set, do kernel expansion for unit in pilot_cu_map[pid]: if not pid in schedule['pilots']: # lost pilot, do not schedule unit self._session.prof.prof('unschedule', uid=unit.uid) logger.warn("unschedule unit %s, lost pilot %s" % (unit.uid, pid)) continue unit.sandbox = schedule['pilots'][pid]['sandbox'] + "/" + str( unit.uid) ud = unit.description if 'kernel' in ud and ud['kernel']: try: from radical.ensemblemd.mdkernels import MDTaskDescription except Exception as ex: logger.error ("Kernels are not supported in" \ "compute unit descriptions -- install " \ "radical.ensemblemd.mdkernels!") # FIXME: unit needs a '_set_state() method or something! self._session._dbs.set_compute_unit_state( unit._uid, FAILED, ["kernel expansion failed"]) continue pilot_resource = schedule['pilots'][pid]['resource'] mdtd = MDTaskDescription() mdtd.kernel = ud.kernel mdtd_bound = mdtd.bind(resource=pilot_resource) ud.environment = mdtd_bound.environment ud.pre_exec = mdtd_bound.pre_exec ud.executable = mdtd_bound.executable ud.mpi = mdtd_bound.mpi units_to_schedule.append(unit) if len(units_to_schedule): self._worker.schedule_compute_units(pilot_uid=pid, units=units_to_schedule) # report any change in wait_queue_size old_wait_queue_size = self.wait_queue_size self.wait_queue_size = len(unscheduled) if old_wait_queue_size != self.wait_queue_size: self._worker.fire_manager_callback(WAIT_QUEUE_SIZE, self, self.wait_queue_size) if len(unscheduled): self._worker.unschedule_compute_units(units=unscheduled) logger.info('%s units remain unscheduled' % len(unscheduled))
def construct_cud(self, task, stage): self.log('Constructing CUD for task %s stage %s' % (task, stage)) cud = rp.ComputeUnitDescription() # Initialize input and output staging as list so that we can later just append. # TODO: Could this be an issue if there are no later appends? cud.input_staging = [] cud.output_staging = [] # The __TASK__ and __STAGE__ substitutions are arguably not # required from an application perspective, # but are # certainly useful for development/debugging purposes. task_substitutions = {'__TASK__': task, '__STAGE__': stage} if stage == 1: for label, pattern in self.io_desc.input_per_task_first_stage.items(): tmp = Template(pattern) filename = tmp.substitute(TASK=task, STAGE=stage) if self.verbose: print '### Using initial input file %s as %s' % (filename, label) task_substitutions[label] = os.path.basename(filename) url = rp.Url(filename) sd = { 'source': url.path, 'target': os.path.basename(filename) } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.LINK else: print "### ERROR: Host not supported for this pilot!" cud.input_staging.append(sd) for label, pattern in self.io_desc.input_all_tasks_per_stage.items(): tmp = Template(pattern) filename = tmp.substitute(TASK=task, STAGE=stage) if self.verbose: print '### Using all task per stage input file %s as %s' % (filename, label) task_substitutions[label] = os.path.basename(filename) url = rp.Url(filename) sd = { 'source': url.path, 'target': os.path.basename(filename) } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.LINK else: print "### ERROR: Host not supported for this pilot!" cud.input_staging.append(sd) if stage != 1: for entry in self.io_desc.intermediate_output_per_task_per_stage: tmp = Template(entry['pattern']) filename = tmp.substitute(TASK=task, STAGE=stage-1) label = entry['input_label'] if self.verbose: print '### Using intermediate per task per stage input file %s as %s' % (filename, label) task_substitutions[label] = os.path.basename(filename) url = rp.Url(filename) sd = { 'source': url.path, 'target': os.path.basename(filename) } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.LINK else: print "### ERROR: Host not supported for this pilot!" cud.input_staging.append(sd) for label, pattern in self.io_desc.input_per_task_all_stages.items(): tmp = Template(pattern) filename = tmp.substitute(TASK=task, STAGE=stage) if self.verbose: print '### Using per task all stage input file %s as %s' % (filename, label) task_substitutions[label] = os.path.basename(filename) url = rp.Url(filename) sd = { 'source': url.path, 'target': os.path.basename(filename) } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.LINK else: print "### ERROR: Host not supported for this pilot!" cud.input_staging.append(sd) for label, pattern in self.io_desc.output_per_task_per_stage.items(): tmp = Template(pattern) filename = tmp.substitute(TASK=task, STAGE=stage) if label == 'STDOUT': if self.verbose: print '### Using per task per stage STDOUT file as %s' % (filename) url = rp.Url(filename) sd = { 'source': 'STDOUT', 'target': url.path } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.COPY else: print "### ERROR: Host not supported for this pilot!" cud.output_staging.append(sd) elif label == 'STDERR': if self.verbose: print '### Using per task per stage STDERR file as %s' % (filename) url = rp.Url(filename) sd = { 'source': 'STDERR', 'target': url.path } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.COPY else: print "### ERROR: Host not supported for this pilot!" cud.output_staging.append(sd) else: if self.verbose: print '### Using per task per stage output file %s as %s' % (filename, label) basename = os.path.basename(filename) task_substitutions[label] = basename url = rp.Url(filename) sd = { 'source': basename, 'target': url.path } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.COPY else: print "### ERROR: Host not supported for this pilot!" cud.output_staging.append(sd) if stage != self.task_desc.num_stages: # If not the latest stage for entry in self.io_desc.intermediate_output_per_task_per_stage: tmp = Template(entry['pattern']) filename = tmp.substitute(TASK=task, STAGE=stage) label = entry['output_label'] if self.verbose: print '### Using intermediate per task per stage output file %s as %s' % (filename, label) basename = os.path.basename(filename) task_substitutions[label] = basename url = rp.Url(filename) sd = { 'source': basename, 'target': url.path } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.COPY else: print "### ERROR: Host not supported for this pilot!" cud.output_staging.append(sd) if stage == self.task_desc.num_stages: # If this is the last step for label, pattern in self.io_desc.output_per_task_final_stage.items(): tmp = Template(pattern) filename = tmp.substitute(TASK=task, STAGE=stage) if self.verbose: print '### Using per task final stage output file %s as %s' % (filename, label) basename = os.path.basename(filename) task_substitutions[label] = basename url = rp.Url(filename) sd = { 'source': basename, 'target': url.path } if url.host == 'localhost' or url.host is None: sd['action'] = rp.TRANSFER elif url.host == self.remote_fs.host: sd['action'] = rp.COPY else: print "### ERROR: Host not supported for this pilot!" cud.output_staging.append(sd) # Name cud.name = "mtms-task-%s-%s" % (task, stage) # Cores cud.cores = self.task_desc.cores # Build MDTaskDescription from kernel name mdtd = MDTaskDescription() if not self.task_desc.kernel: raise Exception('Kernel not specified.') mdtd.kernel = self.task_desc.kernel # Bind to resource mdtd_bound = mdtd.bind(resource=self.resource_desc.resource) # Fill in CUD cud.environment = mdtd_bound.environment cud.pre_exec = mdtd_bound.pre_exec cud.executable = mdtd_bound.executable cud.mpi = mdtd_bound.mpi if not self.task_desc.arguments: if self.verbose: print '### Will execute "%s"' % cud.executable arguments = None else: tmp = Template(self.task_desc.arguments) arguments = tmp.substitute(task_substitutions) if self.verbose: print '### Will execute "%s %s"' % (cud.executable, arguments) # Arguments if arguments: cud.arguments = arguments return cud
def run_workload(config, workload): # """Runs a workload. # """ server = config.SERVER dbname = config.DBNAME maxcpus = config.MAXCPUS resource = config.RESOURCE username = config.USERNAME allocation = config.ALLOCATION # We cannot allocate more than "maxcpus". If the number of tasks is # smaller than 'maxcpus', we chose the closest increment of 16. If it # is larger, we use "maxcpus" and adjust the runtime of the pilot. # NOTE: currently, we assume (near) homogenous runtime among all tasks. task_runtime = workload[0]["runtime"] cores = 0 for task in workload: cores += task["cores"] if cores < maxcpus: pilot_size = cores pilot_runtime = task_runtime else: pilot_size = maxcpus pilot_runtime = task_runtime * (len(workload)/maxcpus) if len(workload)%maxcpus > 0: pilot_runtime += task_runtime print "\n * Number of tasks: %s" % len(workload) print " * Pilot size (# cores): %s" % pilot_size print " * Pilot runtime: %s\n" % pilot_runtime ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = pilot_runtime pdesc.cores = pilot_size pdesc.project = allocation pdesc.cleanup = False ############################################################ # Workload definition tasknum = 0 all_tasks = [] # Create CU descriptions from workload taks... for task in workload: tasknum += 1 parmfile = task["parmfile"] parmfile_basen = os.path.basename(parmfile) coordinates = task["coordinates"] coordinates_basen = os.path.basename(coordinates) conskfile = task["conskfile"] coordinates_basen = os.path.basename(conskfile) input = task["input"] input_basen = os.path.basename(input) output = task["output"] mdtd = MDTaskDescription() mdtd.kernel = "NAMD" mdtd.arguments = ["{0}".format(input_basen)] mdtd_bound = mdtd.bind(resource=resource) mmpbsa_task = radical.pilot.ComputeUnitDescription() mmpbsa_task.environment = mdtd_bound.environment mmpbsa_task.pre_exec = mdtd_bound.pre_exec mmpbsa_task.executable = mdtd_bound.executable mmpbsa_task.arguments = mdtd_bound.arguments mmpbsa_task.mpi = mdtd_bound.mpi mmpbsa_task.cores = task["cores"] mmpbsa_task.name = task["name"] mmpbsa_task.input_data = [parmfile, coordinates, conskfile, input] mmpbsa_task.output_data = ["STDOUT > %s" % output] all_tasks.append(mmpbsa_task) ############################################################ # Call the batch runner br = BatchRunner(config=config) finished_units = br.run(pilot_description=pdesc, cu_descriptions=all_tasks) if type(finished_units) != list: finished_units = [finished_units] print "\nDONE" print "=============================================================================\n" for unit in finished_units: if unit.state == radical.pilot.DONE: t_start = unit.start_time t_stop = unit.stop_time t_run = t_stop - t_start else: t_run = "failed" local_output = unit.description.output_data[0].split(" > ")[1] print " o Task {0} RUNTIME {1} OUTPUT: {2}".format(unit.description.name, t_run, local_output) br.close()
print "ERROR - Couldn't download sample data: %s" % str(ex) return 1 ############################################################ # The pilot description. pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 30 # minutes pdesc.cores = 16 pdesc.project = allocation pdesc.cleanup = True ############################################################ # The test task description. mdtd = MDTaskDescription() mdtd.kernel = "NAMD" mdtd.arguments = ["./eq0.inp"] mdtd_bound = mdtd.bind(resource=resource) mmpbsa_test_task = radical.pilot.ComputeUnitDescription() mmpbsa_test_task.environment = mdtd_bound.environment mmpbsa_test_task.pre_exec = mdtd_bound.pre_exec mmpbsa_test_task.executable = mdtd_bound.executable mmpbsa_test_task.arguments = mdtd_bound.arguments mmpbsa_test_task.mpi = mdtd_bound.mpi mmpbsa_test_task.cores = 16 mmpbsa_test_task.input_data = [ "/%s/complex.pdb" % os.getcwd(), "/%s/complex.top" % os.getcwd(),
def run_workload(config, workload): # """Runs a workload. # """ server = config.SERVER dbname = config.DBNAME maxcpus = config.MAXCPUS resource = config.RESOURCE username = config.USERNAME allocation = config.ALLOCATION # We cannot allocate more than "maxcpus". If the number of tasks is # smaller than 'maxcpus', we chose the closest increment of 16. If it # is larger, we use "maxcpus" and adjust the runtime of the pilot. # NOTE: currently, we assume (near) homogenous runtime among all tasks. task_runtime = workload[0]["runtime"] cores = 0 for task in workload: cores += task["cores"] if cores < maxcpus: pilot_size = cores pilot_runtime = task_runtime else: pilot_size = maxcpus pilot_runtime = task_runtime * (len(workload)/maxcpus) if len(workload)%maxcpus > 0: pilot_runtime += task_runtime print "\n * Number of tasks: %s" % len(workload) print " * Pilot size (# cores): %s" % pilot_size print " * Pilot runtime: %s\n" % pilot_runtime ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = pilot_runtime pdesc.cores = pilot_size pdesc.project = allocation pdesc.cleanup = False ############################################################ # Workload definition tasknum = 0 all_tasks = [] # Create CU descriptions from workload taks... for task in workload: tasknum += 1 # Process data handling. idl = task["input_data_location"] if idl.lower() == "here": idl_remote = False elif idl.lower() == "there": idl_remote = True else: print "Unknown 'input_data_location': {0}".format(idl) idl = task["output_data_location"] if idl.lower() == "here": odl_remote = False elif idl.lower() == "there": odl_remote = True else: print "Unknown 'output_data_location': {0}".format(odl) input_nmode = task["input"] nmode_basen = os.path.basename(input_nmode) input_com = task["complex_prmtop"] com_basen = os.path.basename(input_com) input_rec = task["receptor_prmtop"] rec_basen = os.path.basename(input_rec) input_lig = task["ligand_prmtop"] lig_basen = os.path.basename(input_lig) input_traj = task["trajectory"] traj_basen = os.path.basename(input_traj) output = task["output"] mdtd = MDTaskDescription() mdtd.kernel = "MMPBSA" mdtd.arguments = "-i {0} -cp {1} -rp {2} -lp {3} -y {4}".format(nmode_basen, com_basen, rec_basen, lig_basen, traj_basen) if idl_remote is True: mdtd.copy_local_input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] mdtd_bound = mdtd.bind(resource=resource) mmpbsa_task = radical.pilot.ComputeUnitDescription() mmpbsa_task.environment = mdtd_bound.environment mmpbsa_task.pre_exec = mdtd_bound.pre_exec mmpbsa_task.executable = mdtd_bound.executable mmpbsa_task.arguments = mdtd_bound.arguments mmpbsa_task.mpi = mdtd_bound.mpi mmpbsa_task.cores = task["cores"] mmpbsa_task.name = task["name"] if idl_remote is False: # No remote files. All files are local and need to be transferred mmpbsa_task.input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] mmpbsa_task.output_data = ["FINAL_RESULTS_MMPBSA.dat > %s" % output] all_tasks.append(mmpbsa_task) ############################################################ # Call the batch runner br = BatchRunner(config=config) finished_units = br.run(pilot_description=pdesc, cu_descriptions=all_tasks) if type(finished_units) != list: finished_units = [finished_units] print "\nDONE" print "=============================================================================\n" for unit in finished_units: t_start = unit.start_time t_stop = unit.stop_time t_run = t_stop - t_start local_output = unit.description.output_data[0].split(" > ")[1] print " o Task {0} RUNTIME {1} OUTPUT: {2}".format(unit.description.name, t_run, local_output) br.close()