def test__add_resource_config_2(self): """ Test if we can wait for different pilot states. """ session = radical.pilot.Session(database_url=DBURL) rc = radical.pilot.ResourceConfig("mylocalhost") rc.task_launch_method = "LOCAL" rc.mpi_launch_method = "MPIRUN" rc.job_manager_endpoint = "fork://localhost" rc.filesystem_endpoint = "file://localhost/" rc.bootstrapper = "default_bootstrapper.sh" rc.pilot_agent = "radical-pilot-agent-multicore.py" pm = radical.pilot.PilotManager(session=session) session.add_resource_config(rc) pd = radical.pilot.ComputePilotDescription() pd.resource = "mylocalhost" pd.cores = 1 pd.runtime = 1 # minutes pd.sandbox = "/tmp/radical.pilot.sandbox.unittests" pd.cleanup = True pilot = pm.submit_pilots(pd) pilot.wait(timeout=5*60) pilot.cancel() session.close()
def test__add_resource_config_2(self): """ Test if we can wait for different pilot states. """ session = radical.pilot.Session(database_url=DBURL) rc = radical.pilot.ResourceConfig("mylocalhost") rc.task_launch_method = "LOCAL" rc.mpi_launch_method = "MPIRUN" rc.job_manager_endpoint = "fork://localhost" rc.filesystem_endpoint = "file://localhost/" rc.bootstrapper = "default_bootstrapper.sh" rc.pilot_agent = "radical-pilot-agent-multicore.py" pm = radical.pilot.PilotManager(session=session) session.add_resource_config(rc) pd = radical.pilot.ComputePilotDescription() pd.resource = "mylocalhost" pd.cores = 1 pd.runtime = 1 # minutes pd.sandbox = "/tmp/radical.pilot.sandbox.unittests" pd.cleanup = True pilot = pm.submit_pilots(pd) pilot.wait(timeout=5 * 60) pilot.cancel() session.close()
def test__pilot_cancel(self): """ Test if we can cancel a pilot. """ session = radical.pilot.Session(database_url=DBURL, database_name=DBNAME) pm = radical.pilot.PilotManager(session=session) cpd = radical.pilot.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) assert pilot is not None assert pilot.start_time is None assert pilot.stop_time is None pilot.wait(state=[radical.pilot.PMGR_ACTIVE, radical.pilot.FAILED], timeout=5*60) assert pilot.submission_time is not None assert pilot.state == radical.pilot.PMGR_ACTIVE assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=5*60) assert pilot.state == radical.pilot.CANCELED assert pilot.stop_time is not None session.close()
def test__pilot_cancel(self): """ Test if we can cancel a pilot. """ session = radical.pilot.Session(database_url=DBURL, database_name=DBNAME) pm = radical.pilot.PilotManager(session=session) cpd = radical.pilot.ComputePilotDescription() cpd.resource = "local.localhost" cpd.cores = 1 cpd.runtime = 1 cpd.sandbox = "/tmp/radical.pilot.sandbox.unittests" cpd.cleanup = True pilot = pm.submit_pilots(pilot_descriptions=cpd) assert pilot is not None assert pilot.start_time is None assert pilot.stop_time is None pilot.wait(state=[radical.pilot.ACTIVE, radical.pilot.FAILED], timeout=5*60) assert pilot.submission_time is not None assert pilot.state == radical.pilot.ACTIVE assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=5*60) assert pilot.state == radical.pilot.CANCELED assert pilot.stop_time is not None session.close()
def test__remote_pilot_cancel(self): """ Test if we can cancel a pilot. """ session = radical.pilot.Session(database_url=DBURL, database_name=DBNAME) c = radical.pilot.Context('ssh') c.user_id = self.test_ssh_uid c.user_key = self.test_ssh_key session.add_context(c) pm = radical.pilot.PilotManager(session=session) cpd = radical.pilot.ComputePilotDescription() cpd.resource = self.test_resource cpd.cores = self.test_cores cpd.runtime = 2 cpd.sandbox = self.test_workdir pilot = pm.submit_pilots(pilot_descriptions=cpd) assert pilot is not None #assert cu.start_time is None #assert cu.start_time is None pilot.wait(state=radical.pilot.ACTIVE, timeout=5 * 60) assert pilot.state == radical.pilot.ACTIVE, "Expected state 'ACTIVE' but got %s" % pilot.state assert pilot.submission_time is not None assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=5 * 60) assert pilot.state == radical.pilot.CANCELED assert pilot.stop_time is not None session.close()
def test__remote_pilot_cancel(self): """ Test if we can cancel a pilot. """ session = radical.pilot.Session(database_url=DBURL, database_name=DBNAME) c = radical.pilot.Context('ssh') c.user_id = self.test_ssh_uid c.user_key = self.test_ssh_key session.add_context(c) pm = radical.pilot.PilotManager(session=session) cpd = radical.pilot.ComputePilotDescription() cpd.resource = self.test_resource cpd.cores = self.test_cores cpd.runtime = 2 cpd.sandbox = self.test_workdir pilot = pm.submit_pilots(pilot_descriptions=cpd) assert pilot is not None #assert cu.start_time is None #assert cu.start_time is None pilot.wait(state=radical.pilot.ACTIVE, timeout=5*60) assert pilot.state == radical.pilot.ACTIVE, "Expected state 'ACTIVE' but got %s" % pilot.state assert pilot.submission_time is not None assert pilot.start_time is not None # the pilot should finish after it has reached run_time pilot.cancel() pilot.wait(timeout=5*60) assert pilot.state == radical.pilot.CANCELED assert pilot.stop_time is not None session.close()
def run_benchmark(config): # """Runs a workload. # """ server = config.SERVER resource = config.RESOURCE username = config.USERNAME allocation = config.ALLOCATION dbname = config.FECALC_BENCHMARK_DBNAME pilot_sizes = config.FECALC_BENCHMARK_PILOT_SIZES task_parallelism = config.FECALC_BENCHMARK_TASK_PARALLELISM for ps in pilot_sizes: for tp in task_parallelism: tasks = ps / tp # Set up the session: session = radical.pilot.Session(database_url=server, database_name=dbname) cred = radical.pilot.Context("ssh") # cred = radical.pilot.SSHCredential() cred.user_id = username session.add_context(cred) print "Pilot size: %3s Task parallelism: %3s Num tasks: %3s. Session ID: %s" % (ps, tp, tasks, session.uid) workload = [] for n in range(0, tasks): input_nmode = config.FECALC_BENCHMARK_INPUT_DATA[0] nmode_basen = os.path.basename(input_nmode) input_com = config.FECALC_BENCHMARK_INPUT_DATA[1] com_basen = os.path.basename(input_com) input_rec = config.FECALC_BENCHMARK_INPUT_DATA[2] rec_basen = os.path.basename(input_rec) input_lig = config.FECALC_BENCHMARK_INPUT_DATA[3] lig_basen = os.path.basename(input_lig) input_traj = config.FECALC_BENCHMARK_INPUT_DATA[4] traj_basen = os.path.basename(input_traj) mdtd = MDTaskDescription() mdtd.kernel = "MMPBSA" mdtd.arguments = "-i {0} -cp {1} -rp {2} -lp {3} -y {4}".format( nmode_basen, com_basen, rec_basen, lig_basen, traj_basen ) if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "remote": mdtd.copy_local_input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] mdtd_bound = mdtd.bind(resource=resource) mmpbsa_task = radical.pilot.ComputeUnitDescription() mmpbsa_task.environment = mdtd_bound.environment mmpbsa_task.pre_exec = mdtd_bound.pre_exec mmpbsa_task.executable = mdtd_bound.executable mmpbsa_task.arguments = mdtd_bound.arguments mmpbsa_task.mpi = mdtd_bound.mpi mmpbsa_task.cores = tp mmpbsa_task.name = "task-{0}".format(n) if config.FECALC_BENCHMARK_INPUT_DATA_LOCATION.lower() == "local": # No remote files. All files are local and need to be transferred mmpbsa_task.input_data = [input_nmode, input_com, input_rec, input_lig, input_traj] workload.append(mmpbsa_task) # EXECUTE THE BENCHMARK WORKLOAD pmgr = radical.pilot.PilotManager(session=session) # pmgr.register_callback(resource_cb) ############################################################ # The pilot description pdesc = radical.pilot.ComputePilotDescription() pdesc.resource = resource pdesc.runtime = 30 pdesc.cores = ps pdesc.project = allocation pdesc.cleanup = True pdesc.sandbox = config.WORKDIR pdesc.cleanup = True pilot = pmgr.submit_pilots(pdesc) umgr = radical.pilot.UnitManager(session=session, scheduler=radical.pilot.SCHED_DIRECT_SUBMISSION) # umgr.register_callback(task_cb) umgr.add_pilots(pilot) tasks = umgr.submit_units(workload) print " o STARTED " umgr.wait_units() print " o FINISHED" pilot.cancel() # Close the session. session.close(delete=False) sys.exit(0)