def main(): # TODO: Test both with and without a provided config file. kwargs = {} if len(sys.argv) > 1: cfg = ru.Config(cfg=ru.read_json(sys.argv[1])) kwargs['cfg'] = cfg descr = cfg.worker_descr, count = cfg.n_workers, cores = cfg.cpn, gpus = cfg.gpn else: descr = rp.TaskDescription({ 'uid': 'raptor.worker', 'executable': 'scalems_rp_worker', 'arguments': [] }) count = 1 cores = 1 gpus = 0 master = ScaleMSMaster(**kwargs) master.submit(descr=descr, count=count, cores=cores, gpus=gpus) master.start() master.join() master.stop()
def test_rp_basic_task_remote(rp_task_manager, pilot_description): import radical.pilot as rp if pilot_description.access_schema and pilot_description.access_schema == 'local': pytest.skip('This test is only for remote execution.') tmgr = rp_task_manager session = tmgr.session assert not session.closed td = rp.TaskDescription({ 'executable': '/usr/bin/hostname', 'cpu_processes': 1 }) task = tmgr.submit_tasks(td) tmgr.wait_tasks(uids=[task.uid]) assert task.state == rp.states.DONE assert task.exit_code == 0 localname = subprocess.run(['/usr/bin/hostname'], stdout=subprocess.PIPE, encoding='utf-8').stdout.rstrip() remotename = task.stdout.rstrip() assert len(remotename) > 0 assert remotename != localname
def _describe_raptor_task(item: scalems.workflow.Task, scheduler: str, pre_exec: list) -> rp.TaskDescription: """Derive a RADICAL Pilot TaskDescription from a scalems workflow item. The TaskDescription will be submitted to the named *scheduler*, where *scheduler* is the UID of a task managing the life of a rp.raptor.Master instance. Caller is responsible for ensuring that *scheduler* is valid. """ # Warning: TaskDescription class does not have a strongly defined interface. # Check docs for schema. # Ref: scalems_rp_master._RaptorTaskDescription task_description = rp.TaskDescription(from_dict=dict( executable= 'scalems', # This value is currently ignored, but must be set. pre_exec=pre_exec)) task_description.uid = item.uid() task_description.scheduler = str(scheduler) # Example work would be the JSON serialized form of the following dictionary. # {'mode': 'call', # 'cores': 1, # 'timeout': 10, # 'data': {'method': 'hello', # 'kwargs': {'world': uid}}} # # Maybe something like this: # work_dict = { # 'mode': 'scalems', # 'cores': 1, # 'timeout': 10, # 'data': item.serialize() # } work_dict = { 'mode': 'exec', 'cores': 1, 'timeout': None, 'data': { 'exe': item.input['argv'][0], 'args': item.input['argv'][1:] } } task_description.arguments = [json.dumps(work_dict)] # TODO: Check for and activate an appropriate venv # using # task_description.pre_exec = ... # or # task_description.named_env = ... # TODO: Interpret item details and derive appropriate staging directives. task_description.input_staging = [] task_description.output_staging = [] return task_description
def _describe_legacy_task(item: scalems.workflow.Task, pre_exec: list) -> rp.TaskDescription: """Derive a RADICAL Pilot TaskDescription from a scalems workflow item. For a "raptor" style task, see _describe_raptor_task() """ subprocess_type = TypeIdentifier( ('scalems', 'subprocess', 'SubprocessTask')) assert item.description().type() == subprocess_type input_data = item.input task_input = scalems.subprocess.SubprocessInput(**input_data) args = list([arg for arg in task_input.argv]) # Warning: TaskDescription class does not have a strongly defined interface. # Check docs for schema. task_description = rp.TaskDescription( from_dict=dict(executable=args[0], arguments=args[1:], stdout=str(task_input.stdout), stderr=str(task_input.stderr), pre_exec=pre_exec)) uid: str = item.uid().hex() task_description.uid = uid # TODO: Check for and activate an appropriate venv # using # task_description.pre_exec = ... # or # task_description.named_env = ... # TODO: Interpret item details and derive appropriate staging directives. task_description.input_staging = list(task_input.inputs.values()) task_description.output_staging = [{ 'source': str(task_input.stdout), 'target': os.path.join(uid, pathlib.Path(task_input.stdout).name), 'action': rp.TRANSFER }, { 'source': str(task_input.stderr), 'target': os.path.join(uid, pathlib.Path(task_input.stderr).name), 'action': rp.TRANSFER }] task_description.output_staging += task_input.outputs.values() return task_description
def _get_scheduler(name: str, pre_exec: typing.Iterable[str], task_manager: rp.TaskManager): """Establish the radical.pilot.raptor.Master task. Create a master rp.Task (running the scalems_rp_master script) with the provide *name* to be referenced as the *scheduler* for raptor tasks. Returns the rp.Task for the master script once the Master is ready to receive submissions. Raises: DispatchError if the master task could not be launched successfully. Note: Currently there is no completion condition for the master script. Caller is responsible for canceling the Task returned by this function. """ # This is the name that should be resolvable in an active venv for the script we # install as # pkg_resources.get_entry_info('scalems', 'console_scripts', 'scalems_rp_master').name master_script = 'scalems_rp_master' # We can probably make the config file a permanent part of the local metadata, # but we don't really have a scheme for managing local metadata right now. # with tempfile.TemporaryDirectory() as dir: # config_file_name = 'raptor_scheduler_config.json' # config_file_path = os.path.join(dir, config_file_name) # with open(config_file_path, 'w') as fh: # encoded = scalems_rp_master.encode_as_dict(scheduler_config) # json.dump(encoded, fh, indent=2) # define a raptor.scalems master and launch it within the pilot td = rp.TaskDescription({'uid': name, 'executable': master_script}) td.arguments = [] td.pre_exec = pre_exec # td.named_env = 'scalems_env' logger.debug('Launching RP scheduler.') scheduler = task_manager.submit_tasks(td) # WARNING: rp.Task.wait() *state* parameter does not handle tuples, but does not # check type. scheduler.wait(state=[rp.states.AGENT_EXECUTING] + rp.FINAL) if scheduler.state not in {rp.states.CANCELED, rp.states.FAILED}: raise DispatchError('Could not get Master task for dispatching.') return scheduler
def _job_2_descr(self, job): # FIXME: RP does not support STDIN. Should we raise if STDIN is # specified? from_dict = dict() # TODO: use meta data for jpsi uid from_dict['name'] = job.uid from_dict['executable'] = job.spec.executable from_dict['arguments'] = job.spec.arguments from_dict['environment'] = job.spec.environment # from_dict['stdin' ] = job.spec.stdin_path from_dict['stdout'] = job.spec.stdout_path from_dict['stderr'] = job.spec.stderr_path from_dict['sandbox'] = job.spec.directory return rp.TaskDescription(from_dict=from_dict)
def test_prepare_venv(rp_task_manager, sdist): """Bootstrap the scalems package in a RP target environment using pilot.prepare_env. This test function specifically tests the local.localhost resource. Note that we cannot wait on the environment preparation directly, but we can define a task with ``named_env`` matching the *prepare_env* key to implicitly depend on successful creation. """ # NOTE: *sdist* is a path of an sdist archive that we could stage for the venv installation. # QUESTION: Can't we use the radical.pilot package archive that was already placed for bootstrapping the pilot? # TODO: Merge with test_rp_raptor_local but use the installed scalems_rp_master and scalems_rp_worker files import radical.pilot as rp import radical.saga as rs import radical.utils as ru # We only expect one pilot pilot: rp.Pilot = rp_task_manager.get_pilots()[0] # We get a dictionary... # assert isinstance(pilot, rp.Pilot) # But it looks like it has the pilot id in it. pilot_uid = typing.cast(dict, pilot)['uid'] pmgr_uid = typing.cast(dict, pilot)['pmgr'] session: rp.Session = rp_task_manager.session pmgr: rp.PilotManager = session.get_pilot_managers(pmgr_uids=pmgr_uid) assert isinstance(pmgr, rp.PilotManager) pilot = pmgr.get_pilots(uids=pilot_uid) assert isinstance(pilot, rp.Pilot) # It looks like either the pytest fixture should deliver something other than the TaskManager, # or the prepare_venv part should be moved to a separate function, such as in conftest... sdist_names = { 'scalems': os.path.basename(sdist), 'rp': rp.sdist_name, 'ru': ru.sdist_name, 'rs': rs.sdist_name } sdist_local_paths = { 'scalems': sdist, 'rp': rp.sdist_path, 'rs': rs.sdist_path, 'ru': ru.sdist_path } logger.debug('Checking paths: ' + ', '.join(sdist_local_paths.values())) for path in sdist_local_paths.values(): assert os.path.exists(path) sandbox_path = urllib.parse.urlparse(pilot.pilot_sandbox).path sdist_session_paths = { name: os.path.join(sandbox_path, sdist_names[name]) for name in sdist_names.keys() } logger.debug('Staging ' + ', '.join(sdist_session_paths.values())) input_staging = [] for name in sdist_names.keys(): input_staging.append({ 'source': sdist_local_paths[name], 'target': sdist_session_paths[name], 'action': rp.TRANSFER }) pilot.stage_in(input_staging) tmgr = rp_task_manager pilot.prepare_env({ 'scalems_env': { 'type': 'virtualenv', 'version': '3.8', 'setup': list(sdist_session_paths.values()) } }) td = rp.TaskDescription({ 'executable': 'python3', 'arguments': [ '-c', 'import radical.pilot as rp;' 'import scalems;' 'print(rp.version_detail);' 'print(scalems.__file__)' ], 'named_env': 'scalems_env' }) task = tmgr.submit_tasks(td) tmgr.wait_tasks() logger.debug(f'RP version details and scalems location: {task.stdout}') assert task.exit_code == 0
def test_rp_raptor_staging(pilot_description, rp_venv): """Test file staging for raptor Master and Worker tasks. - upon pilot startup, transfer a file to the pilot sandbox - upon master startup, create a link to that file for each master - for each task, copy the file into the task sandbox - upon task completion, transfer the files to the client (and rename them) """ import time import radical.pilot as rp # Note: we need to install the current scalems package to test remotely. # If this is problematic, we can add a check like the following. # if pilot_description.resource != 'local.localhost' \ # and pilot_description.access_schema \ # and pilot_description.access_schema != 'local': # pytest.skip('This test is only for local execution.') # Note: radical.pilot.Session creation causes several deprecation warnings. # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185 with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) session = rp.Session() fname = '%d.dat' % os.getpid() fpath = os.path.join('/tmp', fname) data: str = time.asctime() # Hopefully, this requirement is temporary. if rp_venv is None: pytest.skip('This test requires a user-provided static RP venv.') if rp_venv: pre_exec = ['. {}/bin/activate'.format(rp_venv)] else: pre_exec = None try: pmgr = rp.PilotManager(session=session) tmgr = rp.TaskManager(session=session) # Illustrate data staging as part of the Pilot launch. # By default, file is copied to the root of the Pilot sandbox, # where it can be referenced as 'pilot:///filename' # Alternatively: pilot.stage_in() and pilot.stage_output() (blocking calls) pilot_description.exit_on_error = True pilot_description.input_staging = [fpath] with open(fpath, 'w') as fh: fh.writelines([data]) try: pilot = pmgr.submit_pilots(pilot_description) # Confirmation that the input file has been staged by waiting for pilot state. pilot.wait(state=[rp.states.PMGR_ACTIVE] + rp.FINAL) finally: os.unlink(fpath) tmgr.add_pilots(pilot) uid = 'scalems.master.001' # Illustrate another mode of data staging with the Master task submission. td = rp.TaskDescription({ 'uid': uid, 'executable': 'scalems_rp_master', 'input_staging': [{ 'source': 'pilot:///%s' % fname, 'target': 'pilot:///%s.%s.lnk' % (fname, uid), 'action': rp.LINK }], 'pre_exec': pre_exec # 'named_env': 'scalems_env' }) master = tmgr.submit_tasks(td) # Illustrate availability of scheduler and of data staged with Master task. # When the task enters AGENT_SCHEDULING_PENDING it has passed all input staging, # and the files will be available. # (see https://docs.google.com/drawings/d/1q5ehxIVdln5tXEn34mJyWAmxBk_DqZ5wwkl3En-t5jo/) # Confirm that Master script is running (and ready to receive raptor tasks) # WARNING: rp.Task.wait() *state* parameter does not handle tuples, but does not check type. master.wait(state=[rp.states.AGENT_EXECUTING] + rp.FINAL) assert master.state not in {rp.CANCELED, rp.FAILED} # define raptor tasks and submit them to the master tds = list() # Illustrate data staging as part of raptor task submission. # Note that tasks submitted by the client # a sandboxed task directory, whereas those submitted by the Master (through Master.request(), # through the wrapper script or the Master.create_initial_tasks() hook) do not, # and do not have a data staging phase. for i in range(3): uid = 'scalems.%06d' % i work = { 'mode': 'call', 'cores': 1, 'timeout': 10, # seconds 'data': { 'method': 'hello', 'kwargs': { 'world': uid } } } tds.append( rp.TaskDescription({ 'uid': uid, 'executable': '-', 'input_staging': [{ 'source': 'pilot:///%s.%s.lnk' % (fname, master.uid), 'target': 'task:///%s' % fname, 'action': rp.COPY }], 'output_staging': [{ 'source': 'task:///%s' % fname, 'target': 'client:///%s.%s.out' % (fname, uid), 'action': rp.TRANSFER }], 'scheduler': master.uid, 'arguments': [json.dumps(work)], 'pre_exec': pre_exec })) # TODO: Organize client-side data with managed hierarchical paths. # Question: RP maintains a filesystem hierarchy on the client side, correct? # Answer: only for profiling and such: do not use for data or user-facing stuff. tasks = tmgr.submit_tasks(tds) # TODO: Clarify the points at which the data exists or is accessed. # * When the (client-submitted) task enters AGENT_STAGING_OUTPUT_PENDING, # it has finished executing and output data should be accessible as 'task:///outfile'. # * When the (client-submitted) task reaches one of the rp.FINAL stages, it has finished # output staging and files are accessible at the location specified in 'output_staging'. # * Tasks submitted directly by the Master (example?) do not perform output staging; # data is written before entering Master.result_cb(). # RP Issue: client-submitted Tasks need to be accessible through a path that is common # with the Master-submitted (`request()`) tasks. (SCALE-MS #108) assert len(tasks) == len(tds) # 'arguments' (element 0) gets wrapped in a Request at the Master by _receive_tasks, # then the list of requests is passed to Master.request(), which is presumably # an extension point for derived Master implementations. The base class method # converts requests to dictionaries and adds them to a request queue, from which they are # picked up by the Worker in _request_cb. Then picked up in forked interpreter # by Worker._dispatch, which checks the *mode* of the Request and dispatches # according to native or registered mode implementations. (e.g. 'call' (native) or 'scalems') # task process is launched with Python multiprocessing (native) module and added to self._pool. # When the task runs, it's result triggers _result_cb # wait for *those* tasks to complete and report results tmgr.wait_tasks(uids=[t.uid for t in tasks]) # Cancel the master. tmgr.cancel_tasks(uids=master.uid) # Cancel blocks until the task is done so the following wait it currently redundant, # but there is a ticket open to change this behavior. # See https://github.com/radical-cybertools/radical.pilot/issues/2336 tmgr.wait_tasks() # Note that these map as follows: # * 'client:///' == $PWD # * 'task:///' == urllib.parse.urlparse(task.sandbox).path # * 'pilot:///' == urllib.parse.urlparse(pilot.pilot_sandbox).path for t in tasks: print(t) outfile = './%s.%s.out' % (fname, t.uid) assert os.path.exists(outfile) with open(outfile, 'r') as outfh: assert outfh.readline().rstrip() == data os.unlink(outfile) pilot.cancel() tmgr.close() pmgr.close() finally: session.close(download=False)
async def test_rp_future(rp_task_manager): """Check our Future implementation. Fulfill the asyncio.Future protocol for a rp.Task wrapper object. The wrapper should appropriately yield when the rp.Task is not finished. """ import radical.pilot as rp tmgr = rp_task_manager td = rp.TaskDescription({ 'executable': '/bin/bash', 'arguments': ['-c', '/bin/sleep 5 && /bin/echo success'], 'cpu_processes': 1 }) # Test propagation of RP cancellation behavior task: rp.Task = tmgr.submit_tasks(td) future = asyncio.get_running_loop().create_future() wrapper: asyncio.Future = await scalems.radical.rp_task(task, future) task.cancel() try: # TODO: With Python 3.9, check cancellation message for how the cancellation propagated. with pytest.raises(asyncio.CancelledError): await asyncio.wait_for(wrapper, timeout=120) except asyncio.TimeoutError as e: # Useful point to insert an easy debugging break point raise e assert future.cancelled() assert wrapper.cancelled() assert task.state == rp.states.CANCELED # Test propagation of asyncio watcher task cancellation. task: rp.Task = tmgr.submit_tasks(td) future = asyncio.get_running_loop().create_future() wrapper: asyncio.Task = await scalems.radical.rp_task(task, future) assert isinstance(wrapper, asyncio.Task) wrapper.cancel() try: with pytest.raises(asyncio.CancelledError): await asyncio.wait_for(wrapper, timeout=5) except asyncio.TimeoutError as e: # Useful point to insert an easy debugging break point raise e assert wrapper.cancelled() assert future.cancelled() # WARNING: rp.Task.wait() never completes with no arguments. # WARNING: This blocks. Don't do it in the event loop thread. task.wait(state=rp.states.CANCELED, timeout=120) # Note that if the test is paused by a debugger, the rp task may # have a chance to complete before being canceled. # Ordinarily, that will not happen in this test. # assert task.state in (rp.states.CANCELED, rp.states.DONE) assert task.state in (rp.states.CANCELED, ) # Test propagation of asyncio future cancellation. task: rp.Task = tmgr.submit_tasks(td) future = asyncio.get_running_loop().create_future() wrapper: asyncio.Task = await scalems.radical.rp_task(task, future) assert isinstance(wrapper, asyncio.Task) future.cancel() try: with pytest.raises(asyncio.CancelledError): await asyncio.wait_for(future, timeout=5) await asyncio.wait_for(wrapper, timeout=1) except asyncio.TimeoutError as e: # Useful point to insert an easy debugging break point raise e assert not wrapper.cancelled() assert future.cancelled() # WARNING: rp.Task.wait() never completes with no arguments. # WARNING: This blocks. Don't do it in the event loop thread. task.wait(state=rp.states.CANCELED, timeout=120) # Note that if the test is paused by a debugger, the rp task may # have a chance to complete before being canceled. # Ordinarily, that will not happen in this test. # assert task.state in (rp.states.CANCELED, rp.states.DONE) assert task.state in (rp.states.CANCELED, ) # Test run to completion task: rp.Task = tmgr.submit_tasks(td) future = asyncio.get_running_loop().create_future() wrapper: asyncio.Task = await scalems.radical.rp_task(task, future) timeout = 120 try: result = await asyncio.wait_for(future, timeout=timeout) except asyncio.TimeoutError as e: logger.debug(f'Waited more than {timeout} for {future}: {e}') result = None assert task.exit_code == 0 assert 'success' in task.stdout assert 'stdout' in result assert 'success' in result['stdout'] assert wrapper.done()
def _connect_rp(config: Configuration) -> Runtime: """Establish the RP Session. Acquire as many re-usable resources as possible. The scope established by this function is as broad as it can be within the life of this instance. Once instance._connect_rp() succeeds, instance._disconnect_rp() must be called to clean up resources. Use the async context manager behavior of the instance to automatically follow this protocol. I.e. instead of calling ``instance._connect_rp(); ...; instance._disconnect_rp()``, use:: async with instance: ... Raises: DispatchError if task dispatching could not be set up. CanceledError if parent asyncio.Task is cancelled while executing. """ # TODO: Consider inlining this into __aenter__(). # A non-async method is potentially useful for debugging, but causes the event loop # to block while waiting for the RP tasks included here. If this continues to be a # slow function, we can wrap the remaining RP calls and let this function be # inlined, or stick the whole function in a separate thread with # loop.run_in_executor(). # TODO: RP triggers SIGINT in various failure modes. # We should use loop.add_signal_handler() to convert to an exception # that we can raise in an appropriate task. # Note that PilotDescription can use `'exit_on_error': False` to suppress the SIGINT, # but we have not explored the consequences of doing so. try: # # Start the Session. # # Note that we cannot resolve the full _resource config until we have a Session # object. # We cannot get the default session config until after creating the Session, # so we don't have a template for allowed, required, or default values. # Question: does the provided *cfg* need to be complete? Or will it be merged # with default values from some internal definition, such as by dict.update()? # I don't remember what the use cases are for overriding the default session # config. session_config = None # At some point soon, we need to track Session ID for the workflow metadata. # We may also want Session ID to be deterministic (or to be re-used?). session_id = None # Note: the current implementation implies that only one Task for the dispatcher # will exist at a time. We are further assuming that there will probably only # be one Task per the lifetime of the dispatcher object. # We could choose another approach and change our assumptions, if appropriate. logger.debug( 'Entering RP dispatching context. Waiting for rp.Session.') # Note: radical.pilot.Session creation causes several deprecation warnings. # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185 with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) # This would be a good time to `await`, if an event-loop friendly # Session creation function becomes available. runtime = Runtime( session=rp.Session(uid=session_id, cfg=session_config)) session_id = runtime.session.uid # Do we want to log this somewhere? # session_config = copy.deepcopy(self.session.cfg.as_dict()) logger.debug('RP dispatcher acquired session {}'.format(session_id)) # We can launch an initial Pilot, but we may have to run further Pilots # during self._queue_runner_task (or while servicing scalems.wait() within the # with block) to handle dynamic work load requirements. # Optionally, we could refrain from launching the pilot here, at all, # but it seems like a good chance to start bootstrapping the agent environment. logger.debug('Launching PilotManager.') pilot_manager = rp.PilotManager(session=runtime.session) logger.debug('Got PilotManager {}.'.format(pilot_manager.uid)) runtime.pilot_manager(pilot_manager) logger.debug('Launching TaskManager.') task_manager = rp.TaskManager(session=runtime.session) logger.debug(('Got TaskManager {}'.format(task_manager.uid))) runtime.task_manager(task_manager) # # Get a Pilot # # # TODO: #94 Describe (link to) configuration points. # resource_config['local.localhost'].update({ # 'project': None, # 'queue': None, # 'schema': None, # 'cores': 1, # 'gpus': 0 # }) # _pilot_description = dict(_resource=_resource, # runtime=30, # exit_on_error=True, # project=resource_config[_resource]['project'], # queue=resource_config[_resource]['queue'], # cores=resource_config[_resource]['cores'], # gpus=resource_config[_resource]['gpus']) # TODO: How to specify PilotDescription? (see also #121) # Where should this actually be coming from? # We need to inspect both the HPC allocation and the work load, I think, # and combine with user-provided preferences. pilot_description = {} pilot_description.update( config.rp_resource_params.get('PilotDescription', {})) pilot_description.update({'resource': config.execution_target}) # TODO: Pilot venv (#90, #94). # Currently, Pilot venv must be specified in the JSON file for resource # definitions. pilot_description = rp.PilotDescription(pilot_description) # How and when should we update pilot description? logger.debug('Submitting PilotDescription {}'.format( repr(pilot_description))) pilot = pilot_manager.submit_pilots(pilot_description) logger.debug('Got Pilot {}'.format(pilot.uid)) runtime.pilot(pilot) # Note that the task description for the master (and worker) can specify a # *named_env* attribute to use a venv prepared via Pilot.prepare_env # E.g. pilot.prepare_env({'numpy_env' : {'type' : 'virtualenv', # 'version': '3.6', # 'setup' : ['numpy']}}) # td.named_env = 'numpy_env' # Note that td.named_env MUST be a key that is given to pilot.prepare_env(arg: # dict) or the task will wait indefinitely to be scheduled. # Alternatively, we could use a pre-installed venv by putting # `. path/to/ve/bin/activate` # in the TaskDescription.pre_exec list. # TODO: Use archives generated from (acquired through) the local installations. # # Could we stage in archive distributions directly? # # self.pilot.stage_in() # pilot.prepare_env( # { # 'scalems_env': { # 'type': 'virtualenv', # 'version': '3.8', # 'setup': [ # # TODO: Generalize scalems dependency resolution. # # Ideally, we would check the current API version # # requirement, map that to a package version, # # and specify >=min_version, allowing cached archives to # # satisfy the dependency. # rp_spec, # scalems_spec # ]}}) # Question: when should we remove the pilot from the task manager? task_manager.add_pilots(pilot) logger.debug('Added Pilot {} to task manager {}.'.format( pilot.uid, task_manager.uid)) pre_exec = get_pre_exec(config) assert isinstance(pre_exec, tuple) assert len(pre_exec) > 0 # Verify usable SCALEMS RP connector. # TODO: Fetch a profile of the venv for client-side analysis (e.g. `pip freeze`). # TODO: Check for compatible installed scalems API version. rp_check = task_manager.submit_tasks( rp.TaskDescription({ # 'executable': py_venv, 'executable': 'python3', 'arguments': ['-c', 'import radical.pilot as rp; print(rp.version)'], 'pre_exec': list(pre_exec) # 'named_env': 'scalems_env' })) logger.debug('Checking RP execution environment.') states = task_manager.wait_tasks(uids=[rp_check.uid]) if states[0] != rp.states.DONE or rp_check.exit_code != 0: raise DispatchError( 'Could not verify RP in execution environment.') try: remote_rp_version = packaging.version.parse( rp_check.stdout.rstrip()) except Exception as e: raise DispatchError( 'Could not determine remote RP version.') from e # TODO: #100 Improve compatibility checking. if remote_rp_version < packaging.version.parse('1.6.0'): raise DispatchError( f'Incompatible radical.pilot version in execution ' f'environment: {str(remote_rp_version)}') # # Get a scheduler task. # assert runtime.scheduler is None # TODO: #119 Re-enable raptor. # runtime.scheduler = _get_scheduler( # 'raptor.scalems', # pre_exec=execution_manager._pre_exec, # task_manager=task_manager) # Note that we can derive scheduler_name from self.scheduler.uid in later methods. # Note: The worker script name only appears in the config file. # logger.info('RP scheduler ready.') # logger.debug(repr(execution_manager.scheduler)) return runtime except asyncio.CancelledError as e: raise e except Exception as e: logger.exception('Exception while connecting RADICAL Pilot.', exc_info=e) raise DispatchError('Failed to launch SCALE-MS master task.') from e