Beispiel #1
def main():
    # TODO: Test both with and without a provided config file.
    kwargs = {}
    if len(sys.argv) > 1:
        cfg = ru.Config(cfg=ru.read_json(sys.argv[1]))
        kwargs['cfg'] = cfg
        descr = cfg.worker_descr,
        count = cfg.n_workers,
        cores = cfg.cpn,
        gpus = cfg.gpn
        descr = rp.TaskDescription({
            'uid': 'raptor.worker',
            'executable': 'scalems_rp_worker',
            'arguments': []
        count = 1
        cores = 1
        gpus = 0
    master = ScaleMSMaster(**kwargs)

    master.submit(descr=descr, count=count, cores=cores, gpus=gpus)

Beispiel #2
def test_rp_basic_task_remote(rp_task_manager, pilot_description):
    import radical.pilot as rp

    if pilot_description.access_schema and pilot_description.access_schema == 'local':
        pytest.skip('This test is only for remote execution.')

    tmgr = rp_task_manager
    session = tmgr.session
    assert not session.closed

    td = rp.TaskDescription({
        'executable': '/usr/bin/hostname',
        'cpu_processes': 1

    task = tmgr.submit_tasks(td)


    assert task.state == rp.states.DONE
    assert task.exit_code == 0

    localname =['/usr/bin/hostname'],
    remotename = task.stdout.rstrip()
    assert len(remotename) > 0
    assert remotename != localname
Beispiel #3
def _describe_raptor_task(item: scalems.workflow.Task, scheduler: str,
                          pre_exec: list) -> rp.TaskDescription:
    """Derive a RADICAL Pilot TaskDescription from a scalems workflow item.

    The TaskDescription will be submitted to the named *scheduler*,
    where *scheduler* is the UID of a task managing the life of a rp.raptor.Master

    Caller is responsible for ensuring that *scheduler* is valid.
    # Warning: TaskDescription class does not have a strongly defined interface.
    # Check docs for schema.
    # Ref: scalems_rp_master._RaptorTaskDescription
    task_description = rp.TaskDescription(from_dict=dict(
        'scalems',  # This value is currently ignored, but must be set.
    task_description.uid = item.uid()
    task_description.scheduler = str(scheduler)
    # Example work would be the JSON serialized form of the following dictionary.
    # {'mode': 'call',
    #  'cores': 1,
    #  'timeout': 10,
    #  'data': {'method': 'hello',
    #           'kwargs': {'world': uid}}}
    # Maybe something like this:
    # work_dict = {
    #     'mode': 'scalems',
    #     'cores': 1,
    #     'timeout': 10,
    #     'data': item.serialize()
    # }
    work_dict = {
        'mode': 'exec',
        'cores': 1,
        'timeout': None,
        'data': {
            'exe': item.input['argv'][0],
            'args': item.input['argv'][1:]
    task_description.arguments = [json.dumps(work_dict)]

    # TODO: Check for and activate an appropriate venv
    # using
    #     task_description.pre_exec = ...
    # or
    #     task_description.named_env = ...

    # TODO: Interpret item details and derive appropriate staging directives.
    task_description.input_staging = []
    task_description.output_staging = []

    return task_description
Beispiel #4
def _describe_legacy_task(item: scalems.workflow.Task,
                          pre_exec: list) -> rp.TaskDescription:
    """Derive a RADICAL Pilot TaskDescription from a scalems workflow item.

    For a "raptor" style task, see _describe_raptor_task()
    subprocess_type = TypeIdentifier(
        ('scalems', 'subprocess', 'SubprocessTask'))
    assert item.description().type() == subprocess_type
    input_data = item.input
    task_input = scalems.subprocess.SubprocessInput(**input_data)
    args = list([arg for arg in task_input.argv])
    # Warning: TaskDescription class does not have a strongly defined interface.
    # Check docs for schema.
    task_description = rp.TaskDescription(
    uid: str = item.uid().hex()
    task_description.uid = uid

    # TODO: Check for and activate an appropriate venv
    # using
    #     task_description.pre_exec = ...
    # or
    #     task_description.named_env = ...

    # TODO: Interpret item details and derive appropriate staging directives.
    task_description.input_staging = list(task_input.inputs.values())
    task_description.output_staging = [{
    }, {
    task_description.output_staging += task_input.outputs.values()

    return task_description
Beispiel #5
def _get_scheduler(name: str, pre_exec: typing.Iterable[str],
                   task_manager: rp.TaskManager):
    """Establish the radical.pilot.raptor.Master task.

    Create a master rp.Task (running the scalems_rp_master script) with the
    provide *name* to be referenced as the *scheduler* for raptor tasks.

    Returns the rp.Task for the master script once the Master is ready to
    receive submissions.

        DispatchError if the master task could not be launched successfully.

        Currently there is no completion condition for the master script.
        Caller is responsible for canceling the Task returned by this function.
    # This is the name that should be resolvable in an active venv for the script we
    # install as
    # pkg_resources.get_entry_info('scalems', 'console_scripts', 'scalems_rp_master').name
    master_script = 'scalems_rp_master'

    # We can probably make the config file a permanent part of the local metadata,
    # but we don't really have a scheme for managing local metadata right now.
    # with tempfile.TemporaryDirectory() as dir:
    #     config_file_name = 'raptor_scheduler_config.json'
    #     config_file_path = os.path.join(dir, config_file_name)
    #     with open(config_file_path, 'w') as fh:
    #         encoded = scalems_rp_master.encode_as_dict(scheduler_config)
    #         json.dump(encoded, fh, indent=2)

    # define a raptor.scalems master and launch it within the pilot
    td = rp.TaskDescription({'uid': name, 'executable': master_script})
    td.arguments = []
    td.pre_exec = pre_exec
    # td.named_env = 'scalems_env'
    logger.debug('Launching RP scheduler.')
    scheduler = task_manager.submit_tasks(td)
    # WARNING: rp.Task.wait() *state* parameter does not handle tuples, but does not
    # check type.
    scheduler.wait(state=[rp.states.AGENT_EXECUTING] + rp.FINAL)
    if scheduler.state not in {rp.states.CANCELED, rp.states.FAILED}:
        raise DispatchError('Could not get Master task for dispatching.')
    return scheduler
Beispiel #6
    def _job_2_descr(self, job):

        # FIXME: RP does not support STDIN.  Should we raise if STDIN is
        #        specified?

        from_dict = dict()

        # TODO: use meta data for jpsi uid
        from_dict['name'] = job.uid

        from_dict['executable'] = job.spec.executable
        from_dict['arguments'] = job.spec.arguments
        from_dict['environment'] = job.spec.environment
        # from_dict['stdin'      ] = job.spec.stdin_path
        from_dict['stdout'] = job.spec.stdout_path
        from_dict['stderr'] = job.spec.stderr_path
        from_dict['sandbox'] =

        return rp.TaskDescription(from_dict=from_dict)
Beispiel #7
def test_prepare_venv(rp_task_manager, sdist):
    """Bootstrap the scalems package in a RP target environment using pilot.prepare_env.

    This test function specifically tests the local.localhost resource.

    Note that we cannot wait on the environment preparation directly, but we can define
    a task with ``named_env`` matching the *prepare_env* key to implicitly depend on
    successful creation.
    # NOTE: *sdist* is a path of an sdist archive that we could stage for the venv installation.
    # QUESTION: Can't we use the radical.pilot package archive that was already placed for bootstrapping the pilot?

    # TODO: Merge with test_rp_raptor_local but use the installed scalems_rp_master and scalems_rp_worker files

    import radical.pilot as rp
    import radical.saga as rs
    import radical.utils as ru
    # We only expect one pilot
    pilot: rp.Pilot = rp_task_manager.get_pilots()[0]
    # We get a dictionary...
    # assert isinstance(pilot, rp.Pilot)
    # But it looks like it has the pilot id in it.
    pilot_uid = typing.cast(dict, pilot)['uid']
    pmgr_uid = typing.cast(dict, pilot)['pmgr']
    session: rp.Session = rp_task_manager.session
    pmgr: rp.PilotManager = session.get_pilot_managers(pmgr_uids=pmgr_uid)
    assert isinstance(pmgr, rp.PilotManager)
    pilot = pmgr.get_pilots(uids=pilot_uid)
    assert isinstance(pilot, rp.Pilot)
    # It looks like either the pytest fixture should deliver something other than the TaskManager,
    # or the prepare_venv part should be moved to a separate function, such as in conftest...

    sdist_names = {
        'scalems': os.path.basename(sdist),
        'rp': rp.sdist_name,
        'ru': ru.sdist_name,
        'rs': rs.sdist_name
    sdist_local_paths = {
        'scalems': sdist,
        'rp': rp.sdist_path,
        'rs': rs.sdist_path,
        'ru': ru.sdist_path
    logger.debug('Checking paths: ' + ', '.join(sdist_local_paths.values()))
    for path in sdist_local_paths.values():
        assert os.path.exists(path)

    sandbox_path = urllib.parse.urlparse(pilot.pilot_sandbox).path

    sdist_session_paths = {
        name: os.path.join(sandbox_path, sdist_names[name])
        for name in sdist_names.keys()

    logger.debug('Staging ' + ', '.join(sdist_session_paths.values()))

    input_staging = []
    for name in sdist_names.keys():
            'source': sdist_local_paths[name],
            'target': sdist_session_paths[name],
            'action': rp.TRANSFER

    tmgr = rp_task_manager

        'scalems_env': {
            'type': 'virtualenv',
            'version': '3.8',
            'setup': list(sdist_session_paths.values())

    td = rp.TaskDescription({
        'arguments': [
            '-c', 'import radical.pilot as rp;'
            'import scalems;'
    task = tmgr.submit_tasks(td)
    logger.debug(f'RP version details and scalems location: {task.stdout}')
    assert task.exit_code == 0
Beispiel #8
def test_rp_raptor_staging(pilot_description, rp_venv):
    """Test file staging for raptor Master and Worker tasks.

    - upon pilot startup, transfer a file to the pilot sandbox
    - upon master startup, create a link to that file for each master
    - for each task, copy the file into the task sandbox
    - upon task completion, transfer the files to the client (and rename them)
    import time
    import radical.pilot as rp

    # Note: we need to install the current scalems package to test remotely.
    # If this is problematic, we can add a check like the following.
    #     if pilot_description.resource != 'local.localhost' \
    #             and pilot_description.access_schema \
    #             and pilot_description.access_schema != 'local':
    #         pytest.skip('This test is only for local execution.')

    # Note: radical.pilot.Session creation causes several deprecation warnings.
    # Ref
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', category=DeprecationWarning)
        session = rp.Session()
    fname = '%d.dat' % os.getpid()
    fpath = os.path.join('/tmp', fname)
    data: str = time.asctime()

    # Hopefully, this requirement is temporary.
    if rp_venv is None:
        pytest.skip('This test requires a user-provided static RP venv.')

    if rp_venv:
        pre_exec = ['. {}/bin/activate'.format(rp_venv)]
        pre_exec = None

        pmgr = rp.PilotManager(session=session)
        tmgr = rp.TaskManager(session=session)

        # Illustrate data staging as part of the Pilot launch.
        # By default, file is copied to the root of the Pilot sandbox,
        # where it can be referenced as 'pilot:///filename'
        # Alternatively: pilot.stage_in() and pilot.stage_output() (blocking calls)
        pilot_description.exit_on_error = True
        pilot_description.input_staging = [fpath]
        with open(fpath, 'w') as fh:
            pilot = pmgr.submit_pilots(pilot_description)
            # Confirmation that the input file has been staged by waiting for pilot state.
            pilot.wait(state=[rp.states.PMGR_ACTIVE] + rp.FINAL)


        uid = 'scalems.master.001'
        # Illustrate another mode of data staging with the Master task submission.
        td = rp.TaskDescription({
            'input_staging': [{
                'source': 'pilot:///%s' % fname,
                'target': 'pilot:///%s.%s.lnk' % (fname, uid),
                'action': rp.LINK
            # 'named_env': 'scalems_env'

        master = tmgr.submit_tasks(td)

        # Illustrate availability of scheduler and of data staged with Master task.
        # When the task enters AGENT_SCHEDULING_PENDING it has passed all input staging,
        # and the files will be available.
        # (see

        # Confirm that Master script is running (and ready to receive raptor tasks)
        # WARNING: rp.Task.wait() *state* parameter does not handle tuples, but does not check type.
        master.wait(state=[rp.states.AGENT_EXECUTING] + rp.FINAL)
        assert master.state not in {rp.CANCELED, rp.FAILED}

        # define raptor tasks and submit them to the master
        tds = list()
        # Illustrate data staging as part of raptor task submission.
        # Note that tasks submitted by the client
        # a sandboxed task directory, whereas those submitted by the Master (through Master.request(),
        # through the wrapper script or the Master.create_initial_tasks() hook) do not,
        # and do not have a data staging phase.
        for i in range(3):
            uid = 'scalems.%06d' % i
            work = {
                'mode': 'call',
                'cores': 1,
                'timeout': 10,  # seconds
                'data': {
                    'method': 'hello',
                    'kwargs': {
                        'world': uid
                    'input_staging': [{
                        'pilot:///%s.%s.lnk' % (fname, master.uid),
                        'task:///%s' % fname,
                    'output_staging': [{
                        'task:///%s' % fname,
                        'client:///%s.%s.out' % (fname, uid),
                    'arguments': [json.dumps(work)],
        # TODO: Organize client-side data with managed hierarchical paths.
        # Question: RP maintains a filesystem hierarchy on the client side, correct?
        # Answer: only for profiling and such: do not use for data or user-facing stuff.
        tasks = tmgr.submit_tasks(tds)
        # TODO: Clarify the points at which the data exists or is accessed.
        # * When the (client-submitted) task enters AGENT_STAGING_OUTPUT_PENDING,
        #   it has finished executing and output data should be accessible as 'task:///outfile'.
        # * When the (client-submitted) task reaches one of the rp.FINAL stages, it has finished
        #   output staging and files are accessible at the location specified in 'output_staging'.
        # * Tasks submitted directly by the Master (example?) do not perform output staging;
        #   data is written before entering Master.result_cb().
        # RP Issue: client-submitted Tasks need to be accessible through a path that is common
        # with the Master-submitted (`request()`) tasks. (SCALE-MS #108)

        assert len(tasks) == len(tds)
        # 'arguments' (element 0) gets wrapped in a Request at the Master by _receive_tasks,
        # then the list of requests is passed to Master.request(), which is presumably
        # an extension point for derived Master implementations. The base class method
        # converts requests to dictionaries and adds them to a request queue, from which they are
        # picked up by the Worker in _request_cb. Then picked up in forked interpreter
        # by Worker._dispatch, which checks the *mode* of the Request and dispatches
        # according to native or registered mode implementations. (e.g. 'call' (native) or 'scalems')

        # task process is launched with Python multiprocessing (native) module and added to self._pool.
        # When the task runs, it's result triggers _result_cb

        # wait for *those* tasks to complete and report results
        tmgr.wait_tasks(uids=[t.uid for t in tasks])

        # Cancel the master.
        # Cancel blocks until the task is done so the following wait it currently redundant,
        # but there is a ticket open to change this behavior.
        # See

        # Note that these map as follows:
        #     * 'client:///' == $PWD
        #     * 'task:///' == urllib.parse.urlparse(task.sandbox).path
        #     * 'pilot:///' == urllib.parse.urlparse(pilot.pilot_sandbox).path

        for t in tasks:
            outfile = './%s.%s.out' % (fname, t.uid)
            assert os.path.exists(outfile)
            with open(outfile, 'r') as outfh:
                assert outfh.readline().rstrip() == data


Beispiel #9
async def test_rp_future(rp_task_manager):
    """Check our Future implementation.

    Fulfill the asyncio.Future protocol for a rp.Task wrapper object. The wrapper
    should appropriately yield when the rp.Task is not finished.
    import radical.pilot as rp

    tmgr = rp_task_manager

    td = rp.TaskDescription({
        'arguments': ['-c', '/bin/sleep 5 && /bin/echo success'],

    # Test propagation of RP cancellation behavior
    task: rp.Task = tmgr.submit_tasks(td)

    future = asyncio.get_running_loop().create_future()
    wrapper: asyncio.Future = await scalems.radical.rp_task(task, future)

        # TODO: With Python 3.9, check cancellation message for how the cancellation propagated.
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(wrapper, timeout=120)
    except asyncio.TimeoutError as e:
        # Useful point to insert an easy debugging break point
        raise e

    assert future.cancelled()
    assert wrapper.cancelled()
    assert task.state == rp.states.CANCELED

    # Test propagation of asyncio watcher task cancellation.
    task: rp.Task = tmgr.submit_tasks(td)

    future = asyncio.get_running_loop().create_future()
    wrapper: asyncio.Task = await scalems.radical.rp_task(task, future)

    assert isinstance(wrapper, asyncio.Task)
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(wrapper, timeout=5)
    except asyncio.TimeoutError as e:
        # Useful point to insert an easy debugging break point
        raise e
    assert wrapper.cancelled()
    assert future.cancelled()

    # WARNING: rp.Task.wait() never completes with no arguments.
    # WARNING: This blocks. Don't do it in the event loop thread.
    task.wait(state=rp.states.CANCELED, timeout=120)
    # Note that if the test is paused by a debugger, the rp task may
    # have a chance to complete before being canceled.
    # Ordinarily, that will not happen in this test.
    # assert task.state in (rp.states.CANCELED, rp.states.DONE)
    assert task.state in (rp.states.CANCELED, )

    # Test propagation of asyncio future cancellation.
    task: rp.Task = tmgr.submit_tasks(td)

    future = asyncio.get_running_loop().create_future()
    wrapper: asyncio.Task = await scalems.radical.rp_task(task, future)

    assert isinstance(wrapper, asyncio.Task)
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(future, timeout=5)
        await asyncio.wait_for(wrapper, timeout=1)
    except asyncio.TimeoutError as e:
        # Useful point to insert an easy debugging break point
        raise e
    assert not wrapper.cancelled()
    assert future.cancelled()

    # WARNING: rp.Task.wait() never completes with no arguments.
    # WARNING: This blocks. Don't do it in the event loop thread.
    task.wait(state=rp.states.CANCELED, timeout=120)
    # Note that if the test is paused by a debugger, the rp task may
    # have a chance to complete before being canceled.
    # Ordinarily, that will not happen in this test.
    # assert task.state in (rp.states.CANCELED, rp.states.DONE)
    assert task.state in (rp.states.CANCELED, )

    # Test run to completion
    task: rp.Task = tmgr.submit_tasks(td)

    future = asyncio.get_running_loop().create_future()
    wrapper: asyncio.Task = await scalems.radical.rp_task(task, future)

    timeout = 120
        result = await asyncio.wait_for(future, timeout=timeout)
    except asyncio.TimeoutError as e:
        logger.debug(f'Waited more than {timeout} for {future}: {e}')
        result = None
    assert task.exit_code == 0
    assert 'success' in task.stdout

    assert 'stdout' in result
    assert 'success' in result['stdout']
    assert wrapper.done()
Beispiel #10
def _connect_rp(config: Configuration) -> Runtime:
    """Establish the RP Session.

    Acquire as many re-usable resources as possible. The scope established by
    this function is as broad as it can be within the life of this instance.

    Once instance._connect_rp() succeeds, instance._disconnect_rp() must be called to
    clean up resources. Use the async context manager behavior of the instance to
    automatically follow this protocol. I.e. instead of calling
    ``instance._connect_rp(); ...; instance._disconnect_rp()``,
        async with instance:

        DispatchError if task dispatching could not be set up.

        CanceledError if parent asyncio.Task is cancelled while executing.

    # TODO: Consider inlining this into __aenter__().
    # A non-async method is potentially useful for debugging, but causes the event loop
    # to block while waiting for the RP tasks included here. If this continues to be a
    # slow function, we can wrap the remaining RP calls and let this function be
    # inlined, or stick the whole function in a separate thread with
    # loop.run_in_executor().

    # TODO: RP triggers SIGINT in various failure modes.
    #  We should use loop.add_signal_handler() to convert to an exception
    #  that we can raise in an appropriate task.
    # Note that PilotDescription can use `'exit_on_error': False` to suppress the SIGINT,
    # but we have not explored the consequences of doing so.

        # Start the Session.

        # Note that we cannot resolve the full _resource config until we have a Session
        # object.
        # We cannot get the default session config until after creating the Session,
        # so we don't have a template for allowed, required, or default values.
        # Question: does the provided *cfg* need to be complete? Or will it be merged
        # with default values from some internal definition, such as by dict.update()?
        # I don't remember what the use cases are for overriding the default session
        # config.
        session_config = None
        # At some point soon, we need to track Session ID for the workflow metadata.
        # We may also want Session ID to be deterministic (or to be re-used?).
        session_id = None

        # Note: the current implementation implies that only one Task for the dispatcher
        # will exist at a time. We are further assuming that there will probably only
        # be one Task per the lifetime of the dispatcher object.
        # We could choose another approach and change our assumptions, if appropriate.
            'Entering RP dispatching context. Waiting for rp.Session.')

        # Note: radical.pilot.Session creation causes several deprecation warnings.
        # Ref
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=DeprecationWarning)
            # This would be a good time to `await`, if an event-loop friendly
            # Session creation function becomes available.
            runtime = Runtime(
                session=rp.Session(uid=session_id, cfg=session_config))
        session_id = runtime.session.uid
        # Do we want to log this somewhere?
        # session_config = copy.deepcopy(self.session.cfg.as_dict())
        logger.debug('RP dispatcher acquired session {}'.format(session_id))

        # We can launch an initial Pilot, but we may have to run further Pilots
        # during self._queue_runner_task (or while servicing scalems.wait() within the
        # with block) to handle dynamic work load requirements.
        # Optionally, we could refrain from launching the pilot here, at all,
        # but it seems like a good chance to start bootstrapping the agent environment.
        logger.debug('Launching PilotManager.')
        pilot_manager = rp.PilotManager(session=runtime.session)
        logger.debug('Got PilotManager {}.'.format(pilot_manager.uid))

        logger.debug('Launching TaskManager.')
        task_manager = rp.TaskManager(session=runtime.session)
        logger.debug(('Got TaskManager {}'.format(task_manager.uid)))

        # Get a Pilot

        # # TODO: #94 Describe (link to) configuration points.
        # resource_config['local.localhost'].update({
        #     'project': None,
        #     'queue': None,
        #     'schema': None,
        #     'cores': 1,
        #     'gpus': 0
        # })

        # _pilot_description = dict(_resource=_resource,
        #                          runtime=30,
        #                          exit_on_error=True,
        #                          project=resource_config[_resource]['project'],
        #                          queue=resource_config[_resource]['queue'],
        #                          cores=resource_config[_resource]['cores'],
        #                          gpus=resource_config[_resource]['gpus'])

        # TODO: How to specify PilotDescription? (see also #121)
        # Where should this actually be coming from?
        # We need to inspect both the HPC allocation and the work load, I think,
        # and combine with user-provided preferences.
        pilot_description = {}
            config.rp_resource_params.get('PilotDescription', {}))
        pilot_description.update({'resource': config.execution_target})

        # TODO: Pilot venv (#90, #94).
        # Currently, Pilot venv must be specified in the JSON file for resource
        # definitions.
        pilot_description = rp.PilotDescription(pilot_description)

        # How and when should we update pilot description?
        logger.debug('Submitting PilotDescription {}'.format(
        pilot = pilot_manager.submit_pilots(pilot_description)
        logger.debug('Got Pilot {}'.format(pilot.uid))

        # Note that the task description for the master (and worker) can specify a
        # *named_env* attribute to use a venv prepared via Pilot.prepare_env
        # E.g.         pilot.prepare_env({'numpy_env' : {'type'   : 'virtualenv',
        #                                           'version': '3.6',
        #                                           'setup'  : ['numpy']}})
        #   td.named_env = 'numpy_env'
        # Note that td.named_env MUST be a key that is given to pilot.prepare_env(arg:
        # dict) or the task will wait indefinitely to be scheduled.
        # Alternatively, we could use a pre-installed venv by putting
        # `. path/to/ve/bin/activate`
        # in the TaskDescription.pre_exec list.

        # TODO: Use archives generated from (acquired through) the local installations.
        # # Could we stage in archive distributions directly?
        # # self.pilot.stage_in()
        # pilot.prepare_env(
        #     {
        #         'scalems_env': {
        #             'type': 'virtualenv',
        #             'version': '3.8',
        #             'setup': [
        #                 # TODO: Generalize scalems dependency resolution.
        #                 # Ideally, we would check the current API version
        #                 # requirement, map that to a package version,
        #                 # and specify >=min_version, allowing cached archives to
        #                 # satisfy the dependency.
        #                 rp_spec,
        #                 scalems_spec
        #             ]}})

        # Question: when should we remove the pilot from the task manager?
        logger.debug('Added Pilot {} to task manager {}.'.format(
            pilot.uid, task_manager.uid))

        pre_exec = get_pre_exec(config)
        assert isinstance(pre_exec, tuple)
        assert len(pre_exec) > 0
        # Verify usable SCALEMS RP connector.
        # TODO: Fetch a profile of the venv for client-side analysis (e.g. `pip freeze`).
        # TODO: Check for compatible installed scalems API version.
        rp_check = task_manager.submit_tasks(
                # 'executable': py_venv,
                ['-c', 'import radical.pilot as rp; print(rp.version)'],
                # 'named_env': 'scalems_env'
        logger.debug('Checking RP execution environment.')
        states = task_manager.wait_tasks(uids=[rp_check.uid])
        if states[0] != rp.states.DONE or rp_check.exit_code != 0:
            raise DispatchError(
                'Could not verify RP in execution environment.')

            remote_rp_version = packaging.version.parse(
        except Exception as e:
            raise DispatchError(
                'Could not determine remote RP version.') from e
        # TODO: #100 Improve compatibility checking.
        if remote_rp_version < packaging.version.parse('1.6.0'):
            raise DispatchError(
                f'Incompatible radical.pilot version in execution '
                f'environment: {str(remote_rp_version)}')

        # Get a scheduler task.

        assert runtime.scheduler is None
        # TODO: #119 Re-enable raptor.
        # runtime.scheduler = _get_scheduler(
        #     'raptor.scalems',
        #     pre_exec=execution_manager._pre_exec,
        #     task_manager=task_manager)
        # Note that we can derive scheduler_name from self.scheduler.uid in later methods.
        # Note: The worker script name only appears in the config file.
        #'RP scheduler ready.')
        # logger.debug(repr(execution_manager.scheduler))

        return runtime

    except asyncio.CancelledError as e:
        raise e
    except Exception as e:
        logger.exception('Exception while connecting RADICAL Pilot.',
        raise DispatchError('Failed to launch SCALE-MS master task.') from e