Python TaskManager Examples

Programming Language: Python

Namespace/Package Name: radical.pilot

Method/Function: TaskManager

Examples at hotexamples.com: 6

Python TaskManager - 6 examples found. These are the top rated real world Python examples of radical.pilot.TaskManager extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_runtime_mismatch(pilot_description):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.session')

        session = rp.Session()

        with session:
            original_pmgr = rp.PilotManager(session=session)
            pilot = original_pmgr.submit_pilots(rp.PilotDescription(pilot_description))
            original_tmgr = rp.TaskManager(session=session)
            original_tmgr.add_pilots(pilot)

        assert session.closed
        # This assertion may not be true:
        # assert pilot.state in rp.FINAL
        # Note that Pilot and other components may still be shutting down, but the
        # intention is that, from this point, pmgr, pilot, and tmgr are now "stale".

        session = rp.Session()

        with session:
            state = Runtime(session=session)

            with pytest.raises(APIError):
                state.task_manager(original_tmgr)
            original_tmgr.close()

            tmgr = rp.TaskManager(session=session)
            state.task_manager(tmgr)

            with pytest.raises(APIError):
                state.pilot_manager(original_pmgr)
            original_pmgr.close()

            pmgr = rp.PilotManager(session=session)
            state.pilot_manager(pmgr)

            # The UID will not resolve in the stored PilotManager.
            with pytest.raises(ValueError):
                state.pilot(pilot.uid)

            # The Pilot is detectably invalid.
            with pytest.raises(APIError):
                state.pilot(pilot)

            # Even here, the old Pilot may still be in 'PMGR_ACTIVE_PENDING'
            if pilot.state not in rp.FINAL:
                pilot.cancel()
            tmgr.close()
            pmgr.close()
        assert session.closed

Example #2

Show file

    def __init__(self, descr: dict, executor: jpsi.JobExecutor,
                 url: str) -> None:

        jpsi.ExecutorAdaptorBase.__init__(self, descr, executor, url)

        self._url = ru.Url(url)
        if self._url.schema != 'rp':
            raise ValueError('handle only rp:// URLs, not %s', self._url)

        try:
            self._jobs = dict()  # {job.uid : [JPSI_JOB, RP_TASK]
            self._lock = mt.Lock()

            self._session = rp.Session()

            self._pmgr = rp.PilotManager(session=self._session)
            self._tmgr = rp.TaskManager(session=self._session)

            self._pmgr.register_callback(self._pilot_state_cb)
            self._tmgr.register_callback(self._task_state_cb)

            # this is layer 0, so we just create a dummy pilot
            pd = rp.PilotDescription({
                'resource': 'local.localhost',
                'cores': 16,
                'runtime': 60
            })
            self._pilot = self._pmgr.submit_pilots(pd)
            self._tmgr.add_pilots(self._pilot)

        except Exception:
            self._log.exception('init failed')
            raise

Example #3

Show file

def test_runtime_bad_uid(pilot_description):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore', category=DeprecationWarning,
                                module='radical.pilot.session')

        session = rp.Session()

        with session:
            state = Runtime(session=session)

            with pytest.raises(ValueError):
                state.task_manager('spam')

            tmgr = rp.TaskManager(session=session)
            state.task_manager(tmgr)

            with pytest.raises(ValueError):
                state.pilot_manager('spam')

            pmgr = rp.PilotManager(session=session)
            state.pilot_manager(pmgr)

            with pytest.raises(ValueError):
                state.pilot_manager('spam')

            tmgr.close()
            pmgr.close()

        assert session.closed

Example #4

Show file

File: conftest.py Project: SCALE-MS/scale-ms

def _new_taskmanager(session: rp.Session, pilot: rp.Pilot):
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.task_manager')
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.db.database')
        warnings.filterwarnings('ignore',
                                category=DeprecationWarning,
                                module='radical.pilot.session')

        tmgr = rp.TaskManager(session=session)
        tmgr.add_pilots(pilot)
    return tmgr

Example #5

Show file

File: test_rp_exec.py Project: eirrgang/scale-ms

def test_rp_raptor_staging(pilot_description, rp_venv):
    """Test file staging for raptor Master and Worker tasks.

    - upon pilot startup, transfer a file to the pilot sandbox
    - upon master startup, create a link to that file for each master
    - for each task, copy the file into the task sandbox
    - upon task completion, transfer the files to the client (and rename them)
    """
    import time
    import radical.pilot as rp

    # Note: we need to install the current scalems package to test remotely.
    # If this is problematic, we can add a check like the following.
    #     if pilot_description.resource != 'local.localhost' \
    #             and pilot_description.access_schema \
    #             and pilot_description.access_schema != 'local':
    #         pytest.skip('This test is only for local execution.')

    # Note: radical.pilot.Session creation causes several deprecation warnings.
    # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', category=DeprecationWarning)
        session = rp.Session()
    fname = '%d.dat' % os.getpid()
    fpath = os.path.join('/tmp', fname)
    data: str = time.asctime()

    # Hopefully, this requirement is temporary.
    if rp_venv is None:
        pytest.skip('This test requires a user-provided static RP venv.')

    if rp_venv:
        pre_exec = ['. {}/bin/activate'.format(rp_venv)]
    else:
        pre_exec = None

    try:
        pmgr = rp.PilotManager(session=session)
        tmgr = rp.TaskManager(session=session)

        # Illustrate data staging as part of the Pilot launch.
        # By default, file is copied to the root of the Pilot sandbox,
        # where it can be referenced as 'pilot:///filename'
        # Alternatively: pilot.stage_in() and pilot.stage_output() (blocking calls)
        pilot_description.exit_on_error = True
        pilot_description.input_staging = [fpath]
        with open(fpath, 'w') as fh:
            fh.writelines([data])
        try:
            pilot = pmgr.submit_pilots(pilot_description)
            # Confirmation that the input file has been staged by waiting for pilot state.
            pilot.wait(state=[rp.states.PMGR_ACTIVE] + rp.FINAL)
        finally:
            os.unlink(fpath)

        tmgr.add_pilots(pilot)

        uid = 'scalems.master.001'
        # Illustrate another mode of data staging with the Master task submission.
        td = rp.TaskDescription({
            'uid':
            uid,
            'executable':
            'scalems_rp_master',
            'input_staging': [{
                'source': 'pilot:///%s' % fname,
                'target': 'pilot:///%s.%s.lnk' % (fname, uid),
                'action': rp.LINK
            }],
            'pre_exec':
            pre_exec
            # 'named_env': 'scalems_env'
        })

        master = tmgr.submit_tasks(td)

        # Illustrate availability of scheduler and of data staged with Master task.
        # When the task enters AGENT_SCHEDULING_PENDING it has passed all input staging,
        # and the files will be available.
        # (see https://docs.google.com/drawings/d/1q5ehxIVdln5tXEn34mJyWAmxBk_DqZ5wwkl3En-t5jo/)

        # Confirm that Master script is running (and ready to receive raptor tasks)
        # WARNING: rp.Task.wait() *state* parameter does not handle tuples, but does not check type.
        master.wait(state=[rp.states.AGENT_EXECUTING] + rp.FINAL)
        assert master.state not in {rp.CANCELED, rp.FAILED}

        # define raptor tasks and submit them to the master
        tds = list()
        # Illustrate data staging as part of raptor task submission.
        # Note that tasks submitted by the client
        # a sandboxed task directory, whereas those submitted by the Master (through Master.request(),
        # through the wrapper script or the Master.create_initial_tasks() hook) do not,
        # and do not have a data staging phase.
        for i in range(3):
            uid = 'scalems.%06d' % i
            work = {
                'mode': 'call',
                'cores': 1,
                'timeout': 10,  # seconds
                'data': {
                    'method': 'hello',
                    'kwargs': {
                        'world': uid
                    }
                }
            }
            tds.append(
                rp.TaskDescription({
                    'uid':
                    uid,
                    'executable':
                    '-',
                    'input_staging': [{
                        'source':
                        'pilot:///%s.%s.lnk' % (fname, master.uid),
                        'target':
                        'task:///%s' % fname,
                        'action':
                        rp.COPY
                    }],
                    'output_staging': [{
                        'source':
                        'task:///%s' % fname,
                        'target':
                        'client:///%s.%s.out' % (fname, uid),
                        'action':
                        rp.TRANSFER
                    }],
                    'scheduler':
                    master.uid,
                    'arguments': [json.dumps(work)],
                    'pre_exec':
                    pre_exec
                }))
        # TODO: Organize client-side data with managed hierarchical paths.
        # Question: RP maintains a filesystem hierarchy on the client side, correct?
        # Answer: only for profiling and such: do not use for data or user-facing stuff.
        tasks = tmgr.submit_tasks(tds)
        # TODO: Clarify the points at which the data exists or is accessed.
        # * When the (client-submitted) task enters AGENT_STAGING_OUTPUT_PENDING,
        #   it has finished executing and output data should be accessible as 'task:///outfile'.
        # * When the (client-submitted) task reaches one of the rp.FINAL stages, it has finished
        #   output staging and files are accessible at the location specified in 'output_staging'.
        # * Tasks submitted directly by the Master (example?) do not perform output staging;
        #   data is written before entering Master.result_cb().
        # RP Issue: client-submitted Tasks need to be accessible through a path that is common
        # with the Master-submitted (`request()`) tasks. (SCALE-MS #108)

        assert len(tasks) == len(tds)
        # 'arguments' (element 0) gets wrapped in a Request at the Master by _receive_tasks,
        # then the list of requests is passed to Master.request(), which is presumably
        # an extension point for derived Master implementations. The base class method
        # converts requests to dictionaries and adds them to a request queue, from which they are
        # picked up by the Worker in _request_cb. Then picked up in forked interpreter
        # by Worker._dispatch, which checks the *mode* of the Request and dispatches
        # according to native or registered mode implementations. (e.g. 'call' (native) or 'scalems')

        # task process is launched with Python multiprocessing (native) module and added to self._pool.
        # When the task runs, it's result triggers _result_cb

        # wait for *those* tasks to complete and report results
        tmgr.wait_tasks(uids=[t.uid for t in tasks])

        # Cancel the master.
        tmgr.cancel_tasks(uids=master.uid)
        # Cancel blocks until the task is done so the following wait it currently redundant,
        # but there is a ticket open to change this behavior.
        # See https://github.com/radical-cybertools/radical.pilot/issues/2336
        tmgr.wait_tasks()

        # Note that these map as follows:
        #     * 'client:///' == $PWD
        #     * 'task:///' == urllib.parse.urlparse(task.sandbox).path
        #     * 'pilot:///' == urllib.parse.urlparse(pilot.pilot_sandbox).path

        for t in tasks:
            print(t)
            outfile = './%s.%s.out' % (fname, t.uid)
            assert os.path.exists(outfile)
            with open(outfile, 'r') as outfh:
                assert outfh.readline().rstrip() == data
            os.unlink(outfile)

        pilot.cancel()
        tmgr.close()
        pmgr.close()

    finally:
        session.close(download=False)

Example #6

Show file

File: runtime.py Project: eirrgang/scale-ms

def _connect_rp(config: Configuration) -> Runtime:
    """Establish the RP Session.

    Acquire as many re-usable resources as possible. The scope established by
    this function is as broad as it can be within the life of this instance.

    Once instance._connect_rp() succeeds, instance._disconnect_rp() must be called to
    clean up resources. Use the async context manager behavior of the instance to
    automatically follow this protocol. I.e. instead of calling
    ``instance._connect_rp(); ...; instance._disconnect_rp()``,
    use::
        async with instance:
            ...

    Raises:
        DispatchError if task dispatching could not be set up.

        CanceledError if parent asyncio.Task is cancelled while executing.

    """
    # TODO: Consider inlining this into __aenter__().
    # A non-async method is potentially useful for debugging, but causes the event loop
    # to block while waiting for the RP tasks included here. If this continues to be a
    # slow function, we can wrap the remaining RP calls and let this function be
    # inlined, or stick the whole function in a separate thread with
    # loop.run_in_executor().

    # TODO: RP triggers SIGINT in various failure modes.
    #  We should use loop.add_signal_handler() to convert to an exception
    #  that we can raise in an appropriate task.
    # Note that PilotDescription can use `'exit_on_error': False` to suppress the SIGINT,
    # but we have not explored the consequences of doing so.

    try:
        #
        # Start the Session.
        #

        # Note that we cannot resolve the full _resource config until we have a Session
        # object.
        # We cannot get the default session config until after creating the Session,
        # so we don't have a template for allowed, required, or default values.
        # Question: does the provided *cfg* need to be complete? Or will it be merged
        # with default values from some internal definition, such as by dict.update()?
        # I don't remember what the use cases are for overriding the default session
        # config.
        session_config = None
        # At some point soon, we need to track Session ID for the workflow metadata.
        # We may also want Session ID to be deterministic (or to be re-used?).
        session_id = None

        # Note: the current implementation implies that only one Task for the dispatcher
        # will exist at a time. We are further assuming that there will probably only
        # be one Task per the lifetime of the dispatcher object.
        # We could choose another approach and change our assumptions, if appropriate.
        logger.debug(
            'Entering RP dispatching context. Waiting for rp.Session.')

        # Note: radical.pilot.Session creation causes several deprecation warnings.
        # Ref https://github.com/radical-cybertools/radical.pilot/issues/2185
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=DeprecationWarning)
            # This would be a good time to `await`, if an event-loop friendly
            # Session creation function becomes available.
            runtime = Runtime(
                session=rp.Session(uid=session_id, cfg=session_config))
        session_id = runtime.session.uid
        # Do we want to log this somewhere?
        # session_config = copy.deepcopy(self.session.cfg.as_dict())
        logger.debug('RP dispatcher acquired session {}'.format(session_id))

        # We can launch an initial Pilot, but we may have to run further Pilots
        # during self._queue_runner_task (or while servicing scalems.wait() within the
        # with block) to handle dynamic work load requirements.
        # Optionally, we could refrain from launching the pilot here, at all,
        # but it seems like a good chance to start bootstrapping the agent environment.
        logger.debug('Launching PilotManager.')
        pilot_manager = rp.PilotManager(session=runtime.session)
        logger.debug('Got PilotManager {}.'.format(pilot_manager.uid))
        runtime.pilot_manager(pilot_manager)

        logger.debug('Launching TaskManager.')
        task_manager = rp.TaskManager(session=runtime.session)
        logger.debug(('Got TaskManager {}'.format(task_manager.uid)))
        runtime.task_manager(task_manager)

        #
        # Get a Pilot
        #

        # # TODO: #94 Describe (link to) configuration points.
        # resource_config['local.localhost'].update({
        #     'project': None,
        #     'queue': None,
        #     'schema': None,
        #     'cores': 1,
        #     'gpus': 0
        # })

        # _pilot_description = dict(_resource=_resource,
        #                          runtime=30,
        #                          exit_on_error=True,
        #                          project=resource_config[_resource]['project'],
        #                          queue=resource_config[_resource]['queue'],
        #                          cores=resource_config[_resource]['cores'],
        #                          gpus=resource_config[_resource]['gpus'])

        # TODO: How to specify PilotDescription? (see also #121)
        # Where should this actually be coming from?
        # We need to inspect both the HPC allocation and the work load, I think,
        # and combine with user-provided preferences.
        pilot_description = {}
        pilot_description.update(
            config.rp_resource_params.get('PilotDescription', {}))
        pilot_description.update({'resource': config.execution_target})

        # TODO: Pilot venv (#90, #94).
        # Currently, Pilot venv must be specified in the JSON file for resource
        # definitions.
        pilot_description = rp.PilotDescription(pilot_description)

        # How and when should we update pilot description?
        logger.debug('Submitting PilotDescription {}'.format(
            repr(pilot_description)))
        pilot = pilot_manager.submit_pilots(pilot_description)
        logger.debug('Got Pilot {}'.format(pilot.uid))
        runtime.pilot(pilot)

        # Note that the task description for the master (and worker) can specify a
        # *named_env* attribute to use a venv prepared via Pilot.prepare_env
        # E.g.         pilot.prepare_env({'numpy_env' : {'type'   : 'virtualenv',
        #                                           'version': '3.6',
        #                                           'setup'  : ['numpy']}})
        #   td.named_env = 'numpy_env'
        # Note that td.named_env MUST be a key that is given to pilot.prepare_env(arg:
        # dict) or the task will wait indefinitely to be scheduled.
        # Alternatively, we could use a pre-installed venv by putting
        # `. path/to/ve/bin/activate`
        # in the TaskDescription.pre_exec list.

        # TODO: Use archives generated from (acquired through) the local installations.
        # # Could we stage in archive distributions directly?
        # # self.pilot.stage_in()
        # pilot.prepare_env(
        #     {
        #         'scalems_env': {
        #             'type': 'virtualenv',
        #             'version': '3.8',
        #             'setup': [
        #                 # TODO: Generalize scalems dependency resolution.
        #                 # Ideally, we would check the current API version
        #                 # requirement, map that to a package version,
        #                 # and specify >=min_version, allowing cached archives to
        #                 # satisfy the dependency.
        #                 rp_spec,
        #                 scalems_spec
        #             ]}})

        # Question: when should we remove the pilot from the task manager?
        task_manager.add_pilots(pilot)
        logger.debug('Added Pilot {} to task manager {}.'.format(
            pilot.uid, task_manager.uid))

        pre_exec = get_pre_exec(config)
        assert isinstance(pre_exec, tuple)
        assert len(pre_exec) > 0
        # Verify usable SCALEMS RP connector.
        # TODO: Fetch a profile of the venv for client-side analysis (e.g. `pip freeze`).
        # TODO: Check for compatible installed scalems API version.
        rp_check = task_manager.submit_tasks(
            rp.TaskDescription({
                # 'executable': py_venv,
                'executable':
                'python3',
                'arguments':
                ['-c', 'import radical.pilot as rp; print(rp.version)'],
                'pre_exec':
                list(pre_exec)
                # 'named_env': 'scalems_env'
            }))
        logger.debug('Checking RP execution environment.')
        states = task_manager.wait_tasks(uids=[rp_check.uid])
        if states[0] != rp.states.DONE or rp_check.exit_code != 0:
            raise DispatchError(
                'Could not verify RP in execution environment.')

        try:
            remote_rp_version = packaging.version.parse(
                rp_check.stdout.rstrip())
        except Exception as e:
            raise DispatchError(
                'Could not determine remote RP version.') from e
        # TODO: #100 Improve compatibility checking.
        if remote_rp_version < packaging.version.parse('1.6.0'):
            raise DispatchError(
                f'Incompatible radical.pilot version in execution '
                f'environment: {str(remote_rp_version)}')

        #
        # Get a scheduler task.
        #

        assert runtime.scheduler is None
        # TODO: #119 Re-enable raptor.
        # runtime.scheduler = _get_scheduler(
        #     'raptor.scalems',
        #     pre_exec=execution_manager._pre_exec,
        #     task_manager=task_manager)
        # Note that we can derive scheduler_name from self.scheduler.uid in later methods.
        # Note: The worker script name only appears in the config file.
        # logger.info('RP scheduler ready.')
        # logger.debug(repr(execution_manager.scheduler))

        return runtime

    except asyncio.CancelledError as e:
        raise e
    except Exception as e:
        logger.exception('Exception while connecting RADICAL Pilot.',
                         exc_info=e)
        raise DispatchError('Failed to launch SCALE-MS master task.') from e