Exemple #1
0
 def __setitem__(self, k: str, v: str) -> None:
     if k in self._allowed:
         if isinstance(v, bytes):
             super().__setitem__(k, v)
         else:
             raise APIError(f'Unsupported add_item key: {repr(v)}')
     else:
         raise APIError(f'Unsupported command: {repr(k)}')
Exemple #2
0
 def __setitem__(self, k: str, v: str) -> None:
     if k in self._allowed:
         if v in self._allowed[k]:
             super().__setitem__(k, v)
         else:
             raise APIError(f'Unsupported command value: {repr(v)}')
     else:
         raise APIError(f'Unsupported command key: {repr(k)}')
Exemple #3
0
 def task_manager(self,
                  task_manager=None) -> typing.Union[rp.TaskManager, None]:
     if task_manager is None:
         return self._task_manager
     elif isinstance(task_manager, rp.TaskManager):
         if not task_manager.session.uid == self.session.uid:
             raise APIError(
                 'Cannot accept a TaskManager from a different Session.')
         self._task_manager = task_manager
         return task_manager
     else:
         uid = task_manager
         try:
             tmgr = self.session.get_task_managers(tmgr_uids=uid)
             assert isinstance(tmgr, rp.TaskManager)
         except (AssertionError, KeyError) as e:
             raise ValueError(
                 f'{uid} does not describe a valid TaskManager') from e
         except Exception as e:
             logger.exception('Unhandled RADICAL Pilot exception.',
                              exc_info=e)
             raise ValueError(
                 f'{uid} does not describe a valid TaskManager') from e
         else:
             return self.task_manager(tmgr)
Exemple #4
0
 def put(self, item: typing.Union[_CommandQueueAddItem,
                                  _CommandQueueControlItem]):
     assert len(item) == 1
     key = list(item.keys())[0]
     if key not in {'command', 'add_item'}:
         raise APIError('Unrecognized queue item representation.')
     self._dispatcher_queue.put(item)
Exemple #5
0
def _(config: Configuration) -> Configuration:
    # Not thread-safe
    if _configuration.get(None):
        raise APIError(
            f'configuration() cannot accept arguments when {__name__} is '
            f'already configured.')
    _configuration.set(config)
    return _configuration.get()
Exemple #6
0
 def manager(self) -> 'WorkflowManager':
     manager = self._workflow_manager()
     if manager is None:
         raise ScopeError('Out of scope. Managing context no longer exists!')
     else:
         if not isinstance(manager, WorkflowManager):
             raise APIError('Bug: ItemView._workflow_manager must weakly reference '
                            'a WorkflowManager.')
     return manager
Exemple #7
0
    def pilot(self, pilot=None) -> typing.Union[rp.Pilot, None]:
        """Get (optionally set) the current Pilot."""
        if pilot is None:
            return self._pilot

        pmgr = self.pilot_manager()
        if not pmgr:
            raise APIError('Cannot set Pilot before setting PilotManager.')

        if isinstance(pilot, rp.Pilot):
            session = pilot.session
            if not isinstance(session, rp.Session):
                raise APIError(
                    f'Pilot {repr(pilot)} does not have a valid Session.')
            if session.uid != self.session.uid:
                raise APIError(
                    'Cannot accept a Pilot from a different Session.')
            if pilot.pmgr.uid != pmgr.uid:
                raise APIError('Pilot must be associated with a PilotManager '
                               'already configured.')
            self._pilot = pilot
            return pilot
        else:
            uid = pilot
            try:
                pilot = pmgr.get_pilots(uids=uid)
                assert isinstance(pilot, rp.Pilot)
            except (AssertionError, KeyError, ValueError) as e:
                raise ValueError(
                    f'{uid} does not describe a valid Pilot') from e
            except Exception as e:
                # TODO: Track down the expected rp exception.
                logger.exception('Unhandled RADICAL Pilot exception.',
                                 exc_info=e)
                raise ValueError(
                    f'{uid} does not describe a valid Pilot') from e
            else:
                return self.pilot(pilot)
Exemple #8
0
    async def __aenter__(self):
        try:
            # Get a lock while the state is changing.
            # Warning: The dispatching protocol is immature.
            # Initially, we don't expect contention for the lock,
            # and if there is contention, it probably represents
            # an unintended race condition or systematic dead-lock.
            # TODO: Clarify dispatcher state machine and remove/replace assertions.
            assert not self._dispatcher_lock.locked()
            async with self._dispatcher_lock:
                if _dispatcher.get(None):
                    raise APIError(
                        'There is already an active dispatcher in this Context.'
                    )
                _dispatcher.set(self)
                # Launch queue processor (proxy executor).
                runner_started = asyncio.Event()
                runner_task = asyncio.create_task(
                    self._queue_runner(runner_started))
                await runner_started.wait()
                self._queue_runner_task = runner_task

                # Without yielding,
                # 1. Install a hook for the queuer to catch new calls to add_item.
                # 2. Get snapshot of current workflow state with which to initialize
                #    the executor.
                # Dont' forget to unsubscribe later!
                # self.source_context.subscribe('add_item', self._dispatcher_queue.put)
                self.source.subscribe('add_item', self.put)
                # TODO: Topologically sort DAG!
                initial_task_list = list(self.source.tasks.keys())
                try:
                    for _task_id in initial_task_list:
                        self.command_queue.put_nowait(
                            QueueItem({'add_item': _task_id}))
                except asyncio.QueueFull as e:
                    raise DispatchError(
                        'Executor was unable to receive initial '
                        'commands.') from e
                # It is now safe to yield.

                # TODO: Add lock context for WorkflowManager event hooks
                #  rather than assume the UI and event loop are always in the same thread.

            return self
        except Exception as e:
            self._exception = e
            raise e
Exemple #9
0
def _set_configuration(*args, **kwargs) -> Configuration:
    """Initialize or retrieve the module configuration.

    This module and the RADICAL infrastructure have various stateful aspects
    that require clearly-scoped module-level configuration. Module configuration
    should be initialized exactly once per Python process.

    Recommended usage is to derive an ArgumentParser from the *parser()* module
    function and use the resulting namespace to initialize the module configuration
    using this function.
    """
    assert len(args) != 0 or len(kwargs) != 0
    # Caller has provided arguments.
    # Not thread-safe
    if _configuration.get(None):
        raise APIError(
            f'configuration() cannot accept arguments when {__name__} is '
            f'already configured.')
    c = Configuration(*args, **kwargs)
    _configuration.set(c)
    return _configuration.get()
Exemple #10
0
async def submit(*,
                 item: scalems.workflow.Task,
                 task_manager: rp.TaskManager,
                 pre_exec: list,
                 scheduler: str = None) -> asyncio.Task:
    """Dispatch a WorkflowItem to be handled by RADICAL Pilot.

    Registers a Future for the task result with *item*.

    Args:
        item: The workflow item to be submitted
        task_manager: A radical.pilot.TaskManager instance
                      through which the task should be submitted.
        scheduler (str): The string name of the "scheduler," corresponding to
                         the UID of a Task running a rp.raptor.Master.

    Returns an asyncio.Task for a submitted rp.Task.

    The caller *must* await the result of the coroutine to obtain an asyncio.Task that
    can be cancelled or awaited as a proxy to direct RP task management. The Task will
    hold a coroutine that is guaranteed to already be running, failed, or canceled. The
    caller should check the status of the task immediately before making assumptions
    about whether a Future has been successfully bound to the managed workflow item.

    The *submitted* (output) event is likely a short-term placeholder and subject to
    change. For instance, the use case for waiting on such an event could be met by
    waiting on the state change of the workflow item to a SUBMITTED state. However,
    note that this function will block for a short time at the
    rp.TaskManager.submit_tasks() call, so it is useful to separate the submission
    event from the completion of this coroutine early in development while we decide
    whether and how to relegate RP calls to threads separated from that of the event
    loop.

    The returned asyncio.Task can be used to cancel the rp.Task (and the Future)
    or to await the RP.Task cleanup.

    To submit tasks as a batch, await an array of submit_rp_task() results in the
    same dispatching context. (TBD)

    Notes:

        workflow manager maintains the workflow state without expensive or stateful
        volatile resources, and can mediate updates to the managed workflow at any
        time. Items enter the graph in an IDLE state. The WorkflowManager can provide
        Futures for the results of the managed items. For IDLE items,
        the WorkflowManager retains a weakref to the issued Futures, which it can use
        to make sure that there is only zero or one Future for a particular result.

        WorkflowManager collaborates with Queuer to transition the graph to an "active"
        or "executing" state. This transition is mediated through the dispatcher_lock.

        Queuer sequences and queues workflow items to be handled, pushing them to a
        dispatch_queue. No state change to the workflow item seems necessary at this
        time.

        The dispatch_queue is read by an ExecutionManager. Items may be processed
        immediately or staged in a command_queue. Workflow items are then either
        SUBMITTED or BLOCKED (awaiting dependencies). Optionally, Items may be marked
        ELIGIBLE and re-queued for batch submission.

        If the ExecutionManager is able to submit a task, the Task has a call-back
        registered for the workflow item. The WorkflowManager needs to convert any
        Future weakrefs to strong references when items are SUBMITTED, and the workflow
        Futures are subscribed to the item. Tasks are wrapped in a scalems object that
        the WorkflowManager is able to take ownership of. BLOCKED items are wrapped in
        Tasks which are subscribed to their dependencies (WorkflowItems should already
        be subscribed to WorkflowItem Futures for any dependencies) and stored by the
        ExecutionManager. When the call-backs for all of the dependencies indicate the
        Item should be processed into an upcoming workload, the Item becomes ELIGIBLE,
        and its wrapper Task (in collaboration with the ExecutionManager) puts it in
        the command_queue.

        As an optimization, and to support co-scheduling, a WorkflowItem call-back can
        provide notification of state changes. For instance, a BLOCKED item may become
        ELIGIBLE once all of its dependencies are SUBMITTED, when the actual Executor
        has some degree of data flow management capabilities.

    """

    # TODO: Optimization: skip tasks that are already done (cached results available).
    def scheduler_is_ready(scheduler):
        return isinstance(scheduler, str) \
               and len(scheduler) > 0 \
               and isinstance(task_manager.get_tasks(scheduler), rp.Task)

    subprocess_type = TypeIdentifier(
        ('scalems', 'subprocess', 'SubprocessTask'))
    if item.description().type() == subprocess_type:
        if scheduler is not None:
            raise DispatchError(
                'Raptor not yet supported for scalems.executable.')
        rp_task_description = _describe_legacy_task(item, pre_exec=pre_exec)
    elif scheduler_is_ready(scheduler):
        # We might want a contextvars.Context to hold the current rp.Master instance name.
        rp_task_description = _describe_raptor_task(item,
                                                    scheduler,
                                                    pre_exec=pre_exec)
    else:
        raise APIError(
            'Caller must provide the UID of a submitted *scheduler* task.')

    loop = asyncio.get_running_loop()
    rp_task_result_future = loop.create_future()

    # Warning: in the long run, we should not extend the life of the reference returned
    # by edit_item, and we need to consider the robust way to publish item results.
    # TODO: Translate RP result to item result type.
    rp_task_result_future.add_done_callback(
        functools.partial(scalems_callback, item=item))

    # TODO: Move slow blocking RP calls to a separate RP control thread.
    task = task_manager.submit_tasks(rp_task_description)

    rp_task_watcher = await rp_task(rptask=task, future=rp_task_result_future)

    if rp_task_watcher.done():
        if rp_task_watcher.cancelled():
            raise DispatchError(
                f'Task for {item} was unexpectedly canceled during '
                'dispatching.')
        e = rp_task_watcher.exception()
        if e is not None:
            raise DispatchError(
                'Task for {item} failed during dispatching.') from e

    return rp_task_watcher