Ejemplo n.º 1
0
def workflow_item_director_factory(
        item,
        *,
        manager: WorkflowManager,
        label: str = None) -> typing.Callable[..., ItemView]:
    """Get a workflow item director for a workflow manager and input type.

    When called, the director finalizes the new item and returns a view.
    """
    # TODO: Incorporate into WorkflowManager interface as a singledispatchmethod.
    # Design note: WorkflowManager classes could cache implementation details for item
    # types,
    # but (since we acknowledge that work may be defined before instantiating the
    # context to
    # which it will be dispatched), we need to allow the WorkflowContext implementation to
    # negotiate/fetch the implementation details at any time.
    # In general, relationships between specific
    # workflow item types and context types should be resolved in terms of context traits,
    # not specific class definitions.
    # In practice, we have some more tightly-coupled relationships,
    # at least in early versions, as we handle
    # (a) subprocess-type items,
    # (b) function-based items,
    # and (c) data staging details for (1) local execution and (2) remote (RADICAL Pilot)
    # execution.
    raise MissingImplementationError(
        'No registered implementation for {} in {}'.format(
            repr(item), repr(manager)))
Ejemplo n.º 2
0
    def add_item(self, task_description) -> scalems.context.ItemView:
        # # TODO: Resolve implementation details for *operation*.
        # if operation != 'scalems.executable':
        #     raise MissingImplementationError('No implementation for {} in {}'.format(operation, repr(self)))
        # # Copy a static copy of the input.
        # # TODO: Dispatch tasks addition, allowing negotiation of Context capabilities and subscription
        # #  to resources owned by other Contexts.
        # if not isinstance(bound_input, scalems.subprocess.SubprocessInput):
        #     raise ValueError('Only scalems.subprocess.SubprocessInput objects supported as input.')
        if not isinstance(task_description, scalems.subprocess.Subprocess):
            raise MissingImplementationError('Operation not supported.')
        uid = task_description.uid()
        if uid in self.task_map:
            # TODO: Consider decreasing error level to `warning`.
            raise DuplicateKeyError('Task already present in workflow.')
        logger.debug('Adding {} to {}'.format(str(task_description), str(self)))
        record = {
            'uid': task_description.uid().hex(),
            'type': task_description.resource_type().scoped_identifier(),
            'input': {}
        }
        task_input = task_description.input_collection()
        for field in dataclasses.fields(task_input):
            name = field.name
            try:
                # TODO: Need serialization typing.
                record['input'][name] = getattr(task_input, name)
            except AttributeError as e:
                raise InternalError('Unexpected missing field.') from e
        record = json.dumps(record, cls=Encoder)

        # TODO: Make sure there are no artifacts of shallow copies that may result in a user modifying nested objects unexpectedly.
        item = scalems.context.Task(self, record)
        # TODO: Check for ability to dispatch.

        self.task_map[uid] = item

        # TODO: Register task factory (dependent on executor).
        # TODO: Register input factory (dependent on dispatcher and task factory / executor).
        # TODO: Register results handler (dependent on dispatcher end points).
        task_view = scalems.context.ItemView(context=self, uid=uid)

        # TODO: Use an abstract event hook for `add_item` and other (decorated) methods.
        # Internal functionality can probably explicitly register and unregister, accounting
        # for the current details of thread safety. External access will need to be in
        # terms of a concurrency framework, so we can use a scoped `async with event_subscription`
        # to create an asynchronous iterator (with some means to externally end the subscription,
        # either through the generator protocol directly or through logic in the provider of the iterator)
        dispatcher_queue = self._queue
        # self._queue may be removed by another thread before we add the item to it,
        # but that is fine. There is nothing wrong with abandoning an unneeded queue.
        if dispatcher_queue is not None:
            logger.debug('Running dispatcher detected. Entering live dispatching hook.')
            # Add the AddItem message to the queue.
            assert isinstance(dispatcher_queue, queue.SimpleQueue)
            dispatcher_queue.put({'add_item': task_description})

        return task_view
Ejemplo n.º 3
0
def workflow_item_director_factory(item, *, context, label: str = None):
    """

    Get a workflow item director for a context and input type.

    When called, the director finalizes the new item and returns a view.
    """
    raise MissingImplementationError(
        'No registered implementation for {} in {}'.format(
            repr(item), repr(context)))
Ejemplo n.º 4
0
    async def submit(self, *, item: scalems.workflow.Task) -> asyncio.Task:
        # TODO: Ensemble handling
        item_shape = item.description().shape()
        if len(item_shape) != 1 or item_shape[0] != 1:
            raise MissingImplementationError(
                'Executor cannot handle multidimensional tasks yet.')

        task: asyncio.Task[rp.Task] = await submit(
            item=item, task_manager=self.task_manager, pre_exec=self._pre_exec)
        return task
Ejemplo n.º 5
0
    def decode(cls,
               obj) -> typing.Union[UnboundObject, BaseDecoded]:  # noqa: C901
        """Create unbound SCALE-MS objects from their basic Python representations.

        We assume this is called in a bottom-up manner as a nested record is deserialized.

        Unrecognized objects are returned unaltered because they may be members
        of an enclosing object with appropriate dispatching.

        .. todo:: Consider where to register transcoders for compatible/virtual types.
                  E.g. Infer np.array(..., dtype=int) -> scalems.Integer
                  This is a small number of cases, since we can lean on the descriptors in the buffer protocol.
        """
        if not isinstance(obj, dict):
            # Probably don't have any special handling for such objects until we know what they are nested in.
            ...
        else:
            assert isinstance(obj, dict)
            if 'schema' in obj:
                # We currently have very limited schema processing.
                try:
                    spec = obj['schema']['spec']
                except KeyError:
                    spec = None
                if not isinstance(spec, str) or spec != 'scalems.v0':
                    # That's fine...
                    logger.info('Unrecognized *schema* when decoding object.')
                    return obj
                if 'name' not in obj['schema'] or not isinstance(
                        obj['schema']['name'], str):
                    raise InternalError('Invalid schema.')
                else:
                    # schema = obj['schema']['name']
                    ...
                # Dispatch the object...
                ...
                raise MissingImplementationError(
                    'We do not yet support dynamic type registration through the work record.'
                )

            if 'type' in obj:
                # Dispatch the decoding according to the type.
                try:
                    dispatch = cls.get_decoder(obj['type'])
                except TypeError:
                    dispatch = BasicSerializable.decode
                if dispatch is not None:
                    return dispatch(obj)
        # Just return un-recognized objects unaltered.
        return obj
Ejemplo n.º 6
0
    async def run(self, task=None):
        """Run the configured workflow.

        TODO:
            Consider whether to use an awaitable argument as a hint to narrow the scope
            of the work graph to execute, or whether to just run everything.

        TODO: Move this function implementation to the executor instance / Session implementation.
        """
        if task is not None:
            raise MissingImplementationError(
                'Semantics for run(task) are not yet defined.')
        # Bypass the need for asyncio.run()
        # if self.event_loop is None:
        #     raise RuntimeError('No event loop!')
        # loop = self.event_loop
        return await asyncio.wait(self.task_map.values())
Ejemplo n.º 7
0
    def item(self, identifier) -> ItemView:
        """Access an item in the managed workflow.
        """
        # Consider providing the consumer context when acquiring access.
        # Consider limiting the scope of access requested.
        if isinstance(identifier, typing.SupportsBytes):
            identifier = bytes(identifier)
        if not isinstance(identifier, bytes):
            raise MissingImplementationError(
                'Item look-up is currently limited to UID byte sequences.')
        identifier = bytes(identifier)
        logger.debug('Looking for {} in ({})'.format(
            identifier.hex(),
            ', '.join(key.hex() for key in self.tasks.keys())))
        if identifier not in self.tasks:
            raise KeyError(f'WorkflowManager does not have item {identifier}')
        item_view = ItemView(manager=self, uid=identifier)

        return item_view
Ejemplo n.º 8
0
    async def _single_iteration_queue(self,
                                      source: _queue.SimpleQueue,
                                      target: asyncio.Queue):
        """Transfer one queue item.

        If a *stop* command is encountered, self-cancel after transfering command.

        To avoid race conditions while stopping queue processing,
        place a *stop* command in *source* and asyncio.shield() a call
        to this coroutine in a *try: ... except: ...* block.

        Note that the caller will then receive CancelledError after *stop* command has
        been transferred.

        Raises:
            queue.Empty if *source* is empty
            asyncio.CancelledError when cancelled or *stop* is received.

        """
        command: QueueItem = source.get_nowait()
        logger.debug(f'Processing command {repr(command)}')

        await target.put(command)

        # TODO: Use formal RPC protocol.
        if 'control' in command:
            # Note that we don't necessarily need to stop managing the dispatcher queue
            # at this point, but the Executor will be directed to shut down,
            # so we must not put anything else onto the command queue until we have a
            # new command queue or a new executor.
            if command['control'] == 'stop':
                raise asyncio.CancelledError()
            else:
                raise ProtocolError('Unknown command: {}'.format(command['control']))
        else:
            if 'add_item' not in command:
                # TODO: We might want a call-back or Event to force errors before the
                #  queue-runner task is awaited.
                raise MissingImplementationError(
                    f'Executor has no implementation for {str(command)}'
                )
        return command
Ejemplo n.º 9
0
 def add_item(self, task_description):
     # # TODO: Resolve implementation details for *operation*.
     # if operation != 'scalems.executable':
     #     raise MissingImplementationError('No implementation for {} in {}'.format(operation, repr(self)))
     # # Copy a static copy of the input.
     # # TODO: Dispatch tasks addition, allowing negotiation of Context capabilities and subscription
     # #  to resources owned by other Contexts.
     # if not isinstance(bound_input, scalems.subprocess.SubprocessInput):
     #     raise ValueError('Only scalems.subprocess.SubprocessInput objects supported as input.')
     if not isinstance(task_description, scalems.subprocess.Subprocess):
         raise MissingImplementationError('Operation not supported.')
     uid = task_description.uid()
     if uid in self.task_map:
         # TODO: Consider decreasing error level to `warning`.
         raise DuplicateKeyError('Task already present in workflow.')
     # TODO: use generic reference to implementation.
     self.task_map[uid] = operations.executable(context=self,
                                                task=task_description)
     # TODO: The return value should be a full proxy to a command instance.
     return uid
Ejemplo n.º 10
0
    def add_item(self, task_description):
        """Placeholder for task creation interface.

        TODO: Subscribe to Futures in the task input.
        TODO: Dispatch task configuration according to registered implementations.
        TODO: Own a task instance and return a task view.
        TODO: Accept object types other than Subprocess (e.g. Data, PyFunc, or opaque dispatchable types).
        """
        from . import operations
        # TODO: more complete type hinting.
        if not isinstance(task_description, scalems.subprocess.Subprocess):
            raise MissingImplementationError('Operation not supported.')
        uid = task_description.uid()
        if uid in self.task_map:
            # TODO: Consider decreasing error level to `warning`.
            raise DuplicateKeyError('Task already present in workflow.')

        task = operations.executable(self, task_description)

        self.task_map[uid] = task
        return task
Ejemplo n.º 11
0
 def set_running_or_notify_cancel(self) -> bool:
     raise MissingImplementationError()
Ejemplo n.º 12
0
    def add_item(self, task_description) -> ItemView:
        # # TODO: Resolve implementation details for *operation*.
        # if operation != 'scalems.executable':
        #     raise MissingImplementationError('No implementation for {} in {}'.format(
        #     operation, repr(self)))
        # # Copy a static copy of the input.
        # # TODO: Dispatch tasks addition, allowing negotiation of Context capabilities
        #  and subscription
        # #  to resources owned by other Contexts.
        # if not isinstance(bound_input, scalems.subprocess.SubprocessInput):
        #     raise ValueError('Only scalems.subprocess.SubprocessInput objects
        #     supported as input.')

        # TODO: Replace with type-based dispatching or some normative interface test.
        from .subprocess import Subprocess
        if not isinstance(task_description, (Subprocess, dict)):
            raise MissingImplementationError('Operation not supported.')

        if hasattr(task_description, 'uid'):
            uid: bytes = task_description.uid()
            if uid in self.tasks:
                # TODO: Consider decreasing error level to `warning`.
                raise DuplicateKeyError('Task already present in workflow.')
            logger.debug('Adding {} to {}'.format(str(task_description),
                                                  str(self)))
            record = {
                'uid': uid.hex(),
                'type': task_description.resource_type().scoped_identifier(),
                'input': {}
            }
            task_input = task_description.input_collection()
            for field in dataclasses.fields(task_input):
                name = field.name
                try:
                    # TODO: Need serialization typing.
                    record['input'][name] = getattr(task_input, name)
                except AttributeError as e:
                    raise InternalError('Unexpected missing field.') from e
        else:
            assert isinstance(task_description, dict)
            assert 'uid' in task_description
            uid = task_description['uid']
            implementation_identifier = task_description.get(
                'implementation', None)
            if not isinstance(implementation_identifier, list):
                raise DispatchError('Bug: bad schema checking?')

            if uid in self.tasks:
                # TODO: Consider decreasing error level to `warning`.
                raise DuplicateKeyError('Task already present in workflow.')
            logger.debug('Adding {} to {}'.format(str(task_description),
                                                  str(self)))
            record = {
                'uid': uid.hex(),
                'type': tuple(implementation_identifier),
                'input': task_description
            }
        serialized_record = json.dumps(record, default=encode)

        # TODO: Make sure there are no artifacts of shallow copies that may result in
        #       a user modifying nested objects unexpectedly.
        item = Task(self, serialized_record)
        # TODO: Check for ability to dispatch.
        #  Note module dependencies, etc. and check in target execution environment
        #  (e.g. https://docs.python.org/3/library/importlib.html#checking-if-a-module
        #  -can-be-imported)

        # TODO: Provide a data descriptor and possibly a more formal Workflow class.
        # We do not yet check that the derived classes actually initialize self.tasks.
        self.tasks[uid] = item

        task_view = ItemView(manager=self, uid=uid)

        # TODO: Register task factory (dependent on executor).
        # TODO: Register input factory (dependent on dispatcher and task factory /
        #  executor).
        # TODO: Register results handler (dependent on dispatcher end points).

        # TODO: Consider an abstract event hook for `add_item` and other (decorated)
        #  methods.
        # Internal functionality can probably explicitly register and unregister,
        # accounting for the current details of thread safety.
        # External access will need to be in terms of a concurrency framework,
        # so we can use a scoped `async with event_subscription`
        # to create an asynchronous iterator that a coroutine can use to receive
        # add_item messages
        # (with some means to externally end the subscription,
        # either through the generator protocol directly or through logic in the
        # provider of the iterator)
        for callback in self._event_hooks['add_item']:
            # TODO: Do we need to provide a contextvars.Context object to the callback?
            logger.debug(f'Running dispatching hook for add_item subscriber '
                         f'{repr(callback)}.')
            callback(_CommandQueueAddItem({'add_item': uid}))

        return task_view
Ejemplo n.º 13
0
async def run_executor(source_context: AsyncWorkflowManager, command_queue: asyncio.Queue):
    """Process workflow messages until a stop message is received.

    Initial implementation processes commands serially without regard for possible
    concurrency.

    Towards concurrency:
        We can create all tasks without awaiting any of them.

        Some tasks will be awaiting results from other tasks.

        All tasks will be awaiting a asyncio.Lock or asyncio.Condition for each
        required resource, but must do so indirectly.

        To avoid dead-locks, we can't have a Lock object for each resource unless
        they are managed by an intermediary that can do some serialization of requests.
        In other words, we need a Scheduler that tracks the resource pool, packages
        resource locks only when they can all be acquired without race conditions or blocking,
        and which then notifies the Condition for each task that it is allowed to run.

        It should not do so until the dependencies of the task are known to have
        all of the resources they need to complete (running with any dynamic dependencies
        also running) and, preferably, complete.

        Alternatively, the Scheduler can operate in blocks, allocating all resources,
        offering the locks to tasks, waiting for all resources to be released, then repeating.
        We can allow some conditions to "wake up" the scheduler to back fill a block
        of resources, but we should be careful with that.

        (We still need to consider dynamic tasks that
        generate other tasks. I think the only way to distinguish tasks which can't be
        dynamic from those which might be would be with the `def` versus `async def` in
        the implementing function declaration. If we abstract `await` with `scalems.wait`,
        we can throw an exception at execution time after checking a ContextVar.
        It may be better to just let implementers use `await` for dynamically created tasks,
        but we need to make the same check if a function calls `.result()` or otherwise
        tries to create a dependency on an item that was not allocated resources before
        the function started executing. In a conservative first draft, we can simply
        throw an exception if a non-`async def` function attempts to call a scalems workflow
        command like add_item while in an executing context.)

    """
    # Could also accept a "stop" Event object, but we would need some other way to yield
    # on an empty queue.
    while True:
        command = await command_queue.get()
        try:
            logger.debug('Executor is handling {}'.format(repr(command)))

            # TODO: Use formal RPC protocol.
            if 'control' in command:
                if command['control'] == 'stop':
                    return
                else:
                    raise ProtocolError('Unknown command: {}'.format(command['control']))
            if 'add_item' not in command:
                raise MissingImplementationError('Executor has no implementation for {}'.format(str(command)))
            key = command['add_item']
            item = source_context.item(key)
            if not isinstance(item, scalems.context.Task):
                raise InternalError('Expected {}.item() to return a scalems.context.Task'.format(repr(source_context)))

            # TODO: Ensemble handling
            item_shape = item.description().shape()
            if len(item_shape) != 1 or item_shape[0] != 1:
                raise MissingImplementationError('Executor cannot handle multidimensional tasks yet.')

            # TODO: Automatically resolve resource types.
            task_type_identifier = item.description().type().identifier()
            if task_type_identifier != 'scalems.subprocess.SubprocessTask':
                raise MissingImplementationError('Executor does not have an implementation for {}'.format(str(task_type_identifier)))
            task_type = scalems.subprocess.SubprocessTask()

            # TODO: Use abstract input factory.
            logger.debug('Resolving input for {}'.format(str(item)))
            input_type = task_type.input_type()
            input_record = input_type(**item.input)
            input_resources = operations.input_resource_scope(context=source_context, task_input=input_record)

            # We need to provide a scope in which we guarantee the availability of resources,
            # such as temporary files provided for input, or other internally-generated
            # asyncio entities.
            async with input_resources as subprocess_input:
                logger.debug('Creating coroutine for {}'.format(task_type.__class__.__name__))
                # TODO: Use abstract task factory.
                coroutine = operations.subprocessCoroutine(subprocess_input)
                logger.debug('Creating asyncio Task for {}'.format(repr(coroutine)))
                awaitable = asyncio.create_task(coroutine)

                # TODO: Use abstract results handler.
                logger.debug('Waiting for task to complete.')
                result = await awaitable
                subprocess_exception = awaitable.exception()
                if subprocess_exception is not None:
                    logger.exception('subprocess task raised exception {}'.format(str(subprocess_exception)))
                    raise subprocess_exception
                logger.debug('Setting result for {}'.format(str(item)))
                item.set_result(result)
        finally:
            logger.debug('Releasing "{}" from command queue.'.format(str(command)))
            command_queue.task_done()
Ejemplo n.º 14
0
 def add_item(self, task_description):
     raise MissingImplementationError(
         'Trivial work graph holder not yet implemented.')
Ejemplo n.º 15
0
 def add_done_callback(self, fn: Callable[[Future], Any]) -> None:
     # TODO: more complete type hinting.
     raise MissingImplementationError()
Ejemplo n.º 16
0
 def running(self) -> bool:
     raise MissingImplementationError()
Ejemplo n.º 17
0
 def cancel(self) -> bool:
     raise MissingImplementationError()
Ejemplo n.º 18
0
def modify_input(*args, **kwargs):
    raise MissingImplementationError()
Ejemplo n.º 19
0
 def item(self, identifier) -> ItemView:
     raise MissingImplementationError(
         'Trivial work graph holder not yet implemented.')
Ejemplo n.º 20
0
 def exception(self,
               timeout: Optional[float] = ...) -> Optional[BaseException]:
     raise MissingImplementationError()