Esempio n. 1
0
 def test_await_future(self):
     f = Future()
     def finish_later():
         time.sleep(0.1)
         f.set_result('future')
     Thread(target=finish_later).start()
     assert self.client.wait([f])
     assert f.done()
     assert f.result() == 'future'
Esempio n. 2
0
 def _on_task_done(self, future: Future):
     assert future.done()
     assert self.__task is not None
     assert self.__task.future is future
     assert self.__task.watcher.future() is future
     self.__task, task = None, self.__task
     task.deleteLater()
     ex = future.exception()
     if ex is not None:
         self.on_exception(ex)
     else:
         self.on_done(future.result())
Esempio n. 3
0
class NamespacedResourceReflector(LoggingConfigurable):
    """
    Base class for keeping a local up-to-date copy of a set of kubernetes resources.

    Must be subclassed once per kind of resource that needs watching.
    """
    labels = Dict({},
                  config=True,
                  help="""
        Labels to reflect onto local cache
        """)

    fields = Dict({},
                  config=True,
                  help="""
        Fields to restrict the reflected objects
        """)

    namespace = Unicode(None,
                        allow_none=True,
                        help="""
        Namespace to watch for resources in
        """)

    resources = Dict({},
                     help="""
        Dictionary of resource names to the appropriate resource objects.

        This can be accessed across threads safely.
        """)

    kind = Unicode('resource',
                   help="""
        Human readable name for kind of object we're watching for.

        Used for diagnostic messages.
        """)

    list_method_name = Unicode("",
                               help="""
        Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources.

        This will be passed a namespace & a label selector. You most likely want something
        of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will
        give you a PodReflector.

        This must be set by a subclass.
        """)

    api_group_name = Unicode('CoreV1Api',
                             help="""
        Name of class that represents the apigroup on which `list_method_name` is to be found.

        Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses,
        for example, you would have to use ExtensionsV1beta1Api
        """)

    request_timeout = Int(60,
                          config=True,
                          help="""
        Network timeout for kubernetes watch.

        Trigger watch reconnect when a given request is taking too long,
        which can indicate network issues.
        """)

    timeout_seconds = Int(10,
                          config=True,
                          help="""
        Timeout for kubernetes watch.

        Trigger watch reconnect when no watch event has been received.
        This will cause a full reload of the currently existing resources
        from the API server.
        """)

    restart_seconds = Int(30,
                          config=True,
                          help="""
        Maximum time before restarting a watch.

        The watch will be restarted at least this often,
        even if events are still arriving.
        Avoids trusting kubernetes watch to yield all events,
        which seems to not be a safe assumption.
        """)

    on_failure = Any(
        help="""Function to be called when the reflector gives up.""")

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Load kubernetes config here, since this is a Singleton and
        # so this __init__ will be run way before anything else gets run.
        try:
            config.load_incluster_config()
        except config.ConfigException:
            config.load_kube_config()
        self.api = shared_client(self.api_group_name)

        # FIXME: Protect against malicious labels?
        self.label_selector = ','.join(
            ['{}={}'.format(k, v) for k, v in self.labels.items()])
        self.field_selector = ','.join(
            ['{}={}'.format(k, v) for k, v in self.fields.items()])

        self.first_load_future = Future()
        self._stop_event = threading.Event()

        self.start()

    def __del__(self):
        self.stop()

    def _list_and_update(self):
        """
        Update current list of resources by doing a full fetch.

        Overwrites all current resource info.
        """
        initial_resources = getattr(self.api, self.list_method_name)(
            self.namespace,
            label_selector=self.label_selector,
            field_selector=self.field_selector,
            _request_timeout=self.request_timeout,
            _preload_content=False,
        )
        # This is an atomic operation on the dictionary!
        initial_resources = json.loads(initial_resources.read())
        self.resources = {
            p["metadata"]["name"]: p
            for p in initial_resources["items"]
        }
        # return the resource version so we can hook up a watch
        return initial_resources["metadata"]["resourceVersion"]

    def _watch_and_update(self):
        """
        Keeps the current list of resources up-to-date

        This method is to be run not on the main thread!

        We first fetch the list of current resources, and store that. Then we
        register to be notified of changes to those resources, and keep our
        local store up-to-date based on these notifications.

        We also perform exponential backoff, giving up after we hit 32s
        wait time. This should protect against network connections dropping
        and intermittent unavailability of the api-server. Every time we
        recover from an exception we also do a full fetch, to pick up
        changes that might've been missed in the time we were not doing
        a watch.

        Note that we're playing a bit with fire here, by updating a dictionary
        in this thread while it is probably being read in another thread
        without using locks! However, dictionary access itself is atomic,
        and as long as we don't try to mutate them (do a 'fetch / modify /
        update' cycle on them), we should be ok!
        """
        selectors = []
        log_name = ""
        if self.label_selector:
            selectors.append("label selector=%r" % self.label_selector)
        if self.field_selector:
            selectors.append("field selector=%r" % self.field_selector)
        log_selector = ', '.join(selectors)

        cur_delay = 0.1

        self.log.info(
            "watching for %s with %s in namespace %s",
            self.kind,
            log_selector,
            self.namespace,
        )
        while True:
            self.log.debug("Connecting %s watcher", self.kind)
            start = time.monotonic()
            w = watch.Watch()
            try:
                resource_version = self._list_and_update()
                if not self.first_load_future.done():
                    # signal that we've loaded our initial data
                    self.first_load_future.set_result(None)
                watch_args = {
                    'namespace': self.namespace,
                    'label_selector': self.label_selector,
                    'field_selector': self.field_selector,
                    'resource_version': resource_version,
                }
                if self.request_timeout:
                    # set network receive timeout
                    watch_args['_request_timeout'] = self.request_timeout
                if self.timeout_seconds:
                    # set watch timeout
                    watch_args['timeout_seconds'] = self.timeout_seconds
                method = partial(getattr(self.api, self.list_method_name),
                                 _preload_content=False)
                # in case of timeout_seconds, the w.stream just exits (no exception thrown)
                # -> we stop the watcher and start a new one
                for watch_event in w.stream(method, **watch_args):
                    # Remember that these events are k8s api related WatchEvents
                    # objects, not k8s Event or Pod representations, they will
                    # reside in the WatchEvent's object field depending on what
                    # kind of resource is watched.
                    #
                    # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.16/#watchevent-v1-meta
                    # ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.16/#event-v1-core
                    cur_delay = 0.1
                    resource = watch_event['object']
                    if watch_event['type'] == 'DELETED':
                        # This is an atomic delete operation on the dictionary!
                        self.resources.pop(resource["metadata"]["name"], None)
                    else:
                        # This is an atomic operation on the dictionary!
                        self.resources[resource["metadata"]["name"]] = resource
                    if self._stop_event.is_set():
                        self.log.info("%s watcher stopped", self.kind)
                        break
                    watch_duration = time.monotonic() - start
                    if watch_duration >= self.restart_seconds:
                        self.log.debug(
                            "Restarting %s watcher after %i seconds",
                            self.kind,
                            watch_duration,
                        )
                        break
            except ReadTimeoutError:
                # network read time out, just continue and restart the watch
                # this could be due to a network problem or just low activity
                self.log.warning("Read timeout watching %s, reconnecting",
                                 self.kind)
                continue
            except Exception:
                cur_delay = cur_delay * 2
                if cur_delay > 30:
                    self.log.exception(
                        "Watching resources never recovered, giving up")
                    if self.on_failure:
                        self.on_failure()
                    return
                self.log.exception(
                    "Error when watching resources, retrying in %ss",
                    cur_delay)
                time.sleep(cur_delay)
                continue
            else:
                # no events on watch, reconnect
                self.log.debug("%s watcher timeout", self.kind)
            finally:
                w.stop()
                if self._stop_event.is_set():
                    self.log.info("%s watcher stopped", self.kind)
                    break
        self.log.warning("%s watcher finished", self.kind)

    def start(self):
        """
        Start the reflection process!

        We'll do a blocking read of all resources first, so that we don't
        race with any operations that are checking the state of the pod
        store - such as polls. This should be called only once at the
        start of program initialization (when the singleton is being created),
        and not afterwards!
        """
        if hasattr(self, 'watch_thread'):
            raise ValueError(
                'Thread watching for resources is already running')

        self._list_and_update()
        self.watch_thread = threading.Thread(target=self._watch_and_update)
        # If the watch_thread is only thread left alive, exit app
        self.watch_thread.daemon = True
        self.watch_thread.start()

    def stop(self):
        self._stop_event.set()

    def stopped(self):
        return self._stop_event.is_set()
Esempio n. 4
0
class KernelManager(ConnectionFileMixin):
    """Manages a single kernel in a subprocess on this host.

    This version starts kernels with Popen.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(**kwargs)
        self._shutdown_status = _ShutdownStatus.Unset
        try:
            self._ready = Future()
        except RuntimeError:
            # No event loop running, use concurrent future
            self._ready = CFuture()

    _created_context: Bool = Bool(False)

    # The PyZMQ Context to use for communication with the kernel.
    context: Instance = Instance(zmq.Context)

    @default("context")
    def _context_default(self) -> zmq.Context:
        self._created_context = True
        return zmq.Context()

    # the class to create with our `client` method
    client_class: DottedObjectName = DottedObjectName(
        "jupyter_client.blocking.BlockingKernelClient")
    client_factory: Type = Type(klass="jupyter_client.KernelClient")

    @default("client_factory")
    def _client_factory_default(self) -> Type:
        return import_item(self.client_class)

    @observe("client_class")
    def _client_class_changed(self, change: t.Dict[str,
                                                   DottedObjectName]) -> None:
        self.client_factory = import_item(str(change["new"]))

    kernel_id: str = Unicode(None, allow_none=True)

    # The kernel provisioner with which this KernelManager is communicating.
    # This will generally be a LocalProvisioner instance unless the kernelspec
    # indicates otherwise.
    provisioner: t.Optional[KernelProvisionerBase] = None

    kernel_spec_manager: Instance = Instance(kernelspec.KernelSpecManager)

    @default("kernel_spec_manager")
    def _kernel_spec_manager_default(self) -> kernelspec.KernelSpecManager:
        return kernelspec.KernelSpecManager(data_dir=self.data_dir)

    @observe("kernel_spec_manager")
    @observe_compat
    def _kernel_spec_manager_changed(self, change: t.Dict[str,
                                                          Instance]) -> None:
        self._kernel_spec = None

    shutdown_wait_time: Float = Float(
        5.0,
        config=True,
        help="Time to wait for a kernel to terminate before killing it, "
        "in seconds. When a shutdown request is initiated, the kernel "
        "will be immediately sent an interrupt (SIGINT), followed"
        "by a shutdown_request message, after 1/2 of `shutdown_wait_time`"
        "it will be sent a terminate (SIGTERM) request, and finally at "
        "the end of `shutdown_wait_time` will be killed (SIGKILL). terminate "
        "and kill may be equivalent on windows.  Note that this value can be"
        "overridden by the in-use kernel provisioner since shutdown times may"
        "vary by provisioned environment.",
    )

    kernel_name: Unicode = Unicode(kernelspec.NATIVE_KERNEL_NAME)

    @observe("kernel_name")
    def _kernel_name_changed(self, change: t.Dict[str, Unicode]) -> None:
        self._kernel_spec = None
        if change["new"] == "python":
            self.kernel_name = kernelspec.NATIVE_KERNEL_NAME

    _kernel_spec: t.Optional[kernelspec.KernelSpec] = None

    @property
    def kernel_spec(self) -> t.Optional[kernelspec.KernelSpec]:
        if self._kernel_spec is None and self.kernel_name != "":
            self._kernel_spec = self.kernel_spec_manager.get_kernel_spec(
                self.kernel_name)
        return self._kernel_spec

    kernel_cmd = List(Unicode(),
                      help="""The Popen Command to launch the kernel.""")

    extra_env = Dict(
        help="""Extra environment variables to be set for the kernel.""")

    @property
    def ready(self) -> Future:
        """A future that resolves when the kernel process has started for the first time"""
        return self._ready

    @property
    def ipykernel(self) -> bool:
        return self.kernel_name in {"python", "python2", "python3"}

    # Protected traits
    _launch_args: Any = Any()
    _control_socket: Any = Any()

    _restarter: Any = Any()

    autorestart: Bool = Bool(
        True,
        config=True,
        help="""Should we autorestart the kernel if it dies.""")

    shutting_down: bool = False

    def __del__(self) -> None:
        self._close_control_socket()
        self.cleanup_connection_file()

    # --------------------------------------------------------------------------
    # Kernel restarter
    # --------------------------------------------------------------------------

    def start_restarter(self) -> None:
        pass

    def stop_restarter(self) -> None:
        pass

    def add_restart_callback(self,
                             callback: t.Callable,
                             event: str = "restart") -> None:
        """register a callback to be called when a kernel is restarted"""
        if self._restarter is None:
            return
        self._restarter.add_callback(callback, event)

    def remove_restart_callback(self,
                                callback: t.Callable,
                                event: str = "restart") -> None:
        """unregister a callback to be called when a kernel is restarted"""
        if self._restarter is None:
            return
        self._restarter.remove_callback(callback, event)

    # --------------------------------------------------------------------------
    # create a Client connected to our Kernel
    # --------------------------------------------------------------------------

    def client(self, **kwargs) -> KernelClient:
        """Create a client configured to connect to our kernel"""
        kw = {}
        kw.update(self.get_connection_info(session=True))
        kw.update(dict(
            connection_file=self.connection_file,
            parent=self,
        ))

        # add kwargs last, for manual overrides
        kw.update(kwargs)
        return self.client_factory(**kw)

    # --------------------------------------------------------------------------
    # Kernel management
    # --------------------------------------------------------------------------

    def format_kernel_cmd(
            self,
            extra_arguments: t.Optional[t.List[str]] = None) -> t.List[str]:
        """replace templated args (e.g. {connection_file})"""
        extra_arguments = extra_arguments or []
        assert self.kernel_spec is not None
        cmd = self.kernel_spec.argv + extra_arguments

        if cmd and cmd[0] in {
                "python",
                "python%i" % sys.version_info[0],
                "python%i.%i" % sys.version_info[:2],
        }:
            # executable is 'python' or 'python3', use sys.executable.
            # These will typically be the same,
            # but if the current process is in an env
            # and has been launched by abspath without
            # activating the env, python on PATH may not be sys.executable,
            # but it should be.
            cmd[0] = sys.executable

        # Make sure to use the realpath for the connection_file
        # On windows, when running with the store python, the connection_file path
        # is not usable by non python kernels because the path is being rerouted when
        # inside of a store app.
        # See this bug here: https://bugs.python.org/issue41196
        ns = dict(
            connection_file=os.path.realpath(self.connection_file),
            prefix=sys.prefix,
        )

        if self.kernel_spec:
            ns["resource_dir"] = self.kernel_spec.resource_dir

        ns.update(self._launch_args)

        pat = re.compile(r"\{([A-Za-z0-9_]+)\}")

        def from_ns(match):
            """Get the key out of ns if it's there, otherwise no change."""
            return ns.get(match.group(1), match.group())

        return [pat.sub(from_ns, arg) for arg in cmd]

    async def _async_launch_kernel(self, kernel_cmd: t.List[str],
                                   **kw) -> None:
        """actually launch the kernel

        override in a subclass to launch kernel subprocesses differently
        Note that provisioners can now be used to customize kernel environments
        and
        """
        assert self.provisioner is not None
        connection_info = await self.provisioner.launch_kernel(
            kernel_cmd, **kw)
        assert self.provisioner.has_process
        # Provisioner provides the connection information.  Load into kernel manager and write file.
        self._force_connection_info(connection_info)

    _launch_kernel = run_sync(_async_launch_kernel)

    # Control socket used for polite kernel shutdown

    def _connect_control_socket(self) -> None:
        if self._control_socket is None:
            self._control_socket = self._create_connected_socket("control")
            self._control_socket.linger = 100

    def _close_control_socket(self) -> None:
        if self._control_socket is None:
            return
        self._control_socket.close()
        self._control_socket = None

    async def _async_pre_start_kernel(
            self, **kw) -> t.Tuple[t.List[str], t.Dict[str, t.Any]]:
        """Prepares a kernel for startup in a separate process.

        If random ports (port=0) are being used, this method must be called
        before the channels are created.

        Parameters
        ----------
        `**kw` : optional
             keyword arguments that are passed down to build the kernel_cmd
             and launching the kernel (e.g. Popen kwargs).
        """
        self.shutting_down = False
        self.kernel_id = self.kernel_id or kw.pop('kernel_id', str(
            uuid.uuid4()))
        # save kwargs for use in restart
        self._launch_args = kw.copy()
        # build the Popen cmd
        extra_arguments = kw.pop('extra_arguments', [])
        kernel_cmd = self.format_kernel_cmd(extra_arguments=extra_arguments)
        env = kw.pop('env', os.environ).copy()
        # Don't allow PYTHONEXECUTABLE to be passed to kernel process.
        # If set, it can bork all the things.
        env.pop('PYTHONEXECUTABLE', None)
        if not self.kernel_cmd:
            # If kernel_cmd has been set manually, don't refer to a kernel spec
            # Environment variables from kernel spec are added to os.environ
            env.update(self.kernel_spec.env or {})
        elif self.extra_env:
            env.update(self.extra_env)

        # launch the kernel subprocess
        self.log.debug("Starting kernel: %s", kernel_cmd)
        self.kernel = self._launch_kernel(kernel_cmd, env=env, **kw)
        self.start_restarter()
        self._connect_control_socket()
        assert self.provisioner is not None
        await self.provisioner.post_launch(**kw)

    post_start_kernel = run_sync(_async_post_start_kernel)

    async def _async_start_kernel(self, **kw):
        """Starts a kernel on this host in a separate process.

        If random ports (port=0) are being used, this method must be called
        before the channels are created.

        Parameters
        ----------
        `**kw` : optional
             keyword arguments that are passed down to build the kernel_cmd
             and launching the kernel (e.g. Popen kwargs).
        """
        done = self._ready.done()

        try:
            kernel_cmd, kw = await ensure_async(self.pre_start_kernel(**kw))

            # launch the kernel subprocess
            self.log.debug("Starting kernel: %s", kernel_cmd)
            await ensure_async(self._launch_kernel(kernel_cmd, **kw))
            await ensure_async(self.post_start_kernel(**kw))
            if not done:
                # Add a small sleep to ensure tests can capture the state before done
                await asyncio.sleep(0.01)
                self._ready.set_result(None)

        except Exception as e:
            if not done:
                self._ready.set_exception(e)
                self.log.exception(self._ready.exception())
            raise e

    start_kernel = run_sync(_async_start_kernel)

    async def _async_request_shutdown(self, restart: bool = False) -> None:
        """Send a shutdown request via control channel"""
        content = dict(restart=restart)
        msg = self.session.msg("shutdown_request", content=content)
        # ensure control socket is connected
        self._connect_control_socket()
        self.session.send(self._control_socket, msg)
        assert self.provisioner is not None
        await self.provisioner.shutdown_requested(restart=restart)
        self._shutdown_status = _ShutdownStatus.ShutdownRequest

    request_shutdown = run_sync(_async_request_shutdown)

    async def _async_finish_shutdown(
        self,
        waittime: t.Optional[float] = None,
        pollinterval: float = 0.1,
        restart: t.Optional[bool] = False,
    ) -> None:
        """Wait for kernel shutdown, then kill process if it doesn't shutdown.

        This does not send shutdown requests - use :meth:`request_shutdown`
        first.
        """
        if waittime is None:
            waittime = max(self.shutdown_wait_time, 0)
        if self.provisioner:  # Allow provisioner to override
            waittime = self.provisioner.get_shutdown_wait_time(
                recommended=waittime)

        try:
            await asyncio.wait_for(self._async_wait(pollinterval=pollinterval),
                                   timeout=waittime / 2)
        except asyncio.TimeoutError:
            self.log.debug("Kernel is taking too long to finish, terminating")
            self._shutdown_status = _ShutdownStatus.SigtermRequest
            await ensure_async(self._send_kernel_sigterm())

        try:
            await asyncio.wait_for(self._async_wait(pollinterval=pollinterval),
                                   timeout=waittime / 2)
        except asyncio.TimeoutError:
            self.log.debug("Kernel is taking too long to finish, killing")
            self._shutdown_status = _ShutdownStatus.SigkillRequest
            await ensure_async(self._kill_kernel(restart=restart))
        else:
            # Process is no longer alive, wait and clear
            if self.has_kernel:
                assert self.provisioner is not None
                await self.provisioner.wait()

    finish_shutdown = run_sync(_async_finish_shutdown)

    async def _async_cleanup_resources(self, restart: bool = False) -> None:
        """Clean up resources when the kernel is shut down"""
        if not restart:
            self.cleanup_connection_file()

        self.cleanup_ipc_files()
        self._close_control_socket()
        self.session.parent = None

        if self._created_context and not restart:
            self.context.destroy(linger=100)

        if self.provisioner:
            await self.provisioner.cleanup(restart=restart)

    cleanup_resources = run_sync(_async_cleanup_resources)

    async def _async_shutdown_kernel(self,
                                     now: bool = False,
                                     restart: bool = False):
        """Attempts to stop the kernel process cleanly.

        This attempts to shutdown the kernels cleanly by:

        1. Sending it a shutdown message over the control channel.
        2. If that fails, the kernel is shutdown forcibly by sending it
           a signal.

        Parameters
        ----------
        now : bool
            Should the kernel be forcible killed *now*. This skips the
            first, nice shutdown attempt.
        restart: bool
            Will this kernel be restarted after it is shutdown. When this
            is True, connection files will not be cleaned up.
        """
        # Shutdown is a no-op for a kernel that had a failed startup
        if self._ready.exception():
            return

        self.shutting_down = True  # Used by restarter to prevent race condition
        # Stop monitoring for restarting while we shutdown.
        self.stop_restarter()

        await ensure_async(self.interrupt_kernel())

        if now:
            await ensure_async(self._kill_kernel())
        else:
            await ensure_async(self.request_shutdown(restart=restart))
            # Don't send any additional kernel kill messages immediately, to give
            # the kernel a chance to properly execute shutdown actions. Wait for at
            # most 1s, checking every 0.1s.
            await ensure_async(self.finish_shutdown(restart=restart))

        await ensure_async(self.cleanup_resources(restart=restart))

    shutdown_kernel = run_sync(_async_shutdown_kernel)

    async def _async_restart_kernel(self,
                                    now: bool = False,
                                    newports: bool = False,
                                    **kw) -> None:
        """Restarts a kernel with the arguments that were used to launch it.

        Parameters
        ----------
        now : bool, optional
            If True, the kernel is forcefully restarted *immediately*, without
            having a chance to do any cleanup action.  Otherwise the kernel is
            given 1s to clean up before a forceful restart is issued.

            In all cases the kernel is restarted, the only difference is whether
            it is given a chance to perform a clean shutdown or not.

        newports : bool, optional
            If the old kernel was launched with random ports, this flag decides
            whether the same ports and connection file will be used again.
            If False, the same ports and connection file are used. This is
            the default. If True, new random port numbers are chosen and a
            new connection file is written. It is still possible that the newly
            chosen random port numbers happen to be the same as the old ones.

        `**kw` : optional
            Any options specified here will overwrite those used to launch the
            kernel.
        """
        if self._launch_args is None:
            raise RuntimeError("Cannot restart the kernel. "
                               "No previous call to 'start_kernel'.")

        if not self._ready.done():
            raise RuntimeError("Cannot restart the kernel. "
                               "Kernel has not fully started.")

        # Stop currently running kernel.
        await ensure_async(self.shutdown_kernel(now=now, restart=True))

        if newports:
            self.cleanup_random_ports()

        # Start new kernel.
        self._launch_args.update(kw)
        await ensure_async(self.start_kernel(**self._launch_args))

    restart_kernel = run_sync(_async_restart_kernel)

    @property
    def has_kernel(self) -> bool:
        """Has a kernel process been started that we are actively managing."""
        return self.provisioner is not None and self.provisioner.has_process

    async def _async_send_kernel_sigterm(self, restart: bool = False) -> None:
        """similar to _kill_kernel, but with sigterm (not sigkill), but do not block"""
        if self.has_kernel:
            assert self.provisioner is not None
            await self.provisioner.terminate(restart=restart)

    _send_kernel_sigterm = run_sync(_async_send_kernel_sigterm)

    async def _async_kill_kernel(self, restart: bool = False) -> None:
        """Kill the running kernel.

        This is a private method, callers should use shutdown_kernel(now=True).
        """
        if self.has_kernel:
            assert self.provisioner is not None
            await self.provisioner.kill(restart=restart)

            # Wait until the kernel terminates.
            try:
                await asyncio.wait_for(self._async_wait(), timeout=5.0)
            except asyncio.TimeoutError:
                # Wait timed out, just log warning but continue - not much more we can do.
                self.log.warning(
                    "Wait for final termination of kernel timed out - continuing..."
                )
                pass
            else:
                # Process is no longer alive, wait and clear
                if self.has_kernel:
                    await self.provisioner.wait()

    _kill_kernel = run_sync(_async_kill_kernel)

    async def _async_interrupt_kernel(self) -> None:
        """Interrupts the kernel by sending it a signal.

        Unlike ``signal_kernel``, this operation is well supported on all
        platforms.
        """
        if self.has_kernel:
            interrupt_mode = self.kernel_spec.interrupt_mode
            if interrupt_mode == 'signal':
                if sys.platform == 'win32':
                    from .win_interrupt import send_interrupt
                    send_interrupt(self.kernel.win32_interrupt_event)
                else:
                    self.signal_kernel(signal.SIGINT)

            elif interrupt_mode == 'message':
                msg = self.session.msg("interrupt_request", content={})
                self._connect_control_socket()
                self.session.send(self._control_socket, msg)
        else:
            raise RuntimeError(
                "Cannot interrupt kernel. No kernel is running!")

    interrupt_kernel = run_sync(_async_interrupt_kernel)

    async def _async_signal_kernel(self, signum: int) -> None:
        """Sends a signal to the process group of the kernel (this
        usually includes the kernel and any subprocesses spawned by
        the kernel).

        Note that since only SIGTERM is supported on Windows, this function is
        only useful on Unix systems.
        """
        if self.has_kernel:
            assert self.provisioner is not None
            await self.provisioner.send_signal(signum)
        else:
            raise RuntimeError("Cannot signal kernel. No kernel is running!")

    signal_kernel = run_sync(_async_signal_kernel)

    async def _async_is_alive(self) -> bool:
        """Is the kernel process still running?"""
        if self.has_kernel:
            assert self.provisioner is not None
            ret = await self.provisioner.poll()
            if ret is None:
                return True
        return False

    is_alive = run_sync(_async_is_alive)

    async def _async_wait(self, pollinterval: float = 0.1) -> None:
        # Use busy loop at 100ms intervals, polling until the process is
        # not alive.  If we find the process is no longer alive, complete
        # its cleanup via the blocking wait().  Callers are responsible for
        # issuing calls to wait() using a timeout (see _kill_kernel()).
        while await ensure_async(self.is_alive()):
            await asyncio.sleep(pollinterval)
Esempio n. 5
0
class ClientOperation(Operation):
    """
    Base class for a client operation.

    Nearly all functions are private/protected. Child classes should
    rewrite public API to properly document the types they deal with.
    """
    def __init__(self, stream_handler: StreamResponseHandler,
                 shape_index: ShapeIndex, connection: Connection):
        # do not instantiate directly, created by ServiceClient.new_operation()
        # all callbacks that modify state fire on the same thread,
        # so don't need locks to protect members
        self._stream_handler = stream_handler
        self._shape_index = shape_index
        self._message_count = 0
        self._closed_future = Future()
        self._closed_future.set_running_or_notify_cancel()  # prevent cancel
        self._initial_response_future = Future()
        self._initial_response_future.set_running_or_notify_cancel(
        )  # prevent cancel
        self._protocol_handler = _ProtocolContinuationHandler(self)
        self._continuation = connection._new_stream(self._protocol_handler)

    def _activate(self, request: Shape) -> Future:
        headers = [
            Header.from_string(CONTENT_TYPE_HEADER,
                               CONTENT_TYPE_APPLICATION_JSON),
            Header.from_string(SERVICE_MODEL_TYPE_HEADER,
                               request._model_name())
        ]
        payload = self._json_payload_from_shape(request)
        logger.debug("%r sending request APPLICATION_MESSAGE %s %r", self,
                     headers, payload)
        return self._continuation.activate(
            operation=self._model_name(),
            headers=headers,
            payload=payload,
            message_type=protocol.MessageType.APPLICATION_MESSAGE)

    def _send_stream_event(self, event: Shape) -> Future:
        headers = [
            Header.from_string(CONTENT_TYPE_HEADER,
                               CONTENT_TYPE_APPLICATION_JSON),
            Header.from_string(SERVICE_MODEL_TYPE_HEADER, event._model_name())
        ]
        payload = self._json_payload_from_shape(event)
        logger.debug("%r sending event APPLICATION_MESSAGE %s %r", self,
                     headers, payload)
        return self._continuation.send_message(
            headers=headers,
            payload=payload,
            message_type=protocol.MessageType.APPLICATION_MESSAGE)

    def _get_response(self) -> Future:
        return self._initial_response_future

    def close(self) -> Future:
        try:
            # try to send empty APPLICATION_MESSAGE with TERMINATE_STREAM flag.
            # this fails if stream is already closed, so just ignore errors.
            self._continuation.send_message(
                message_type=protocol.MessageType.APPLICATION_MESSAGE,
                flags=protocol.MessageFlag.TERMINATE_STREAM)
        except Exception:
            pass
        return self._closed_future

    def _find_header(self, headers, name, header_type=HeaderType.STRING):
        """Return header value, or None"""
        name_lower = name.lower()
        for header in headers:
            if header.name.lower() == name_lower:
                if header.type == header_type:
                    return header.value
        return None

    def _shape_from_json_payload(self, payload_bytes, shape_type):
        try:
            payload_str = payload_bytes.decode()
            payload_obj = json.loads(payload_str)
            shape = shape_type._from_payload(payload_obj)
            return shape
        except Exception as e:
            raise DeserializeError(
                "Failed to deserialize %s" % shape_type._model_name(), e,
                payload_bytes)

    def _json_payload_from_shape(self, shape):
        try:
            payload_obj = shape._to_payload()
            payload_str = json.dumps(payload_obj)
            payload_bytes = payload_str.encode()
            return payload_bytes
        except Exception as e:
            raise SerializeError("Failed to serialize", shape, e)

    def _on_continuation_message(self, headers: Sequence[Header],
                                 payload: bytes,
                                 message_type: protocol.MessageType,
                                 flags: int, **kwargs):
        self._message_count += 1
        logger.debug("%r received #%d %s %s %r", self, self._message_count,
                     message_type.name, headers, payload)
        try:
            model_name = self._find_header(headers, SERVICE_MODEL_TYPE_HEADER)
            if model_name is None:
                if flags & protocol.MessageFlag.TERMINATE_STREAM:
                    # it's ok for a TERMINATE_STREAM message to be empty
                    return
                msg = "Missing header: " + SERVICE_MODEL_TYPE_HEADER
                raise UnmappedDataError(msg, headers, payload)

            content_type = self._find_header(headers, CONTENT_TYPE_HEADER)
            if content_type is None:
                msg = "Missing header: " + CONTENT_TYPE_HEADER
                raise UnmappedDataError(msg, headers, payload)
            if content_type != CONTENT_TYPE_APPLICATION_JSON:
                msg = "Unexpected {}: '{}', expected: '{}'".format(
                    CONTENT_TYPE_HEADER, content_type,
                    CONTENT_TYPE_APPLICATION_JSON)
                raise UnmappedDataError(msg, headers, payload)

            if message_type == protocol.MessageType.APPLICATION_MESSAGE:
                self._handle_data(model_name, payload)
                return

            # otherwise it's an APPLICATION_ERROR
            found_type = self._shape_index.find_shape_type(model_name)
            if found_type is None:
                msg = "Unknown error type: {}".format(model_name)
                raise UnmappedDataError(msg, payload)
            if not issubclass(found_type, Exception):
                msg = "Unexpected type: {} sent as APPLICATION_ERROR, expected subclass of Exception".format(
                    model_name)
                raise UnmappedDataError(msg, payload)
            shape = self._shape_from_json_payload(payload, found_type)
            raise shape
        except Exception as e:
            self._handle_error(e, flags)

    def _handle_data(self, model_name, payload):
        """
        Pass APPLICATION_MESSAGE payload along as a 1st response,
        or subsequent stream-event. Any exceptions raised by this function
        will be passed to _handle_error().
        """
        if self._message_count == 1:
            # 1st message is "response"
            expected_type = self._response_type()
            expected_name = expected_type._model_name()
            if model_name != expected_name:
                msg = "Unexpected response type: {}, expected: {}".format(
                    model_name, expected_name)
                raise UnmappedDataError(msg, payload)
            shape = self._shape_from_json_payload(payload, expected_type)
            self._initial_response_future.set_result(shape)
        else:
            # messages after the 1st are "stream events"
            expected_type = self._response_stream_type()
            if expected_type is None:
                msg = "Operation does not support response stream events, received type: {}".format(
                    model_name)
                raise UnmappedDataError(msg, payload)
            expected_name = expected_type._model_name()
            if model_name != expected_name:
                msg = "Unexpected response stream event type: {}, expected: {}".format(
                    model_name, expected_name)
                raise UnmappedDataError(msg, payload)
            shape = self._shape_from_json_payload(payload, expected_type)
            self._stream_handler.on_stream_event(shape)

    def _handle_error(self, error, message_flags):
        """
        Pass along an APPLICATION_ERROR payload, or an exception encountered while
        processing an APPLICATION_MESSAGE, as a failed 1st response
        or a stream-error.
        """
        stream_already_terminated = message_flags & protocol.MessageFlag.TERMINATE_STREAM
        try:
            if self._message_count == 1:
                # error from 1st message is "response" error.
                self._initial_response_future.set_exception(error)
                # errors on initial response must terminate the stream
                if not stream_already_terminated:
                    self.close()
            elif self._stream_handler is not None:
                # error from subsequent messages are "stream errors"
                # If this callback returns True (or forgets to return a value)
                # then close the stream
                return_val = self._stream_handler.on_stream_error(error)
                if return_val or return_val is None:
                    if not stream_already_terminated:
                        self.close()
            else:
                # this operation did not expect more than 1 message
                raise error
        except Exception:
            logger.exception("%r unhandled exception while receiving message",
                             self)

    def _on_continuation_closed(self, **kwargs) -> None:
        logger.debug("%r closed", self)
        if not self._initial_response_future.done():
            self._initial_response_future.set_exception(StreamClosedError())

        self._closed_future.set_result(None)

        if self._stream_handler:
            try:
                self._stream_handler.on_stream_closed()
            except Exception:
                logger.exception("%r unhandled exception calling callback",
                                 self)
Esempio n. 6
0
def test_future_result_now():
    fut = Future()
    assert not fut.done()
    corocc.start(result_now(), future=fut)
    assert fut.done()
    assert fut.result() == 42
Esempio n. 7
0
class NamespacedResourceReflector(LoggingConfigurable):
    """
    Base class for keeping a local up-to-date copy of a set of kubernetes resources.

    Must be subclassed once per kind of resource that needs watching.
    """
    labels = Dict(
        {},
        config=True,
        help="""
        Labels to reflect onto local cache
        """
    )

    namespace = Unicode(
        None,
        allow_none=True,
        help="""
        Namespace to watch for resources in
        """
    )

    resources = Dict(
        {},
        help="""
        Dictionary of resource names to the appropriate resource objects.

        This can be accessed across threads safely.
        """
    )

    kind = Unicode(
        'resource',
        help="""
        Human readable name for kind of object we're watching for.

        Used for diagnostic messages.
        """
    )

    list_method_name = Unicode(
        "",
        help="""
        Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources.

        This will be passed a namespace & a label selector. You most likely want something
        of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will
        give you a PodReflector.

        This must be set by a subclass.
        """
    )

    api_group_name = Unicode(
        'CoreV1Api',
        help="""
        Name of class that represents the apigroup on which `list_method_name` is to be found.

        Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses,
        for example, you would have to use ExtensionsV1beta1Api
        """
    )

    on_failure = Any(help="""Function to be called when the reflector gives up.""")

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Load kubernetes config here, since this is a Singleton and
        # so this __init__ will be run way before anything else gets run.
        try:
            config.load_incluster_config()
        except config.ConfigException:
            config.load_kube_config()
        self.api = shared_client(self.api_group_name)

        # FIXME: Protect against malicious labels?
        self.label_selector = ','.join(['{}={}'.format(k, v) for k, v in self.labels.items()])

        self.first_load_future = Future()

        self.start()

    def _list_and_update(self):
        """
        Update current list of resources by doing a full fetch.

        Overwrites all current resource info.
        """
        initial_resources = getattr(self.api, self.list_method_name)(
            self.namespace,
            label_selector=self.label_selector
        )
        # This is an atomic operation on the dictionary!
        self.resources = {p.metadata.name: p for p in initial_resources.items}
        # return the resource version so we can hook up a watch
        return initial_resources.metadata.resource_version

    def _watch_and_update(self):
        """
        Keeps the current list of resources up-to-date

        This method is to be run not on the main thread!

        We first fetch the list of current resources, and store that. Then we
        register to be notified of changes to those resources, and keep our
        local store up-to-date based on these notifications.

        We also perform exponential backoff, giving up after we hit 32s
        wait time. This should protect against network connections dropping
        and intermittent unavailability of the api-server. Every time we
        recover from an exception we also do a full fetch, to pick up
        changes that might've been missed in the time we were not doing
        a watch.

        Note that we're playing a bit with fire here, by updating a dictionary
        in this thread while it is probably being read in another thread
        without using locks! However, dictionary access itself is atomic,
        and as long as we don't try to mutate them (do a 'fetch / modify /
        update' cycle on them), we should be ok!
        """
        cur_delay = 0.1
        while True:
            self.log.info("watching for %s with label selector %s in namespace %s", self.kind, self.label_selector, self.namespace)
            w = watch.Watch()
            try:
                resource_version = self._list_and_update()
                if not self.first_load_future.done():
                    # signal that we've loaded our initial data
                    self.first_load_future.set_result(None)
                for ev in w.stream(
                        getattr(self.api, self.list_method_name),
                        self.namespace,
                        label_selector=self.label_selector,
                        resource_version=resource_version,
                ):
                    cur_delay = 0.1
                    resource = ev['object']
                    if ev['type'] == 'DELETED':
                        # This is an atomic delete operation on the dictionary!
                        self.resources.pop(resource.metadata.name, None)
                    else:
                        # This is an atomic operation on the dictionary!
                        self.resources[resource.metadata.name] = resource
            except Exception:
                cur_delay = cur_delay * 2
                if cur_delay > 30:
                    self.log.exception("Watching resources never recovered, giving up")
                    if self.on_failure:
                        self.on_failure()
                    return
                self.log.exception("Error when watching resources, retrying in %ss", cur_delay)
                time.sleep(cur_delay)
                continue
            finally:
                w.stop()

    def start(self):
        """
        Start the reflection process!

        We'll do a blocking read of all resources first, so that we don't
        race with any operations that are checking the state of the pod
        store - such as polls. This should be called only once at the
        start of program initialization (when the singleton is being created),
        and not afterwards!
        """
        if hasattr(self, 'watch_thread'):
            raise ValueError('Thread watching for resources is already running')

        self._list_and_update()
        self.watch_thread = threading.Thread(target=self._watch_and_update)
        # If the watch_thread is only thread left alive, exit app
        self.watch_thread.daemon = True
        self.watch_thread.start()
Esempio n. 8
0
class SystemdJobHandler(object):
    """An utility for waiting for one or more systemd jobs.

    Usage:

    with SystemdJobHandler() as job_handler:
        job_path = job_handler.manager.$do_something_to_create_a_job
        job_handler.register_job(job_path)
        # Can register more parallel jobs like this

        job_results = yield job_handler.all_jobs_done_future()

    job_results will be a dictionary, in SYSTEMD_MANAGER_INTERFACE.JobRemoved
    terms job_results[unit] = result
    """
    def __init__(self):
        self.__future = Future()
        self.__pending_jobs = set()
        self.__job_results = {}
        self.__signal_match = None

        bus = slip.dbus.SystemBus()
        manager_object = bus.get_object(SYSTEMD_MANAGER_NAME,
                                        SYSTEMD_MANAGER_PATH)
        self.__manager = dbus.Interface(manager_object,
                                        SYSTEMD_MANAGER_INTERFACE)

    def __job_removed_handler(self, job_id, job_path, unit, result):
        """SYSTEMD_MANAGER_INTERFACE.JobRemoved signal handler"""
        log.debug1("systemd JobRemoved signal: %s" % repr(
            (job_id, job_path, unit, result)))
        if job_path in self.__pending_jobs:
            self.__job_results[unit] = result
            self.__pending_jobs.remove(job_path)
            if len(self.__pending_jobs) == 0:
                self.__future.set_result(self.__job_results)

    # We use the context manager protocol to ensure the signal registration is
    # correctly removed.
    def __enter__(self):
        assert self.__signal_match is None, "Recursive use of SystemdJobProcessor"
        assert not self.__future.done(), "Repeated use of SystemdJobProcessor"
        self.__signal_match = self.__manager.connect_to_signal(
            "JobRemoved", self.__job_removed_handler)
        return self  # To allow “with SystemdJobHandler as job_handler:”…

    def __exit__(self, *args):
        self.__signal_match.remove()
        self.__signal_match = None
        return False

    # This is not strictly speaking a necessary part of the API, but since we
    # need the interface object for ourselves and the caller needs it as well,
    # let’s make it available.
    @property
    def manager(self):
        """A dbus.Interface object for SYSTEMD_MANAGER_INTERFACE."""
        return self.__manager

    def register_job(self, job_path):
        """Register a job to be followed to completion.

        :param job_path: A path of the job object.  Make sure to provide the
        path soon after receiving it (in particular before allowing any D-Bus
        signals to be processed).
        """
        assert self.__signal_match is not None, \
            "Registering for jobs when not watching for results"
        self.__pending_jobs.add(job_path)

    def all_jobs_done_future(self):
        """Return a future for results of registered jobs.

        :returns: a future.  The value eventually set as a result is
        a dictionary of unit name -> job result string.
        """
        assert self.__signal_match is not None and len(
            self.__pending_jobs) != 0
        return self.__future
Esempio n. 9
0
 def done_callback(self, future: Future):
     success = future.done() and future.exception() == None
     self.callback(success, self.url, self.webloc_filepath)
Esempio n. 10
0
    def test_futures(self):
        f = Future()
        self.assertEqual(f.done(), False)
        self.assertEqual(f.running(), False)

        self.assertTrue(f.cancel())
        self.assertTrue(f.cancelled())

        with self.assertRaises(CancelledError):
            f.result()

        with self.assertRaises(CancelledError):
            f.exception()

        f = Future()
        f.set_running_or_notify_cancel()

        with self.assertRaises(TimeoutError):
            f.result(0.1)

        with self.assertRaises(TimeoutError):
            f.exception(0.1)

        f = Future()
        f.set_running_or_notify_cancel()
        f.set_result("result")

        self.assertEqual(f.result(), "result")
        self.assertEqual(f.exception(), None)

        f = Future()
        f.set_running_or_notify_cancel()

        f.set_exception(Exception("foo"))

        with self.assertRaises(Exception):
            f.result()

        class Ref():
            def __init__(self, ref):
                self.ref = ref

            def set(self, ref):
                self.ref = ref

        # Test that done callbacks are called.
        called = Ref(False)
        f = Future()
        f.add_done_callback(lambda f: called.set(True))
        f.set_result(None)
        self.assertTrue(called.ref)

        # Test that callbacks are called when cancelled.
        called = Ref(False)
        f = Future()
        f.add_done_callback(lambda f: called.set(True))
        f.cancel()
        self.assertTrue(called.ref)

        # Test that callbacks are called immediately when the future is
        # already done.
        called = Ref(False)
        f = Future()
        f.set_result(None)
        f.add_done_callback(lambda f: called.set(True))
        self.assertTrue(called.ref)

        count = Ref(0)
        f = Future()
        f.add_done_callback(lambda f: count.set(count.ref + 1))
        f.add_done_callback(lambda f: count.set(count.ref + 1))
        f.set_result(None)
        self.assertEqual(count.ref, 2)

        # Test that the callbacks are called with the future as argument.
        done_future = Ref(None)
        f = Future()
        f.add_done_callback(lambda f: done_future.set(f))
        f.set_result(None)
        self.assertIs(f, done_future.ref)
Esempio n. 11
0
from ..utils import get_third_party_modules_from_config
from .pool import create_supervisor_actor_pool, create_worker_actor_pool
from .service import (
    start_supervisor,
    start_worker,
    stop_supervisor,
    stop_worker,
    load_config,
)
from .session import AbstractSession, _new_session, ensure_isolation_created

logger = logging.getLogger(__name__)

_is_exiting_future = SyncFuture()
atexit.register(lambda: _is_exiting_future.set_result(0)
                if not _is_exiting_future.done() else None)
atexit.register(stop_isolation)


async def new_cluster_in_isolation(
    address: str = "0.0.0.0",
    n_worker: int = 1,
    n_cpu: Union[int, str] = "auto",
    cuda_devices: Union[List[int], str] = "auto",
    subprocess_start_method: str = None,
    backend: str = None,
    config: Union[str, Dict] = None,
    web: bool = True,
    timeout: float = None,
) -> ClientType:
    if subprocess_start_method is None:
Esempio n. 12
0
class RemoteTask(TaskDefinition):
    def __init__(self, taskdef: TaskDefinition, cluster):
        kwargs = taskdef.serialize()
        super().__init__(**kwargs)
        self.conn = None
        self.nonce = 0
        self.cluster = cluster
        self.future = Future()
        self.awaitable = asyncio.wrap_future(self.future)
        self.status = WAIT
        self.error = None
        self.result = None

    def __await__(self):
        return self.awaitable.__await__()

    @property
    def done(self) -> bool:
        return self.future.done()

    def destroy(self) -> None:
        self.cluster.destroy(self.id)

    def set_status(self, status: str) -> None:
        # sanity checks
        if self.status == FAIL and status == DONE:
            raise RuntimeError('Cant complete a failed task')

        if self.status == DONE and status == FAIL:
            raise RuntimeError('Cant fail a failed completed')

        # update status
        self.status = status

    def set_error(self, error: str) -> None:
        self.set_status(FAIL)
        self.error = error
        if not self.future.done():
            self.future.set_exception(TaskError(error))

    def set_result(self, result: any, result_type: any = 'any') -> None:
        # unpack type & deserialize result
        result_type = type_from_description(result_type)
        result = result_type.deserialize(result)

        self.set_status(DONE)
        self.result = result
        if not self.future.done():
            self.future.set_result(result)

    async def wait_for_init(self, timeout=30) -> None:
        if self.status != WAIT:
            raise RuntimeError(f'Cant await task with status {self.status}')

        slept = 0
        interval = 0.2
        while True:
            if self.status == WORK:
                return
            if self.status == FAIL:
                raise RuntimeError(
                    f'Awaited task failed with error: {self.error}')

            if slept > timeout:
                raise TimeoutError('Task took to long to initialize')

            await asyncio.sleep(interval)
            slept += interval

    async def call(self, method, args={}):
        if self.status != WORK:
            await self.wait_for_init()
            # raise RuntimeError(
            #     f'RPC is only available when status = WORK, was {self.status}. '
            #     f'Attempted to call {method}')

        return await self.conn.rpc.call(method, args)

    async def stop(self):
        # special case RPC - it always causes a send exception
        await self.call('stop')

    def __getattr__(self, method):
        async def magic_rpc(**kwargs):
            return await self.call(method, kwargs)

        return magic_rpc
Esempio n. 13
0
class ExpectationBase(object):

    __metaclass__ = ABCMeta

    def __init__(self):
        self._future = Future()
        self._awaited = False
        self._scheduler = None
        self._success = False
        self._timeout = None
        self._deadline = None
        self._timedout = False
        self._float_tol = DEFAULT_FLOAT_TOL

    def _schedule(self, scheduler):
        # This expectation is scheduled on the `scheduler`, subclasses of ExpectationBase can
        # perform some operations on this scheduler: schedule another expectation later or
        # perform an operation on the scheduler object when this expectation is schedule (like
        # sending a message for which this expectation object expect some result).
        self._awaited = True
        self._scheduler = scheduler
        if self._timeout is not None:
            self._deadline = timestamp_now() + self._timeout

    def success(self):
        return self._success

    def wait(self, _timeout=None):
        if self._awaited:
            try:
                self._future.result(timeout=_timeout)
            except FutureTimeoutError:
                self.set_timedout()
            except FutureCancelledError:
                self.cancel()
        return self

    def set_result(self):
        self._success = True
        return self._future.set_result(self.received_events())

    def set_exception(self, exception):
        return self._future.set_exception(exception)

    def set_timeout(self, _timeout):
        self._timeout = _timeout

    def set_timedout(self):
        if not self._success:
            self._timedout = True
            self.cancel()

    def cancel(self):
        return self._future.cancel()

    def cancelled(self):
        return self._future.cancelled()

    def timedout(self):
        if self._timedout:
            return True
        if self._success:
            return False
        if self._deadline is not None:
            self._timedout = (timestamp_now() > self._deadline)
            if self._timedout:
                self.cancel()
        return self._timedout

    def set_float_tol(self, _float_tol):
        self._float_tol = _float_tol

    def base_copy(self, *args, **kwds):
        other = self.__class__(*args, **kwds)
        ExpectationBase.__init__(other)
        other._timeout = self._timeout
        other._float_tol = self._float_tol
        return other

    @abstractmethod
    def copy(self):
        """
        All expectations sublclasses must implement a shallow copy.
        """
        pass

    def done(self):
        return (self._future.done() or not self._awaited) and self._success

    def __bool__(self):
        return self.done()

    def __or__(self, other):
        return ArsdkWhenAnyExpectation([self, other])

    def __and__(self, other):
        return ArsdkWhenAllExpectations([self, other])

    def __rshift__(self, other):
        return ArsdkWhenSequenceExpectations([self, other])

    __nonzero__ = __bool__
class NamespacedResourceReflector(LoggingConfigurable):
    """
    Base class for keeping a local up-to-date copy of a set of kubernetes resources.

    Must be subclassed once per kind of resource that needs watching.
    """
    labels = Dict(
        {},
        config=True,
        help="""
        Labels to reflect onto local cache
        """
    )

    fields = Dict(
        {},
        config=True,
        help="""
        Fields to restrict the reflected objects
        """
    )

    namespace = Unicode(
        None,
        allow_none=True,
        help="""
        Namespace to watch for resources in
        """
    )

    resources = Dict(
        {},
        help="""
        Dictionary of resource names to the appropriate resource objects.

        This can be accessed across threads safely.
        """
    )

    kind = Unicode(
        'resource',
        help="""
        Human readable name for kind of object we're watching for.

        Used for diagnostic messages.
        """
    )

    list_method_name = Unicode(
        "",
        help="""
        Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources.

        This will be passed a namespace & a label selector. You most likely want something
        of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will
        give you a PodReflector.

        This must be set by a subclass.
        """
    )

    api_group_name = Unicode(
        'CoreV1Api',
        help="""
        Name of class that represents the apigroup on which `list_method_name` is to be found.

        Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses,
        for example, you would have to use ExtensionsV1beta1Api
        """
    )

    request_timeout = Int(
        60,
        config=True,
        help="""
        Network timeout for kubernetes watch.

        Trigger watch reconnect when a given request is taking too long,
        which can indicate network issues.
        """
    )

    timeout_seconds = Int(
        10,
        config=True,
        help="""
        Timeout for kubernetes watch.

        Trigger watch reconnect when no watch event has been received.
        This will cause a full reload of the currently existing resources
        from the API server.
        """
    )

    restart_seconds = Int(
        30,
        config=True,
        help="""
        Maximum time before restarting a watch.

        The watch will be restarted at least this often,
        even if events are still arriving.
        Avoids trusting kubernetes watch to yield all events,
        which seems to not be a safe assumption.
        """)

    on_failure = Any(help="""Function to be called when the reflector gives up.""")

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Load kubernetes config here, since this is a Singleton and
        # so this __init__ will be run way before anything else gets run.
        try:
            config.load_incluster_config()
        except config.ConfigException:
            config.load_kube_config()
        self.api = shared_client(self.api_group_name)

        # FIXME: Protect against malicious labels?
        self.label_selector = ','.join(['{}={}'.format(k, v) for k, v in self.labels.items()])
        self.field_selector = ','.join(['{}={}'.format(k, v) for k, v in self.fields.items()])

        self.first_load_future = Future()
        self._stop_event = threading.Event()

        self.start()

    def __del__(self):
        self.stop()

    def _list_and_update(self):
        """
        Update current list of resources by doing a full fetch.

        Overwrites all current resource info.
        """
        initial_resources = getattr(self.api, self.list_method_name)(
            self.namespace,
            label_selector=self.label_selector,
            field_selector=self.field_selector,
            _request_timeout=self.request_timeout,
        )
        # This is an atomic operation on the dictionary!
        self.resources = {p.metadata.name: p for p in initial_resources.items}
        # return the resource version so we can hook up a watch
        return initial_resources.metadata.resource_version

    def _watch_and_update(self):
        """
        Keeps the current list of resources up-to-date

        This method is to be run not on the main thread!

        We first fetch the list of current resources, and store that. Then we
        register to be notified of changes to those resources, and keep our
        local store up-to-date based on these notifications.

        We also perform exponential backoff, giving up after we hit 32s
        wait time. This should protect against network connections dropping
        and intermittent unavailability of the api-server. Every time we
        recover from an exception we also do a full fetch, to pick up
        changes that might've been missed in the time we were not doing
        a watch.

        Note that we're playing a bit with fire here, by updating a dictionary
        in this thread while it is probably being read in another thread
        without using locks! However, dictionary access itself is atomic,
        and as long as we don't try to mutate them (do a 'fetch / modify /
        update' cycle on them), we should be ok!
        """
        selectors = []
        log_name = ""
        if self.label_selector:
            selectors.append("label selector=%r" % self.label_selector)
        if self.field_selector:
            selectors.append("field selector=%r" % self.field_selector)
        log_selector = ', '.join(selectors)

        cur_delay = 0.1

        self.log.info(
            "watching for %s with %s in namespace %s",
            self.kind, log_selector, self.namespace,
        )
        while True:
            self.log.debug("Connecting %s watcher", self.kind)
            start = time.monotonic()
            w = watch.Watch()
            try:
                resource_version = self._list_and_update()
                if not self.first_load_future.done():
                    # signal that we've loaded our initial data
                    self.first_load_future.set_result(None)
                watch_args = {
                    'namespace': self.namespace,
                    'label_selector': self.label_selector,
                    'field_selector': self.field_selector,
                    'resource_version': resource_version,
                }
                if self.request_timeout:
                    # set network receive timeout
                    watch_args['_request_timeout'] = self.request_timeout
                if self.timeout_seconds:
                    # set watch timeout
                    watch_args['timeout_seconds'] = self.timeout_seconds
                # in case of timeout_seconds, the w.stream just exits (no exception thrown)
                # -> we stop the watcher and start a new one
                for ev in w.stream(
                        getattr(self.api, self.list_method_name),
                        **watch_args
                ):
                    cur_delay = 0.1
                    resource = ev['object']
                    if ev['type'] == 'DELETED':
                        # This is an atomic delete operation on the dictionary!
                        self.resources.pop(resource.metadata.name, None)
                    else:
                        # This is an atomic operation on the dictionary!
                        self.resources[resource.metadata.name] = resource
                    if self._stop_event.is_set():
                        self.log.info("%s watcher stopped", self.kind)
                        break
                    watch_duration = time.monotonic() - start
                    if watch_duration >= self.restart_seconds:
                        self.log.debug(
                            "Restarting %s watcher after %i seconds",
                            self.kind, watch_duration,
                        )
                        break
            except ReadTimeoutError:
                # network read time out, just continue and restart the watch
                # this could be due to a network problem or just low activity
                self.log.warning("Read timeout watching %s, reconnecting", self.kind)
                continue
            except Exception:
                cur_delay = cur_delay * 2
                if cur_delay > 30:
                    self.log.exception("Watching resources never recovered, giving up")
                    if self.on_failure:
                        self.on_failure()
                    return
                self.log.exception("Error when watching resources, retrying in %ss", cur_delay)
                time.sleep(cur_delay)
                continue
            else:
                # no events on watch, reconnect
                self.log.debug("%s watcher timeout", self.kind)
            finally:
                w.stop()
                if self._stop_event.is_set():
                    self.log.info("%s watcher stopped", self.kind)
                    break
        self.log.warning("%s watcher finished", self.kind)

    def start(self):
        """
        Start the reflection process!

        We'll do a blocking read of all resources first, so that we don't
        race with any operations that are checking the state of the pod
        store - such as polls. This should be called only once at the
        start of program initialization (when the singleton is being created),
        and not afterwards!
        """
        if hasattr(self, 'watch_thread'):
            raise ValueError('Thread watching for resources is already running')

        self._list_and_update()
        self.watch_thread = threading.Thread(target=self._watch_and_update)
        # If the watch_thread is only thread left alive, exit app
        self.watch_thread.daemon = True
        self.watch_thread.start()

    def stop(self):
        self._stop_event.set()

    def stopped(self):
        return self._stop_event.is_set()
Esempio n. 15
0
def testing_nonfuture():
    fpath = '~/shuffled.txt'
    df = DataFuture(None, fpath)
    print(df)
    print("Result: ", df.filepath)
    assert df.filepath == os.path.abspath(os.path.expanduser(fpath))


if __name__ == "__main__":
    # logging.basicConfig(filename='futures.testing.log',level=logging.DEBUG)
    import sys
    import random
    logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
    logger.debug("Begin Testing")

    with open('shuffled.txt', 'w') as testfile:
        nums = list(range(0, 10000))
        random.shuffle(nums)
        for item in nums:
            testfile.write("{0}\n".format(item))

    foo = Future()  # type: Future[str]
    df = DataFuture(foo, './shuffled.txt')
    dx = DataFuture(foo, '~/shuffled.txt')

    print(foo.done())
    print(df.done())

    testing_nonfuture()
Esempio n. 16
0
class SystemdJobHandler(object):
    """An utility for waiting for one or more systemd jobs.

    Usage:

    with SystemdJobHandler() as job_handler:
        job_path = job_handler.manager.$do_something_to_create_a_job
        job_handler.register_job(job_path)
        # Can register more parallel jobs like this

        job_results = yield job_handler.all_jobs_done_future()

    job_results will be a dictionary, in SYSTEMD_MANAGER_INTERFACE.JobRemoved
    terms job_results[unit] = result
    """

    def __init__(self):
        self.__future = Future()
        self.__pending_jobs = set()
        self.__job_results = {}
        self.__signal_match = None

        bus = slip.dbus.SystemBus()
        manager_object = bus.get_object(SYSTEMD_MANAGER_NAME,
                                        SYSTEMD_MANAGER_PATH)
        self.__manager = dbus.Interface(manager_object,
                                         SYSTEMD_MANAGER_INTERFACE)

    def __job_removed_handler(self, job_id, job_path, unit, result):
        """SYSTEMD_MANAGER_INTERFACE.JobRemoved signal handler"""
        log.debug1("systemd JobRemoved signal: %s" %
                   repr((job_id, job_path, unit, result)))
        if job_path in self.__pending_jobs:
            self.__job_results[unit] = result
            self.__pending_jobs.remove(job_path)
            if len(self.__pending_jobs) == 0:
                self.__future.set_result(self.__job_results)

    # We use the context manager protocol to ensure the signal registration is
    # correctly removed.
    def __enter__(self):
        assert self.__signal_match is None, "Recursive use of SystemdJobProcessor"
        assert not self.__future.done(), "Repeated use of SystemdJobProcessor"
        self.__signal_match = self.__manager.connect_to_signal("JobRemoved", self.__job_removed_handler)
        return self # To allow “with SystemdJobHandler as job_handler:”…

    def __exit__(self, *args):
        self.__signal_match.remove()
        self.__signal_match = None
        return False

    # This is not strictly speaking a necessary part of the API, but since we
    # need the interface object for ourselves and the caller needs it as well,
    # let’s make it available.
    @property
    def manager(self):
        """A dbus.Interface object for SYSTEMD_MANAGER_INTERFACE."""
        return self.__manager

    def register_job(self, job_path):
        """Register a job to be followed to completion.

        :param job_path: A path of the job object.  Make sure to provide the
        path soon after receiving it (in particular before allowing any D-Bus
        signals to be processed).
        """
        assert self.__signal_match is not None, \
            "Registering for jobs when not watching for results"
        self.__pending_jobs.add(job_path)

    def all_jobs_done_future(self):
        """Return a future for results of registered jobs.

        :returns: a future.  The value eventually set as a result is
        a dictionary of unit name -> job result string.
        """
        assert self.__signal_match is not None and len(self.__pending_jobs) != 0
        return self.__future
Esempio n. 17
0
class RemoteTask(TaskInstance):
    def __init__(self, taskdef: TaskDefinition, cluster):
        kwargs = taskdef.serialize()
        super().__init__(**kwargs)
        self.conn = None
        self.nonce = 0
        self.cluster = cluster
        self.future = Future()
        self.awaitable = asyncio.wrap_future(self.future)

    def __await__(self):
        return self.awaitable.__await__()

    @property
    def done(self) -> bool:
        return self.future.done()

    def destroy(self) -> None:
        self.cluster.destroy(self.id)

    def set_status(self, status: str) -> None:
        # sanity checks
        if self.status == FAIL and status == DONE:
            raise RuntimeError('Cant complete a failed task')

        if self.status == DONE and status == FAIL:
            raise RuntimeError('Cant fail a completed task')

        if status == STOP and not self.future.done():
            self.future.set_exception(
                StoppedError(f'Remote task {self.id} was stopped'))

        # update status
        self.status = status

    def set_error(self, error: str) -> None:
        self.set_status(FAIL)
        self.error = error
        if not self.future.done():
            self.future.set_exception(TaskError(error))

    def set_result(self, result: any, result_type: any = 'any') -> None:
        # unpack type & deserialize result
        result_type = type_from_description(result_type)
        result = result_type.deserialize(result)

        self.set_status(DONE)
        self.result = result
        if not self.future.done():
            self.future.set_result(result)

    async def wait_for_scheduling(self) -> None:
        pass

    async def wait_for_init(self, timeout=30) -> None:
        if self.status != WAIT:
            raise RuntimeError(f'Cant await task with status {self.status}')

        await self.wait_for_scheduling()

        slept = 0
        interval = 0.2
        while True:
            if self.status == WORK:
                return
            if self.status == FAIL:
                raise RuntimeError(
                    f'Awaited task failed with error: {self.error}')

            if slept > timeout:
                raise TimeoutError('Task took to long to initialize')

            await asyncio.sleep(interval)
            slept += interval

    async def call(self, method, args={}) -> any:
        if self.status == WAIT:
            await self.wait_for_init()
        elif self.status != WORK:
            raise RuntimeError(
                f'RPC is only available when status = WORK, was {self.status}. '
                f'Attempted to call {method}')

        return await self.conn.rpc.call(method, args)

    async def stop(self) -> None:
        if self.status == STOP:
            return
        await self.call('stop')

    def logs(self):
        return self.cluster.logs(self.id)

    def __getattr__(self, method):
        if method[0] == '_':
            return super().__getattr__(method)

        async def magic_rpc(*args, **kwargs):
            if len(args) > 0:
                raise TypeError(
                    'Positional arguments are not supported for RPC methods')
            return await self.call(method, kwargs)

        return magic_rpc

    def __str__(self):
        return f'RemoteTask({self.id}, {self.status}, {self.inputs})'

    def __repr__(self):
        return self.__str__()
Esempio n. 18
0
class Preplanner(object):
    def __init__(self, ctrl, max_plan_time = 60 * 60 * 24, max_loop_time = 300):
        self.ctrl = ctrl
        self.log = ctrl.log.get('Preplanner')

        self.max_plan_time = max_plan_time
        self.max_loop_time = max_loop_time

        path = self.ctrl.get_plan()
        if not os.path.exists(path): os.mkdir(path)

        self.started = Future()
        self.plans = {}


    def start(self):
        if not self.started.done():
            self.log.info('Preplanner started')
            self.started.set_result(True)


    def invalidate(self, filename):
        if filename in self.plans:
            self.plans[filename].terminate()
            del self.plans[filename]


    def invalidate_all(self):
        for filename, plan in self.plans.items():
            plan.terminate()
        self.plans = {}


    def delete_all_plans(self):
        files = glob.glob(self.ctrl.get_plan('*'))
        for path in files: safe_remove(path)
        self.invalidate_all()


    def delete_plans(self, filename):
        if filename in self.plans:
            self.plans[filename].delete()
            self.invalidate(filename)

    @gen.coroutine
    def get_plan(self, filename):
        if filename is None: raise Exception('Filename cannot be None')

        # Wait until state is fully initialized
        yield self.started

        if filename in self.plans: plan = self.plans[filename]
        else:
            plan = Plan(self, self.ctrl, filename)
            self.plans[filename] = plan

        data = yield plan.future
        return data


    def get_plan_progress(self, filename):
        return self.plans[filename].progress if filename in self.plans else 0