Beispiel #1
0
    def close_learning_instance(self, object_id):
        if object_id not in self._learning_instance_processes:
            return

        # delete the services
        for target in self._graphlearn_services[object_id]:
            try:
                delete_kubernetes_object(
                    api_client=self._api_client,
                    target=target,
                    wait=self._waiting_for_delete,
                    timeout_seconds=self._timeout_seconds,
                )
            except Exception as e:
                logger.error("Failed to delete graphlearn service for %s, %s",
                             object_id, e)

        # terminate the process
        for proc in self._learning_instance_processes[object_id]:
            try:
                proc.terminate()
                proc.wait(1)
            except Exception as e:
                logger.error("Failed to terminate graphlearn server: %s", e)
        self._learning_instance_processes[object_id].clear()
Beispiel #2
0
    def stop(self, wait=False):
        """Stop graphscope instance on kubernetes cluster.

        Args:
            wait: bool, optional
                Waiting for delete. Defaults to False.

        Raises:
            TimeoutError:
                Waiting for stop instance timeout when ``wait`` or ``_waiting_for_delete`` is True.
        """
        if not self._closed:
            # delete resources created by graphscope inside namespace
            # make sure delete permission resouces in the end
            for target in reversed(self._resource_object):
                delete_kubernetes_object(
                    api_client=self._api_client,
                    target=target,
                    wait=self._waiting_for_delete,
                    timeout_seconds=self._timeout_seconds,
                )
            self._resource_object = []
            if self._delete_namespace:
                # delete namespace
                api = CoreV1Api(self._api_client)
                try:
                    api.delete_namespace(self._namespace)
                except K8SApiException:
                    # namespace already deleted.
                    pass
                else:
                    if wait or self._waiting_for_delete:
                        start_time = time.time()
                        while True:
                            try:
                                api.read_namespace(self._namespace)
                            except K8SApiException as ex:
                                if ex.status != 404:
                                    raise
                                break
                            else:
                                time.sleep(1)
                                if (
                                    self._timeout_seconds
                                    and time.time() - start_time > self._timeout_seconds
                                ):
                                    logger.info(
                                        "Deleting namespace %s timeout"
                                        % self._namespace
                                    )
                                    break
            self._closed = True
Beispiel #3
0
    def stop(self, is_dangling=False):
        if not self._closed:
            for target in self._resource_object:
                delete_kubernetes_object(
                    api_client=self._api_client,
                    target=target,
                    wait=self._saved_locals["waiting_for_delete"],
                    timeout_seconds=self._saved_locals["timeout_seconds"],
                )
            self._resource_object = []

            if is_dangling:
                logger.info("Dangling coordinator detected, cleaning up...")
                # delete everything inside namespace of graphscope instance
                if self._saved_locals["delete_namespace"]:
                    # delete namespace created by graphscope
                    self._core_api.delete_namespace(
                        self._saved_locals["namespace"])
                    if self._saved_locals["waiting_for_delete"]:
                        start_time = time.time()
                        while True:
                            try:
                                self._core_api.read_namespace(
                                    self._saved_locals["namespace"])
                            except K8SApiException as ex:
                                if ex.status != 404:
                                    logger.error(
                                        "Deleting dangling namespace {} failed: {}"
                                        .format(
                                            self._saved_locals["namespace"],
                                            str(ex)))
                                break
                            else:
                                time.sleep(1)
                                if (time.time() - start_time >
                                        self._saved_locals["timeout_seconds"]):
                                    logger.error(
                                        "Deleting namespace %s timeout" %
                                        self._saved_locals["namespace"])
                else:
                    # delete coordinator deployment and service
                    self._delete_dangling_coordinator()
            self._closed = True