Example #1
0
 def wait_for_ingress(self, title, test_path=None, method="GET"):
     base_url = self.get_ingress_url(title)
     test_url = urllib.parse.urljoin(base_url, test_path)
     request = urllib.request.Request(test_url, method=method)
     try:
         urllib.request.urlopen(request)
     except http.client.RemoteDisconnected as e:
         self.logger.warning(
             "wait_for_ingress (%s): RemoteDisconnected: %s" %
             (test_url, e))
         if self.status == deployment_status.STATUS_DEPLOYING:
             raise deployment_status.StillDeploying(
                 "Waiting for %s ingress" % title)
         raise deployment_status.DeploymentError("Could not connect: %s" %
                                                 e)
     except urllib.error.HTTPError as e:
         self.logger.warning("wait_for_ingress (%s): HTTPError: %s" %
                             (test_url, e))
         if self.status == deployment_status.STATUS_DEPLOYING:
             if e.code == 404 or e.code == 503 or e.code == 502 or e.code == 504:
                 raise deployment_status.StillDeploying(
                     "Waiting for %s ingress" % title)
         raise deployment_status.DeploymentError("HTTP response: %s" % e)
     except urllib.error.URLError as e:
         self.logger.warning("wait_for_ingress (%s): URLError: %s" %
                             (test_url, e))
         if self.status == deployment_status.STATUS_DEPLOYING:
             raise deployment_status.StillDeploying(
                 "Waiting for %s ingress" % title)
         raise deployment_status.DeploymentError("URL error: %s" % e)
Example #2
0
 def sync_source_code(self, url):
     url = urllib.parse.urljoin(url, "notebook")
     self.logger.warning("notebookurl: %s" % url)
     try:
         download_request = urllib.request.Request(url, method="GET")
         download_response = urllib.request.urlopen(download_request,
                                                    timeout=7)
         container_notebook_version_string = download_response.getheader(
             "X-Notebook-Version")
         if container_notebook_version_string is None:
             remote_notebook_version = -1
         else:
             remote_notebook_version = int(
                 container_notebook_version_string)
         remote_notebook_code = download_response.read().decode()
     except http.client.RemoteDisconnected as e:
         if self.status == deployment_status.STATUS_DEPLOYING:
             raise deployment_status.StillDeploying(
                 "Waiting for connection")
         raise deployment_status.DeploymentError("Could not connect: %s" %
                                                 e)
     except urllib.error.HTTPError as e:
         if e.code != 404 and e.code != 503 and e.code != 502 and e.code != 504:
             raise Exception(
                 "downloading notebook source failed with code %s" % e.code)
         raise deployment_status.StillDeploying(
             "Waiting for connection to container")
     if remote_notebook_version > self.algorithm.source_code_version:
         self.algorithm.update_source_code(
             remote_notebook_code,
             remote_notebook_version,
         )
         self.logger.info(
             "Received and stored updated source code (version %s)" %
             remote_notebook_version)
     if remote_notebook_version < self.algorithm.source_code_version:
         local_notebook_data = self.algorithm.source_code.encode()
         upload_request = urllib.request.Request(
             url,
             data=local_notebook_data,
             method="PUT",
             headers={
                 "X-Notebook-Version": self.algorithm.source_code_version,
                 "Content-Type": "application/octet-stream",
             })
         try:
             upload_response = urllib.request.urlopen(upload_request)
             self.logger.info("Sent source code (version %s)" %
                              self.algorithm.source_code_version)
             python_code = upload_response.read().decode()
             #self.logger.info("Resulting python code:\n%s" % python_code)
         except urllib.error.HTTPError as e:
             if e.code != 404 and e.code != 503 and e.code != 502 and e.code != 504:
                 raise Exception("Error uploading sourcecode: %s" % e.code)
             raise deployment_status.StillDeploying(
                 "Waiting for connection to container")
Example #3
0
 def wait_for_endpoints_pod_ip(self, endpoints_name):
     endpoints = self.core_api.read_namespaced_endpoints(
         endpoints_name,
         self.environment.namespace,
     )
     if not endpoints:
         raise deployment_status.StillDeploying("Waiting for %s endpoints" %
                                                endpoints_name)
     if not endpoints.subsets:
         raise deployment_status.StillDeploying("Waiting for %s endpoints" %
                                                endpoints_name)
     pod_ip = None
     for subset in endpoints.subsets:
         for address in subset.addresses:
             pod_ip = address.ip
     if not pod_ip:
         raise deployment_status.StillDeploying(
             "Waiting for %s pod endpoint address" % endpoints_name)
     return pod_ip
Example #4
0
 def sync_source_code(self):
     notebook_url = urllib.parse.urljoin(
         self.get_ingress_url("editor"),
         "_dltk/notebook"
     )
     try:
         download_request = urllib.request.Request(notebook_url, method="GET")
         download_response = urllib.request.urlopen(download_request)
         notebook_version = int(download_response.getheader("X-Notebook-Version"))
         if notebook_version is None:
             raise Exception("Did not receive notebook version")
         notebook_code = download_response.read().decode()
     except http.client.RemoteDisconnected as e:
         if self.status == deployment_status.STATUS_DEPLOYING:
             raise deployment_status.StillDeploying("Waiting for connection")
         raise deployment_status.DeploymentError("Could not connect: %s" % e)
     except urllib.error.HTTPError as e:
         if e.code != 404:
             raise UserFriendlyError("failed downloading notebook source: %s" % e.code)
         notebook_version = -1
         notebook_code = ""
     if notebook_version > self.algorithm.source_code_version:
         self.algorithm.update_source_code(
             notebook_code,
             notebook_version,
         )
         logging.info("Received and stored updated source code (version %s)" % notebook_version)
     if notebook_version < self.algorithm.source_code_version:
         notebook_data = self.algorithm.source_code.encode()
         #raise Exception("notebook_data: %s" % self.algorithm.source_code)
         upload_request = urllib.request.Request(
             notebook_url,
             data=notebook_data,
             method="PUT",
             headers={
                 "X-Notebook-Version": self.algorithm.source_code_version,
             }
         )
         try:
             urllib.request.urlopen(upload_request)
         except urllib.error.HTTPError as e:
             raise UserFriendlyError("error sending new source code to runtime: %s" % e)
         logging.info("Sent source code (version %s)" % self.algorithm.source_code_version)
Example #5
0
 def deploy_stateful_set(
     self,
     component_name,
     headless_service,
     cpu_count,  # deprecated
     memory_mb,
     image,
     replicas,
     stateful_set_labels,
     pod_labels,
     ports=None,
     env=None,
     cpu_request=1,
     cpu_limit=None,
 ):
     if cpu_count is not None:
         cpu_request_resources = "%s" % cpu_count
         cpu_limit_resources = "%s" % cpu_count
     else:
         if cpu_limit is None:
             cpu_limit = cpu_request
         cpu_request_resources = "%s" % cpu_request
         cpu_limit_resources = "%s" % cpu_limit
     memory_resources = "%sMi" % memory_mb
     stateful_set = self.get_stateful_set(stateful_set_labels)
     if stateful_set:
         changed = False
         if stateful_set.spec.replicas != replicas:
             self.logger.info("replicas changed from %s to %s" %
                              (stateful_set.spec.replicas, replicas))
             stateful_set.spec.replicas = replicas
             changed = True
         for container in stateful_set.spec.template.spec.containers:
             if container.name == component_name:
                 if container.image != image:
                     container.image = image
                     changed = True
                     self.logger.info("image changed")
                 if container.resources.requests is None:
                     container.resources.requests = {}
                 if container.resources.limits is None:
                     container.resources.limits = {}
                 if "cpu" not in container.resources.requests or container.resources.requests[
                         "cpu"] != cpu_request_resources:
                     container.resources.requests[
                         "cpu"] = cpu_request_resources
                     changed = True
                     self.logger.info("cpu requests changed to %s" %
                                      cpu_request_resources)
                 if "cpu" not in container.resources.limits or container.resources.limits[
                         "cpu"] != cpu_limit_resources:
                     container.resources.limits["cpu"] = cpu_limit_resources
                     changed = True
                     self.logger.info("cpu limit changed to %s" %
                                      cpu_limit_resources)
                 if "memory" not in container.resources.requests or container.resources.requests[
                         "memory"] != memory_resources:
                     container.resources.requests[
                         "memory"] = memory_resources
                     changed = True
                     self.logger.info("memory request changed to %s" %
                                      memory_resources)
                 if "memory" not in container.resources.limits or container.resources.limits[
                         "memory"] != memory_resources:
                     container.resources.limits["memory"] = memory_resources
                     changed = True
                     self.logger.info("memory limit changed to %s" %
                                      memory_resources)
         if changed:
             self.logger.info("patching stateful_set...")
             self.apps_api.patch_namespaced_stateful_set(
                 name=stateful_set.metadata.name,
                 namespace=self.environment.namespace,
                 body=stateful_set,
             )
             raise deployment_status.StillDeploying(
                 "Waiting for %s stateful set being patched" %
                 component_name)
     else:
         self.logger.info("creating %s stateful_set..." % component_name)
         stateful_set = self.apps_api.create_namespaced_stateful_set(
             namespace=self.environment.namespace,
             body=kubernetes_client.V1StatefulSet(
                 api_version="apps/v1",
                 kind="StatefulSet",
                 metadata=kubernetes_client.V1ObjectMeta(
                     name=self.generate_object_name(component_name),
                     namespace=self.environment.namespace,
                     labels=self.generate_object_labels(
                         stateful_set_labels)),
                 spec=kubernetes_client.V1StatefulSetSpec(
                     service_name=headless_service.metadata.name,
                     replicas=replicas,
                     selector=kubernetes_client.V1LabelSelector(
                         match_labels=self.generate_object_labels(
                             pod_labels)),
                     template=kubernetes_client.V1PodTemplateSpec(
                         metadata=kubernetes_client.V1ObjectMeta(
                             labels=self.generate_object_labels(
                                 pod_labels)),
                         spec=kubernetes_client.V1PodSpec(containers=[
                             kubernetes_client.V1Container(
                                 name=component_name,
                                 image=image,
                                 image_pull_policy=self.environment.
                                 image_pull_policy,
                                 resources=kubernetes_client.
                                 V1ResourceRequirements(
                                     requests={
                                         "cpu": cpu_request_resources,
                                         "memory": memory_resources,
                                     },
                                     limits={
                                         "cpu": cpu_limit_resources,
                                         "memory": memory_resources,
                                     },
                                 ),
                                 env=env,
                                 ports=ports,
                             ),
                         ], ),
                     ),
                 ),
             ),
         )
     return stateful_set
Example #6
0
 def deploy_deployment(
     self,
     image,
     memory_mb=50,
     cpu_count=None,  # deprecated
     cpu_request=1,
     cpu_limit=None,
     gpu_request=None,
     replicas=1,
     deployment_labels=None,
     pod_labels=None,
     name_suffix=None,
     container_name=None,
     ports=[],
     env=None,
     volumes=[],
     volume_mounts=[],
     run_as_user=None,
     fs_group=None,
 ):
     if not container_name:
         if name_suffix:
             container_name = name_suffix
         else:
             container_name = self.algorithm.runtime.name
         container_name = container_name.replace(".", "-")
     if cpu_count is not None:
         cpu_request_resources = "%s" % cpu_count
         cpu_limit_resources = "%s" % cpu_count
     else:
         if cpu_limit is None:
             cpu_limit = cpu_request
         cpu_request_resources = "%s" % cpu_request
         cpu_limit_resources = "%s" % cpu_limit
     memory_resources = "%sMi" % memory_mb
     if gpu_request != None:
         gpu_request_resources = "%s" % gpu_request
         gpu_limit_resources = gpu_request_resources
     else:
         gpu_request_resources = None
         gpu_limit_resources = None
     deployment = self.get_deployment(deployment_labels)
     if deployment:
         changed = False
         if deployment.spec.replicas != replicas:
             self.logger.info("replicas changed from %s to %s" %
                              (deployment.spec.replicas, replicas))
             deployment.spec.replicas = replicas
             changed = True
         for container in deployment.spec.template.spec.containers:
             if container.name == container_name:
                 if container.image != image:
                     container.image = image
                     changed = True
                     self.logger.info("image changed")
                 if container.resources.requests is None:
                     container.resources.requests = {}
                 if container.resources.limits is None:
                     container.resources.limits = {}
                 if "cpu" not in container.resources.requests or container.resources.requests[
                         "cpu"] != cpu_request_resources:
                     container.resources.requests[
                         "cpu"] = cpu_request_resources
                     changed = True
                     self.logger.info(
                         "cpu_requests_resources requests changed to %s" %
                         cpu_request_resources)
                 if "cpu" not in container.resources.limits or container.resources.limits[
                         "cpu"] != cpu_limit_resources:
                     container.resources.limits["cpu"] = cpu_limit_resources
                     changed = True
                     self.logger.info(
                         "cpu_limit_resources limits changed to %s" %
                         cpu_limit_resources)
                 if "memory" not in container.resources.requests:
                     container.resources.requests[
                         "memory"] = memory_resources
                     changed = True
                     self.logger.info(
                         "memory_resources was not set. Now set to '%s'" %
                         (memory_resources, ))
                 elif resources.parse_memory(
                         container.resources.requests["memory"]
                 ) != resources.parse_memory(memory_resources):
                     container.resources.requests[
                         "memory"] = memory_resources
                     changed = True
                     self.logger.info(
                         "memory_resources requests changed from '%s' to '%s'"
                         % (
                             container.resources.requests["memory"],
                             memory_resources,
                         ))
                 if "memory" not in container.resources.limits:
                     container.resources.limits["memory"] = memory_resources
                     changed = True
                     self.logger.info(
                         "memory_resources limits was not set. Now set to %s"
                         % memory_resources)
                 elif resources.parse_memory(
                         container.resources.limits["memory"]
                 ) != resources.parse_memory(memory_resources):
                     container.resources.limits["memory"] = memory_resources
                     changed = True
                     self.logger.info(
                         "memory_resources limits changed from %s to %s" % (
                             container.resources.limits["memory"],
                             memory_resources,
                         ))
                 if "nvidia.com/gpu" not in container.resources.limits:
                     if gpu_limit_resources != None:
                         container.resources.limits[
                             "nvidia.com/gpu"] = gpu_limit_resources
                         changed = True
                         self.logger.info(
                             "gpu_resources limits was not set. Now set to %s"
                             % gpu_limit_resources)
                 elif container.resources.limits[
                         "nvidia.com/gpu"] != gpu_limit_resources:
                     if gpu_limit_resources != None:
                         container.resources.limits[
                             "nvidia.com/gpu"] = gpu_limit_resources
                         changed = True
                         self.logger.info(
                             "gpu_resources limits changed from %s to %s" %
                             (
                                 container.resources.
                                 limits["nvidia.com/gpu"],
                                 gpu_limit_resources,
                             ))
                     else:
                         self.logger.info(
                             "gpu_resources limits was set to %s but not required anymore"
                             %
                             (container.resources.limits["nvidia.com/gpu"],
                              ))
                         del container.resources.limits["nvidia.com/gpu"]
                         changed = True
                 if "nvidia.com/gpu" not in container.resources.requests:
                     if gpu_request_resources != None:
                         container.resources.requests[
                             "nvidia.com/gpu"] = gpu_request_resources
                         changed = True
                         self.logger.info(
                             "gpu_resources requests was not set. Now set to %s"
                             % gpu_request_resources)
                 elif container.resources.requests[
                         "nvidia.com/gpu"] != gpu_request_resources:
                     if gpu_request_resources != None:
                         container.resources.requests[
                             "nvidia.com/gpu"] = gpu_request_resources
                         changed = True
                         self.logger.info(
                             "gpu_resources requests changed from %s to %s"
                             % (
                                 container.resources.
                                 requests["nvidia.com/gpu"],
                                 gpu_request_resources,
                             ))
                     else:
                         self.logger.info(
                             "gpu_resources requests was set to %s but not required anymore"
                             % (container.resources.
                                requests["nvidia.com/gpu"], ))
                         del container.resources.requests["nvidia.com/gpu"]
                         changed = True
         if changed:
             self.logger.info("patching deployment...")
             self.apps_api.patch_namespaced_deployment(
                 name=deployment.metadata.name,
                 namespace=self.environment.namespace,
                 body=deployment,
             )
             raise deployment_status.StillDeploying(
                 "Waiting for %s deployment being patched" % container_name)
     else:
         resource_requirements = kubernetes_client.V1ResourceRequirements(
             requests={
                 "cpu": cpu_request_resources,
                 "memory": memory_resources,
             },
             limits={
                 "cpu": cpu_limit_resources,
                 "memory": memory_resources,
             },
         )
         if gpu_request_resources:
             resource_requirements.requests[
                 "nvidia.com/gpu"] = gpu_request_resources
         if gpu_limit_resources:
             resource_requirements.limits[
                 "nvidia.com/gpu"] = gpu_limit_resources
         self.logger.info("creating %s deployment..." % container_name)
         deployment = self.apps_api.create_namespaced_deployment(
             namespace=self.environment.namespace,
             body=kubernetes_client.V1Deployment(
                 api_version="apps/v1",
                 kind="Deployment",
                 metadata=kubernetes_client.V1ObjectMeta(
                     name=self.generate_object_name(name_suffix),
                     namespace=self.environment.namespace,
                     labels=self.generate_object_labels(deployment_labels)),
                 spec=kubernetes_client.V1DeploymentSpec(
                     replicas=replicas,
                     selector=kubernetes_client.V1LabelSelector(
                         match_labels=self.generate_object_labels(
                             pod_labels)),
                     template=kubernetes_client.V1PodTemplateSpec(
                         metadata=kubernetes_client.V1ObjectMeta(
                             labels=self.generate_object_labels(
                                 pod_labels)),
                         spec=kubernetes_client.V1PodSpec(
                             containers=[
                                 kubernetes_client.V1Container(
                                     name=container_name,
                                     image=image,
                                     image_pull_policy=self.environment.
                                     image_pull_policy,
                                     resources=resource_requirements,
                                     env=env,
                                     ports=ports,
                                     volume_mounts=volume_mounts,
                                 ),
                             ],
                             volumes=volumes,
                             security_context=kubernetes_client.
                             V1PodSecurityContext(
                                 run_as_user=run_as_user,
                                 fs_group=fs_group,
                             ),
                         ),
                     ),
                 ),
             ),
         )
     return deployment