def default_cluster_agent_deployment(): """ Default cluster agent deployment """ labels = { 'app': 'epsagon-cluster-agent' } return client.V1Deployment( api_version='apps/v1', kind='Deployment', metadata=client.V1ObjectMeta(name='cluster-agent', namespace='epsagon-monitoring'), spec=client.V1DeploymentSpec( selector=client.V1LabelSelector( match_labels=labels.copy() ), replicas=1, template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels=labels.copy()), spec=client.V1PodSpec( service_account_name='cluster-agent', containers=[ client.V1Container( name='cluster-agent', image='epsagon/cluster-agent:test', # required for pulling from the docker local loaded images # and not from Epsagon remote hub image_pull_policy='Never', env=[ client.V1EnvVar(name='EPSAGON_TOKEN', value='123'), client.V1EnvVar(name='EPSAGON_CLUSTER_NAME', value='test'), client.V1EnvVar(name='EPSAGON_DEBUG', value='false'), client.V1EnvVar(name='EPSAGON_COLLECTOR_URL', value='http://localhost:5000'), ] ), ] ), ), ), )
async def deploy_resource( self, resource_attributes: AttributeDict ) -> AttributeDict: drone_environment = self.drone_environment( resource_attributes.drone_uuid, resource_attributes.obs_machine_meta_data_translation_mapping, ) spec = k8s_client.V1DeploymentSpec( replicas=1, selector=k8s_client.V1LabelSelector( match_labels={"app": resource_attributes.drone_uuid} ), template=k8s_client.V1PodTemplateSpec(), ) spec.template.metadata = k8s_client.V1ObjectMeta( name=resource_attributes.drone_uuid, labels={"app": resource_attributes.drone_uuid}, ) container = k8s_client.V1Container( image=self.machine_type_configuration.image, args=self.machine_type_configuration.args, name=resource_attributes.drone_uuid, resources=k8s_client.V1ResourceRequirements( requests={ "cpu": self.machine_meta_data.Cores, "memory": convert_to(self.machine_meta_data.Memory * 1e09, int), } ), env=[ k8s_client.V1EnvVar(name=f"TardisDrone{key}", value=str(value)) for key, value in drone_environment.items() ], ) spec.template.spec = k8s_client.V1PodSpec(containers=[container]) body = k8s_client.V1Deployment( metadata=k8s_client.V1ObjectMeta(name=resource_attributes.drone_uuid), spec=spec, ) response_temp = await self.client.create_namespaced_deployment( namespace=self.machine_type_configuration.namespace, body=body ) response = { "uid": response_temp.metadata.uid, "name": response_temp.metadata.name, "type": "Booting", } if self.machine_type_configuration.hpa: spec = k8s_client.V1HorizontalPodAutoscalerSpec( max_replicas=self.machine_type_configuration.max_replicas, min_replicas=self.machine_type_configuration.min_replicas, target_cpu_utilization_percentage=self.machine_type_configuration.cpu_utilization, # noqa: B950 scale_target_ref=k8s_client.V1CrossVersionObjectReference( api_version="apps/v1", kind="Deployment", name=resource_attributes.drone_uuid, ), ) dep = k8s_client.V1HorizontalPodAutoscaler( metadata=k8s_client.V1ObjectMeta(name=resource_attributes.drone_uuid), spec=spec, ) await self.hpa_client.create_namespaced_horizontal_pod_autoscaler( namespace=self.machine_type_configuration.namespace, body=dep ) return self.handle_response(response)
async def create_deployment( self, container: str, num_replicas: int, cpus: float = 1.0, memory: float = 1.0, ) -> Tuple[str, str]: assert self.auth_client assert self.cluster_endpoint cfg = client.Configuration( host=f"https://{self.cluster_endpoint}:443", api_key={ "authorization": f"Bearer {await self.auth_client.get()}" }, ) cfg.verify_ssl = False async with ApiClient(configuration=cfg) as kube_api: apps_api = client.AppsV1Api(kube_api) core_api = client.CoreV1Api(kube_api) # Create deployment deployment_id = f"dep-{uuid.uuid4()}" deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name=deployment_id), spec=client.V1DeploymentSpec( replicas=num_replicas, selector={"matchLabels": { "dep": deployment_id }}, template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( labels={"dep": deployment_id}), spec=client.V1PodSpec(containers=[ client.V1Container( name=deployment_id, env=[ client.V1EnvVar(name="PORT", value=str(INTERNAL_PORT)) ], image=container, resources=client.V1ResourceRequirements( requests={ "cpu": str(cpus), "memory": f"{int(memory * 1024)}M", }), ports=[ client.V1ContainerPort( container_port=INTERNAL_PORT) ], ) ]), ), ), ) await apps_api.create_namespaced_deployment( namespace=KUBE_NAMESPACE, body=deployment) # Create service service_id = f"{deployment_id}-svc" service_port = self.get_unassigned_port() service = client.V1Service( api_version="v1", kind="Service", metadata=client.V1ObjectMeta( name=service_id, # annotations={"cloud.google.com/load-balancer-type": "Internal"}, ), spec=client.V1ServiceSpec( selector={"dep": deployment_id}, ports=[ client.V1ServicePort( protocol="TCP", port=service_port, target_port=INTERNAL_PORT, ) ], type="LoadBalancer", ), ) await core_api.create_namespaced_service(namespace=KUBE_NAMESPACE, body=service) # Poll for external URL service_ip = None while not service_ip: await asyncio.sleep(POLL_INTERVAL) ingress = (await core_api.read_namespaced_service( name=service_id, namespace=KUBE_NAMESPACE)).status.load_balancer.ingress if ingress: service_ip = ingress[0].ip service_url = f"http://{service_ip}:{service_port}" print(f"Started deployment {deployment_id} at {service_url}") return deployment_id, service_url
def make_pod_spec( image, labels={}, threads_per_worker=1, env={}, extra_container_config={}, extra_pod_config={}, memory_limit=None, memory_request=None, cpu_limit=None, cpu_request=None, ): """ Create generic pod template from input parameters Examples -------- >>> make_pod_spec(image='daskdev/dask:latest', memory_limit='4G', memory_request='4G') """ args = [ "dask-worker", "$(DASK_SCHEDULER_ADDRESS)", "--nthreads", str(threads_per_worker), "--death-timeout", "60", ] if memory_limit: args.extend(["--memory-limit", str(memory_limit)]) pod = client.V1Pod( metadata=client.V1ObjectMeta(labels=labels), spec=client.V1PodSpec( restart_policy="Never", containers=[ client.V1Container( name="dask-worker", image=image, args=args, env=[ client.V1EnvVar(name=k, value=v) for k, v in env.items() ], ) ], ), ) resources = client.V1ResourceRequirements(limits={}, requests={}) if cpu_request: resources.requests["cpu"] = cpu_request if memory_request: resources.requests["memory"] = memory_request if cpu_limit: resources.limits["cpu"] = cpu_limit if memory_limit: resources.limits["memory"] = memory_limit pod.spec.containers[0].resources = resources for key, value in extra_container_config.items(): _set_k8s_attribute(pod.spec.containers[0], key, value) for key, value in extra_pod_config.items(): _set_k8s_attribute(pod.spec, key, value) return pod
def setUp(self): config = self.mock_config.return_value test_site_config = config.TestSite # Endpoint of Kube cluster test_site_config.host = "https://127.0.0.1:443" # Barer token we are going to use to authenticate test_site_config.token = "31ada4fd-adec-460c-809a-9e56ceb75269" test_site_config.MachineTypeConfiguration = AttributeDict( test2large=AttributeDict( namespace="default", image="busybox:1.26.1", args=["sleep", "3600"], hpa="True", min_replicas="1", max_replicas="2", cpu_utilization="50", ) ) test_site_config.MachineMetaData = AttributeDict( test2large=AttributeDict(Cores=2, Memory=4) ) kubernetes_api = self.mock_kubernetes_api.return_value kubernetes_hpa = self.mock_kubernetes_hpa.return_value spec = client.V1DeploymentSpec( replicas=1, selector=client.V1LabelSelector(match_labels={"app": "testsite-089123"}), template=client.V1PodTemplateSpec(), ) container = client.V1Container( image="busybox:1.26.1", args=["sleep", "3600"], name="testsite-089123", resources=client.V1ResourceRequirements( requests={ "cpu": test_site_config.MachineMetaData.test2large.Cores, "memory": test_site_config.MachineMetaData.test2large.Memory * 1e9, } ), env=[ client.V1EnvVar(name="TardisDroneCores", value="2"), client.V1EnvVar(name="TardisDroneMemory", value="4096"), client.V1EnvVar(name="TardisDroneUuid", value="testsite-089123"), ], ) spec.template.metadata = client.V1ObjectMeta( name="testsite-089123", labels={"app": "testsite-089123"}, ) spec.template.spec = client.V1PodSpec(containers=[container]) self.body = client.V1Deployment( metadata=client.V1ObjectMeta(name="testsite-089123"), spec=spec, ) self.create_return_value = client.V1Deployment( metadata=client.V1ObjectMeta(name="testsite-089123", uid="123456"), spec=spec, ) kubernetes_api.create_namespaced_deployment.return_value = async_return( return_value=self.create_return_value ) condition_list = [ client.V1DeploymentCondition( status="True", type="Progressing", ) ] self.read_return_value = client.V1Deployment( metadata=client.V1ObjectMeta(name="testsite-089123", uid="123456"), spec=spec, status=client.V1DeploymentStatus(conditions=condition_list), ) kubernetes_api.read_namespaced_deployment.return_value = async_return( return_value=self.read_return_value ) kubernetes_api.replace_namespaced_deployment.return_value = async_return( return_value=None ) kubernetes_api.delete_namespaced_deployment.return_value = async_return( return_value=None ) kubernetes_hpa.create_namespaced_horizontal_pod_autoscaler.return_value = ( async_return(return_value=None) ) kubernetes_hpa.delete_namespaced_horizontal_pod_autoscaler.return_value = ( async_return(return_value=None) ) self.kubernetes_adapter = KubernetesAdapter( machine_type="test2large", site_name="TestSite" )