def test_tainted_scale_set(self): region = 'test' mock_client, monitor_client, resource_client = _default_mock_clients( region) instance_type = 'Standard_NC24' resource_group = 'test-resource-group' scale_set = AzureScaleSet(region, resource_group, 'test-scale-set', instance_type, 0, 'Succeeded', no_schedule_taints={'gpu': 'yes'}) virtual_scale_set = AzureVirtualScaleSet( region, resource_group, AzureWrapper(mock_client, monitor_client, resource_client), instance_type, True, [scale_set], []) dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, 'data/busybox.yaml'), 'r') as f: dummy_pod = yaml.load(f.read()) pod = KubePod(pykube.Pod(None, dummy_pod)) self.assertFalse(virtual_scale_set.is_taints_tolerated(pod)) dummy_pod['spec']['tolerations'] = [{ 'key': 'gpu', 'operator': 'Exists' }] pod = KubePod(pykube.Pod(None, dummy_pod)) self.assertTrue(virtual_scale_set.is_taints_tolerated(pod))
def test_scale_up_notification(self): big_pod_spec = copy.deepcopy(self.dummy_pod) for container in big_pod_spec['spec']['containers']: container['resources']['requests']['cpu'] = '100' pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) big_pod = KubePod(pykube.Pod(self.api, big_pod_spec)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod, big_pod]) self.cluster.notifier.notify_scale.assert_called_with(mock.ANY, mock.ANY, [pod])
def test_scale_down_under_utilized_undrainable(self): """ kube node with daemonset and pod/rc-pod --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # create some undrainable pods ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) for container in self.dummy_pod['spec']['containers']: container.pop('resources', None) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) self.dummy_rc_pod['metadata']['labels']['openai/do-not-drain'] = 'true' for container in self.dummy_rc_pod['spec']['containers']: container.pop('resources', None) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pod_scenarios = [ # kube node with daemonset and pod with no resource ask --> noop [ds_pod, pod], # kube node with daemonset and critical rc pod --> noop [ds_pod, rc_pod] ] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD = -1 for pods in pod_scenarios: state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual( response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def test_get_pending_pods(self): dummy_node = copy.deepcopy(self.dummy_node) dummy_node['metadata']['name'] = 'k8s-agentpool1-16334397-0' node = KubeNode(pykube.Node(self.api, dummy_node)) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) act = self.cluster.get_pending_pods([pod], [node]) self.assertEqual(len(act), 0) node = KubeNode(pykube.Node(self.api, dummy_node)) pod2 = KubePod(pykube.Pod(self.api, self.dummy_pod)) pod3 = KubePod(pykube.Pod(self.api, self.dummy_pod)) act = self.cluster.get_pending_pods([pod, pod2, pod3], [node]) #only one should fit self.assertEqual(len(act), 2)
def test_pod_exists(api, requests_mock): with requests_mock as rsps: obj = { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": "my-pod" }, "spec": { "containers": [] }, } pod = pykube.Pod(api, obj) rsps.add( responses.GET, "https://localhost:9443/api/v1/namespaces/default/pods/my-pod", status=404, ) assert not pod.exists() with pytest.raises(ObjectDoesNotExist): pod.exists(ensure=True) rsps.replace( responses.GET, "https://localhost:9443/api/v1/namespaces/default/pods/my-pod", json={}, ) assert pod.exists()
def shell(self): podSpec = self._get_shell_pod() if self._find_obj(pykube.Pod, self.shell_label_selector) is None: pykube.Pod(self.api, podSpec).create() if not self._wait_for_pod(self.shell_label_selector): print("Pod is not ready in time") self._delete_obj(pykube.Pod, self.shell_label_selector) sys.exit(1) kubectl = subprocess.Popen([ '/usr/bin/kubectl', 'attach', '--tty', '--stdin', 'interactive' ]) kubectl.wait() # kubectl attach prints the following when done: Session ended, resume # using 'kubectl attach interactive -c interactive -i -t' command when # the pod is running # This isn't true, since we actually kill the pod when done print("Pod stopped. Session cannot be resumed.") self._delete_obj(pykube.Pod, self.shell_label_selector)
def test_scale_down(self): """ kube node with daemonset and no pod --> cordon """ node = self._spin_up_node() node.cordon = mock.Mock(return_value="mocked stuff") node.drain = mock.Mock(return_value="mocked stuff") all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map(managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) running_or_pending_assigned_pods = [ds_pod] self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.maintain( managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs) response = self.asg_client.describe_auto_scaling_groups() assert len(response['AutoScalingGroups']) == 1 assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 1 node.cordon.assert_called_once_with()
def terminate(self, name): """Terminate a service. It will terminate Service, then ReplicationController and Pods have the same labels.""" del_obj = { 'apiVersion': 'v1', 'kind': '', 'metadata': { 'name': name, 'namespace': get_conf().kube_namespace } } try: del_obj['kind'] = 'Service' pykube.Service(self.api, del_obj).delete() del_obj['kind'] = 'ReplicationController' pykube.ReplicationController(self.api, del_obj).delete() del_obj['kind'] = 'Pod' pod_selector = ZOE_LABELS pod_selector['service_name'] = name pods = pykube.Pod.objects( self.api).filter(namespace=get_conf().kube_namespace, selector=pod_selector).iterator() for pod in pods: del_obj['metadata']['name'] = str(pod) pykube.Pod(self.api, del_obj).delete() log.info('Service deleted on Kubernetes cluster') except Exception as ex: log.error(ex)
def test_scale_down_launch_grace_period(self): """ kube node with daemonset and no pod + launch grace period --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map(managed_nodes, []) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) running_or_pending_assigned_pods = [ds_pod] self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD['aws'] = 60*30 self.cluster.maintain( managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def destroy(api, spec): if spec["kind"] == 'Deployment': pykube.Deployment(api, spec).delete() if spec["kind"] == 'Service': pykube.Service(api, spec).delete() if spec["kind"] == 'Pod': pykube.Pod(api, spec).delete()
def constroy(api, spec): if spec["kind"] == 'Deployment': pykube.Deployment(api, spec).create() if spec["kind"] == 'Service': pykube.Service(api, spec).create() if spec["kind"] == 'Pod': pykube.Pod(api, spec).create()
def run_on_node(fn, node): name = f"task-{uuid.uuid4()}" ctx = { "name": name, "namespace": "kube-system", # FIXME "nodeSelector": json.dumps({"kubernetes.io/hostname": node}), "cmd": json.dumps(fn), "image_repository": CONFIG["image_repository"], "image_tag": CONFIG["image_tag"], } template = Path("./templates/task.yaml").read_bytes().decode() manifest = template.format(**ctx) obj = yaml.safe_load(manifest) task_pod = pykube.Pod(api, obj) task_pod.create() def is_finished(): task_pod.reload() status = task_pod.obj["status"] if status["phase"] in ["Succeeded", "Failed"]: return True return False wait_for(is_finished, "task to finish") if task_pod.obj["status"]["phase"] != "Succeeded": exit_code = task_pod.obj["status"]["containerStatuses"][0]["state"][ "terminated"]["exitCode"] raise CalledProcessError(returncode=exit_code, cmd=f"Task: {name}") task_pod.delete()
def create_fn(spec, **kwargs): # Render the pod yaml with some spec fields used in the template. doc = yaml.safe_load(f""" apiVersion: v1 kind: Pod spec: containers: - name: the-only-one image: busybox command: ["sh", "-x", "-c"] args: - | echo "FIELD=$FIELD" sleep {spec.get('duration', 0)} env: - name: FIELD value: {spec.get('field', 'default-value')} """) # Make it our child: assign the namespace, name, labels, owner references, etc. kopf.adopt(doc) # Actually create an object by requesting the Kubernetes API. api = pykube.HTTPClient(pykube.KubeConfig.from_env()) pod = pykube.Pod(api, doc) pod.create() api.session.close() # Update the parent's status. return {'children': [pod.metadata['uid']]}
def test_impossible(self): self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 't2.micro'} print repr(self.dummy_pod['metadata']['creationTimestamp']) from dateutil.parser import parse as dateutil_parse print dateutil_parse(self.dummy_pod['metadata']['creationTimestamp']) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) assert not capacity.is_possible(pod)
def test_scale_up(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertGreater(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def setup_module(_): extra_kwargs = {} if os.environ.get("API_URL", False): extra_kwargs.update({ "override_api_url": os.environ.get("API_URL") }) def _raise(): raise Exception("No subject storage provider overridden") subject_storage_factory.override( providers.Factory(lambda name, event_info, event_data: name.startswith("k8s:") and k8s_storage_impl.SubjectsK8sStorage( resource_path=name[4:], resource_body=event_data ) or _raise()) ) global POD_NAME, NAMESPACE POD_NAME = "pytest-temp-{0}".format( ''.join(random.choice(string.ascii_lowercase) for i in range(8)) ) if os.environ.get("API_URL", False): config = pykube.KubeConfig.from_url(os.environ.get("API_URL")) else: config = pykube.KubeConfig.from_env() if os.environ.get("NAMESPACE", False): NAMESPACE = os.environ.get("NAMESPACE") else: NAMESPACE = config.namespace # create a test pod obj = { "apiVersion": "v1", "kind": "Pod", "metadata": { "name": POD_NAME, "namespace": NAMESPACE, "labels": { "app": "pytest-temp" } }, "spec": { "containers": [{ "name": "hello", "image": "karthequian/helloworld" }] } } api = pykube.HTTPClient(config) pykube.Pod(api, obj).create()
def test_scale_down_busy(self): """ kube node with daemonset and pod/rc-pod --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # kube node with daemonset and pod --> noop ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pod_scenarios = [ # kube node with daemonset and pod --> noop [ds_pod, pod], # kube node with daemonset and rc pod --> noop [ds_pod, rc_pod] ] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 for pods in pod_scenarios: state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.BUSY) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual( response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def deploy(self, obj): if obj['kind'] == 'Deployment': logging.info('Starting Deployment... ' + obj['metadata']['name']) pykube.Deployment(self.api, obj).create() elif obj['kind'] == 'Service': logging.info('Starting Service... ' + obj['metadata']['name']) pykube.Service(self.api, obj).create() elif obj['kind'] == 'Pod': logging.info('Starting Pod... ' + obj['metadata']['name'] + '(' + obj['spec']['containers'][0]['image'] + ')') pykube.Pod(self.api, obj).create()
def delete_pod(self, name): obj = { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': { 'name': name, 'namespace': 'default' } } logging.info('Deleting...{}'.format(name)) pykube.Pod(self.api, obj).delete()
def get_pod(kube_api, namespace, name): """ Retrieve Pod from cluster :param kube_api: Pykube API Object :param namespace: Namespace of pod :param name: Name of pod :return: Pykube Pod object from cluster """ pod = pykube.Pod(kube_api, base_obj("Pod", namespace, name)) pod.reload() return pod
def test_scale_up_selector(self): self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 'm4.large'} pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def test_validate_rogue_unrelated(self): pod1 = pykube.Pod(self.api, TestData.pod_spec) pod2 = pykube.Pod(self.api, TestData.pod_spec_noapp) pod1.create() pod2.create() while not pod1.ready: pod1.reload() sleep(1) while not pod2.ready: pod2.reload() sleep(1) ogo = self.extraSetUp(TestData.kot, TestData.kog) self.assertFalse(ogo.is_pod_expected()) self.kw.setdefault('status', {})['pod'] = pod1.name self.kw.setdefault('status', {})['currently_running'] = 'itemname' self.assertIsNone(ogo.validate_no_rogue_pods_are_running()) pod1.obj['metadata']['labels'] = None pod2.obj['metadata']['labels'] = None pod1.update() pod2.update() pod1.delete() pod2.delete()
def test_scale_down_under_utilized_drainable(self): """ kube node with daemonset and rc-pod --> cordon+drain """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # create some undrainable pods ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) for container in self.dummy_rc_pod['spec']['containers']: container.pop('resources', None) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pods = [ds_pod, rc_pod] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD = -1 state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.UNDER_UTILIZED_DRAINABLE) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_called_once_with() node.drain.assert_called_once_with(pods, notifier=mock.ANY)
def test_fulfill_pending(self): nodes = self.create_nodes(2, 1) scaler = self.create_scaler(nodes) scaler.scale_pools = MagicMock() #Should add one node to pool 1 (2, 1) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) scaler.fulfill_pending([pod]) scaler.scale_pools.assert_called_with({ 'agentpool0': 2, 'agentpool1': 1 }) #The two pods should fit in the same new node dummy_pod_2 = copy.deepcopy(self.dummy_pod) dummy_pod_2['spec']['containers'][0]['resources']['requests'][ 'cpu'] = '400m' dummy_pod_2['spec']['containers'][0]['resources']['limits'][ 'cpu'] = '400m' dummy_pod_2['metadata']['uid'] = 'fake' pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2)) scaler.fulfill_pending([pod, pod_2]) scaler.scale_pools.assert_called_with({ 'agentpool0': 2, 'agentpool1': 1 }) #pod_2 shouldn't fit anymore, and so it should add 2 new VMs dummy_pod_2['spec']['containers'][0]['resources']['requests'][ 'cpu'] = '600m' dummy_pod_2['spec']['containers'][0]['resources']['limits'][ 'cpu'] = '600m' pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2)) scaler.fulfill_pending([pod, pod_2]) scaler.scale_pools.assert_called_with({ 'agentpool0': 3, 'agentpool1': 1 })
def _create_pod(): api = pykube.HTTPClient(pykube.KubeConfig.from_file()) pod = pykube.Pod(api, { 'apiVersion': 'v1', 'kind': 'Pod', 'metadata': {'generateName': 'kopf-pod-', 'namespace': 'default'}, 'spec': { 'containers': [{ 'name': 'the-only-one', 'image': 'busybox', 'command': ["sh", "-x", "-c", "sleep 1"], }]}, }) pod.create() return pod.name
def test_validate_expected_pod_negative(self): pod1 = pykube.Pod(self.api, TestData.pod_spec) pod1.create() while not pod1.ready: pod1.reload() sleep(1) ogo = self.extraSetUp(TestData.kot, TestData.kog) self.assertFalse(ogo.is_pod_expected()) self.kw.setdefault('status', {})['pod'] = f'not-{pod1.name}' self.kw.setdefault('status', {})['currently_running'] = 'itemname' with self.assertRaisesRegexp(ProcessingComplete, 'item.*failed during validation'): ogo.validate_expected_pod_is_running() pod1.obj['metadata']['labels'] = None pod1.update() pod1.delete()
def test_timed_out_group(self): with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.is_timed_out') as is_timed_out: with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.scale') as scale: is_timed_out.return_value = True scale.return_value = utils.CompletedFuture(None) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) scale.assert_not_called() response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def test_validate_expected_pod(self): pod1 = pykube.Pod(self.api, TestData.pod_spec) pod1.create() while not pod1.ready: pod1.reload() sleep(1) ogo = self.extraSetUp(TestData.kot, TestData.kog) self.assertFalse(ogo.is_pod_expected()) self.kw.setdefault('status', {})['pod'] = pod1.name self.kw.setdefault('status', {})['currently_running'] = 'itemname' self.assertEqual(ogo.get_status('pod'), pod1.name) with self.assertRaisesRegexp(ProcessingComplete, 'Pod.*exists and is in state'): ogo.validate_expected_pod_is_running() pod1.obj['metadata']['labels'] = None pod1.update() pod1.delete()
def test_scale_up(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 2) big_gpu_asg, small_gpu_asg = {}, {} if (response['AutoScalingGroups'][0]['AutoScalingGroupName'] == 'dummy-asg-small-gpu'): small_gpu_asg = response['AutoScalingGroups'][0] big_gpu_asg = response['AutoScalingGroups'][1] else: small_gpu_asg = response['AutoScalingGroups'][1] big_gpu_asg = response['AutoScalingGroups'][0] self.assertGreater(big_gpu_asg['DesiredCapacity'], 0) self.assertEqual(small_gpu_asg['DesiredCapacity'], 0)
def create_pod(body, **kwargs): # Render the pod yaml with some spec fields used in the template. pod_data = yaml.safe_load(f""" apiVersion: v1 kind: Pod spec: containers: - name: the-only-one image: busybox command: ["sh", "-x", "-c", "sleep 1"] """) # Make it our child: assign the namespace, name, labels, owner references, etc. kopf.adopt(pod_data, owner=body) kopf.label(pod_data, {'application': 'kopf-example-10'}) # Actually create an object by requesting the Kubernetes API. pod = pykube.Pod(api, pod_data) pod.create()