Esempio n. 1
0
 def test_scale_up_notification(self):
     big_pod_spec = copy.deepcopy(self.dummy_pod)
     for container in big_pod_spec['spec']['containers']:
         container['resources']['requests']['cpu'] = '100'
     pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
     big_pod = KubePod(pykube.Pod(self.api, big_pod_spec))
     selectors_hash = utils.selectors_to_hash(pod.selectors)
     asgs = self.cluster.autoscaling_groups.get_all_groups([])
     self.cluster.fulfill_pending(asgs, selectors_hash, [pod, big_pod])
     self.cluster.notifier.notify_scale.assert_called_with(mock.ANY, mock.ANY, [pod])
Esempio n. 2
0
    def test_scale_down_under_utilized_undrainable(self):
        """
        kube node with daemonset and pod/rc-pod --> noop
        """
        node = self._spin_up_node()
        all_nodes = [node]
        managed_nodes = [n for n in all_nodes if node.is_managed()]
        running_insts_map = self.cluster.get_running_instances_map(
            managed_nodes)
        pods_to_schedule = {}
        asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes)

        # create some undrainable pods
        ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod))
        for container in self.dummy_pod['spec']['containers']:
            container.pop('resources', None)
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        self.dummy_rc_pod['metadata']['labels']['openai/do-not-drain'] = 'true'
        for container in self.dummy_rc_pod['spec']['containers']:
            container.pop('resources', None)
        rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod))

        pod_scenarios = [
            # kube node with daemonset and pod with no resource ask --> noop
            [ds_pod, pod],
            # kube node with daemonset and critical rc pod --> noop
            [ds_pod, rc_pod]
        ]

        # make sure we're not on grace period
        self.cluster.idle_threshold = -1
        self.cluster.type_idle_threshold = -1
        self.cluster.LAUNCH_HOUR_THRESHOLD = -1

        for pods in pod_scenarios:
            state = self.cluster.get_node_state(node, asgs[0], pods,
                                                pods_to_schedule,
                                                running_insts_map,
                                                collections.Counter())
            self.assertEqual(state,
                             ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE)

            self.cluster.maintain(managed_nodes, running_insts_map,
                                  pods_to_schedule, pods, asgs)

            response = self.asg_client.describe_auto_scaling_groups()
            self.assertEqual(len(response['AutoScalingGroups']), 1)
            self.assertEqual(
                response['AutoScalingGroups'][0]['DesiredCapacity'], 1)
            node.cordon.assert_not_called()
    def test_scale_down(self):
        """
        kube node with daemonset and no pod --> cordon
        """
        node = self._spin_up_node()
        node.cordon = mock.Mock(return_value="mocked stuff")
        node.drain = mock.Mock(return_value="mocked stuff")
        all_nodes = [node]
        managed_nodes = [n for n in all_nodes if node.is_managed()]
        running_insts_map = self.cluster.get_running_instances_map(managed_nodes)
        pods_to_schedule = {}
        asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes)

        ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod))
        running_or_pending_assigned_pods = [ds_pod]

        self.cluster.idle_threshold = -1
        self.cluster.type_idle_threshold = -1
        self.cluster.maintain(
            managed_nodes, running_insts_map,
            pods_to_schedule, running_or_pending_assigned_pods, asgs)

        response = self.asg_client.describe_auto_scaling_groups()
        assert len(response['AutoScalingGroups']) == 1
        assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 1
        node.cordon.assert_called_once_with()
Esempio n. 4
0
    def test_scale_down_launch_grace_period(self):
        """
        kube node with daemonset and no pod + launch grace period --> noop
        """
        node = self._spin_up_node()
        all_nodes = [node]
        managed_nodes = [n for n in all_nodes if node.is_managed()]
        running_insts_map = self.cluster.get_running_instances_map(managed_nodes, [])
        pods_to_schedule = {}
        asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes)

        ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod))
        running_or_pending_assigned_pods = [ds_pod]

        self.cluster.idle_threshold = -1
        self.cluster.type_idle_threshold = -1
        self.cluster.LAUNCH_HOUR_THRESHOLD['aws'] = 60*30
        self.cluster.maintain(
            managed_nodes, running_insts_map,
            pods_to_schedule, running_or_pending_assigned_pods, asgs)

        response = self.asg_client.describe_auto_scaling_groups()
        self.assertEqual(len(response['AutoScalingGroups']), 1)
        self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 1)
        node.cordon.assert_not_called()
    def test_get_pending_pods(self):
        dummy_node = copy.deepcopy(self.dummy_node)
        dummy_node['metadata']['name'] = 'k8s-agentpool1-16334397-0'
        node = KubeNode(pykube.Node(self.api, dummy_node))
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))

        act = self.cluster.get_pending_pods([pod], [node])
        self.assertEqual(len(act), 0)

        node = KubeNode(pykube.Node(self.api, dummy_node))
        pod2 = KubePod(pykube.Pod(self.api, self.dummy_pod))
        pod3 = KubePod(pykube.Pod(self.api, self.dummy_pod))

        act = self.cluster.get_pending_pods([pod, pod2, pod3], [node])
        #only one should fit
        self.assertEqual(len(act), 2)
    def test_impossible(self):
        self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 't2.micro'}

        print repr(self.dummy_pod['metadata']['creationTimestamp'])
        from dateutil.parser import parse as dateutil_parse
        print dateutil_parse(self.dummy_pod['metadata']['creationTimestamp'])
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        assert not capacity.is_possible(pod)
Esempio n. 7
0
    def test_scale_up(self):
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        selectors_hash = utils.selectors_to_hash(pod.selectors)
        asgs = self.cluster.autoscaling_groups.get_all_groups([])
        self.cluster.fulfill_pending(asgs, selectors_hash, [pod])

        response = self.asg_client.describe_auto_scaling_groups()
        self.assertEqual(len(response['AutoScalingGroups']), 1)
        self.assertGreater(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
Esempio n. 8
0
    def test_scale_down_busy(self):
        """
        kube node with daemonset and pod/rc-pod --> noop
        """
        node = self._spin_up_node()
        all_nodes = [node]
        managed_nodes = [n for n in all_nodes if node.is_managed()]
        running_insts_map = self.cluster.get_running_instances_map(
            managed_nodes)
        pods_to_schedule = {}
        asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes)

        # kube node with daemonset and pod --> noop
        ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod))
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod))

        pod_scenarios = [
            # kube node with daemonset and pod --> noop
            [ds_pod, pod],
            # kube node with daemonset and rc pod --> noop
            [ds_pod, rc_pod]
        ]

        # make sure we're not on grace period
        self.cluster.idle_threshold = -1
        self.cluster.type_idle_threshold = -1

        for pods in pod_scenarios:
            state = self.cluster.get_node_state(node, asgs[0], pods,
                                                pods_to_schedule,
                                                running_insts_map,
                                                collections.Counter())
            self.assertEqual(state, ClusterNodeState.BUSY)

            self.cluster.maintain(managed_nodes, running_insts_map,
                                  pods_to_schedule, pods, asgs)

            response = self.asg_client.describe_auto_scaling_groups()
            self.assertEqual(len(response['AutoScalingGroups']), 1)
            self.assertEqual(
                response['AutoScalingGroups'][0]['DesiredCapacity'], 1)
            node.cordon.assert_not_called()
Esempio n. 9
0
    def test_scale_up_selector(self):
        self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 'm4.large'}
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        selectors_hash = utils.selectors_to_hash(pod.selectors)
        asgs = self.cluster.autoscaling_groups.get_all_groups([])
        self.cluster.fulfill_pending(asgs, selectors_hash, [pod])

        response = self.asg_client.describe_auto_scaling_groups()
        self.assertEqual(len(response['AutoScalingGroups']), 1)
        self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'],
                         0)
Esempio n. 10
0
    def test_scale_down_under_utilized_drainable(self):
        """
        kube node with daemonset and rc-pod --> cordon+drain
        """
        node = self._spin_up_node()
        all_nodes = [node]
        managed_nodes = [n for n in all_nodes if node.is_managed()]
        running_insts_map = self.cluster.get_running_instances_map(
            managed_nodes)
        pods_to_schedule = {}
        asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes)

        # create some undrainable pods
        ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod))
        for container in self.dummy_rc_pod['spec']['containers']:
            container.pop('resources', None)
        rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod))
        pods = [ds_pod, rc_pod]

        # make sure we're not on grace period
        self.cluster.idle_threshold = -1
        self.cluster.type_idle_threshold = -1
        self.cluster.LAUNCH_HOUR_THRESHOLD = -1

        state = self.cluster.get_node_state(node, asgs[0], pods,
                                            pods_to_schedule,
                                            running_insts_map,
                                            collections.Counter())
        self.assertEqual(state, ClusterNodeState.UNDER_UTILIZED_DRAINABLE)

        self.cluster.maintain(managed_nodes, running_insts_map,
                              pods_to_schedule, pods, asgs)

        response = self.asg_client.describe_auto_scaling_groups()
        self.assertEqual(len(response['AutoScalingGroups']), 1)
        self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'],
                         1)
        node.cordon.assert_called_once_with()
        node.drain.assert_called_once_with(pods, notifier=mock.ANY)
Esempio n. 11
0
    def test_fulfill_pending(self):
        nodes = self.create_nodes(2, 1)
        scaler = self.create_scaler(nodes)
        scaler.scale_pools = MagicMock()

        #Should add one node to pool 1 (2, 1)
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        scaler.fulfill_pending([pod])
        scaler.scale_pools.assert_called_with({
            'agentpool0': 2,
            'agentpool1': 1
        })

        #The two pods should fit in the same new node
        dummy_pod_2 = copy.deepcopy(self.dummy_pod)
        dummy_pod_2['spec']['containers'][0]['resources']['requests'][
            'cpu'] = '400m'
        dummy_pod_2['spec']['containers'][0]['resources']['limits'][
            'cpu'] = '400m'
        dummy_pod_2['metadata']['uid'] = 'fake'
        pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2))
        scaler.fulfill_pending([pod, pod_2])
        scaler.scale_pools.assert_called_with({
            'agentpool0': 2,
            'agentpool1': 1
        })

        #pod_2 shouldn't fit anymore, and so it should add 2 new VMs
        dummy_pod_2['spec']['containers'][0]['resources']['requests'][
            'cpu'] = '600m'
        dummy_pod_2['spec']['containers'][0]['resources']['limits'][
            'cpu'] = '600m'
        pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2))
        scaler.fulfill_pending([pod, pod_2])
        scaler.scale_pools.assert_called_with({
            'agentpool0': 3,
            'agentpool1': 1
        })
Esempio n. 12
0
    def test_timed_out_group(self):
        with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.is_timed_out') as is_timed_out:
            with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.scale') as scale:
                is_timed_out.return_value = True
                scale.return_value = utils.CompletedFuture(None)

                pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
                selectors_hash = utils.selectors_to_hash(pod.selectors)
                asgs = self.cluster.autoscaling_groups.get_all_groups([])
                self.cluster.fulfill_pending(asgs, selectors_hash, [pod])

                scale.assert_not_called()

                response = self.asg_client.describe_auto_scaling_groups()
                self.assertEqual(len(response['AutoScalingGroups']), 1)
                self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
    def test_scale_up(self):
        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        selectors_hash = utils.selectors_to_hash(pod.selectors)
        asgs = self.cluster.autoscaling_groups.get_all_groups([])
        self.cluster.fulfill_pending(asgs, selectors_hash, [pod])

        response = self.asg_client.describe_auto_scaling_groups()
        self.assertEqual(len(response['AutoScalingGroups']), 2)
        big_gpu_asg, small_gpu_asg = {}, {}
        if (response['AutoScalingGroups'][0]['AutoScalingGroupName'] ==
                'dummy-asg-small-gpu'):
            small_gpu_asg = response['AutoScalingGroups'][0]
            big_gpu_asg = response['AutoScalingGroups'][1]
        else:
            small_gpu_asg = response['AutoScalingGroups'][1]
            big_gpu_asg = response['AutoScalingGroups'][0]

        self.assertGreater(big_gpu_asg['DesiredCapacity'], 0)
        self.assertEqual(small_gpu_asg['DesiredCapacity'], 0)
 def test_possible(self):
     pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
     assert capacity.is_possible(pod)
 def test_can_fit(self):
     pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
     node = KubeNode(pykube.Node(self.api, self.dummy_node))
     assert node.can_fit(pod.resources)
Esempio n. 16
0
 def build_kube_pod(pykube_pod):
     return KubePod(pykube_pod, self.drainable_labels)
    def test_impossible(self):
        self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 't2.micro'}

        pod = KubePod(pykube.Pod(self.api, self.dummy_pod))
        assert not capacity.is_possible(pod)