Exemple #1
0
    def test_add_less_gpu(self):
        node_id = 2
        gpu_count = 3
        GPUManager.define_node_gpus(node_id, gpu_count)
        self.assertEqual(GPUManager.get_gpu_count_for_node(node_id), 3)

        gpu_count = 1
        GPUManager.define_node_gpus(node_id, gpu_count)
        self.assertEqual(GPUManager.get_gpu_count_for_node(node_id), 3)
Exemple #2
0
 def test_calls_where_node_has_no_gpus(self):
     node_id = 7
     job_id = 10
     gpu_count = 2
     required_gpus = 2
     GPUManager.define_node_gpus(node_id, gpu_count)
     node_id = 8
     self.assertFalse(
         GPUManager.reserve_gpus_for_job(node_id, required_gpus))
     self.assertFalse(
         GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus))
     nvidia_label = GPUManager.get_nvidia_docker_label(node_id, job_id)
     self.assertEqual(nvidia_label, "")
Exemple #3
0
    def test_reserve_gpu(self):
        node_id = 4
        gpu_count = 2
        required_gpus = 2
        GPUManager.define_node_gpus(node_id, gpu_count)
        self.assertTrue(GPUManager.reserve_gpus_for_job(
            node_id, required_gpus))

        job_id = 11
        self.assertFalse(
            GPUManager.reserve_gpus_for_job(node_id, required_gpus))

        gpu_count = 4
        GPUManager.define_node_gpus(node_id, gpu_count)
        self.assertTrue(GPUManager.reserve_gpus_for_job(
            node_id, required_gpus))
Exemple #4
0
    def __init__(self, agent_id, node, tasks, running_job_exes, resource_set):
        """Constructor

        :param agent_id: The agent ID
        :type agent_id: string
        :param node: The node
        :type node: :class:`scheduler.node.node_class.Node`
        :param tasks: The current tasks running on the node
        :type tasks: :func:`list`
        :param running_job_exes: The current job executions running on the node
        :type running_job_exes: :func:`list`
        :param resource_set: The set of resources for the node
        :type resource_set: :class:`scheduler.resources.agent.ResourceSet`
        """

        self.agent_id = agent_id  # Set agent ID separately from node since it can change during scheduling
        self.hostname = node.hostname
        self.node_id = node.id
        self.is_ready_for_new_job = node.is_ready_for_new_job(
        )  # Cache this for consistency
        self.is_ready_for_next_job_task = node.is_ready_for_next_job_task(
        )  # Cache this for consistency
        self.is_ready_for_system_task = node.is_ready_for_system_task(
        )  # Cache this for consistency
        self.allocated_offers = []
        self.allocated_resources = NodeResources()
        self.allocated_tasks = [
        ]  # Tasks that have been allocated resources from this node

        self._node = node
        self._allocated_queued_job_exes = [
        ]  # New queued job executions that have been allocated resources
        self._allocated_running_job_exes = [
        ]  # Running job executions that have been allocated resources
        self._running_job_exes = running_job_exes
        self._running_tasks = tasks

        self._offered_resources = NodeResources(
        )  # The amount of resources that were originally offered
        self._offered_resources.add(resource_set.offered_resources)
        self._remaining_resources = NodeResources()
        self._remaining_resources.add(self._offered_resources)
        self._task_resources = resource_set.task_resources
        self._watermark_resources = resource_set.watermark_resources
        if int(resource_set.offered_resources.gpus) > 0:
            GPUManager.define_node_gpus(
                self.node_id, int(resource_set.offered_resources.gpus))
Exemple #5
0
    def test_get_nvidia_label(self):
        node_id = 6
        job_id = 10
        gpu_count = 2
        required_gpus = 2
        GPUManager.define_node_gpus(node_id, gpu_count)
        GPUManager.reserve_gpus_for_job(node_id, required_gpus)
        GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus)
        nvidia_label = GPUManager.get_nvidia_docker_label(node_id, job_id)
        self.assertEqual(nvidia_label, "0,1")

        gpu_count = 4
        job_id = 11
        GPUManager.define_node_gpus(node_id, gpu_count)
        GPUManager.reserve_gpus_for_job(node_id, required_gpus)
        GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus)
        nvidia_label = GPUManager.get_nvidia_docker_label(node_id, job_id)
        self.assertEqual(nvidia_label, "2,3")
Exemple #6
0
    def test_assign_gpus(self):
        node_id = 5
        job_id = 10
        gpu_count = 2
        required_gpus = 2
        GPUManager.define_node_gpus(node_id, gpu_count)
        GPUManager.reserve_gpus_for_job(node_id, required_gpus)
        self.assertTrue(
            GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus))

        job_id = 11
        self.assertFalse(
            GPUManager.reserve_gpus_for_job(
                node_id, required_gpus))  # shouldnt have enough GPUs

        gpu_count = 4
        GPUManager.define_node_gpus(node_id, gpu_count)
        GPUManager.reserve_gpus_for_job(node_id, required_gpus)
        self.assertTrue(
            GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus))
Exemple #7
0
    def test_release_gpu(self):
        node_id = 7
        job_id = 10
        gpu_count = 2
        required_gpus = 2
        GPUManager.define_node_gpus(node_id, gpu_count)
        GPUManager.reserve_gpus_for_job(node_id, required_gpus)
        self.assertTrue(
            GPUManager.assign_gpus_for_job(node_id, job_id, required_gpus))

        job_id = 11

        self.assertFalse(
            GPUManager.reserve_gpus_for_job(
                node_id, required_gpus))  # shouldnt have enough GPUs

        GPUManager.release_gpus(node_id, 10)
        self.assertTrue(GPUManager.reserve_gpus_for_job(
            node_id, required_gpus))  #gpus should be avail again
        self.assertTrue(
            GPUManager.assign_gpus_for_job(
                node_id, job_id, required_gpus))  #gpus should be avail again
        nvidia_label = GPUManager.get_nvidia_docker_label(node_id, job_id)
        self.assertEqual(nvidia_label, "0,1")
Exemple #8
0
 def test_add_additional_GPU(self):
     node_id = 3
     gpu_count = 4
     GPUManager.define_node_gpus(node_id, gpu_count)
     self.assertEqual(GPUManager.get_gpu_count_for_node(node_id), 4)
Exemple #9
0
 def test_add_new_node_gpus(self):
     node_id = 1
     gpu_count = 3
     GPUManager.define_node_gpus(node_id, gpu_count)
     self.assertEqual(GPUManager.get_gpu_count_for_node(node_id), gpu_count)