Beispiel #1
0
def test_bin_pack():
    assert get_bin_pack_residual([], [{"GPU": 2}, {"GPU": 2}]) == \
        [{"GPU": 2}, {"GPU": 2}]
    assert get_bin_pack_residual([{"GPU": 2}], [{"GPU": 2}, {"GPU": 2}]) == \
        [{"GPU": 2}]
    assert get_bin_pack_residual([{"GPU": 4}], [{"GPU": 2}, {"GPU": 2}]) == []
    arg = [{"GPU": 2}, {"GPU": 2, "CPU": 2}]
    assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}]) == []
    arg = [{"CPU": 2}, {"GPU": 2}]
    assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}]) == [{"GPU": 2}]
def test_bin_pack():
    assert get_bin_pack_residual([], [{"GPU": 2}, {"GPU": 2}])[0] == \
        [{"GPU": 2}, {"GPU": 2}]
    assert get_bin_pack_residual([{"GPU": 2}], [{"GPU": 2}, {"GPU": 2}])[0] \
        == [{"GPU": 2}]
    assert get_bin_pack_residual([{
        "GPU": 4
    }], [{
        "GPU": 2
    }, {
        "GPU": 2
    }])[0] == []
    arg = [{"GPU": 2}, {"GPU": 2, "CPU": 2}]
    assert get_bin_pack_residual(arg, [{"GPU": 2}, {"GPU": 2}])[0] == []
    arg = [{"CPU": 2}, {"GPU": 2}]
    assert get_bin_pack_residual(arg, [{
        "GPU": 2
    }, {
        "GPU": 2
    }])[0] == [{
        "GPU": 2
    }]
    arg = [{"GPU": 3}]
    assert get_bin_pack_residual(arg, [{
        "GPU": 1
    }, {
        "GPU": 1
    }],
                                 strict_spread=False)[0] == []
    assert get_bin_pack_residual(arg, [{
        "GPU": 1
    }, {
        "GPU": 1
    }],
                                 strict_spread=True) == ([{
                                     "GPU": 1
                                 }], [{
                                     "GPU": 2
                                 }])
Beispiel #3
0
    def _get_nodes_allowed_to_terminate(
            self, sorted_node_ids: List[NodeID]) -> Dict[NodeID, bool]:
        # TODO(ameer): try merging this with resource_demand_scheduler
        # code responsible for adding nodes for request_resources().
        """Returns the nodes allowed to terminate for request_resources().

        Args:
            sorted_node_ids: the node ids sorted based on last used (LRU last).

        Returns:
            nodes_allowed_to_terminate: whether the node id is allowed to
                terminate or not.
        """
        nodes_allowed_to_terminate: Dict[NodeID, bool] = {}
        head_node_resources: ResourceDict = copy.deepcopy(
            self.available_node_types[
                self.config["head_node_type"]]["resources"])
        if not head_node_resources:
            # Legacy yaml might include {} in the resources field.
            # TODO(ameer): this is somewhat duplicated in
            # resource_demand_scheduler.py.
            head_id: List[NodeID] = self.provider.non_terminated_nodes(
                {TAG_RAY_NODE_KIND: NODE_KIND_HEAD})
            if head_id:
                head_ip = self.provider.internal_ip(head_id[0])
                static_nodes: Dict[
                    NodeIP,
                    ResourceDict] = \
                    self.load_metrics.get_static_node_resources_by_ip()
                head_node_resources = static_nodes.get(head_ip, {})
            else:
                head_node_resources = {}

        max_node_resources: List[ResourceDict] = [head_node_resources]
        resource_demand_vector_worker_node_ids = []
        # Get max resources on all the non terminated nodes.
        for node_id in sorted_node_ids:
            tags = self.provider.node_tags(node_id)
            if TAG_RAY_USER_NODE_TYPE in tags:
                node_type = tags[TAG_RAY_USER_NODE_TYPE]
                node_resources: ResourceDict = copy.deepcopy(
                    self.available_node_types[node_type]["resources"])
                if not node_resources:
                    # Legacy yaml might include {} in the resources field.
                    static_nodes: Dict[
                        NodeIP,
                        ResourceDict] = \
                            self.load_metrics.get_static_node_resources_by_ip()
                    node_ip = self.provider.internal_ip(node_id)
                    node_resources = static_nodes.get(node_ip, {})
                max_node_resources.append(node_resources)
                resource_demand_vector_worker_node_ids.append(node_id)
        # Since it is sorted based on last used, we "keep" nodes that are
        # most recently used when we binpack. We assume get_bin_pack_residual
        # is following the given order here.
        used_resource_requests: List[ResourceDict]
        _, used_resource_requests = \
            get_bin_pack_residual(max_node_resources,
                                  self.load_metrics.get_resource_requests())
        # Remove the first entry (the head node).
        max_node_resources.pop(0)
        # Remove the first entry (the head node).
        used_resource_requests.pop(0)
        for i, node_id in enumerate(resource_demand_vector_worker_node_ids):
            if used_resource_requests[i] == max_node_resources[i] \
                    and max_node_resources[i]:
                # No resources of the node were needed for request_resources().
                # max_node_resources[i] is an empty dict for legacy yamls
                # before the node is connected.
                nodes_allowed_to_terminate[node_id] = True
            else:
                nodes_allowed_to_terminate[node_id] = False
        return nodes_allowed_to_terminate