def do_allocate( self, node_mgr: nodemanager.NodeManager, allow_existing: bool, all_or_nothing: bool, ) -> AllocationResult: if self.__node_count > 0: return node_mgr.allocate( self._constraints, node_count=self.__node_count, allow_existing=allow_existing, all_or_nothing=self.__colocated, assignment_id=self.name, ) assert self.iterations_remaining > 0 result = node_mgr.allocate( self._constraints, slot_count=self.iterations_remaining, allow_existing=allow_existing, # colocated is always all or nothing all_or_nothing=all_or_nothing or self.colocated, assignment_id=self.name, ) if result: self.iterations_remaining -= result.total_slots return result
def add_ccnodeid_default_resource(node_mgr: NodeManager) -> None: """ In order for this to function properly, ccnodeid must be defined """ def get_node_id(n: Node) -> Optional[str]: return n.delayed_node_id.node_id node_mgr.add_default_resource({}, "ccnodeid", get_node_id)
def test_top_level_limits(node_mgr: NodeManager) -> None: assert node_mgr.cluster_max_core_count == 10_000 assert node_mgr.cluster_consumed_core_count == 0 assert ["westus2"] == node_mgr.get_locations() assert node_mgr.get_regional_consumed_core_count("westus2") == 0 assert node_mgr.get_regional_max_core_count("westus2") == 80 assert node_mgr.allocate({"node.vcpu_count": 4}, node_count=1) assert node_mgr.cluster_consumed_core_count == 4 assert node_mgr.get_regional_consumed_core_count("westus2") == 4
def test_over_allocate(node_mgr: NodeManager) -> None: assert node_mgr.allocate({"node.nodearray": "htc"}, node_count=1) # can't allocate 10, because there are only 9 left assert not node_mgr.allocate( {"node.nodearray": "htc"}, node_count=10, all_or_nothing=True) result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=10, all_or_nothing=False) assert result and len(result.nodes) == 9 assert result.nodes[0].nodearray == "htc"
def make_requests(node_mgr: NodeManager) -> None: for n, mag in enumerate(magnitudes): node_count = None if slots_or_nodes[n] else mag slot_count = None if node_count else mag node_mgr.allocate( { "ncpus": ncpus_per_job[n], "exclusive": exclusivity[n] }, node_count=node_count, slot_count=slot_count, )
def test_no_buckets(): node_mgr = NodeManager(MockClusterBinding(), []) dc = DemandCalculator( node_mgr, NullNodeHistory(), singleton_lock=util.NullSingletonLock() ) result = dc._add_job(Job("1", {"ncpus": 2})) assert not result assert "NoBucketsDefined" == result.status
def preprocess_node_mgr(self, config: Dict, node_mgr: NodeManager) -> None: """ We add a default resource to map group_id to node.placement_group """ super().preprocess_node_mgr(config, node_mgr) def group_id(node: Node) -> str: return node.placement_group if node.placement_group else "_none_" node_mgr.add_default_resource({}, "group_id", group_id, allow_none=False) def ungrouped(node: Node) -> str: return str(not bool(node.placement_group)).lower() node_mgr.add_default_resource({}, "ungrouped", ungrouped)
def validate_hg_intersections(ge_env: GridEngineEnvironment, node_mgr: NodeManager, warn_function: WarnFunction) -> bool: bucket_to_hgs: Dict[str, Set[str]] = {} for bucket in node_mgr.get_buckets(): if bucket.bucket_id not in bucket_to_hgs: bucket_to_hgs[str(bucket)] = set() by_str = partition_single(node_mgr.get_buckets(), str) for queue in ge_env.queues.values(): if not queue.autoscale_enabled: continue for hostgroup in queue.bound_hostgroups.values(): for bucket in node_mgr.get_buckets(): is_satisfied = True for constraint in hostgroup.constraints: result = constraint.satisfied_by_bucket(bucket) if not result: is_satisfied = False break if is_satisfied: bucket_to_hgs[str(bucket)].add(hostgroup.name) failure = False for bkey, matches in bucket_to_hgs.items(): bucket = by_str[bkey] if not matches: warn_function( "%s is not matched by any hostgroup. This is not an error.", bucket, ) elif len(matches) > 1: # seq_no will be used to determine ties if not ge_env.scheduler.sort_by_seqno: warn_function( "%s is matched by more than one hostgroup %s. This is not an error.", bucket, ",".join(matches), ) return failure
def add_default_placement_groups(config: Dict, node_mgr: NodeManager) -> None: nas = config.get("nodearrays", {}) for name, child in nas.items(): if child.get("placement_groups"): return by_pg = partition(node_mgr.get_buckets(), lambda b: (b.nodearray, b.placement_group)) by_na_vm = partition(node_mgr.get_buckets(), lambda b: (b.nodearray, b.vm_size)) for key, buckets in by_na_vm.items(): nodearray, vm_size = key non_pg_buckets = [b for b in buckets if not b.placement_group] if not non_pg_buckets: # hardcoded PlacementGroupId logging.debug( "Nodearray %s defines PlacementGroupId, so no additional " + "placement groups will be created automatically.", nodearray, ) continue bucket = non_pg_buckets[0] if not bucket.supports_colocation: continue buf_size = int( nas.get(nodearray, {}).get("generated_placement_group_buffer", 2)) buf_remaining = buf_size pgi = 0 while buf_remaining > 0: pg_name = ht.PlacementGroup("{}_pg{}".format(vm_size, pgi)) pg_key = (nodearray, pg_name) if pg_key not in by_pg: logging.fine("Adding placement group %s", pg_name) node_mgr.add_placement_group(pg_name, bucket) buf_remaining -= 1 pgi += 1
def test_packing(node_mgr: NodeManager) -> None: # htc node can fit 4 ncpus, so only allocate one node result = node_mgr.allocate({ "node.nodearray": "htc", "ncpus": 1 }, slot_count=2) assert result, str(result) assert len(result.nodes) == 1, result.nodes assert result.nodes[0].name == "htc-1" assert result.nodes[0].resources["ncpus"] == 4 assert result.nodes[0].available["ncpus"] == 2, result.nodes[0].available[ "ncpus"] assert len(node_mgr.new_nodes) == 1, len(node_mgr.new_nodes) # htc node can fit 4 ncpus, but 2 are used up on the first node, so allocate a second result = node_mgr.allocate({ "node.nodearray": "htc", "ncpus": 1 }, slot_count=4) assert result assert len(result.nodes) == 2, result.nodes assert result.nodes[0].name == "htc-1" assert result.nodes[1].name == "htc-2" assert len(node_mgr.new_nodes) == 2, [n.name for n in node_mgr.new_nodes] assert len(set([n.name for n in node_mgr.new_nodes])) == 2 result = node_mgr.allocate({ "node.nodearray": "htc", "ncpus": 1 }, slot_count=2) assert len(result.nodes) == 1 assert result.nodes[0].name == "htc-2" assert len(node_mgr.new_nodes) == 2
def test_single_alloc(node_mgr: NodeManager) -> None: result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=1) assert result and len(result.nodes) == 1 assert result.nodes[0].nodearray == "htc"
def test_unmanaged_nodes(node_mgr: NodeManager) -> None: assert len(node_mgr.get_buckets()) == 2 tux = SchedulerNode("tux", bucket_id=ht.BucketId("tuxid")) node_mgr.add_unmanaged_nodes([tux]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux] tux2 = SchedulerNode("tux2", bucket_id=tux.bucket_id) node_mgr.add_unmanaged_nodes([tux2]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2] node_mgr.add_unmanaged_nodes([tux, tux2]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2]
def test_node_software_configuration_alias(node_mgr: NodeManager) -> None: node_mgr.add_default_resource({}, "int_alias", "node.software_configuration.custom_int") b = node_mgr.get_buckets()[0] assert b.resources["int_alias"] == b.software_configuration["custom_int"]
def test_node_resources_alias(node_mgr: NodeManager) -> None: node_mgr.add_default_resource({}, "memgb_alias", "node.resources.memgb") b = node_mgr.get_buckets()[0] assert b.resources["memgb_alias"] == b.resources["memgb"]