def test_config_based_default_resources(bindings) -> None: config = {"_mock_bindings": bindings, "default_resources": []} node_mgr = new_node_manager(config) for b in node_mgr.get_buckets(): assert "blah" not in b.resources config["default_resources"].append({ "select": {}, "name": "blah", "value": "node.pcpu_count" }) node_mgr = new_node_manager(config) for b in node_mgr.get_buckets(): assert b.resources["blah"] == b.pcpu_count
def onprem_burst_node_mgr() -> None: # Unlike the SchedulerNode above, here we can define the vcpu_count and memory # for the onprem nodes. onprem_res = {"onprem": True, "nodetype": "A"} onprem001 = UnmanagedNode( "onprem001", vcpu_count=16, memory=Memory(128, "g"), resources=onprem_res ) onprem002 = UnmanagedNode( "onprem002", vcpu_count=32, memory=Memory(256, "g"), resources=onprem_res ) node_mgr = new_node_manager(CONFIG, existing_nodes=[onprem001, onprem002]) node_mgr.add_default_resource({"node.nodearray": "htc"}, "nodetype", "A") result = node_mgr.allocate({"nodetype": "A"}, node_count=5) assert result if result: print( "Allocated {} nodes, {} are new".format( len(result.nodes), len(node_mgr.new_nodes) ) ) else: print("Failed! {}".format(result)) if not DRY_RUN: node_mgr.bootup()
def target_counts_node_mgr() -> None: """ Handle a mixture of 'target count' style allocation of ncpus and nodes via the NodeMgr. """ node_mgr = new_node_manager(CONFIG) result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=2) if result: print("Allocated {} nodes.".format(len(result.nodes))) else: print("Failed! {}".format(result)) result = node_mgr.allocate({"node.nodearray": "htc", "memgb": 1}, slot_count=128) if result: print("Allocated {} nodes".format(len(result.nodes))) else: print("Failed! {}".format(result)) # you can also do Memory.value_of("100g") # or even (Memory.value_of("1g") * 100), as the memory object is supposed # to be used as a number print("Allocated {} nodes in total".format(len(node_mgr.new_nodes))) if not DRY_RUN: node_mgr.bootup()
def new_demand_calculator( config: Union[str, dict], existing_nodes: Optional[List[SchedulerNode]] = None, node_mgr: Optional[NodeManager] = None, node_history: Optional[NodeHistory] = None, disable_default_resources: bool = False, node_queue: Optional[NodeQueue] = None, singleton_lock: Optional[SingletonLock] = NullSingletonLock(), ) -> DemandCalculator: config_dict = load_config(config) existing_nodes = existing_nodes or [] if node_mgr is None: node_mgr = new_node_manager( config_dict, disable_default_resources=disable_default_resources, ) else: logging.initialize_logging(config_dict) if not disable_default_resources: node_mgr.set_system_default_resources() node_history = node_history or SQLiteNodeHistory() if singleton_lock is None: singleton_lock = new_singleton_lock(config_dict) dc = DemandCalculator(node_mgr, node_history, node_queue, singleton_lock) dc.update_scheduler_nodes(existing_nodes) return dc
def _setup_shell_locals(self, config: Dict) -> Dict: """ Provides read only interactive shell. type hpcpackhelp() in the shell for more information """ ctx = DefaultContextHandler("[interactive-readonly]") def hpcpackhelp() -> None: print( "config - dict representing autoscale configuration." ) print( "cli - object representing the CLI commands") print( "node_mgr - ScaleLib NodeManager - interacts with CycleCloud for all node related" + " activities - creation, deletion, limits, buckets etc." ) print("hpcpackhelp - This help function") shell_locals = { "config": config, "cli": self, "ctx": ctx, "node_mgr": new_node_manager(config), "hpcpackhelp": hpcpackhelp, } return shell_locals
def default_resources() -> None: """ have printer print out every resource for ever bucket. (and get_columns) add gpus by default (node.gpu_count, node.gpu_sku, node.gpu_vendor) """ # now we will disable the default resources, ncpus/pcpus/gpus etc # and define them ourselves. node_mgr = new_node_manager(CONFIG, disable_default_resources=True) # let's define gpus for every node # then, for nodes that actually have a gpu, let's set the pcpus # to equal the number of gpus * 2 # define ngpus node_mgr.add_default_resource({}, "ngpus", "node.gpu_count") # also could have just passed in a lambda/function # node_mgr.add_default_resource({}, "gpus", lambda node: node.gpu_count) # now that ngpus is defined, we can use ngpus: 1 here to filter out nodes that # have at least one ngpu. Let's set pcpus to 2 * ngpus node_mgr.add_default_resource({"ngpus": 1}, "pcpus", lambda node: node.resources["ngpus"] * 2) # and lastly, for all other nodes, we will apply the system defaults node_mgr.set_system_default_resources() has_gpu = node_mgr.example_node("southcentralus", "Standard_NV24") no_gpu = node_mgr.example_node("southcentralus", "Standard_F16s") print(has_gpu.vm_size, " -> %(ngpus)s ngpus %(pcpus)s pcpus" % has_gpu.resources) print(no_gpu.vm_size, " -> %(ngpus)s ngpus %(pcpus)s pcpus" % no_gpu.resources)
def target_counts_node_mgr() -> None: """ break allocate to add_nodes / scale_to """ node_mgr = new_node_manager(CONFIG) result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=2) if result: print("Allocated {} nodes".format(len(result.nodes))) else: print("Failed! {}".format(result)) result = node_mgr.allocate({ "node.nodearray": "htc", "memgb": 1 }, slot_count=128) if result: print("Allocated {} nodes".format(len(result.nodes))) else: print("Failed! {}".format(result)) # you can also do Memory.value_of("100g") # or even (Memory.value_of("1g") * 100), as the memory object is supposed # to be used as a number print("Allocated {} nodes in total".format(len(node_mgr.new_nodes))) if not DRY_RUN: node_mgr.bootup()
def test_custom_node_attrs_and_node_config() -> None: b = MockClusterBinding() b.add_nodearray("htc", {}, software_configuration={"myscheduler": { "A": 1 }}) b.add_bucket("htc", "Standard_F2", 10, 10) b.add_node("htc-1", "htc") node_mgr = new_node_manager({"_mock_bindings": b}) (existing_node, ) = node_mgr.get_nodes() try: existing_node.node_attribute_overrides["willfail"] = 123 assert False except TypeError: pass result = node_mgr.allocate({"exclusive": True}, node_count=2) assert result (node, ) = [n for n in result.nodes if not n.exists] assert node.software_configuration.get("test_thing") is None node.node_attribute_overrides["Configuration"] = {"test_thing": "is set"} assert node.software_configuration.get("test_thing") == "is set" try: node.software_configuration["willfail"] = 123 assert not node.software_configuration.get("willfail") except TypeError: pass # we won't handle dict merges here. assert node.software_configuration.get("myscheduler") == {"A": 1} node.node_attribute_overrides["Configuration"] = {"myscheduler": {"B": 2}} assert node.software_configuration.get("myscheduler") == {"B": 2} # if you want to add to the existing software_configuration, use # the node.software_configuration node.node_attribute_overrides["Configuration"][ "myscsheduler"] = node.software_configuration.get("myscheduler", {}) node.node_attribute_overrides["Configuration"]["myscheduler"]["B"] = 2 node.node_attribute_overrides["Configuration"] = { "myscheduler": { "A": 1, "B": 2 } } node.software_configuration["willsucceed"] = 123 node.exists = True try: node.software_configuration["willfail"] = 123 assert False except TypeError: pass
def resources(config: Dict, constraint_expr: str) -> None: ge_env = environment.from_qconf(config) ge_driver = autoscaler.new_driver(config, ge_env) node_mgr = new_node_manager(config, existing_nodes=ge_driver) filtered = _query_with_constraints(config, constraint_expr, node_mgr.get_buckets()) columns = set() for node in filtered: columns.update(set(node.resources.keys())) columns.update(set(node.resources.keys())) config["output_columns"]
def manual_node_mgmt() -> None: node_mgr = new_node_manager(CONFIG) assert node_mgr.allocate({}, node_count=2) if node_mgr.new_nodes: node_mgr.bootup() node1, node2 = node_mgr.get_nodes() assert node1 in node_mgr.get_nodes() res = node_mgr.delete([node1]) print(res) assert node1 not in node_mgr.get_nodes(), node_mgr.get_nodes()
def default_resources() -> None: """ An example of how to programmatically create default resources. """ # now we will disable the default resources, ncpus/pcpus/gpus etc # and define them ourselves. CONFIG["disable_default_resources"] = True node_mgr = new_node_manager(CONFIG) # let's define gpus for every node # then, for nodes that actually have a gpu, let's set the pcpus # to equal the number of gpus * 2 # define ngpus node_mgr.add_default_resource({}, "ngpus", "node.gpu_count") # also could have just passed in a lambda/function # node_mgr.add_default_resource({}, "gpus", lambda node: node.gpu_count) # now that ngpus is defined, we can use ngpus: 1 here to filter out nodes that # have at least one ngpu. Let's set pcpus to 2 * ngpus node_mgr.add_default_resource( selection={"ngpus": 1}, resource_name="pcpus", default_value=lambda node: node.resources["ngpus"] * 2, ) # and for nodes without GPUs, let's just use the actual pcpu_count node_mgr.add_default_resource( selection={"ngpus": 0}, resource_name="pcpus", default_value=lambda node: node.pcpu_count, ) # note that a blank selection would have accomplished the same thing, as default resources # are processed in order. # node_mgr.add_default_resource({}, "pcpus", lambda node: node.pcpu_count) has_gpu = node_mgr.example_node("southcentralus", "Standard_NV24") no_gpu = node_mgr.example_node("southcentralus", "Standard_F16s") print( has_gpu.vm_size, "(pcpu_count=%s)" % has_gpu.pcpu_count, "-> %(ngpus)s ngpus %(pcpus)s pcpus" % has_gpu.resources, ) print( no_gpu.vm_size, "(pcpu_count=%s)" % has_gpu.pcpu_count, "-> %(ngpus)s ngpus %(pcpus)s pcpus" % no_gpu.resources, )
def buckets( config: Dict, constraint_expr: str, output_columns: Optional[List[str]] = None, output_format: Optional[str] = None, ) -> None: """Prints out autoscale bucket information, like limits etc""" ge_env = environment.from_qconf(config) ge_driver = autoscaler.new_driver(config, ge_env) config = ge_driver.preprocess_config(config) node_mgr = new_node_manager(config) specified_output_columns = output_columns output_columns = output_columns or [ "nodearray", "placement_group", "vm_size", "vcpu_count", "pcpu_count", "memory", "available_count", ] if specified_output_columns is None: for bucket in node_mgr.get_buckets(): for resource_name in bucket.resources: if resource_name not in output_columns: output_columns.append(resource_name) for attr in dir(bucket.limits): if attr[0].isalpha() and "count" in attr: value = getattr(bucket.limits, attr) if isinstance(value, int): bucket.resources[attr] = value bucket.example_node._resources[attr] = value filtered = _query_with_constraints(config, constraint_expr, node_mgr.get_buckets()) demand_result = DemandResult([], [f.example_node for f in filtered], [], []) if "all" in output_columns: output_columns = ["all"] config["output_columns"] = output_columns autoscaler.print_demand(config, demand_result, output_columns, output_format)
def new_demand_calculator( config: Dict, pbs_env: Optional[PBSProEnvironment] = None, pbs_driver: Optional["PBSProDriver"] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, singleton_lock: Optional[SingletonLock] = None, ) -> DemandCalculator: if pbs_driver is None: pbs_driver = PBSProDriver(config) if pbs_env is None: pbs_env = envlib.from_driver(config, pbs_driver) if node_history is None: node_history = pbs_driver.new_node_history(config) # keep it as a config node_mgr = new_node_manager(config, existing_nodes=pbs_env.scheduler_nodes) pbs_driver.preprocess_node_mgr(config, node_mgr) singleton_lock = singleton_lock or pbs_driver.new_singleton_lock(config) assert singleton_lock demand_calculator = dcalclib.new_demand_calculator( config, node_mgr=node_mgr, node_history=node_history, node_queue=pbs_driver.new_node_queue(config), singleton_lock=singleton_lock, # it will handle the none case, existing_nodes=pbs_env.scheduler_nodes, ) ccnode_id_added = False for bucket in demand_calculator.node_mgr.get_buckets(): # ccnodeid will almost certainly not be defined. It just needs # to be definede once, so we will add a default for all nodes # the first time we see it is missingg if "ccnodeid" not in bucket.resources and not ccnode_id_added: hpc.autoscale.job.driver.add_ccnodeid_default_resource( demand_calculator.node_mgr) ccnode_id_added = True return demand_calculator
def test_basic() -> None: binding = MockClusterBinding() binding.add_nodearray("hpc", {"ncpus": "node.vcpu_count"}) binding.add_bucket("hpc", "Standard_F4", max_count=100, available_count=100) node_mgr = new_node_manager({"_mock_bindings": binding}) bucket = node_mgr.get_buckets()[0] assert 100 == bucket.available_count bucket.decrement(5) assert 95 == bucket.available_count bucket.rollback() assert 100 == bucket.available_count bucket.decrement(5) assert 95 == bucket.available_count bucket.commit() assert 95 == bucket.available_count bucket.decrement(5) assert 90 == bucket.available_count bucket.rollback() assert 95 == bucket.available_count
def autoscale_grid_engine( config: Dict[str, Any], ge_env: Optional[GridEngineEnvironment] = None, ge_driver: Optional["GridEngineDriver"] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, dry_run: bool = False, ) -> DemandResult: global _exit_code assert not config.get("read_only", False) if dry_run: logging.warning("Running gridengine autoscaler in dry run mode") # allow multiple instances config["lock_file"] = None # put in read only mode config["read_only"] = True if ge_env is None: ge_env = envlib.from_qconf(config) # interface to GE, generally by cli if ge_driver is None: # allow tests to pass in a mock ge_driver = new_driver(config, ge_env) ge_driver.initialize_environment() config = ge_driver.preprocess_config(config) logging.fine("Driver = %s", ge_driver) invalid_nodes = [] # we need an instance without any scheduler nodes, so don't # pass in the existing nodes. tmp_node_mgr = new_node_manager(config) by_hostname = partition_single(tmp_node_mgr.get_nodes(), lambda n: n.hostname_or_uuid) for node in ge_env.nodes: # many combinations of a u and other states. However, # as long as a and u are in there it is down state = node.metadata.get("state", "") cc_node = by_hostname.get(node.hostname) ccnodeid = node.resources.get("ccnodeid") if cc_node: if not ccnodeid or ccnodeid == cc_node.delayed_node_id.node_id: if cc_node.state in ["Preparing", "Acquiring"]: continue if "a" in state and "u" in state: invalid_nodes.append(node) # nodes in error state must also be deleted nodes_to_delete = ge_driver.clean_hosts(invalid_nodes) for node in nodes_to_delete: ge_env.delete_node(node) demand_calculator = calculate_demand(config, ge_env, ge_driver, ctx_handler, node_history) ge_driver.handle_failed_nodes( demand_calculator.node_mgr.get_failed_nodes()) demand_result = demand_calculator.finish() if ctx_handler: ctx_handler.set_context("[joining]") # details here are that we pass in nodes that matter (matched) and the driver figures out # which ones are new and need to be added via qconf joined = ge_driver.handle_join_cluster( [x for x in demand_result.compute_nodes if x.exists]) ge_driver.handle_post_join_cluster(joined) if ctx_handler: ctx_handler.set_context("[scaling]") # bootup all nodes. Optionally pass in a filtered list if demand_result.new_nodes: if not dry_run: demand_calculator.bootup() if not dry_run: demand_calculator.update_history() # we also tell the driver about nodes that are unmatched. It filters them out # and returns a list of ones we can delete. idle_timeout = int(config.get("idle_timeout", 300)) boot_timeout = int(config.get("boot_timeout", 3600)) logging.fine("Idle timeout is %s", idle_timeout) unmatched_for_5_mins = demand_calculator.find_unmatched_for( at_least=idle_timeout) timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout) # I don't care about nodes that have keep_alive=true timed_out_booting = [n for n in timed_out_booting if not n.keep_alive] timed_out_to_deleted = [] unmatched_nodes_to_delete = [] if timed_out_booting: logging.info("The following nodes have timed out while booting: %s", timed_out_booting) timed_out_to_deleted = ge_driver.handle_boot_timeout( timed_out_booting) or [] if unmatched_for_5_mins: node_expr = ", ".join([str(x) for x in unmatched_for_5_mins]) logging.info("Unmatched for at least %s seconds: %s", idle_timeout, node_expr) unmatched_nodes_to_delete = ( ge_driver.handle_draining(unmatched_for_5_mins) or []) nodes_to_delete = [] for node in timed_out_to_deleted + unmatched_nodes_to_delete: if node.assignments: logging.warning( "%s has jobs assigned to it so we will take no action.", node) continue nodes_to_delete.append(node) if nodes_to_delete: try: logging.info("Deleting %s", [str(n) for n in nodes_to_delete]) delete_result = demand_calculator.delete(nodes_to_delete) if delete_result: # in case it has anything to do after a node is deleted (usually just remove it from the cluster) ge_driver.handle_post_delete(delete_result.nodes) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e)) print_demand(config, demand_result, log=not dry_run) return demand_result
def test_family_and_spots() -> None: bindings = MockClusterBinding("clusty") bindings.add_nodearray("htc", {}, spot=False, max_count=10, max_core_count=400) bindings.add_nodearray("hpc", {}, spot=False, max_placement_group_size=7) bindings.add_bucket( "htc", "Standard_F4s", max_count=20, available_count=10, family_consumed_core_count=40, family_quota_core_count=80, family_quota_count=20, regional_consumed_core_count=45, regional_quota_core_count=100, regional_quota_count=25, ) bindings.add_bucket( "htc", "Standard_D4s_v3", max_count=20, available_count=10, family_consumed_core_count=40, family_quota_core_count=80, family_quota_count=20, regional_consumed_core_count=45, regional_quota_core_count=100, regional_quota_count=25, ) bindings.add_bucket( "hpc", "Standard_D4s_v3", max_count=20, available_count=10, family_consumed_core_count=40, family_quota_core_count=80, family_quota_count=20, regional_consumed_core_count=45, regional_quota_core_count=100, regional_quota_count=25, ) bindings.add_bucket( "hpc", "Standard_D4s_v3", max_count=20, available_count=10, family_consumed_core_count=40, family_quota_core_count=80, family_quota_count=20, regional_consumed_core_count=45, regional_quota_core_count=100, regional_quota_count=25, placement_groups=["123"], ) bindings.add_nodearray("htcspot", {}, spot=True) bindings.add_bucket( "htcspot", "Standard_F4s", max_count=20, available_count=10, family_consumed_core_count=0, family_quota_core_count=0, family_quota_count=0, regional_consumed_core_count=45, regional_quota_core_count=100, regional_quota_count=25, ) node_mgr = new_node_manager({"_mock_bindings": bindings}) by_key: Dict[str, NodeBucket] = partition(node_mgr.get_buckets(), lambda b: (b.nodearray, b.vm_size)) htc = by_key[("htc", "Standard_F4s")][0] htc2 = by_key[("htc", "Standard_D4s_v3")][0] htcspot = by_key[("htcspot", "Standard_F4s")][0] hpcs = by_key[("hpc", "Standard_D4s_v3")] hpc_pg = [x for x in hpcs if x.placement_group][0] # ondemand instances use actual family quota assert htc.limits.family_max_count == 20 assert htc2.limits.family_max_count == 20 assert htc.limits.family_available_count == 10 assert htc2.limits.family_available_count == 10 # spot instances replace family with regional assert htcspot.limits.family_max_count == 25 assert htcspot.limits.family_available_count == 13 assert node_mgr.allocate( { "node.nodearray": "htc", "node.vm_size": "Standard_F4s" }, node_count=1) # ondemand instances use actual family quota assert htc.limits.family_max_count == 20 assert htc2.limits.family_max_count == 20 assert htc.limits.family_available_count == 9 assert htc2.limits.family_available_count == 10 assert htc.limits.nodearray_available_count == 9 assert htc2.limits.nodearray_available_count == 9 assert htc.available_count == 9 # nodearray limit affects htc2 since max_count=10 assert htc2.available_count == 9 # now the regional is affected by our allocation assert htcspot.limits.family_max_count == 25 assert htcspot.limits.family_available_count == 13 - 1 assert hpc_pg.available_count == 7
def autoscale_hpcpack( config: Dict[str, Any], ctx_handler: DefaultContextHandler = None, hpcpack_rest_client: Optional[HpcRestClient] = None, dry_run: bool = False, ) -> None: if not hpcpack_rest_client: hpcpack_rest_client = new_rest_client(config) if ctx_handler: ctx_handler.set_context("[Sync-Status]") autoscale_config = config.get("autoscale") or {} # Load history info idle_timeout_seconds: int = autoscale_config.get("idle_timeout") or 600 provisioning_timeout_seconds = autoscale_config.get("boot_timeout") or 1500 statefile = autoscale_config.get( "statefile") or "C:\\cycle\\jetpack\\config\\autoscaler_state.txt" archivefile = autoscale_config.get( "archivefile") or "C:\\cycle\\jetpack\\config\\autoscaler_archive.txt" node_history = HpcNodeHistory( statefile=statefile, archivefile=archivefile, provisioning_timeout=provisioning_timeout_seconds, idle_timeout=idle_timeout_seconds) logging.info("Synchronizing the nodes between Cycle cloud and HPC Pack") # Initialize data of History info, cc nodes, HPC Pack nodes, HPC grow decisions # Get node list from Cycle Cloud def nodes_state_key(n: Node) -> Tuple[int, str, int]: try: state_pri = 1 if n.state == 'Deallocated': state_pri = 2 elif n.state == 'Stopping': state_pri = 3 elif n.state == 'Terminating': state_pri = 4 name, index = n.name.rsplit("-", 1) return (state_pri, name, int(index)) except Exception: return (state_pri, n.name, 0) node_mgr: NodeManager = new_node_manager(config) for b in node_mgr.get_buckets(): b.nodes.sort(key=nodes_state_key) cc_nodes: List[Node] = node_mgr.get_nodes() cc_nodes_by_id = partition_single(cc_nodes, func=lambda n: n.delayed_node_id.node_id) # Get compute node list and grow decision from HPC Pack hpc_node_groups = hpcpack_rest_client.list_node_groups() grow_decisions = hpcpack_rest_client.get_grow_decision() logging.info("grow decision: {}".format(grow_decisions)) hpc_cn_nodes: List[HpcNode] = hpcpack_rest_client.list_computenodes() hpc_cn_nodes = [n for n in hpc_cn_nodes if n.active] # This function will link node history items, cc nodes and hpc nodes node_history.synchronize(cc_nodes, hpc_cn_nodes) cc_nodearrays = set([b.nodearray for b in node_mgr.get_buckets()]) logging.info("Current node arrays in cyclecloud: {}".format(cc_nodearrays)) # Create HPC node groups for CC node arrays cc_map_hpc_groups = ["CycleCloudNodes"] + list(cc_nodearrays) for cc_grp in cc_map_hpc_groups: if ci_notin(cc_grp, hpc_node_groups): logging.info("Create HPC node group: {}".format(cc_grp)) hpcpack_rest_client.add_node_group(cc_grp, "Cycle Cloud Node group") # Add HPC nodes into corresponding node groups add_cc_tag_nodes = [ n.name for n in hpc_cn_nodes if n.shall_addcyclecloudtag ] if len(add_cc_tag_nodes) > 0: logging.info( "Adding HPC nodes to node group CycleCloudNodes: {}".format( add_cc_tag_nodes)) hpcpack_rest_client.add_node_to_node_group("CycleCloudNodes", add_cc_tag_nodes) for cc_grp in list(cc_nodearrays): add_array_tag_nodes = [ n.name for n in hpc_cn_nodes if n.shall_addnodearraytag and ci_equals(n.cc_nodearray, cc_grp) ] if len(add_array_tag_nodes) > 0: logging.info("Adding HPC nodes to node group {}: {}".format( cc_grp, add_array_tag_nodes)) hpcpack_rest_client.add_node_to_node_group(cc_grp, add_array_tag_nodes) # Possible values for HPC NodeState (states marked with * shall not occur for CC nodes): # Unknown, Provisioning, Offline, Starting, Online, Draining, Rejected(*), Removing, NotDeployed(*), Stopping(*) # Remove the following HPC Pack nodes: # 1. The corresponding CC node already removed # 2. The corresponding CC node is stopped and HPC node is not assigned a node template # Take offline the following HPC Pack nodes: # 1. The corresponding CC node is stopped or is going to stop hpc_nodes_to_remove = [ n.name for n in hpc_cn_nodes if n.removed_cc_node or (n.stopped_cc_node and not n.template_assigned) ] hpc_nodes_to_take_offline = [ n.name for n in hpc_cn_nodes if n.stopped_cc_node and ci_equals(n.state, "Online") ] if len(hpc_nodes_to_remove) > 0: logging.info("Removing the HPC nodes: {}".format(hpc_nodes_to_remove)) if dry_run: logging.info("Dry-run: no real action") else: hpcpack_rest_client.remove_nodes(hpc_nodes_to_remove) hpc_cn_nodes = [ n for n in hpc_cn_nodes if not (n.stopped_cc_node or n.removed_cc_node) ] # Assign default node template for unapproved CC node hpc_nodes_to_assign_template = [ n.name for n in hpc_cn_nodes if n.bound_cc_node and not n.template_assigned ] if len(hpc_nodes_to_assign_template) > 0: logging.info( "Assigning default node template for the HPC nodes: {}".format( hpc_nodes_to_assign_template)) if dry_run: logging.info("Dry-run: no real action") else: hpcpack_rest_client.assign_default_compute_node_template( hpc_nodes_to_assign_template) ### Start scale up checking: logging.info("Start scale up checking ...") if ctx_handler: ctx_handler.set_context("[scale-up]") hpc_nodes_with_active_cc = [ n for n in hpc_cn_nodes if n.template_assigned and n.bound_cc_node ] # Exclude the already online healthy HPC nodes before calling node_mgr.allocate for hpc_node in hpc_nodes_with_active_cc: if hpc_node.ready_for_job: hpc_node.bound_cc_node.closed = True # Terminate the provisioning timeout CC nodes cc_node_to_terminate: List[Node] = [] for cc_node in cc_nodes: if ci_equals(cc_node.target_state, 'Deallocated') or ci_equals( cc_node.target_state, 'Terminated') or cc_node.create_time_remaining: continue nhi = node_history.find(cc_id=cc_node.delayed_node_id.node_id) if not nhi.hpc_id: cc_node.closed = True cc_node_to_terminate.append(cc_node) else: hpc_node = ci_find_one(hpc_nodes_with_active_cc, nhi.hpc_id, lambda n: n.id) if hpc_node and hpc_node.error: cc_node.closed = True cc_node_to_terminate.append(cc_node) # "ComputeNodes", "CycleCloudNodes", "AzureIaaSNodes" are all treated as default # grow_by_socket not supported yet, treat as grow_by_node defaultGroups = [ "Default", "ComputeNodes", "AzureIaaSNodes", "CycleCloudNodes" ] default_cores_to_grow = default_nodes_to_grow = 0.0 # If the current CC nodes in the node array cannot satisfy the grow decision, the group is hungry # For a hungry group, no idle check is required if the node health is OK group_hungry: Dict[str, bool] = {} nbrNewNodes: int = 0 grow_groups = list(grow_decisions.keys()) for grp in grow_groups: tmp = grow_decisions.pop(grp) if not (tmp.cores_to_grow + tmp.nodes_to_grow + tmp.sockets_to_grow): continue if ci_in(grp, defaultGroups): default_cores_to_grow += tmp.cores_to_grow default_nodes_to_grow += tmp.nodes_to_grow + tmp.sockets_to_grow continue if ci_notin(grp, cc_nodearrays): logging.warning( "No mapping node array for the grow requirement {}:{}".format( grp, grow_decisions[grp])) grow_decisions.pop(grp) continue group_hungry[grp] = False array = ci_lookup(grp, cc_nodearrays) selector = {'ncpus': 1, 'node.nodearray': [array]} target_cores = math.ceil(tmp.cores_to_grow) target_nodes = math.ceil(tmp.nodes_to_grow + tmp.sockets_to_grow) if target_nodes: logging.info("Allocate: {} Target Nodes: {}".format( selector, target_nodes)) result = node_mgr.allocate(selector, node_count=target_nodes) logging.info(result) if not result or result.total_slots < target_nodes: group_hungry[grp] = True if target_cores: logging.info("Allocate: {} Target Cores: {}".format( selector, target_cores)) result = node_mgr.allocate(selector, slot_count=target_cores) logging.info(result) if not result or result.total_slots < target_cores: group_hungry[grp] = True if len(node_mgr.new_nodes) > nbrNewNodes: group_hungry[grp] = True nbrNewNodes = len(node_mgr.new_nodes) # We then check the grow decision for the default node groups: checkShrinkNeeded = True growForDefaultGroup = True if default_nodes_to_grow or default_cores_to_grow else False if growForDefaultGroup: selector = {'ncpus': 1} if default_nodes_to_grow: target_nodes = math.ceil(default_nodes_to_grow) logging.info("Allocate: {} Target Nodes: {}".format( selector, target_nodes)) result = node_mgr.allocate({'ncpus': 1}, node_count=target_nodes) if not result or result.total_slots < target_nodes: checkShrinkNeeded = False if default_cores_to_grow: target_cores = math.ceil(default_cores_to_grow) logging.info("Allocate: {} Target Cores: {}".format( selector, target_cores)) result = node_mgr.allocate({'ncpus': 1}, slot_count=target_cores) if not result or result.total_slots < target_cores: checkShrinkNeeded = False if len(node_mgr.new_nodes) > nbrNewNodes: checkShrinkNeeded = False nbrNewNodes = len(node_mgr.new_nodes) if nbrNewNodes > 0: logging.info("Need to Allocate {} nodes in total".format(nbrNewNodes)) if dry_run: logging.info("Dry-run: skipping node bootup...") else: logging.info("Allocating {} nodes in total".format( len(node_mgr.new_nodes))) bootup_result: BootupResult = node_mgr.bootup() logging.info(bootup_result) if bootup_result and bootup_result.nodes: for cc_node in bootup_result.nodes: nhi = node_history.find( cc_id=cc_node.delayed_node_id.node_id) if nhi is None: nhi = node_history.insert( NodeHistoryItem(cc_node.delayed_node_id.node_id)) else: nhi.restart() else: logging.info("No need to allocate new nodes ...") ### Start the shrink checking if ctx_handler: ctx_handler.set_context("[scale-down]") cc_node_to_shutdown: List[Node] = [] if not checkShrinkNeeded: logging.info("No shrink check at this round ...") if not dry_run: for nhi in node_history.items: if not nhi.stopped and nhi.hpc_id: nhi.idle_from = None else: logging.info("Start scale down checking ...") # By default, we check idle for active CC nodes in HPC Pack with 'Offline', 'Starting', 'Online', 'Draining' state candidate_idle_check_nodes = [ n for n in hpc_nodes_with_active_cc if (not n.bound_cc_node.keep_alive) and ci_in(n.state, ["Offline", "Starting", "Online", "Draining"]) ] # We can exclude some nodes from idle checking: # 1. If HPC Pack ask for grow in default node group(s), all healthy ONLINE nodes are considered as busy # 2. If HPC Pack ask for grow in certain node group, all healthy ONLINE nodes in that node group are considered as busy # 3. If a node group is hungry (new CC required or grow request not satisfied), no idle check needed for all nodes in that node array if growForDefaultGroup: candidate_idle_check_nodes = [ n for n in candidate_idle_check_nodes if not n.ready_for_job ] for grp, hungry in group_hungry.items(): if hungry: candidate_idle_check_nodes = [ n for n in candidate_idle_check_nodes if not ci_equals(grp, n.cc_nodearray) ] elif not growForDefaultGroup: candidate_idle_check_nodes = [ n for n in candidate_idle_check_nodes if not (ci_equals(grp, n.cc_nodearray) and n.ready_for_job) ] curtime = datetime.utcnow() # Offline node must be idle idle_node_names = [ n.name for n in candidate_idle_check_nodes if ci_equals(n.state, 'Offline') ] if len(candidate_idle_check_nodes) > len(idle_node_names): idle_nodes = hpcpack_rest_client.check_nodes_idle([ n.name for n in candidate_idle_check_nodes if not ci_equals(n.state, 'Offline') ]) if len(idle_nodes) > 0: idle_node_names.extend([n.node_name for n in idle_nodes]) if len(idle_node_names) > 0: logging.info( "The following node is idle: {}".format(idle_node_names)) else: logging.info("No idle node found in this round.") retention_days = autoscale_config.get("vm_retention_days") or 7 for nhi in node_history.items: if nhi.stopped: if nhi.stop_time + timedelta( days=retention_days) < datetime.utcnow(): cc_node = cc_nodes_by_id.get(nhi.cc_id) if cc_node is not None: cc_node_to_terminate.append(cc_node) continue if ci_in(nhi.hostname, idle_node_names): if nhi.idle_from is None: nhi.idle_from = curtime elif nhi.idle_timeout(idle_timeout_seconds): nhi.stop_time = curtime cc_node = cc_nodes_by_id.get(nhi.cc_id) if cc_node is not None: cc_node_to_shutdown.append(cc_node) else: nhi.idle_from = None shrinking_cc_node_ids = [ n.delayed_node_id.node_id for n in cc_node_to_terminate ] shrinking_cc_node_ids.extend( [n.delayed_node_id.node_id for n in cc_node_to_shutdown]) hpc_nodes_to_bring_online = [ n.name for n in hpc_nodes_with_active_cc if ci_equals(n.state, 'Offline') and not n.error and ci_notin(n.cc_node_id, shrinking_cc_node_ids) ] hpc_nodes_to_take_offline.extend([ n.name for n in hpc_nodes_with_active_cc if ci_equals(n.state, 'Online') and ci_in(n.cc_node_id, shrinking_cc_node_ids) ]) if len(hpc_nodes_to_bring_online) > 0: logging.info("Bringing the HPC nodes online: {}".format( hpc_nodes_to_bring_online)) if dry_run: logging.info("Dry-run: no real action") else: hpcpack_rest_client.bring_nodes_online(hpc_nodes_to_bring_online) if len(hpc_nodes_to_take_offline) > 0: logging.info("Taking the HPC nodes offline: {}".format( hpc_nodes_to_take_offline)) if dry_run: logging.info("Dry-run: no real action") else: hpcpack_rest_client.take_nodes_offline(hpc_nodes_to_take_offline) if len(cc_node_to_shutdown) > 0: logging.info("Shut down the following Cycle cloud node: {}".format( [cn.name for cn in cc_node_to_shutdown])) if dry_run: logging.info("Dry-run: skip ...") else: node_mgr.shutdown_nodes(cc_node_to_shutdown) if len(cc_node_to_terminate) > 0: logging.info( "Terminating the following provisioning-timeout Cycle cloud nodes: {}" .format([cn.name for cn in cc_node_to_terminate])) if dry_run: logging.info("Dry-run: skip ...") else: node_mgr.terminate_nodes(cc_node_to_terminate) if not dry_run: logging.info("Save node history: {}".format(node_history)) node_history.save()
def _node_mgr(bindings: MockClusterBinding) -> NodeManager: return new_node_manager({"_mock_bindings": bindings})
def shutdown_nodes_node_mgr() -> None: node_names = ["htc-1"] node_mgr = new_node_manager(CONFIG) to_shutdown = [x for x in node_mgr.get_nodes() if x.name in node_names] if to_shutdown: node_mgr.delete(to_shutdown)