def parse_queue_complex_values( expr: str, complexes: Dict[str, Complex], qname: str ) -> Dict[str, Dict[str, Dict]]: raw: Dict[str, List[str]] = parse_hostgroup_mapping(expr) ret: Dict[str, Dict] = {} for hostgroup, sub_exprs in raw.items(): if hostgroup not in ret: ret[hostgroup] = {} d = ret[hostgroup] for sub_expr in sub_exprs: if sub_expr is None: sub_expr = "NONE" if "=" not in sub_expr: continue sub_expr = sub_expr.strip() complex_name, value_expr = sub_expr.split("=", 1) c = complexes.get(complex_name) if not c: logging.debug( "Could not find complex %s defined in queue %s", complex_name, qname ) continue d[complex_name] = c.parse(value_expr) return ret
def __init__( self, node_mgr: NodeManager, node_history: NodeHistory = NullNodeHistory(), node_queue: Optional[NodeQueue] = None, singleton_lock: Optional[SingletonLock] = None, ) -> None: assert isinstance(node_mgr, NodeManager) self.node_mgr = node_mgr self.node_history = node_history if node_queue is None: node_queue = NodeQueue() self.__scheduler_nodes_queue: NodeQueue = node_queue for node in self.node_mgr.get_non_failed_nodes(): self.__scheduler_nodes_queue.push(node) self.__set_buffer_delayed_invocations: List[Tuple[Any, ...]] = [] self.node_history.decorate(list(self.__scheduler_nodes_queue)) if not singleton_lock: singleton_lock = new_singleton_lock({}) self.__singleton_lock = singleton_lock logging.debug( "Calculating demand using the following pre-existing nodes: %s", [n.name for n in self.__scheduler_nodes_queue], )
def autoscale( self, config: Dict, output_columns: Optional[List[str]], output_format: OutputFormat, dry_run: bool = False, long: bool = False, ) -> None: """End-to-end autoscale process, including creation, deletion and joining of nodes.""" output_columns = output_columns or self._get_default_output_columns( config) ctx_handler = self._ctx_handler(config) register_result_handler(ctx_handler) driver = self._driver(config) driver.initialize() config = driver.preprocess_config(config) logging.debug("Driver = %s", driver) return autoscale_hpcpack(config, ctx_handler=ctx_handler, dry_run=dry_run)
def update_scheduler_nodes(self, scheduler_nodes: List[SchedulerNode]) -> None: by_hostname: Dict[str, Node] = partition_single( self.__scheduler_nodes_queue, lambda n: n.hostname_or_uuid # type: ignore ) for new_snode in scheduler_nodes: if new_snode.hostname not in by_hostname: logging.debug( "Found new node[hostname=%s] that does not exist in CycleCloud", new_snode.hostname, ) by_hostname[new_snode.hostname] = new_snode self.__scheduler_nodes_queue.push(new_snode) self.node_mgr.add_unmanaged_nodes([new_snode]) # TODO inform bucket catalog? else: old_snode = by_hostname[new_snode.hostname_or_uuid] logging.fine( "Found existing CycleCloud node[hostname=%s]", new_snode.hostname, ) old_snode.update(new_snode)
def read_queues( autoscale_config: Dict, scheduler: "GridEngineScheduler", pes: Dict[str, "ParallelEnvironment"], hostgroups: List[Hostgroup], complexes: Dict[str, "Complex"], qbin: QBin, ) -> Dict[str, GridEngineQueue]: queues = {} qnames = qbin.qconf(["-sql"]).split() logging.debug("Found %d queues: %s", len(qnames), " ".join(qnames)) autoscale_queues_config = autoscale_config.get("gridengine", {}).get("queues", {}) unbound_hostgroups = partition_single(hostgroups, lambda h: h.name) for qname in qnames: lines = qbin.qconf(["-sq", qname]).splitlines() queue_config = parse_ge_config(lines) autoscale_enabled = autoscale_queues_config.get(queue_config["qname"], {}).get( "autoscale_enabled", True ) expr = queue_config.get("complex_values", "NONE") complex_values = parse_queue_complex_values(expr, complexes, qname) queues[qname] = GridEngineQueue( queue_config, scheduler, pes, unbound_hostgroups, complex_values, autoscale_enabled, ) return queues
def bootup(self, nodes: Optional[List[Node]] = None) -> BootupResult: nodes = nodes if nodes is not None else self.get_demand().new_nodes if not nodes: logging.info("No nodes to bootup.") return BootupResult("success", OperationId(""), None) logging.debug("booting up %s", [n.name for n in nodes]) return self.node_mgr.bootup(nodes)
def delete(self, nodes: Optional[List[Node]] = None) -> DeleteResult: nodes = nodes if nodes is not None else self.get_demand( ).unmatched_nodes if not nodes: logging.info("No nodes to delete.") return DeleteResult("success", OperationId(""), None) logging.debug("deleting %s", [n.name for n in nodes]) return self.node_mgr.delete(nodes)
def add_node_to_node_group(self, group_name: str, node_names: Iterable[str]) -> List[str]: assert len(node_names) > 0 and group_name logging.debug("Adding nodes {} to nodegroup {}".format( node_names, group_name)) res = self._post( self.add_node_to_node_group.__name__, self.ADD_NODES_TO_NODE_GROUP_ROUTE.format(group_name=group_name), json.dumps(node_names)) return json.loads(res.content)
def __call__(self, result: Result) -> None: logging.debug("%s: %s", self.ctx, result) self.by_context[self.ctx].append(result) if hasattr(result, "nodes") and getattr(result, "nodes"): for result_node in getattr(result, "nodes"): if "contexts" not in result_node.metadata: result_node.metadata["contexts"] = set() result_node.metadata["contexts"].add(self.ctx)
def autoscale( config: Dict, output_columns: Optional[List[str]] = None, output_format: Optional[str] = None, ) -> None: """Runs actual autoscale process""" logging.debug("Begin autoscale") ctx_handler = register_result_handler(DefaultContextHandler("[initialization]")) if output_columns: config["output_columns"] = output_columns if output_format: config["output_format"] = output_format autoscaler.autoscale_grid_engine(config, ctx_handler=ctx_handler) logging.debug("End autoscale")
def handle_draining(self, nodes: List[Node]) -> List[Node]: # TODO batch these up, but keep it underneath the # max arg limit ret = [] for node in nodes: if not node.hostname: logging.info("Node %s has no hostname.", node) continue # TODO implement after we have resources added back in # what about deleting partially initialized nodes? I think we # just need to skip non-managed nodes # if not node.resources.get("ccnodeid"): # continue if not node.managed and not node.resources.get("ccnodeid"): logging.debug("Ignoring attempt to drain unmanaged %s", node) continue if "offline" in node.metadata.get("pbs_state", ""): if node.assignments: logging.info("Node %s has jobs still running on it.", node) # node is already 'offline' i.e. draining, but a job is still running continue else: # ok - it is offline _and_ no jobs are running on it. ret.append(node) else: try: self.pbscmd.pbsnodes("-o", node.hostname) # # Due to a delay in when pbsnodes -o exits to when pbsnodes -a # # actually reports an offline state, w ewill just optimistically set it to offline # # otherwise ~50% of the time you get the old state (free) # response = self.pbscmd.pbsnodes_parsed("-a", node.hostname) # if response: # node.metadata["pbs_state"] = response[0]["state"] node.metadata["pbs_state"] = "offline" except CalledProcessError as e: if node.private_ip: logging.error( "'pbsnodes -o %s' failed and this node will not be scaled down: %s", node.hostname, e, ) return ret
def _down_long_enough(self, now: datetime.datetime, node: Node) -> bool: last_state_change_time_str = node.metadata.get( "last_state_change_time") if last_state_change_time_str: last_state_change_time = datetime.datetime.strptime( last_state_change_time_str, "%a %b %d %H:%M:%S %Y") delta = now - last_state_change_time if delta > self.down_timeout_td: return True else: seconds_remaining = (delta - self.down_timeout_td).seconds logging.debug( "Down node %s still has %s seconds before setting to offline", node, seconds_remaining, ) return False
def demand( config: Dict, jobs: Optional[str] = None, scheduler_nodes: Optional[str] = None, output_columns: Optional[List[str]] = None, output_format: Optional[str] = None, ) -> None: """Runs autoscale in dry run mode to see the demand for new nodes""" logging.debug("Begin demand") ctx = DefaultContextHandler("[demand-cli]") register_result_handler(ctx) ge_env = environment.from_qconf(config) ge_driver = autoscaler.new_driver(config, ge_env) config = ge_driver.preprocess_config(config) demand_calc = autoscaler.calculate_demand(config, ge_env, ge_driver, ctx) demand_result = demand_calc.finish() autoscaler.print_demand(config, demand_result, output_columns, output_format) logging.debug("End demand")
def update_scheduler_nodes(self, scheduler_nodes: List[SchedulerNode]) -> None: by_hostname: Dict[str, Node] = partition_single( self.__scheduler_nodes_queue, lambda n: n.hostname_or_uuid # type: ignore ) for new_snode in scheduler_nodes: if new_snode.hostname not in by_hostname: by_hostname[new_snode.hostname] = new_snode self.__scheduler_nodes_queue.push(new_snode) self.node_mgr.add_unmanaged_nodes([new_snode]) if new_snode.resources.get("ccnodeid"): logging.warning( "%s has ccnodeid defined, but no longer exists in CycleCloud", new_snode, ) else: logging.debug( "Found new node[hostname=%s] that does not exist in CycleCloud", new_snode.hostname, ) # TODO inform bucket catalog? elif new_snode.metadata.get("override_resources", True): old_snode = by_hostname[new_snode.hostname_or_uuid] logging.fine( "Found existing CycleCloud node[hostname=%s]", new_snode.hostname, ) old_snode.update(new_snode) else: logging.fine( "Found existing CycleCloud node[hostname=%s], but node.metadata.override_resources=false" + " so ignoring the reported resources and only copying metadata", new_snode.hostname, ) old_snode = by_hostname[new_snode.hostname_or_uuid] old_snode.metadata.update(new_snode.metadata)
def _log_response(self, s: ResponseStatus, r: Any) -> None: if logging.getLogger().getEffectiveLevel() > logging.DEBUG: return import inspect current_frame = inspect.currentframe() caller_frame = inspect.getouterframes(current_frame, 2) caller = "[{}]".format(caller_frame[1].function) as_json = json.dumps(r.to_dict()) logging.debug( "[%s] Response: Status=%s -> %s", caller, s.status_code, as_json[:100], ) if logging.getLogger().getEffectiveLevel() > logging.FINE: return logging.fine( "[%s] Full response: Status=%s -> %s", caller, s.status_code, as_json, )
def add_default_placement_groups(config: Dict, node_mgr: NodeManager) -> None: nas = config.get("nodearrays", {}) for name, child in nas.items(): if child.get("placement_groups"): return by_pg = partition(node_mgr.get_buckets(), lambda b: (b.nodearray, b.placement_group)) by_na_vm = partition(node_mgr.get_buckets(), lambda b: (b.nodearray, b.vm_size)) for key, buckets in by_na_vm.items(): nodearray, vm_size = key non_pg_buckets = [b for b in buckets if not b.placement_group] if not non_pg_buckets: # hardcoded PlacementGroupId logging.debug( "Nodearray %s defines PlacementGroupId, so no additional " + "placement groups will be created automatically.", nodearray, ) continue bucket = non_pg_buckets[0] if not bucket.supports_colocation: continue buf_size = int( nas.get(nodearray, {}).get("generated_placement_group_buffer", 2)) buf_remaining = buf_size pgi = 0 while buf_remaining > 0: pg_name = ht.PlacementGroup("{}_pg{}".format(vm_size, pgi)) pg_key = (nodearray, pg_name) if pg_key not in by_pg: logging.fine("Adding placement group %s", pg_name) node_mgr.add_placement_group(pg_name, bucket) buf_remaining -= 1 pgi += 1
def autoscale_grid_engine( config: Dict[str, Any], ge_env: Optional[GridEngineEnvironment] = None, ge_driver: Optional["GridEngineDriver"] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, dry_run: bool = False, ) -> DemandResult: global _exit_code assert not config.get("read_only", False) if dry_run: logging.warning("Running gridengine autoscaler in dry run mode") # allow multiple instances config["lock_file"] = None # put in read only mode config["read_only"] = True if ge_env is None: ge_env = envlib.from_qconf(config) # interface to GE, generally by cli if ge_driver is None: # allow tests to pass in a mock ge_driver = new_driver(config, ge_env) ge_driver.initialize_environment() config = ge_driver.preprocess_config(config) logging.debug("Driver = %s", ge_driver) invalid_nodes = [] for node in ge_env.nodes: # many combinations of a u and other states. However, # as long as a and u are in there it is down state = node.metadata.get("state", "") if "a" in state and "u" in state: invalid_nodes.append(node) # nodes in error state must also be deleted nodes_to_delete = ge_driver.clean_hosts(invalid_nodes) for node in nodes_to_delete: ge_env.delete_node(node) demand_calculator = calculate_demand(config, ge_env, ge_driver, ctx_handler, node_history) ge_driver.handle_failed_nodes( demand_calculator.node_mgr.get_failed_nodes()) demand_result = demand_calculator.finish() if ctx_handler: ctx_handler.set_context("[joining]") # details here are that we pass in nodes that matter (matched) and the driver figures out # which ones are new and need to be added via qconf joined = ge_driver.handle_join_cluster( [x for x in demand_result.compute_nodes if x.exists]) ge_driver.handle_post_join_cluster(joined) if ctx_handler: ctx_handler.set_context("[scaling]") # bootup all nodes. Optionally pass in a filtered list if demand_result.new_nodes: if not dry_run: demand_calculator.bootup() if not dry_run: demand_calculator.update_history() # we also tell the driver about nodes that are unmatched. It filters them out # and returns a list of ones we can delete. idle_timeout = int(config.get("idle_timeout", 300)) boot_timeout = int(config.get("boot_timeout", 3600)) logging.fine("Idle timeout is %s", idle_timeout) unmatched_for_5_mins = demand_calculator.find_unmatched_for( at_least=idle_timeout) timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout) # I don't care about nodes that have keep_alive=true timed_out_booting = [n for n in timed_out_booting if not n.keep_alive] timed_out_to_deleted = [] unmatched_nodes_to_delete = [] if timed_out_booting: logging.info("The following nodes have timed out while booting: %s", timed_out_booting) timed_out_to_deleted = ge_driver.handle_boot_timeout( timed_out_booting) or [] if unmatched_for_5_mins: node_expr = ", ".join([str(x) for x in unmatched_for_5_mins]) logging.info("Unmatched for at least %s seconds: %s", idle_timeout, node_expr) unmatched_nodes_to_delete = ( ge_driver.handle_draining(unmatched_for_5_mins) or []) nodes_to_delete = [] for node in timed_out_to_deleted + unmatched_nodes_to_delete: if node.assignments: logging.warning( "%s has jobs assigned to it so we will take no action.", node) continue nodes_to_delete.append(node) if nodes_to_delete: try: logging.info("Deleting %s", [str(n) for n in nodes_to_delete]) delete_result = demand_calculator.delete(nodes_to_delete) if delete_result: # in case it has anything to do after a node is deleted (usually just remove it from the cluster) ge_driver.handle_post_delete(delete_result.nodes) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e)) print_demand(config, demand_result, log=not dry_run) return demand_result
def main(argv: Iterable[str] = None) -> None: default_install_dir = os.path.join("/", "opt", "cycle", "gridengine") parser = ArgumentParser() sub_parsers = parser.add_subparsers() def csv_list(x: str) -> List[str]: return [x.strip() for x in x.split(",")] help_msg = io.StringIO() def add_parser( name: str, func: Callable, read_only: bool = True, skip_config: bool = False ) -> ArgumentParser: doc_str = (func.__doc__ or "").strip() doc_str = " ".join([x.strip() for x in doc_str.splitlines()]) help_msg.write("\n {:20} - {}".format(name, doc_str)) default_config: Optional[str] default_config = os.path.join(default_install_dir, "autoscale.json") if not os.path.exists(default_config): default_config = None new_parser = sub_parsers.add_parser(name) new_parser.set_defaults(func=func, read_only=read_only) if skip_config: return new_parser new_parser.add_argument( "--config", "-c", default=default_config, required=not bool(default_config), action="append", ) return new_parser def str_list(c: str) -> List[str]: return c.split(",") def add_parser_with_columns( name: str, func: Callable, read_only: bool = True ) -> ArgumentParser: parser = add_parser(name, func, read_only) def parse_format(c: str) -> str: c = c.lower() if c in ["json", "table", "table_headerless"]: return c print("Expected json, table or table_headerless - got", c, file=sys.stderr) sys.exit(1) parser.add_argument("--output-columns", "-o", type=str_list) parser.add_argument("--output-format", "-F", type=parse_format) return parser add_parser_with_columns("autoscale", autoscale, read_only=False) add_parser_with_columns("buckets", buckets).add_argument( "--constraint-expr", "-C", default="[]" ) add_parser("complexes", complexes).add_argument( "-a", "--include-irrelevant", action="store_true", default=False ) delete_parser = add_parser("delete_nodes", delete_nodes, read_only=False) delete_parser.add_argument("-H", "--hostnames", type=str_list, default=[]) delete_parser.add_argument("-N", "--node-names", type=str_list, default=[]) delete_parser.add_argument("--force", action="store_true", default=False) remove_parser = add_parser("remove_nodes", remove_nodes, read_only=False) remove_parser.add_argument("-H", "--hostnames", type=str_list, default=[]) remove_parser.add_argument("-N", "--node-names", type=str_list, default=[]) remove_parser.add_argument("--force", action="store_true", default=False) add_parser_with_columns("demand", demand).add_argument( "--jobs", "-j", default=None, required=False ) add_parser("drain_node", drain_node, read_only=False).add_argument( "-H", "--hostname", required=True ) initconfig_parser = add_parser( "initconfig", initconfig, read_only=False, skip_config=True ) initconfig_parser.add_argument("--cluster-name", required=True) initconfig_parser.add_argument("--username", required=True) initconfig_parser.add_argument("--password") initconfig_parser.add_argument("--url", required=True) initconfig_parser.add_argument( "--log-config", default=os.path.join(default_install_dir, "logging.conf"), dest="logging__config_file", ) initconfig_parser.add_argument( "--lock-file", default=os.path.join(default_install_dir, "scalelib.lock") ) initconfig_parser.add_argument( "--default-resource", type=json.loads, action="append", default=[], dest="default_resources", ) initconfig_parser.add_argument( "--relevant-complexes", default=["slots", "slot_type", "exclusive"], type=csv_list, dest="gridengine__relevant_complexes", ) initconfig_parser.add_argument( "--idle-timeout", default=300, type=int, dest="idle_timeout" ) initconfig_parser.add_argument( "--boot-timeout", default=1800, type=int, dest="boot_timeout" ) initconfig_parser.add_argument( "--disable-pgs-for-pe", default=[], type=str, action="append", help="Disable creation of placement groups for a parallel environment. " + "This can be invoked more than once.", dest="disable_pgs_for_pe", ) initconfig_parser.add_argument( "--hostgroup-constraint", default=[], action="append", dest="hostgroup_constraints", ) add_parser("jobs", jobs) add_parser("jobs_and_nodes", jobs_and_nodes) add_parser("join_cluster", join_cluster).add_argument( "-H", "--hostname", type=str_list, required=True ) add_parser_with_columns("nodes", nodes).add_argument( "--constraint-expr", "-C", default="[]" ) add_parser("scheduler_nodes", scheduler_nodes) help_msg.write("\nadvanced usage:") add_parser("validate", validate_func, read_only=True) add_parser("queues", queues, read_only=True) add_parser("shell", shell) analyze_parser = add_parser("analyze", analyze) analyze_parser.add_argument("--job-id", "-j", required=True) analyze_parser.add_argument("--wide", "-w", action="store_true", default=False) parser.usage = help_msg.getvalue() args = parser.parse_args() if not hasattr(args, "func"): parser.print_help() sys.exit(1) # parse list of config paths to a single config if hasattr(args, "config"): args.config = load_config(*args.config) logging.initialize_logging(args.config) if args.read_only: args.config["read_only"] = True args.config["lock_file"] = None kwargs = {} for k in dir(args): if k[0].islower() and k not in ["read_only", "func"]: kwargs[k] = getattr(args, k) try: args.func(**kwargs) except Exception as e: print(str(e), file=sys.stderr) if hasattr(e, "message"): print(getattr(e, "message"), file=sys.stderr) logging.debug("Full stacktrace", exc_info=sys.exc_info()) sys.exit(1)
def autoscale_pbspro( config: Dict[str, Any], pbs_env: Optional[PBSProEnvironment] = None, pbs_driver: Optional[PBSProDriver] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, dry_run: bool = False, ) -> DemandResult: global _exit_code assert not config.get("read_only", False) if dry_run: logging.warning("Running pbs autoscaler in dry run mode") # allow multiple instances config["lock_file"] = None # put in read only mode config["read_only"] = True # interface to PBSPro, generally by cli if pbs_driver is None: # allow tests to pass in a mock pbs_driver = PBSProDriver(config) if pbs_env is None: pbs_env = envlib.from_driver(config, pbs_driver) pbs_driver.initialize() config = pbs_driver.preprocess_config(config) logging.debug("Driver = %s", pbs_driver) demand_calculator = calculate_demand(config, pbs_env, ctx_handler, node_history) failed_nodes = demand_calculator.node_mgr.get_failed_nodes() for node in pbs_env.scheduler_nodes: if "down" in node.metadata.get("pbs_state", ""): failed_nodes.append(node) pbs_driver.handle_failed_nodes(failed_nodes) demand_result = demand_calculator.finish() if ctx_handler: ctx_handler.set_context("[joining]") # details here are that we pass in nodes that matter (matched) and the driver figures out # which ones are new and need to be added joined = pbs_driver.add_nodes_to_cluster( [x for x in demand_result.compute_nodes if x.exists]) pbs_driver.handle_post_join_cluster(joined) if ctx_handler: ctx_handler.set_context("[scaling]") # bootup all nodes. Optionally pass in a filtered list if demand_result.new_nodes: if not dry_run: demand_calculator.bootup() if not dry_run: demand_calculator.update_history() # we also tell the driver about nodes that are unmatched. It filters them out # and returns a list of ones we can delete. idle_timeout = int(config.get("idle_timeout", 300)) boot_timeout = int(config.get("boot_timeout", 3600)) logging.fine("Idle timeout is %s", idle_timeout) unmatched_for_5_mins = demand_calculator.find_unmatched_for( at_least=idle_timeout) timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout) # I don't care about nodes that have keep_alive=true timed_out_booting = [n for n in timed_out_booting if not n.keep_alive] timed_out_to_deleted = [] unmatched_nodes_to_delete = [] if timed_out_booting: logging.info("The following nodes have timed out while booting: %s", timed_out_booting) timed_out_to_deleted = pbs_driver.handle_boot_timeout( timed_out_booting) or [] if unmatched_for_5_mins: logging.info("unmatched_for_5_mins %s", unmatched_for_5_mins) unmatched_nodes_to_delete = ( pbs_driver.handle_draining(unmatched_for_5_mins) or []) nodes_to_delete = [] for node in timed_out_to_deleted + unmatched_nodes_to_delete: if node.assignments: logging.warning( "%s has jobs assigned to it so we will take no action.", node) continue nodes_to_delete.append(node) if nodes_to_delete: try: logging.info("Deleting %s", [str(n) for n in nodes_to_delete]) delete_result = demand_calculator.delete(nodes_to_delete) if delete_result: # in case it has anything to do after a node is deleted (usually just remove it from the cluster) pbs_driver.handle_post_delete(delete_result.nodes) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e)) print_demand(config, demand_result, log=not dry_run) return demand_result
def _execute(self, stmt: str) -> sqlite3.Cursor: logging.debug(stmt) return self.conn.execute(stmt)