def new_node_history(self, config: Dict) -> NodeHistory: db_path = config.get("nodehistorydb") if not db_path: db_path = os.path.join(self.autoscale_home, "nodehistory.db") read_only = config.get("read_only", False) node_history = SQLiteNodeHistory(db_path, read_only) node_history.create_timeout = config.get("boot_timeout", 3600) node_history.last_match_timeout = config.get("idle_timeout", 300) return node_history
def new_demand_calculator( config: Union[str, dict], existing_nodes: Optional[List[SchedulerNode]] = None, node_mgr: Optional[NodeManager] = None, node_history: Optional[NodeHistory] = None, disable_default_resources: bool = False, node_queue: Optional[NodeQueue] = None, singleton_lock: Optional[SingletonLock] = NullSingletonLock(), ) -> DemandCalculator: config_dict = load_config(config) existing_nodes = existing_nodes or [] if node_mgr is None: node_mgr = new_node_manager( config_dict, disable_default_resources=disable_default_resources, ) else: logging.initialize_logging(config_dict) if not disable_default_resources: node_mgr.set_system_default_resources() node_history = node_history or SQLiteNodeHistory() if singleton_lock is None: singleton_lock = new_singleton_lock(config_dict) dc = DemandCalculator(node_mgr, node_history, node_queue, singleton_lock) dc.update_scheduler_nodes(existing_nodes) return dc
def auto(): CONFIG = json_load("/opt/cycle/scalelib/autoscale.json") MIN_CORE_COUNT = 4 WARM_BUFFER = 2 # Get hosts / tasks celery_d = celery_status() dcalc = demandcalculator.new_demand_calculator( CONFIG, existing_nodes=celery_d.scheduler_nodes, node_history=SQLiteNodeHistory()) dcalc.add_jobs(celery_d.jobs) n_jobs = len(celery_d.jobs) n_add_jobs = max(n_jobs + WARM_BUFFER, max(n_jobs, MIN_CORE_COUNT)) if n_add_jobs > 0: # RIGHT-SIZE based on Min Count and Buffer # It's possible that the padded jobs will float around extending the timer # but it seems like they're placed in some kind of normal order that's # preserved across autoscale runs print("add padding of %d jobs, to existing %d" % (n_add_jobs, n_jobs)) dcalc.add_jobs(job_buffer(n_add_jobs)) demand_result = dcalc.finish() output_columns = [ "name", "hostname", "job_ids", "required", "slots", "vm_size", "vcpu_count", "state" ] print_demand(output_columns, demand_result) dcalc.bootup() delete_result = dcalc.find_unmatched_for(at_least=180) if delete_result: try: dcalc.delete(delete_result) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e))
def new_demand_calculator( config: Dict, ge_env: Optional[GridEngineEnvironment] = None, ge_driver: Optional["GridEngineDriver"] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, singleton_lock: Optional[SingletonLock] = None, ) -> DemandCalculator: if ge_env is None: ge_env = envlib.from_qconf(config) if ge_driver is None: ge_driver = new_driver(config, ge_env) if node_history is None: db_path = config.get("nodehistorydb") if not db_path: db_dir = "/opt/cycle/jetpack/system/bootstrap/gridengine" if not os.path.exists(db_dir): db_dir = os.getcwd() db_path = os.path.join(db_dir, "nodehistory.db") read_only = config.get("read_only", False) node_history = SQLiteNodeHistory(db_path, read_only) node_history.create_timeout = config.get("boot_timeout", 3600) node_history.last_match_timeout = config.get("idle_timeout", 300) demand_calculator = dcalclib.new_demand_calculator( config, existing_nodes=ge_env.nodes, node_history=node_history, node_queue=ge_driver.new_node_queue(), singleton_lock=singleton_lock, # it will handle the none case ) for name, default_complex in ge_env.complexes.items(): if name == "slots": continue if default_complex.default is None: continue if not default_complex.requestable: continue logging.trace("Adding default resource %s=%s", name, default_complex.default) demand_calculator.node_mgr.add_default_resource( {}, name, default_complex.default) ccnode_id_added = False slots_added: Set[str] = set() for bucket in demand_calculator.node_mgr.get_buckets(): if "slots" not in bucket.resources and bucket.nodearray not in slots_added: default = ( '"default_resources": [{"select": {"node.nodearray": "%s"}, "name": "slots", "value": "node.vcpu_count"}]' % (bucket.nodearray)) demand_calculator.node_mgr.add_default_resource( selection={"node.nodearray": bucket.nodearray}, resource_name="slots", default_value="node.vcpu_count", ) logging.warning( """slots is not defined for bucket {}. Using the default, which you can add to your config: {}""" .format(bucket, default)) slots_added.add(bucket.nodearray) # ccnodeid will almost certainly not be defined. It just needs # to be definede once, so we will add a default for all nodes # the first time we see it is missingg if "ccnodeid" not in bucket.resources and not ccnode_id_added: demand_calculator.node_mgr.add_default_resource( selection={}, # applies to all nodes resource_name="ccnodeid", default_value=lambda n: n.delayed_node_id.node_id, ) ccnode_id_added = True return demand_calculator