def process_scaleset(scaleset: Scaleset) -> None: if scaleset.state == ScalesetState.halt: scaleset.halt() return # if the scaleset is touched during cleanup, don't continue to process it if scaleset.cleanup_nodes(): return if scaleset.state in ScalesetState.needs_work(): logging.info( "executing scaleset state: %s - %s", scaleset.scaleset_id, scaleset.state.name, ) getattr(scaleset, scaleset.state.name)() return
def autoscale_pool(pool: Pool) -> None: logging.info("autoscale: %s", pool.autoscale) if not pool.autoscale: return # get all the tasks (count not stopped) for the pool tasks = Task.get_tasks_by_pool_name(pool.name) logging.info("Pool: %s, #Tasks %d", pool.name, len(tasks)) num_of_tasks = get_vm_count(tasks) nodes_needed = max(num_of_tasks, pool.autoscale.min_size) if pool.autoscale.max_size: nodes_needed = min(nodes_needed, pool.autoscale.max_size) # do scaleset logic match with pool # get all the scalesets for the pool scalesets = Scaleset.search_by_pool(pool.name) pool_resize = False for scaleset in scalesets: if scaleset.state in ScalesetState.modifying(): pool_resize = True break nodes_needed = nodes_needed - scaleset.size if pool_resize: return logging.info("Pool: %s, #Nodes Needed: %d", pool.name, nodes_needed) if nodes_needed > 0: # resizing scaleset or creating new scaleset. scale_up(pool, scalesets, nodes_needed) elif nodes_needed < 0: for scaleset in scalesets: nodes = Node.search_states(scaleset_id=scaleset.scaleset_id) for node in nodes: if node.delete_requested: nodes_needed += 1 if nodes_needed < 0: scale_down(scalesets, abs(nodes_needed))
def process_scaleset(scaleset: Scaleset) -> None: logging.debug("checking scaleset for updates: %s", scaleset.scaleset_id) if scaleset.state == ScalesetState.resize: scaleset.resize() # if the scaleset is touched during cleanup, don't continue to process it if scaleset.cleanup_nodes(): logging.debug("scaleset needed cleanup: %s", scaleset.scaleset_id) return if (scaleset.state in ScalesetState.needs_work() and scaleset.state != ScalesetState.resize): logging.info( "exec scaleset state: %s - %s", scaleset.scaleset_id, scaleset.state, ) if hasattr(scaleset, scaleset.state.name): getattr(scaleset, scaleset.state.name)() return
def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841 scalesets = Scaleset.search() scalesets_needs_work = ScalesetState.needs_work() for scaleset in scalesets: logging.info("queueing scaleset updates: %s", scaleset.scaleset_id) scaleset.queue(method=scaleset.update_configs) if scaleset.state in scalesets_needs_work: scaleset.queue() proxies = Proxy.search_states(states=VmState.needs_work()) for proxy in proxies: logging.info("requeueing update proxy vm: %s", proxy.region) proxy.queue() vms = Repro.search_states(states=VmState.needs_work()) for vm in vms: logging.info("requeueing update vm: %s", vm.vm_id) vm.queue() tasks = Task.search_states(states=TaskState.needs_work()) for task in tasks: logging.info("requeueing update task: %s", task.task_id) task.queue() jobs = Job.search_states(states=JobState.needs_work()) for job in jobs: logging.info("requeueing update job: %s", job.job_id) job.queue() pools = Pool.search_states(states=PoolState.needs_work()) for pool in pools: logging.info("queuing update pool: %s (%s)", pool.pool_id, pool.name) pool.queue() nodes = Node.search_states(states=NodeState.needs_work()) for node in nodes: logging.info("queuing update node: %s", node.machine_id) node.queue() expired_tasks = Task.search_expired() for task in expired_tasks: logging.info("queuing stop for task: %s", task.job_id) task.queue_stop() expired_jobs = Job.search_expired() for job in expired_jobs: logging.info("queuing stop for job: %s", job.job_id) job.queue_stop() # Reminder, proxies are created on-demand. If something is "wrong" with # a proxy, the plan is: delete and recreate it. for proxy in Proxy.search(): if not proxy.is_alive(): logging.error("proxy alive check failed, stopping: %s", proxy.region) proxy.state = VmState.stopping proxy.save() else: proxy.save_proxy_config() event = get_event() if event: dashboard.set(event)
def can_process_new_work(self) -> bool: from .pools import Pool from .scalesets import Scaleset if (self.is_outdated() and os.environ.get("ONEFUZZ_ALLOW_OUTDATED_AGENT") != "true"): logging.info( "can_process_new_work agent and service versions differ, " "stopping node. " "machine_id:%s agent_version:%s service_version: %s", self.machine_id, self.version, __version__, ) self.stop(done=True) return False if self.is_too_old(): logging.info( "can_process_new_work node is too old. machine_id:%s", self.machine_id) self.stop(done=True) return False if self.state not in NodeState.can_process_new_work(): logging.info( "can_process_new_work node not in appropriate state for new work" "machine_id:%s state:%s", self.machine_id, self.state.name, ) return False if self.state in NodeState.ready_for_reset(): logging.info( "can_process_new_work node is set for reset. machine_id:%s", self.machine_id, ) return False if self.delete_requested: logging.info( "can_process_new_work is set to be deleted. machine_id:%s", self.machine_id, ) self.stop(done=True) return False if self.reimage_requested: logging.info( "can_process_new_work is set to be reimaged. machine_id:%s", self.machine_id, ) self.stop(done=True) return False if self.could_shrink_scaleset(): logging.info( "can_process_new_work node scheduled to shrink. machine_id:%s", self.machine_id, ) self.set_halt() return False if self.scaleset_id: scaleset = Scaleset.get_by_id(self.scaleset_id) if isinstance(scaleset, Error): logging.info( "can_process_new_work invalid scaleset. " "scaleset_id:%s machine_id:%s", self.scaleset_id, self.machine_id, ) return False if scaleset.state not in ScalesetState.available(): logging.info( "can_process_new_work scaleset not available for work. " "scaleset_id:%s machine_id:%s", self.scaleset_id, self.machine_id, ) return False pool = Pool.get_by_name(self.pool_name) if isinstance(pool, Error): logging.info( "can_schedule - invalid pool. " "pool_name:%s machine_id:%s", self.pool_name, self.machine_id, ) return False if pool.state not in PoolState.available(): logging.info( "can_schedule - pool is not available for work. " "pool_name:%s machine_id:%s", self.pool_name, self.machine_id, ) return False return True
def can_process_new_work(self) -> bool: from .pools import Pool from .scalesets import Scaleset if self.is_outdated(): logging.info( "can_schedule agent and service versions differ, stopping node. " "machine_id:%s agent_version:%s service_version: %s", self.machine_id, self.version, __version__, ) self.stop() return False if self.state in NodeState.ready_for_reset(): logging.info("can_schedule node is set for reset. machine_id:%s", self.machine_id) return False if self.delete_requested: logging.info( "can_schedule is set to be deleted. machine_id:%s", self.machine_id, ) self.stop() return False if self.reimage_requested: logging.info( "can_schedule is set to be reimaged. machine_id:%s", self.machine_id, ) self.stop() return False if self.could_shrink_scaleset(): self.set_halt() logging.info("node scheduled to shrink. machine_id:%s", self.machine_id) return False if self.scaleset_id: scaleset = Scaleset.get_by_id(self.scaleset_id) if isinstance(scaleset, Error): logging.info( "can_schedule - invalid scaleset. scaleset_id:%s machine_id:%s", self.scaleset_id, self.machine_id, ) return False if scaleset.state not in ScalesetState.available(): logging.info( "can_schedule - scaleset not available for work. " "scaleset_id:%s machine_id:%s", self.scaleset_id, self.machine_id, ) return False pool = Pool.get_by_name(self.pool_name) if isinstance(pool, Error): logging.info( "can_schedule - invalid pool. " "pool_name:%s machine_id:%s", self.pool_name, self.machine_id, ) return False if pool.state not in PoolState.available(): logging.info( "can_schedule - pool is not available for work. " "pool_name:%s machine_id:%s", self.pool_name, self.machine_id, ) return False return True