def maintain(self, cached_managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs): """ maintains running instances: - determines if idle nodes should be drained and terminated - determines if there are bad nodes in ASGs (did not spin up under `instance_init_time` seconds) """ logger.info("++++++++++++++ Maintaining Managed Nodes ++++++++++++++++") # for each type of instance, we keep one around for longer # in order to speed up job start up time idle_selector_hash = collections.Counter() pods_by_node = {} for p in running_or_pending_assigned_pods: pods_by_node.setdefault(p.node_name, []).append(p) stats_time = time.time() for node in cached_managed_nodes: asg = utils.get_group_for_node(asgs, node) state = self.get_node_state( node, asg, pods_by_node.get(node.name, []), pods_to_schedule, running_insts_map, idle_selector_hash) logger.info("node: %-*s state: %s" % (75, node, state)) self.stats.increment( 'kubernetes.custom.node.state.{}'.format( state.replace('-', '_')), timestamp=stats_time) # state machine & why doesnt python have case? if state in (ClusterNodeState.POD_PENDING, ClusterNodeState.BUSY, ClusterNodeState.GRACE_PERIOD, ClusterNodeState.TYPE_GRACE_PERIOD, ClusterNodeState.ASG_MIN_SIZE, ClusterNodeState.LAUNCH_HR_GRACE_PERIOD): # do nothing pass elif state == ClusterNodeState.UNDER_UTILIZED_DRAINABLE: if not self.dry_run: if not asg: logger.warn( 'Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() node.drain(pods_by_node.get(node.name, []), notifier=self.notifier) else: logger.info( '[Dry run] Would have drained and cordoned %s', node) elif state == ClusterNodeState.IDLE_SCHEDULABLE: if not self.dry_run: if not asg: logger.warn( 'Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() else: logger.info('[Dry run] Would have cordoned %s', node) elif state == ClusterNodeState.BUSY_UNSCHEDULABLE: # this is duplicated in original scale logic if not self.dry_run: node.uncordon() else: logger.info('[Dry run] Would have uncordoned %s', node) elif state == ClusterNodeState.IDLE_UNSCHEDULABLE: # remove it from asg if not self.dry_run: if not asg: logger.warn( 'Cannot find ASG for node %s. Not terminated.', node) else: asg.scale_node_in(node) else: logger.info('[Dry run] Would have scaled in %s', node) elif state == ClusterNodeState.INSTANCE_TERMINATED: if not self.dry_run: node.delete() else: logger.info('[Dry run] Would have deleted %s', node) elif state == ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE: # noop for now pass else: raise Exception("Unhandled state: {}".format(state)) logger.info( "++++++++++++++ Maintaining Unmanaged Instances ++++++++++++++++") # these are instances that have been running for a while but it's not properly managed # i.e. not having registered to kube or not having proper meta data set managed_instance_ids = set( node.instance_id for node in cached_managed_nodes) for asg in asgs: unmanaged_instance_ids = list( asg.instance_ids - managed_instance_ids) if len(unmanaged_instance_ids) != 0: unmanaged_running_insts = self.get_running_instances_in_region( asg.region, unmanaged_instance_ids) for inst in unmanaged_running_insts: if (datetime.datetime.now(inst.launch_time.tzinfo) - inst.launch_time).seconds >= self.instance_init_time: if not self.dry_run: asg.client.terminate_instance_in_auto_scaling_group( InstanceId=inst.id, ShouldDecrementDesiredCapacity=False) logger.info("terminating unmanaged %s" % inst) self.stats.increment( 'kubernetes.custom.node.state.unmanaged', timestamp=stats_time) # TODO: try to delete node from kube as well # in the case where kubelet may have registered but node # labels have not been applied yet, so it appears # unmanaged else: logger.info( '[Dry run] Would have terminated unmanaged %s', inst)
def maintain(self, cached_managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs): """ maintains running instances: - determines if idle nodes should be drained and terminated - determines if there are bad nodes in ASGs (did not spin up under `instance_init_time` seconds) """ logger.info("++++++++++++++ Maintaining Nodes & Instances ++++++++++++++++") # for each type of instance, we keep one around for longer # in order to speed up job start up time idle_selector_hash = collections.Counter() pods_by_node = {} for p in running_or_pending_assigned_pods: pods_by_node.setdefault(p.node_name, []).append(p) stats_time = time.time() nodes_to_scale_in = {} nodes_to_delete = [] for node in cached_managed_nodes: asg = utils.get_group_for_node(asgs, node) state = self.get_node_state( node, asg, pods_by_node.get(node.name, []), pods_to_schedule, running_insts_map, idle_selector_hash) logger.info("node: %-*s state: %s" % (75, node, state)) self.stats.increment( 'kubernetes.custom.node.state.{}'.format(state.replace('-', '_')), timestamp=stats_time) # state machine & why doesnt python have case? if state in (ClusterNodeState.POD_PENDING, ClusterNodeState.BUSY, ClusterNodeState.GRACE_PERIOD, ClusterNodeState.TYPE_GRACE_PERIOD, ClusterNodeState.ASG_MIN_SIZE, ClusterNodeState.LAUNCH_HR_GRACE_PERIOD, ClusterNodeState.DETACHED): # do nothing pass elif state == ClusterNodeState.UNDER_UTILIZED_DRAINABLE: if not self.dry_run: if not asg: logger.warn('Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() node.drain(pods_by_node.get(node.name, []), notifier=self.notifier) else: logger.info('[Dry run] Would have drained and cordoned %s', node) elif state == ClusterNodeState.IDLE_SCHEDULABLE: if not self.dry_run: if not asg: logger.warn('Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() else: logger.info('[Dry run] Would have cordoned %s', node) elif state == ClusterNodeState.BUSY_UNSCHEDULABLE: # this is duplicated in original scale logic if not self.dry_run: node.uncordon() else: logger.info('[Dry run] Would have uncordoned %s', node) elif state == ClusterNodeState.IDLE_UNSCHEDULABLE: # remove it from asg if not self.dry_run: nodes_to_delete.append(node) if not asg: logger.warn('Cannot find ASG for node %s. Not terminated.', node) else: nodes_to_scale_in.setdefault(asg, []).append(node) else: logger.info('[Dry run] Would have scaled in %s', node) elif state == ClusterNodeState.INSTANCE_TERMINATED: if not self.dry_run: nodes_to_delete.append(node) else: logger.info('[Dry run] Would have deleted %s', node) elif state == ClusterNodeState.DEAD: if not self.dry_run: nodes_to_delete.append(node) if asg: nodes_to_scale_in.setdefault(asg, []).append(node) else: logger.info('[Dry run] Would have reaped dead node %s', node) elif state == ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE: # noop for now pass else: raise Exception("Unhandled state: {}".format(state)) # these are instances that have been running for a while but it's not properly managed # i.e. not having registered to kube or not having proper meta data set managed_instance_ids = set(node.instance_id for node in cached_managed_nodes) instances_to_terminate = {} for asg in asgs: unmanaged_instance_ids = (asg.instance_ids - managed_instance_ids) if len(unmanaged_instance_ids) > 0: if asg.provider == 'azure': for inst_id in unmanaged_instance_ids: inst = asg.instances[inst_id] if (datetime.datetime.now(inst.launch_time.tzinfo) - inst.launch_time).seconds >= self.instance_init_time: if not self.dry_run: logger.info("terminating unmanaged %s" % inst) instances_to_terminate.setdefault(asg, []).append(inst_id) self.stats.increment( 'kubernetes.custom.node.state.unmanaged', timestamp=stats_time) # TODO: try to delete node from kube as well # in the case where kubelet may have registered but node # labels have not been applied yet, so it appears unmanaged else: logger.info('[Dry run] Would have terminated unmanaged %s', inst) else: unmanaged_running_insts = self.get_running_instances_in_region( asg.region, list(unmanaged_instance_ids)) for inst in unmanaged_running_insts: if (datetime.datetime.now(inst.launch_time.tzinfo) - inst.launch_time).seconds >= self.instance_init_time: if not self.dry_run: asg.client.terminate_instance_in_auto_scaling_group( InstanceId=inst.id, ShouldDecrementDesiredCapacity=False) logger.info("terminating unmanaged %s" % inst) self.stats.increment( 'kubernetes.custom.node.state.unmanaged', timestamp=stats_time) # TODO: try to delete node from kube as well # in the case where kubelet may have registered but node # labels have not been applied yet, so it appears unmanaged else: logger.info( '[Dry run] Would have terminated unmanaged %s [%s]', inst, asg.region) async_operations = [] total_instances = max(sum(len(asg.instance_ids) for asg in asgs), len(cached_managed_nodes)) max_allowed_scale_in = int(math.ceil(self.max_scale_in_fraction * total_instances)) to_scale_in = sum(len(nodes) for nodes in nodes_to_scale_in.values()) + \ sum(len(instance_ids) for instance_ids in instances_to_terminate.values()) to_scale_in = max(to_scale_in, len(nodes_to_delete)) if to_scale_in > max_allowed_scale_in: logger.error("TOO MANY NODES TO SCALE IN: {}, max allowed is {}".format(to_scale_in, max_allowed_scale_in)) elif not self.dry_run: for asg, nodes in nodes_to_scale_in.items(): async_operations.append(asg.scale_nodes_in(nodes)) for asg, instance_ids in instances_to_terminate.items(): async_operations.append(asg.terminate_instances(instance_ids)) for node in nodes_to_delete: node.delete() # Wait for all background scale-in operations to complete for operation in async_operations: try: operation.result() except CloudError as e: logger.warn("Error while deleting Azure node: {}".format(e.message)) except TimeoutError: logger.warn("Timeout while deleting Azure node")
def maintain(self, cached_managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs): """ maintains running instances: - determines if idle nodes should be drained and terminated - determines if there are bad nodes in ASGs (did not spin up under `instance_init_time` seconds) """ logger.info("++++++++++++++ Maintaining Managed Nodes ++++++++++++++++") # for each type of instance, we keep one around for longer # in order to speed up job start up time idle_selector_hash = collections.Counter() pods_by_node = {} for p in running_or_pending_assigned_pods: pods_by_node.setdefault(p.node_name, []).append(p) stats_time = time.time() for node in cached_managed_nodes: asg = utils.get_group_for_node(asgs, node) state = self.get_node_state( node, asg, pods_by_node.get(node.name, []), pods_to_schedule, running_insts_map, idle_selector_hash) logger.info("node: %-*s state: %s" % (75, node, state)) self.stats.increment( 'kubernetes.custom.node.state.{}'.format(state.replace('-', '_')), timestamp=stats_time) # state machine & why doesnt python have case? if state in (ClusterNodeState.POD_PENDING, ClusterNodeState.BUSY, ClusterNodeState.GRACE_PERIOD, ClusterNodeState.TYPE_GRACE_PERIOD, ClusterNodeState.ASG_MIN_SIZE): # do nothing pass elif state == ClusterNodeState.UNDER_UTILIZED_DRAINABLE: if not self.dry_run: if not asg: logger.warn('Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() node.drain(pods_by_node.get(node.name, [])) else: logger.info('[Dry run] Would have drained and cordoned %s', node) elif state == ClusterNodeState.IDLE_SCHEDULABLE: if not self.dry_run: if not asg: logger.warn('Cannot find ASG for node %s. Not cordoned.', node) else: node.cordon() else: logger.info('[Dry run] Would have cordoned %s', node) elif state == ClusterNodeState.BUSY_UNSCHEDULABLE: # this is duplicated in original scale logic if not self.dry_run: node.uncordon() else: logger.info('[Dry run] Would have uncordoned %s', node) elif state == ClusterNodeState.IDLE_UNSCHEDULABLE: # remove it from asg if not self.dry_run: if not asg: logger.warn('Cannot find ASG for node %s. Not terminated.', node) else: asg.scale_node_in(node) else: logger.info('[Dry run] Would have scaled in %s', node) elif state == ClusterNodeState.INSTANCE_TERMINATED: if not self.dry_run: node.delete() else: logger.info('[Dry run] Would have deleted %s', node) elif state == ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE: # noop for now pass else: raise Exception("Unhandled state: {}".format(state)) logger.info("++++++++++++++ Maintaining Unmanaged Instances ++++++++++++++++") # these are instances that have been running for a while but it's not properly managed # i.e. not having registered to kube or not having proper meta data set managed_instance_ids = set(node.instance_id for node in cached_managed_nodes) for asg in asgs: unmanaged_instance_ids = list(asg.instance_ids - managed_instance_ids) if len(unmanaged_instance_ids) != 0: unmanaged_running_insts = self.get_running_instances_in_region( asg.region, unmanaged_instance_ids) for inst in unmanaged_running_insts: if (datetime.datetime.now(inst.launch_time.tzinfo) - inst.launch_time).seconds >= self.instance_init_time: if not self.dry_run: asg.client.terminate_instance_in_auto_scaling_group( InstanceId=inst.id, ShouldDecrementDesiredCapacity=False) logger.info("terminating unmanaged %s" % inst) self.stats.increment( 'kubernetes.custom.node.state.unmanaged', timestamp=stats_time) # TODO: try to delete node from kube as well # in the case where kubelet may have registered but node # labels have not been applied yet, so it appears unmanaged else: logger.info('[Dry run] Would have terminated unmanaged %s', inst)