def filter_poe_rebootable(self, requester, nodes, warn_unknown_connectivity, warn_poe_forbidden): nodes_ok = [] nodes_unknown = [] nodes_forbidden = defaultdict(list) for node in nodes: sw_info, sw_port = self.topology.get_connectivity_info( \ node.mac) if sw_info: if sw_info.poe_reboot_nodes == True: nodes_ok.append(node) else: nodes_forbidden[sw_info.name].append(node) else: nodes_unknown.append(node) if len(nodes_unknown) > 0 and warn_unknown_connectivity: requester.stderr.write( format_sentence_about_nodes(MSG_CONNECTIVITY_UNKNOWN, [n.name for n in nodes_unknown])) if len(nodes_forbidden) > 0 and warn_poe_forbidden: for sw_name, sw_nodes in nodes_forbidden.items(): explain = MSG_POE_REBOOT_UNABLE_EXPLAIN % dict(sw_name=sw_name) requester.stderr.write( format_sentence_about_nodes(explain, [n.name for n in sw_nodes])) return nodes_ok, nodes_unknown, nodes_forbidden
def filter_poe_rebootable(self, requester, nodes, warn_unknown_connectivity, warn_poe_forbidden): nodes_ok = [] nodes_unknown = [] nodes_forbidden = defaultdict(list) for node in nodes: sw_info, sw_port = self.topology.get_connectivity_info( \ node.mac) if sw_info: if sw_info.poe_reboot_nodes == True: nodes_ok.append(node) else: nodes_forbidden[sw_info.name].append(node) else: nodes_unknown.append(node) if len(nodes_unknown) > 0 and warn_unknown_connectivity: requester.stderr.write(format_sentence_about_nodes( MSG_CONNECTIVITY_UNKNOWN, [n.name for n in nodes_unknown])) if len(nodes_forbidden) > 0 and warn_poe_forbidden: for sw_name, sw_nodes in nodes_forbidden.items(): explain = MSG_POE_REBOOT_UNABLE_EXPLAIN % dict( sw_name = sw_name ) requester.stderr.write(format_sentence_about_nodes( explain, [n.name for n in sw_nodes])) return nodes_ok, nodes_unknown, nodes_forbidden
def reply_requester(requester, task_callback, hard_only, vmrebooted, softrebooted, poerebooted, softreboot_errors, poereboot_errors, **env): if len(poereboot_errors) == 0: # we managed to reboot all nodes, so we can be brief. requester.stdout.write('Done.\n') else: # not all went well rebooted = tuple(vmrebooted) + tuple(softrebooted) + tuple(poerebooted) if len(rebooted) > 0: requester.stdout.write(format_sentence_about_nodes( '%s: OK.\n', rebooted)) per_errors = defaultdict(list) for node_name in poereboot_errors: errors = (poereboot_errors[node_name],) if not hard_only: # then soft reboot was tried too errors += (softreboot_errors[node_name],) per_errors[errors].append(node_name) for errors, node_names in per_errors.items(): if hard_only: # no soft reboot error, just a poe reboot error poe_error = errors[0] explain = 'failed PoE-reboot (%s)' % poe_error else: poe_error, soft_error = errors explain = 'failed soft-reboot (%s) and PoE-reboot (%s)' % \ (soft_error, poe_error) requester.stderr.write(format_sentence_about_nodes( '%s: ' + explain + '\n', node_names)) if not hard_only: # then we had soft reboot errors requester.stdout.write('note: soft-reboot only works when node is fully booted.\n') # unblock client task_callback(None)
def softreboot(self, requester, node_set, hide_issues): nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported # first, we pass the 'booted' flag of all nodes to false # (if we manage to reboot them, they will be unreachable # for a little time; if we do not manage to reboot them, # this probably means they are down, thus not booted...) for node in nodes: self.db.update('nodes', 'mac', mac=node.mac, booted=False) self.db.commit() nodes_ko, nodes_ok = [], [] for node in nodes: link = self.connect(requester, node.name, hide_issues) if link == None: nodes_ko.append(node.name) continue res = link.request('REBOOT') del link if not res[0]: if not hide_issues: requester.stderr.write('Soft-reboot request to %s failed: %s\n' % \ (node.name, res[1])) nodes_ko.append(node.name) continue nodes_ok.append(node.name) if len(nodes_ok) > 0: requester.stdout.write( format_sentence_about_nodes('%s was(were) rebooted.', nodes_ok) + '\n') # return nodes OK and KO in node_set form return self.devices.as_device_set( nodes_ok), self.devices.as_device_set(nodes_ko)
def softreboot(self, requester, node_set, hide_issues): nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported # first, we pass the 'booted' flag of all nodes to false # (if we manage to reboot them, they will be unreachable # for a little time; if we do not manage to reboot them, # this probably means they are down, thus not booted...) for node in nodes: self.db.update('nodes', 'mac', mac = node.mac, booted = False); self.db.commit() nodes_ko, nodes_ok = [], [] for node in nodes: link = self.connect(requester, node.name, hide_issues) if link == None: nodes_ko.append(node.name) continue res = link.request('REBOOT') del link if not res[0]: if not hide_issues: requester.stderr.write('Soft-reboot request to %s failed: %s\n' % \ (node.name, res[1])) nodes_ko.append(node.name) continue nodes_ok.append(node.name) if len(nodes_ok) > 0: requester.stdout.write(format_sentence_about_nodes( '%s was(were) rebooted.' , nodes_ok) + '\n') # return nodes OK and KO in node_set form return self.devices.as_device_set(nodes_ok), self.devices.as_device_set(nodes_ko)
def hard_reboot_vnodes(self, requester, node_set): nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported nodes_ok = [] for node in nodes: if not node.virtual: requester.stderr.write(MSG_NOT_VIRTUAL % node.name) continue # terminate VM by quitting screen session self.try_kill_vnode(node.name) # restart VM self.start_vnode(node) nodes_ok.append(node.name) if len(nodes_ok) > 0: requester.stdout.write(format_sentence_about_nodes( '%s was(were) rebooted.' , nodes_ok) + '\n')
def hard_reboot_vnodes(self, requester, node_set): nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported nodes_ok = [] for node in nodes: if not node.virtual: requester.stderr.write(MSG_NOT_VIRTUAL % node.name) continue # terminate VM by quitting screen session self.try_kill_vnode(node.name) # restart VM self.start_vnode(node) nodes_ok.append(node.name) if len(nodes_ok) > 0: requester.stdout.write( format_sentence_about_nodes('%s was(were) rebooted.', nodes_ok) + '\n')
def set_image(self, requester, nodes, image_name): # if image tag is specified, let's get its fullname if image_name != 'default': image = self.store.get_user_image_from_name(requester, image_name) if image == None: return False image_compatible_models = set(image.get_node_models()) node_models = set(node.model for node in nodes) incompatible_models = node_models - image_compatible_models if len(incompatible_models) > 0: sentence = format_sentence(MSG_INCOMPATIBLE_MODELS, incompatible_models, None, 'node model', 'node models') requester.stderr.write(sentence) return False image_fullnames = {node.mac: image.fullname for node in nodes} else: image_fullnames = {} # since the 'default' keyword was specified, we might have to associate # different images depending on the type of each WalT node. # we compute the appropriate image fullname here. for node in nodes: image_fullnames[ node.mac] = self.store.get_default_image_fullname( node.model) # let's update the database about which node is mounting what for node_mac, image_fullname in image_fullnames.items(): self.db.update('nodes', 'mac', mac=node_mac, image=image_fullname) self.store.update_image_mounts(requester=requester) tftp.update(self.db, self.store) self.db.commit() self.dhcpd.update() # inform requester if image_name == 'default': sentence = MSG_BOOT_DEFAULT_IMAGE else: sentence = '%s will now boot ' + image_name + '.' requester.stdout.write( format_sentence_about_nodes(sentence, [n.name for n in nodes]) + '\n') return True
def setpower(self, requester, node_set, poweron, warn_poe_issues): """Hard-reboot nodes by setting the PoE switch port off and back on""" # we have to verify that: # - we know where each node is connected (PoE switch port) # - PoE remote control is allowed on this switch nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported nodes_ok, nodes_unknown, nodes_forbidden = \ self.filter_poe_rebootable( \ requester, nodes, warn_poe_issues, warn_poe_issues) if len(nodes_ok) == 0: return None # otherwise, at least one node can be reached, so do it. s_state = {True: 'on', False: 'off'}[poweron] nodes_really_ok = [] for node in nodes_ok: try: self.topology.setpower(node.mac, poweron) nodes_really_ok.append(node) except snmp.SNMPException: sw_info, sw_port = \ self.topology.get_connectivity_info(node.mac) requester.stderr.write(MSG_POE_REBOOT_FAILED % dict(node_name=node.name, state=s_state, sw_name=sw_info.name, sw_ip=sw_info.ip)) if len(nodes_really_ok) > 0: requester.stdout.write( format_sentence_about_nodes( '%s was(were) powered ' + s_state + '.', [n.name for n in nodes_really_ok]) + '\n') # return successful nodes as a node_set return self.devices.as_device_set(n.name for n in nodes_really_ok) else: return None
def set_image(self, requester, nodes, image_name): # if image tag is specified, let's get its fullname if image_name != 'default': image = self.store.get_user_image_from_name(requester, image_name) if image == None: return False image_compatible_models = set(image.get_node_models()) node_models = set(node.model for node in nodes) incompatible_models = node_models - image_compatible_models if len(incompatible_models) > 0: sentence = format_sentence(MSG_INCOMPATIBLE_MODELS, incompatible_models, None, 'node model', 'node models') requester.stderr.write(sentence) return False image_fullnames = { node.mac: image.fullname for node in nodes } else: image_fullnames = {} # since the 'default' keyword was specified, we might have to associate # different images depending on the type of each WalT node. # we compute the appropriate image fullname here. for node in nodes: image_fullnames[node.mac] = self.store.get_default_image_fullname(node.model) # let's update the database about which node is mounting what for node_mac, image_fullname in image_fullnames.items(): self.db.update('nodes', 'mac', mac=node_mac, image=image_fullname) self.store.update_image_mounts(requester = requester) tftp.update(self.db) self.db.commit() self.dhcpd.update() # inform requester if image_name == 'default': sentence = MSG_BOOT_DEFAULT_IMAGE else: sentence = '%s will now boot ' + image_name + '.' requester.stdout.write(format_sentence_about_nodes( sentence, [n.name for n in nodes]) + '\n') return True
def setpower(self, requester, node_set, poweron, warn_poe_issues): """Hard-reboot nodes by setting the PoE switch port off and back on""" # we have to verify that: # - we know where each node is connected (PoE switch port) # - PoE remote control is allowed on this switch nodes = self.parse_node_set(requester, node_set) if nodes == None: return None # error already reported nodes_ok, nodes_unknown, nodes_forbidden = \ self.filter_poe_rebootable( \ requester, nodes, warn_poe_issues, warn_poe_issues) if len(nodes_ok) == 0: return None # otherwise, at least one node can be reached, so do it. s_state = {True:'on',False:'off'}[poweron] nodes_really_ok = [] for node in nodes_ok: try: self.topology.setpower(node.mac, poweron) nodes_really_ok.append(node) except snmp.SNMPException: sw_info, sw_port = \ self.topology.get_connectivity_info(node.mac) requester.stderr.write(MSG_POE_REBOOT_FAILED % dict( node_name = node.name, state = s_state, sw_name = sw_info.name, sw_ip = sw_info.ip)) if len(nodes_really_ok) > 0: requester.stdout.write(format_sentence_about_nodes( '%s was(were) powered ' + s_state + '.' , [n.name for n in nodes_really_ok]) + '\n') # return successful nodes as a node_set return self.devices.as_device_set(n.name for n in nodes_really_ok) else: return None
def reboot_nodes(server, node_set, hard=False): if not hard: WalTNode.wait_for_nodes(server, node_set) server.set_busy_label('Trying soft-reboot') nodes_ok, nodes_ko = server.softreboot(node_set, hard) # if it fails and --hard was specified, # try to power-cycle physical nodes using PoE and restart VM of # virtual nodes if len(nodes_ko) > 0: if hard: virtnodes, physnodes = server.virtual_or_physical(nodes_ko) if len(virtnodes) > 0: server.set_busy_label('Hard-rebooting virtual nodes') server.hard_reboot_vnodes(virtnodes) if len(physnodes) > 0: server.set_busy_label('Trying hard-reboot (PoE)') with WalTNode.PoETemporarilyOff(server, physnodes) as really_off: if really_off: time.sleep(POE_REBOOT_DELAY) else: print(format_sentence_about_nodes( MSG_SOFT_REBOOT_FAILED, nodes_ko.split(','))) print(MSG_SOFT_REBOOT_FAILED_TIP % dict(nodes_ko = nodes_ko))
def reboot_nodes(server, node_set, hard=False): if not hard: WalTNode.wait_for_nodes(server, node_set) server.set_busy_label('Trying soft-reboot') nodes_ok, nodes_ko = server.softreboot(node_set, hard) # if it fails and --hard was specified, # try to power-cycle physical nodes using PoE and restart VM of # virtual nodes if len(nodes_ko) > 0: if hard: virtnodes, physnodes = server.virtual_or_physical(nodes_ko) if len(virtnodes) > 0: server.set_busy_label('Hard-rebooting virtual nodes') server.hard_reboot_vnodes(virtnodes) if len(physnodes) > 0: server.set_busy_label('Trying hard-reboot (PoE)') with WalTNode.PoETemporarilyOff(server, physnodes) as really_off: if really_off: time.sleep(POE_REBOOT_DELAY) else: print((format_sentence_about_nodes( MSG_SOFT_REBOOT_FAILED, nodes_ko.split(',')))) print((MSG_SOFT_REBOOT_FAILED_TIP % dict(nodes_ko = nodes_ko)))