def stale_servers_in_db(D, G): idD = set(D.keys()) idG = set(G.keys()) stale = set() for i in idD - idG: if D[i] == 'BUILD': vm = VirtualMachine.objects.get(id=i) if needs_reconciliation(vm): with pooled_rapi_client(vm) as c: try: job_status = c.GetJobStatus(vm.backendjobid)['status'] if job_status in ('queued', 'waiting', 'running'): # Server is still building in Ganeti continue else: c.GetInstance(utils.id_to_instance_name(i)) # Server has just been created in Ganeti continue except GanetiApiError: stale.add(i) else: stale.add(i) return stale
def reconcile_orphan_servers(self): orphans = self.gnt_servers_keys - self.db_servers_keys if orphans: self.log.info("Found orphan servers %s at backend %s", ", ".join(map(str, orphans)), self.backend) else: self.log.debug("No orphan servers at backend %s", self.backend) if orphans and self.options["fix_orphans"]: for server_id in orphans: server_name = utils.id_to_instance_name(server_id) self.client.DeleteInstance(server_name) self.log.debug("Issued OP_INSTANCE_REMOVE for orphan servers.")
def detect_conflicting_ips(network): """Detect NIC's that have the same IP in the same network.""" machine_ips = network.nics.all().values_list('ipv4', 'machine') ips = map(lambda x: x[0], machine_ips) distinct_ips = set(ips) if len(distinct_ips) < len(ips): for i in distinct_ips: ips.remove(i) for i in ips: machines = [utils.id_to_instance_name(x[1]) for x in machine_ips if x[0] == i] write('D: Conflicting IP:%s Machines: %s\n' % (i, ', '.join(machines)))
def detect_conflicting_ips(network): """Detect NIC's that have the same IP in the same network.""" machine_ips = network.nics.all().values_list('ipv4', 'machine') ips = map(lambda x: x[0], machine_ips) distinct_ips = set(ips) if len(distinct_ips) < len(ips): for i in distinct_ips: ips.remove(i) for i in ips: machines = [ utils.id_to_instance_name(x[1]) for x in machine_ips if x[0] == i ] write('D: Conflicting IP:%s Machines: %s\n' % (i, ', '.join(machines)))
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join([str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join([str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join([ "%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced ]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join(["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"
def test_iname_from_id(self): self.assertEqual(utils.id_to_instance_name(42), 'snf-42')
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join( [str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join( [str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join( ["%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join( ["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"