def reconcile_unsynced_nics(self, server_id, db_server, gnt_server): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_nics = db_server.nics.exclude(state="BUILD", created__lte=building_time) \ .order_by("id") gnt_nics = gnt_server["nics"] gnt_nics_parsed = backend_mod.process_ganeti_nics(gnt_nics) nics_changed = len(db_nics) != len(gnt_nics) for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())): gnt_nic_id, gnt_nic = gnt_nic if (db_nic.id == gnt_nic_id) and\ backend_mod.nics_are_equal(db_nic, gnt_nic): continue else: nics_changed = True break if nics_changed: msg = "Found unsynced NICs for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics)) gnt_nics_str = "\n\t\t".join(map(format_gnt_nic, sorted(gnt_nics_parsed.items()))) self.log.info(msg, server_id, db_nics_str, gnt_nics_str) if self.options["fix_unsynced_nics"]: vm = get_locked_server(server_id) backend_mod.process_net_status(vm=vm, etime=self.event_time, nics=gnt_nics)
def reconcile_unsynced_nics(self, server_id, db_server, gnt_server): building_time = self.event_time - BUILDING_NIC_TIMEOUT db_nics = db_server.nics.exclude(state="BUILD", created__lte=building_time) \ .order_by("id") gnt_nics = gnt_server["nics"] gnt_nics_parsed = backend_mod.process_ganeti_nics(gnt_nics) nics_changed = len(db_nics) != len(gnt_nics) for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())): gnt_nic_id, gnt_nic = gnt_nic if (db_nic.id == gnt_nic_id) and\ backend_mod.nics_are_equal(db_nic, gnt_nic): continue else: nics_changed = True break if nics_changed: msg = "Found unsynced NICs for server '%s'.\n"\ "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s" db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics)) gnt_nics_str = "\n\t\t".join( map(format_gnt_nic, sorted(gnt_nics_parsed.items()))) self.log.info(msg, server_id, db_nics_str, gnt_nics_str) if self.options["fix_unsynced_nics"]: vm = get_locked_server(server_id) backend_mod.process_net_status(vm=vm, etime=self.event_time, nics=gnt_nics)
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join([str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join([str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join([ "%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced ]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join(["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"
def handle(self, **options): verbosity = int(options['verbosity']) self._process_args(options) backend_id = options['backend-id'] backend = get_backend(backend_id) if backend_id else None G, GNics = reconciliation.get_instances_from_ganeti(backend) D = reconciliation.get_servers_from_db(backend) DBNics = reconciliation.get_nics_from_db(backend) # # Detect problems # if options['detect_stale']: stale = reconciliation.stale_servers_in_db(D, G) if len(stale) > 0: print >> sys.stderr, "Found the following stale server IDs: " print " " + "\n ".join( [str(x) for x in stale]) elif verbosity == 2: print >> sys.stderr, "Found no stale server IDs in DB." if options['detect_orphans']: orphans = reconciliation.orphan_instances_in_ganeti(D, G) if len(orphans) > 0: print >> sys.stderr, "Found orphan Ganeti instances with IDs: " print " " + "\n ".join( [str(x) for x in orphans]) elif verbosity == 2: print >> sys.stderr, "Found no orphan Ganeti instances." if options['detect_unsynced']: unsynced = reconciliation.unsynced_operstate(D, G) if len(unsynced) > 0: print >> sys.stderr, "The operstate of the following server" \ " IDs is out-of-sync:" print " " + "\n ".join( ["%d is %s in DB, %s in Ganeti" % (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced]) elif verbosity == 2: print >> sys.stderr, "The operstate of all servers is in sync." if options['detect_build_errors']: build_errors = reconciliation.instances_with_build_errors(D, G) if len(build_errors) > 0: msg = "The os for the following server IDs was not build"\ " successfully:" print >> sys.stderr, msg print " " + "\n ".join( ["%d" % x for x in build_errors]) elif verbosity == 2: print >> sys.stderr, "Found no instances with build errors." if options['detect_unsynced_nics']: def pretty_print_nics(nics): if not nics: print ''.ljust(18) + 'None' for index, info in nics.items(): print ''.ljust(18) + 'nic/' + str(index) +\ ': MAC: %s, IP: %s, Network: %s' % \ (info['mac'], info['ipv4'], info['network']) unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) if len(unsynced_nics) > 0: msg = "The NICs of the servers with the following IDs are"\ " unsynced:" print >> sys.stderr, msg for id, nics in unsynced_nics.items(): print ''.ljust(2) + '%6d:' % id print ''.ljust(8) + '%8s:' % 'DB' pretty_print_nics(nics[0]) print ''.ljust(8) + '%8s:' % 'Ganeti' pretty_print_nics(nics[1]) elif verbosity == 2: print >> sys.stderr, "All instance nics are synced." # # Then fix them # if options['fix_stale'] and len(stale) > 0: print >> sys.stderr, \ "Simulating successful Ganeti removal for %d " \ "servers in the DB:" % len(stale) for vm in VirtualMachine.objects.filter(pk__in=stale): event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode='OP_INSTANCE_REMOVE', status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_orphans'] and len(orphans) > 0: print >> sys.stderr, \ "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ len(orphans) for id in orphans: try: vm = VirtualMachine.objects.get(pk=id) with pooled_rapi_client(vm) as client: client.DeleteInstance(utils.id_to_instance_name(id)) except VirtualMachine.DoesNotExist: print >> sys.stderr, "No entry for VM %d in DB !!" % id print >> sys.stderr, " ...done" if options['fix_unsynced'] and len(unsynced) > 0: print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ len(unsynced) for id, db_state, ganeti_up in unsynced: vm = VirtualMachine.objects.get(pk=id) opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ else "OP_INSTANCE_SHUTDOWN" event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode=opcode, status='success', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_build_errors'] and len(build_errors) > 0: print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ len(build_errors) for id in build_errors: vm = VirtualMachine.objects.get(pk=id) event_time = datetime.datetime.now() backend_mod.process_op_status( vm=vm, etime=event_time, jobid=-0, opcode="OP_INSTANCE_CREATE", status='error', logmsg='Reconciliation: simulated Ganeti event') print >> sys.stderr, " ...done" if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ len(unsynced_nics) for id, nics in unsynced_nics.items(): vm = VirtualMachine.objects.get(pk=id) nics = nics[1] # Ganeti nics if nics == {}: # No nics vm.nics.all.delete() continue for index, nic in nics.items(): net_id = utils.id_from_network_name(nic['network']) subnet6 = Network.objects.get(id=net_id).subnet6 # Produce ipv6 ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None nic['ipv6'] = ipv6 # Rename ipv4 to ip nic['ip'] = nic['ipv4'] # Dict to sorted list final_nics = [] nics_keys = nics.keys() nics_keys.sort() for i in nics_keys: if nics[i]['network']: final_nics.append(nics[i]) else: print 'Network of nic %d of vm %s is None. ' \ 'Can not reconcile' % (i, vm.backend_vm_id) event_time = datetime.datetime.now() backend_mod.process_net_status(vm=vm, etime=event_time, nics=final_nics) print >> sys.stderr, " ...done"