Beispiel #1
0
 def reconcile_unsynced_nics(self, server_id, db_server, gnt_server):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_nics = db_server.nics.exclude(state="BUILD",
                                      created__lte=building_time) \
                             .order_by("id")
     gnt_nics = gnt_server["nics"]
     gnt_nics_parsed = backend_mod.process_ganeti_nics(gnt_nics)
     nics_changed = len(db_nics) != len(gnt_nics)
     for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())):
         gnt_nic_id, gnt_nic = gnt_nic
         if (db_nic.id == gnt_nic_id) and\
            backend_mod.nics_are_equal(db_nic, gnt_nic):
             continue
         else:
             nics_changed = True
             break
     if nics_changed:
         msg = "Found unsynced NICs for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics))
         gnt_nics_str = "\n\t\t".join(map(format_gnt_nic,
                                      sorted(gnt_nics_parsed.items())))
         self.log.info(msg, server_id, db_nics_str, gnt_nics_str)
         if self.options["fix_unsynced_nics"]:
             vm = get_locked_server(server_id)
             backend_mod.process_net_status(vm=vm,
                                            etime=self.event_time,
                                            nics=gnt_nics)
Beispiel #2
0
 def reconcile_unsynced_nics(self, server_id, db_server, gnt_server):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_nics = db_server.nics.exclude(state="BUILD",
                                      created__lte=building_time) \
                             .order_by("id")
     gnt_nics = gnt_server["nics"]
     gnt_nics_parsed = backend_mod.process_ganeti_nics(gnt_nics)
     nics_changed = len(db_nics) != len(gnt_nics)
     for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())):
         gnt_nic_id, gnt_nic = gnt_nic
         if (db_nic.id == gnt_nic_id) and\
            backend_mod.nics_are_equal(db_nic, gnt_nic):
             continue
         else:
             nics_changed = True
             break
     if nics_changed:
         msg = "Found unsynced NICs for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics))
         gnt_nics_str = "\n\t\t".join(
             map(format_gnt_nic, sorted(gnt_nics_parsed.items())))
         self.log.info(msg, server_id, db_nics_str, gnt_nics_str)
         if self.options["fix_unsynced_nics"]:
             vm = get_locked_server(server_id)
             backend_mod.process_net_status(vm=vm,
                                            etime=self.event_time,
                                            nics=gnt_nics)
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join([str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join([str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join([
                    "%d is %s in DB, %s in Ganeti" %
                    (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced
                ])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:

            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE',
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode=opcode,
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode="OP_INSTANCE_CREATE",
                    status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm,
                                               etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"
Beispiel #4
0
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join(
                    ["%d is %s in DB, %s in Ganeti" %
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
                     for x in unsynced])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(
                    ["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:
            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE', status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode=opcode, status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode="OP_INSTANCE_CREATE", status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm, etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"