Beispiel #1
0
def stale_servers_in_db(D, G):
    idD = set(D.keys())
    idG = set(G.keys())

    stale = set()
    for i in idD - idG:
        if D[i] == 'BUILD':
            vm = VirtualMachine.objects.get(id=i)
            if needs_reconciliation(vm):
                with pooled_rapi_client(vm) as c:
                    try:
                        job_status = c.GetJobStatus(vm.backendjobid)['status']
                        if job_status in ('queued', 'waiting', 'running'):
                            # Server is still building in Ganeti
                            continue
                        else:
                            c.GetInstance(utils.id_to_instance_name(i))
                            # Server has just been created in Ganeti
                            continue
                    except GanetiApiError:
                        stale.add(i)
        else:
            stale.add(i)

    return stale
Beispiel #2
0
    def reconcile_orphan_servers(self):
        orphans = self.gnt_servers_keys - self.db_servers_keys
        if orphans:
            self.log.info("Found orphan servers %s at backend %s",
                          ", ".join(map(str, orphans)), self.backend)
        else:
            self.log.debug("No orphan servers at backend %s", self.backend)

        if orphans and self.options["fix_orphans"]:
            for server_id in orphans:
                server_name = utils.id_to_instance_name(server_id)
                self.client.DeleteInstance(server_name)
            self.log.debug("Issued OP_INSTANCE_REMOVE for orphan servers.")
Beispiel #3
0
    def reconcile_orphan_servers(self):
        orphans = self.gnt_servers_keys - self.db_servers_keys
        if orphans:
            self.log.info("Found orphan servers %s at backend %s",
                          ", ".join(map(str, orphans)), self.backend)
        else:
            self.log.debug("No orphan servers at backend %s", self.backend)

        if orphans and self.options["fix_orphans"]:
            for server_id in orphans:
                server_name = utils.id_to_instance_name(server_id)
                self.client.DeleteInstance(server_name)
            self.log.debug("Issued OP_INSTANCE_REMOVE for orphan servers.")
def detect_conflicting_ips(network):
    """Detect NIC's that have the same IP in the same network."""
    machine_ips = network.nics.all().values_list('ipv4', 'machine')
    ips = map(lambda x: x[0], machine_ips)
    distinct_ips = set(ips)
    if len(distinct_ips) < len(ips):
        for i in distinct_ips:
            ips.remove(i)
        for i in ips:
            machines = [utils.id_to_instance_name(x[1])
                        for x in machine_ips if x[0] == i]
            write('D: Conflicting IP:%s Machines: %s\n' %
                  (i, ', '.join(machines)))
Beispiel #5
0
def detect_conflicting_ips(network):
    """Detect NIC's that have the same IP in the same network."""
    machine_ips = network.nics.all().values_list('ipv4', 'machine')
    ips = map(lambda x: x[0], machine_ips)
    distinct_ips = set(ips)
    if len(distinct_ips) < len(ips):
        for i in distinct_ips:
            ips.remove(i)
        for i in ips:
            machines = [
                utils.id_to_instance_name(x[1]) for x in machine_ips
                if x[0] == i
            ]
            write('D: Conflicting IP:%s Machines: %s\n' %
                  (i, ', '.join(machines)))
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join([str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join([str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join([
                    "%d is %s in DB, %s in Ganeti" %
                    (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced
                ])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:

            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE',
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode=opcode,
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode="OP_INSTANCE_CREATE",
                    status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm,
                                               etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"
Beispiel #7
0
 def test_iname_from_id(self):
     self.assertEqual(utils.id_to_instance_name(42), 'snf-42')
Beispiel #8
0
 def test_iname_from_id(self):
     self.assertEqual(utils.id_to_instance_name(42), 'snf-42')
Beispiel #9
0
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join(
                    ["%d is %s in DB, %s in Ganeti" %
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
                     for x in unsynced])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(
                    ["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:
            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE', status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode=opcode, status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode="OP_INSTANCE_CREATE", status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm, etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"