Ejemplo n.º 1
0
 def reconcile_unsynced_disks(self, server_id, db_server, gnt_server):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_disks = db_server.volumes.exclude(status="CREATING",
                                          created__lte=building_time) \
                                 .filter(deleted=False)\
                                 .order_by("id")
     gnt_disks = gnt_server["disks"]
     gnt_disks_parsed = backend_mod.parse_instance_disks(gnt_disks)
     disks_changed = len(db_disks) != len(gnt_disks)
     for db_disk, gnt_disk in zip(db_disks,
                                  sorted(gnt_disks_parsed.items())):
         gnt_disk_id, gnt_disk = gnt_disk
         if (db_disk.id == gnt_disk_id) and\
            backend_mod.disks_are_equal(db_disk, gnt_disk):
             continue
         else:
             disks_changed = True
             break
     if disks_changed:
         msg = "Found unsynced disks for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_disks_str = "\n\t\t".join(map(format_db_disk, db_disks))
         gnt_disks_str = "\n\t\t".join(
             map(format_gnt_disk, sorted(gnt_disks_parsed.items())))
         self.log.info(msg, server_id, db_disks_str, gnt_disks_str)
         if self.options["fix_unsynced_disks"]:
             vm = get_locked_server(server_id)
             backend_mod.process_op_status(
                 vm=vm,
                 etime=self.event_time,
                 jobid=-0,
                 opcode="OP_INSTANCE_SET_PARAMS",
                 status='success',
                 logmsg="Reconciliation: simulated Ganeti event",
                 disks=gnt_disks)
Ejemplo n.º 2
0
 def reconcile_unsynced_disks(self, server_id, db_server, gnt_server,
                              atomic_context=None):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_disks = db_server.volumes.exclude(status="CREATING",
                                          created__lte=building_time) \
                                 .filter(deleted=False)\
                                 .order_by("id")
     gnt_disks = gnt_server["disks"]
     gnt_disks_parsed = backend_mod.parse_instance_disks(gnt_disks)
     disks_changed = len(db_disks) != len(gnt_disks)
     for db_disk, gnt_disk in zip(db_disks,
                                  sorted(gnt_disks_parsed.items())):
         gnt_disk_id, gnt_disk = gnt_disk
         if (db_disk.id == gnt_disk_id) and\
            backend_mod.disks_are_equal(db_disk, gnt_disk):
             continue
         else:
             disks_changed = True
             break
     if disks_changed:
         msg = "Found unsynced disks for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_disks_str = "\n\t\t".join(map(format_db_disk, db_disks))
         gnt_disks_str = "\n\t\t".join(map(format_gnt_disk,
                                       sorted(gnt_disks_parsed.items())))
         self.log.info(msg, server_id, db_disks_str, gnt_disks_str)
         if self.options["fix_unsynced_disks"]:
             vm = get_locked_server(server_id)
             backend_mod.process_op_status(
                 vm=vm, etime=self.event_time, jobid=-0,
                 opcode="OP_INSTANCE_SET_PARAMS", status='success',
                 logmsg="Reconciliation: simulated Ganeti event",
                 disks=gnt_disks, atomic_context=atomic_context)
Ejemplo n.º 3
0
def update_db(vm, msg, event_time):
    """Process a notification of type 'ganeti-op-status'"""
    log.debug("Processing ganeti-op-status msg: %s", msg)

    if msg['type'] != "ganeti-op-status":
        log.error("Message is of unknown type %s.", msg['type'])
        return

    operation = msg["operation"]
    status = msg["status"]
    jobID = msg["jobId"]
    logmsg = msg["logmsg"]
    nics = msg.get("instance_nics", None)
    disks = msg.get("instance_disks", None)
    job_fields = msg.get("job_fields", {})
    result = msg.get("result", [])

    # Special case: OP_INSTANCE_CREATE with opportunistic locking may fail
    # if all Ganeti nodes are already locked. Retry the job without
    # opportunistic locking..
    if (operation == "OP_INSTANCE_CREATE" and status == "error" and
       job_fields.get("opportunistic_locking", False)):
        try:
            error_code = result[1][1]
        except IndexError:
            error_code = None
        if error_code == rapi.ECODE_TEMP_NORES:
            if vm.backendjobid != jobID:  # The job has already been retried!
                return
            # Remove extra fields
            [job_fields.pop(f, None) for f in ("OP_ID", "reason")]
            # Remove 'pnode' and 'snode' if they were set by Ganeti iallocator.
            # Ganeti will fail if both allocator and nodes are specified.
            allocator = job_fields.pop("iallocator", None)
            if allocator is not None:
                [job_fields.pop(f, None) for f in ("pnode", "snode")]
            name = job_fields.pop("name",
                                  job_fields.pop("instance_name", None))
            # Turn off opportunistic locking before retrying the job
            job_fields["opportunistic_locking"] = False
            with pooled_rapi_client(vm) as c:
                jobID = c.CreateInstance(name=name, **job_fields)
            # Update the VM fields
            vm.backendjobid = jobID
            # Update the task_job_id for commissions
            vm.task_job_id = jobID
            vm.backendjobstatus = None
            vm.save()
            log.info("Retrying failed creation of instance '%s' without"
                     " opportunistic locking. New job ID: '%s'", name, jobID)
            return

    backend_mod.process_op_status(vm, event_time, jobID,
                                  operation, status,
                                  logmsg, nics=nics,
                                  disks=disks,
                                  job_fields=job_fields)

    log.debug("Done processing ganeti-op-status msg for vm %s.",
              msg['instance'])
Ejemplo n.º 4
0
 def reconcile_unsynced_operstate(self, server_id, db_server, gnt_server,
                                  atomic_context=None):
     if db_server.operstate != gnt_server["state"]:
         self.log.info("Server '%s' is '%s' in DB and '%s' in Ganeti.",
                       server_id, db_server.operstate, gnt_server["state"])
         if self.options["fix_unsynced"]:
             vm = get_locked_server(server_id)
             # If server is in building state, you will have first to
             # reconcile it's creation, to avoid wrong quotas
             if db_server.operstate == "BUILD":
                 backend_mod.process_op_status(
                     vm=vm, etime=self.event_time, jobid=-0,
                     opcode="OP_INSTANCE_CREATE", status='success',
                     logmsg='Reconciliation: simulated Ganeti event',
                     atomic_context=atomic_context)
             fix_opcode = "OP_INSTANCE_STARTUP"\
                 if gnt_server["state"] == "STARTED"\
                 else "OP_INSTANCE_SHUTDOWN"
             backend_mod.process_op_status(
                 vm=vm, etime=self.event_time, jobid=-0,
                 opcode=fix_opcode, status='success',
                 logmsg='Reconciliation: simulated Ganeti event',
                 atomic_context=atomic_context)
             self.log.debug("Simulated Ganeti state event for server '%s'",
                            server_id)
Ejemplo n.º 5
0
 def reconcile_unsynced_operstate(self, server_id, db_server, gnt_server):
     if db_server.operstate != gnt_server["state"]:
         self.log.info("Server '%s' is '%s' in DB and '%s' in Ganeti.",
                       server_id, db_server.operstate, gnt_server["state"])
         if self.options["fix_unsynced"]:
             vm = get_locked_server(server_id)
             # If server is in building state, you will have first to
             # reconcile it's creation, to avoid wrong quotas
             if db_server.operstate == "BUILD":
                 backend_mod.process_op_status(
                     vm=vm,
                     etime=self.event_time,
                     jobid=-0,
                     opcode="OP_INSTANCE_CREATE",
                     status='success',
                     logmsg='Reconciliation: simulated Ganeti event')
             fix_opcode = "OP_INSTANCE_STARTUP"\
                 if gnt_server["state"] == "STARTED"\
                 else "OP_INSTANCE_SHUTDOWN"
             backend_mod.process_op_status(
                 vm=vm,
                 etime=self.event_time,
                 jobid=-0,
                 opcode=fix_opcode,
                 status='success',
                 logmsg='Reconciliation: simulated Ganeti event')
             self.log.debug("Simulated Ganeti state event for server '%s'",
                            server_id)
Ejemplo n.º 6
0
 def reconcile_unsynced_nics(self, server_id, db_server, gnt_server):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_nics = db_server.nics.exclude(state="BUILD",
                                      created__lte=building_time) \
                             .order_by("id")
     gnt_nics = gnt_server["nics"]
     try:
         gnt_nics_parsed = backend_mod.parse_instance_nics(gnt_nics)
     except Network.InvalidBackendIdError as e:
         self.log.warning("Server %s is connected to unknown network %s"
                          " Cannot reconcile server." % (server_id, str(e)))
         return
     nics_changed = len(db_nics) != len(gnt_nics)
     for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())):
         gnt_nic_id, gnt_nic = gnt_nic
         if (db_nic.id == gnt_nic_id) and\
            backend_mod.nics_are_equal(db_nic, gnt_nic):
             continue
         else:
             nics_changed = True
             break
     if nics_changed:
         msg = "Found unsynced NICs for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics))
         gnt_nics_str = "\n\t\t".join(map(format_gnt_nic,
                                      sorted(gnt_nics_parsed.items())))
         self.log.info(msg, server_id, db_nics_str, gnt_nics_str)
         if self.options["fix_unsynced_nics"]:
             vm = get_locked_server(server_id)
             backend_mod.process_op_status(
                 vm=vm, etime=self.event_time, jobid=-0,
                 opcode="OP_INSTANCE_SET_PARAMS", status='success',
                 logmsg="Reconciliation: simulated Ganeti event",
                 nics=gnt_nics)
Ejemplo n.º 7
0
 def _reconcile(server_id, atomic_context=None):
     vm = get_locked_server(server_id)
     backend_mod.process_op_status(
         vm=vm,
         etime=self.event_time,
         jobid=-0,
         opcode='OP_INSTANCE_REMOVE', status='success',
         logmsg='Reconciliation: simulated Ganeti event',
         atomic_context=atomic_context)
Ejemplo n.º 8
0
    def reconcile_unsynced_rescue(self, server_id, gnt_server, db_server):
        # Find servers that are using a cdrom as their primary boot device,
        # an check if these servers are in rescue mode in cyclades.
        hvparams = gnt_server.get("hvparams")
        if hvparams is None:
            return

        cdrom_image_path = hvparams.get('cdrom_image_path', '')
        boot_order = hvparams.get('boot_order', '')

        changed = False
        # If both cdrom_image_path is set and boot_order has cdrom priority,
        # then the VM is in rescue mode. We just have to figure out if the
        # rescue image is set correctly
        if boot_order.startswith('cdrom') and len(cdrom_image_path) > 0:
            # The server is not in rescue mode in cyclades
            if not db_server.rescue or db_server.rescue_image is None:
                changed = True
            else:
                location = db_server.rescue_image.location
                if (db_server.rescue_image.location_type ==
                        RescueImage.FILETYPE_FILE):
                    # The location of the image in Ganeti is always a fullpath,
                    # while in cyclades only the filename is stored.
                    location = os.path.join(settings.RESCUE_IMAGE_PATH,
                                            location)
                # The VM in Ganeti is in rescue mode with a different image
                # than Ganeti
                if location != cdrom_image_path:
                    changed = True
        else:
            # The server is not on rescue mode in Ganeti but it is on cyclades
            if db_server.rescue or db_server.rescue_image is not None:
                changed = True

        if changed:
            self.log.info(
                "Found unsynced rescue state for server %s: "
                "VM rescue: %s, VM rescue image location: %s "
                "boot_order: %s, cdrom_image_path: %s", db_server.id,
                db_server.rescue, db_server.rescue_image.location if
                db_server.rescue_image else None, boot_order, cdrom_image_path)
            if self.options["fix_unsynced_rescue"]:
                hvparams = {
                    'boot_order': boot_order,
                    'cdrom_image_path': cdrom_image_path
                }
                vm = get_locked_server(server_id)
                backend_mod.process_op_status(
                    vm=vm,
                    etime=self.event_time,
                    jobid=-0,
                    opcode="OP_INSTANCE_SET_PARAMS",
                    status='success',
                    logmsg="Reconciliation: simulated Ganeti event",
                    hvparams=hvparams)
Ejemplo n.º 9
0
 def _reconcile(server_id, atomic_context=None):
     vm = get_locked_server(server_id)
     backend_mod.process_op_status(
         vm=vm,
         etime=self.event_time,
         jobid=-0,
         opcode='OP_INSTANCE_REMOVE',
         status='success',
         logmsg='Reconciliation: simulated Ganeti event',
         atomic_context=atomic_context)
Ejemplo n.º 10
0
    def reconcile_unsynced_rescue(self, server_id, gnt_server, db_server):
        # Find servers that are using a cdrom as their primary boot device,
        # an check if these servers are in rescue mode in cyclades.
        hvparams = gnt_server.get("hvparams")
        if hvparams is None:
            return

        cdrom_image_path = hvparams.get('cdrom_image_path', '')
        boot_order = hvparams.get('boot_order', '')

        changed = False
        # If both cdrom_image_path is set and boot_order has cdrom priority,
        # then the VM is in rescue mode. We just have to figure out if the
        # rescue image is set correctly
        if boot_order.startswith('cdrom') and len(cdrom_image_path) > 0:
            # The server is not in rescue mode in cyclades
            if not db_server.rescue or db_server.rescue_image is None:
                changed = True
            else:
                location = db_server.rescue_image.location
                if (db_server.rescue_image.location_type ==
                        RescueImage.FILETYPE_FILE):
                    # The location of the image in Ganeti is always a fullpath,
                    # while in cyclades only the filename is stored.
                    location = os.path.join(settings.RESCUE_IMAGE_PATH,
                                            location)
                # The VM in Ganeti is in rescue mode with a different image
                # than Ganeti
                if location != cdrom_image_path:
                    changed = True
        else:
            # The server is not on rescue mode in Ganeti but it is on cyclades
            if db_server.rescue or db_server.rescue_image is not None:
                changed = True

        if changed:
            self.log.info("Found unsynced rescue state for server %s: "
                          "VM rescue: %s, VM rescue image location: %s "
                          "boot_order: %s, cdrom_image_path: %s", db_server.id,
                          db_server.rescue,
                          db_server.rescue_image.location
                          if db_server.rescue_image else None,
                          boot_order, cdrom_image_path)
            if self.options["fix_unsynced_rescue"]:
                hvparams = {
                    'boot_order': boot_order,
                    'cdrom_image_path': cdrom_image_path
                }
                vm = get_locked_server(server_id)
                backend_mod.process_op_status(
                    vm=vm, etime=self.event_time, jobid=-0,
                    opcode="OP_INSTANCE_SET_PARAMS", status='success',
                    logmsg="Reconciliation: simulated Ganeti event",
                    hvparams=hvparams)
Ejemplo n.º 11
0
 def reconcile_building_server(self, db_server):
     self.log.info("Server '%s' is BUILD in DB, but 'ERROR' in Ganeti.",
                   db_server.id)
     if self.options["fix_unsynced"]:
         fix_opcode = "OP_INSTANCE_CREATE"
         vm = get_locked_server(db_server.id)
         backend_mod.process_op_status(
             vm=vm,
             etime=self.event_time,
             jobid=-0,
             opcode=fix_opcode, status='error',
             logmsg='Reconciliation: simulated Ganeti event')
         self.log.debug("Simulated Ganeti error build event for"
                        " server '%s'", db_server.id)
Ejemplo n.º 12
0
def update_db(vm, msg, event_time):
    """Process a notification of type 'ganeti-op-status'"""
    log.debug("Processing ganeti-op-status msg: %s", msg)

    if msg['type'] != "ganeti-op-status":
        log.error("Message is of unknown type %s.", msg['type'])
        return

    nics = msg.get("nics", None)
    backend.process_op_status(vm, event_time, msg['jobId'], msg['operation'],
                              msg['status'], msg['logmsg'], nics)

    log.debug("Done processing ganeti-op-status msg for vm %s.",
              msg['instance'])
Ejemplo n.º 13
0
def update_db(vm, msg, event_time):
    """Process a notification of type 'ganeti-op-status'"""
    log.debug("Processing ganeti-op-status msg: %s", msg)

    if msg['type'] != "ganeti-op-status":
        log.error("Message is of unknown type %s.", msg['type'])
        return

    nics = msg.get("nics", None)
    backend.process_op_status(vm, event_time, msg['jobId'], msg['operation'],
                              msg['status'], msg['logmsg'], nics)

    log.debug("Done processing ganeti-op-status msg for vm %s.",
              msg['instance'])
Ejemplo n.º 14
0
    def reconcile_unsynced_flavor(self,
                                  server_id,
                                  db_server,
                                  gnt_server,
                                  atomic_context=None):
        db_flavor = db_server.flavor
        gnt_flavor = gnt_server["flavor"]
        if (db_flavor.ram != gnt_flavor["ram"]
                or db_flavor.cpu != gnt_flavor["vcpus"]
                or db_flavor.disk != gnt_flavor["disk"]):
            try:
                gnt_flavor = Flavor.objects.get(
                    ram=gnt_flavor["ram"],
                    cpu=gnt_flavor["vcpus"],
                    disk=gnt_flavor["disk"],
                    volume_type_id=db_flavor.volume_type_id)
            except Flavor.DoesNotExist:
                self.log.warning("Server '%s' has unknown flavor.", server_id)
                return

            self.log.info(
                "Server '%s' has flavor '%s' in DB and '%s' in"
                " Ganeti", server_id, db_flavor, gnt_flavor)
            if self.options["fix_unsynced_flavors"]:
                vm = get_locked_server(server_id)
                old_state = vm.operstate
                opcode = "OP_INSTANCE_SET_PARAMS"
                beparams = {
                    "vcpus": gnt_flavor.cpu,
                    "minmem": gnt_flavor.ram,
                    "maxmem": gnt_flavor.ram
                }
                backend_mod.process_op_status(
                    vm=vm,
                    etime=self.event_time,
                    jobid=-0,
                    opcode=opcode,
                    status='success',
                    job_fields={"beparams": beparams},
                    logmsg='Reconciliation: simulated Ganeti event',
                    atomic_context=atomic_context)
                # process_op_status with beparams will set the vmstate to
                # shutdown. Fix this be returning it to old state
                vm = VirtualMachine.objects.get(pk=server_id)
                vm.operstate = old_state
                vm.save()
                self.log.debug("Simulated Ganeti flavor event for server '%s'",
                               server_id)
Ejemplo n.º 15
0
 def reconcile_building_server(self, db_server):
     self.log.info("Server '%s' is BUILD in DB, but 'ERROR' in Ganeti.",
                   db_server.id)
     if self.options["fix_unsynced"]:
         fix_opcode = "OP_INSTANCE_CREATE"
         vm = get_locked_server(db_server.id)
         backend_mod.process_op_status(
             vm=vm,
             etime=self.event_time,
             jobid=-0,
             opcode=fix_opcode,
             status='error',
             logmsg='Reconciliation: simulated Ganeti event')
         self.log.debug(
             "Simulated Ganeti error build event for"
             " server '%s'", db_server.id)
Ejemplo n.º 16
0
 def reconcile_unsynced_nics(self,
                             server_id,
                             db_server,
                             gnt_server,
                             atomic_context=None):
     building_time = self.event_time - BUILDING_NIC_TIMEOUT
     db_nics = db_server.nics.exclude(state="BUILD",
                                      created__lte=building_time) \
                             .order_by("id")
     gnt_nics = gnt_server["nics"]
     try:
         gnt_nics_parsed = backend_mod.parse_instance_nics(gnt_nics)
     except Network.InvalidBackendIdError as e:
         self.log.warning("Server %s is connected to unknown network %s"
                          " Cannot reconcile server." % (server_id, str(e)))
         return
     nics_changed = len(db_nics) != len(gnt_nics)
     for db_nic, gnt_nic in zip(db_nics, sorted(gnt_nics_parsed.items())):
         gnt_nic_id, gnt_nic = gnt_nic
         if (db_nic.id == gnt_nic_id) and\
            backend_mod.nics_are_equal(db_nic, gnt_nic):
             continue
         else:
             nics_changed = True
             break
     if nics_changed:
         msg = "Found unsynced NICs for server '%s'.\n"\
               "\tDB:\n\t\t%s\n\tGaneti:\n\t\t%s"
         db_nics_str = "\n\t\t".join(map(format_db_nic, db_nics))
         gnt_nics_str = "\n\t\t".join(
             map(format_gnt_nic, sorted(gnt_nics_parsed.items())))
         self.log.info(msg, server_id, db_nics_str, gnt_nics_str)
         if self.options["fix_unsynced_nics"]:
             vm = get_locked_server(server_id)
             backend_mod.process_op_status(
                 vm=vm,
                 etime=self.event_time,
                 jobid=-0,
                 opcode="OP_INSTANCE_SET_PARAMS",
                 status='success',
                 logmsg="Reconciliation: simulated Ganeti event",
                 nics=gnt_nics,
                 atomic_context=atomic_context)
Ejemplo n.º 17
0
    def reconcile_stale_servers(self):
        # Detect stale servers
        stale = []
        stale_keys = self.db_servers_keys - self.gnt_servers_keys
        for server_id in stale_keys:
            db_server = self.db_servers[server_id]
            if db_server.operstate == "BUILD":
                build_status, end_timestamp = self.get_build_status(db_server)
                if build_status == "ERROR":
                    # Special handling of BUILD eerrors
                    self.reconcile_building_server(db_server)
                elif build_status != "RUNNING":
                    stale.append(server_id)
            elif (db_server.operstate == "ERROR"
                  and db_server.action != "DESTROY"):
                # Servers at building ERROR are stale only if the user has
                # asked to destroy them.
                pass
            else:
                stale.append(server_id)

        # Report them
        if stale:
            self.log.info("Found stale servers %s at backend %s",
                          ", ".join(map(str, stale)), self.backend)
        else:
            self.log.debug("No stale servers at backend %s", self.backend)

        # Fix them
        if stale and self.options["fix_stale"]:
            for server_id in stale:
                vm = get_locked_server(server_id)
                backend_mod.process_op_status(
                    vm=vm,
                    etime=self.event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE',
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            self.log.debug("Simulated Ganeti removal for stale servers.")
Ejemplo n.º 18
0
    def reconcile_unsynced_flavor(self, server_id, db_server, gnt_server,
                                  atomic_context=None):
        db_flavor = db_server.flavor
        gnt_flavor = gnt_server["flavor"]
        if (db_flavor.ram != gnt_flavor["ram"] or
           db_flavor.cpu != gnt_flavor["vcpus"] or
           db_flavor.disk != gnt_flavor["disk"]):
            try:
                gnt_flavor = Flavor.objects.get(
                    ram=gnt_flavor["ram"],
                    cpu=gnt_flavor["vcpus"],
                    disk=gnt_flavor["disk"],
                    volume_type_id=db_flavor.volume_type_id)
            except Flavor.DoesNotExist:
                self.log.warning("Server '%s' has unknown flavor.", server_id)
                return

            self.log.info("Server '%s' has flavor '%s' in DB and '%s' in"
                          " Ganeti", server_id, db_flavor, gnt_flavor)
            if self.options["fix_unsynced_flavors"]:
                vm = get_locked_server(server_id)
                old_state = vm.operstate
                opcode = "OP_INSTANCE_SET_PARAMS"
                beparams = {"vcpus": gnt_flavor.cpu,
                            "minmem": gnt_flavor.ram,
                            "maxmem": gnt_flavor.ram}
                backend_mod.process_op_status(
                    vm=vm, etime=self.event_time, jobid=-0,
                    opcode=opcode, status='success',
                    job_fields={"beparams": beparams},
                    logmsg='Reconciliation: simulated Ganeti event',
                    atomic_context=atomic_context)
                # process_op_status with beparams will set the vmstate to
                # shutdown. Fix this be returning it to old state
                vm = VirtualMachine.objects.get(pk=server_id)
                vm.operstate = old_state
                vm.save()
                self.log.debug("Simulated Ganeti flavor event for server '%s'",
                               server_id)
Ejemplo n.º 19
0
    def reconcile_stale_servers(self):
        # Detect stale servers
        stale = []
        stale_keys = self.db_servers_keys - self.gnt_servers_keys
        for server_id in stale_keys:
            db_server = self.db_servers[server_id]
            if db_server.operstate == "BUILD":
                build_status, end_timestamp = self.get_build_status(db_server)
                if build_status == "ERROR":
                    # Special handling of BUILD eerrors
                    self.reconcile_building_server(db_server)
                elif build_status != "RUNNING":
                    stale.append(server_id)
            elif (db_server.operstate == "ERROR" and
                  db_server.action != "DESTROY"):
                # Servers at building ERROR are stale only if the user has
                # asked to destroy them.
                pass
            else:
                stale.append(server_id)

        # Report them
        if stale:
            self.log.info("Found stale servers %s at backend %s",
                          ", ".join(map(str, stale)), self.backend)
        else:
            self.log.debug("No stale servers at backend %s", self.backend)

        # Fix them
        if stale and self.options["fix_stale"]:
            for server_id in stale:
                vm = get_locked_server(server_id)
                backend_mod.process_op_status(
                    vm=vm,
                    etime=self.event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE', status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            self.log.debug("Simulated Ganeti removal for stale servers.")
Ejemplo n.º 20
0
def console(vm, console_type):
    """Arrange for an OOB console of the specified type

    This method arranges for an OOB console of the specified type.
    Only consoles of type "vnc" are supported for now.

    It uses a running instance of vncauthproxy to setup proper
    VNC forwarding with a random password, then returns the necessary
    VNC connection info to the caller.

    """
    log.info("Get console  VM %s, type %s", vm, console_type)

    if vm.operstate != "STARTED":
        raise faults.BadRequest('Server not in ACTIVE state.')

    # Use RAPI to get VNC console information for this instance
    # RAPI GetInstanceConsole() returns endpoints to the vnc_bind_address,
    # which is a cluster-wide setting, either 0.0.0.0 or 127.0.0.1, and pretty
    # useless (see #783).
    #
    # Until this is fixed on the Ganeti side, construct a console info reply
    # directly.
    #
    # WARNING: This assumes that VNC runs on port network_port on
    #          the instance's primary node, and is probably
    #          hypervisor-specific.
    def get_console_data(i):
        return {"kind": "vnc",
                "host": i["pnode"],
                "port": i["network_port"]}
    with pooled_rapi_client(vm) as c:
        i = c.GetInstance(vm.backend_vm_id)
    console_data = get_console_data(i)

    if vm.backend.hypervisor == "kvm" and i['hvparams']['serial_console']:
        raise Exception("hv parameter serial_console cannot be true")

    # Check that the instance is really running
    if not i["oper_state"]:
        log.warning("VM '%s' is marked as '%s' in DB while DOWN in Ganeti",
                    vm.id, vm.operstate)
        # Instance is not running. Mock a shutdown job to sync DB
        backend.process_op_status(vm, etime=datetime.now(), jobid=0,
                                  opcode="OP_INSTANCE_SHUTDOWN",
                                  status="success",
                                  logmsg="Reconciliation simulated event")
        raise faults.BadRequest('Server not in ACTIVE state.')

    # Let vncauthproxy decide on the source port.
    # The alternative: static allocation, e.g.
    # sport = console_data['port'] - 1000
    sport = 0
    daddr = console_data['host']
    dport = console_data['port']
    password = util.random_password()

    vnc_extra_opts = settings.CYCLADES_VNCAUTHPROXY_OPTS

    # Maintain backwards compatibility with the dict setting
    if isinstance(vnc_extra_opts, list):
        vnc_extra_opts = choice(vnc_extra_opts)

    fwd = request_vnc_forwarding(sport, daddr, dport, password,
                                 console_type=console_type, **vnc_extra_opts)

    if fwd['status'] != "OK":
        log.error("vncauthproxy returned error status: '%s'" % fwd)
        raise faults.ServiceUnavailable('vncauthproxy returned error status')

    # Verify that the VNC server settings haven't changed
    with pooled_rapi_client(vm) as c:
        i = c.GetInstance(vm.backend_vm_id)
    if get_console_data(i) != console_data:
        raise faults.ServiceUnavailable('VNC Server settings changed.')

    try:
        host = fwd['proxy_address']
    except KeyError:
        host = getfqdn()

    console = {
        'type': console_type,
        'host': host,
        'port': fwd['source_port'],
        'password': password}

    return console
Ejemplo n.º 21
0
def console(vm, console_type):
    """Arrange for an OOB console of the specified type

    This method arranges for an OOB console of the specified type.
    Only consoles of type "vnc" are supported for now.

    It uses a running instance of vncauthproxy to setup proper
    VNC forwarding with a random password, then returns the necessary
    VNC connection info to the caller.

    """
    log.info("Get console  VM %s, type %s", vm, console_type)

    if vm.operstate != "STARTED":
        raise faults.BadRequest('Server not in ACTIVE state.')

    # Use RAPI to get VNC console information for this instance
    # RAPI GetInstanceConsole() returns endpoints to the vnc_bind_address,
    # which is a cluster-wide setting, either 0.0.0.0 or 127.0.0.1, and pretty
    # useless (see #783).
    #
    # Until this is fixed on the Ganeti side, construct a console info reply
    # directly.
    #
    # WARNING: This assumes that VNC runs on port network_port on
    #          the instance's primary node, and is probably
    #          hypervisor-specific.
    def get_console_data(i):
        return {"kind": "vnc", "host": i["pnode"], "port": i["network_port"]}

    with pooled_rapi_client(vm) as c:
        i = c.GetInstance(vm.backend_vm_id)
    console_data = get_console_data(i)

    if vm.backend.hypervisor == "kvm" and i['hvparams']['serial_console']:
        raise Exception("hv parameter serial_console cannot be true")

    # Check that the instance is really running
    if not i["oper_state"]:
        log.warning("VM '%s' is marked as '%s' in DB while DOWN in Ganeti",
                    vm.id, vm.operstate)
        # Instance is not running. Mock a shutdown job to sync DB
        backend.process_op_status(vm,
                                  etime=datetime.now(),
                                  jobid=0,
                                  opcode="OP_INSTANCE_SHUTDOWN",
                                  status="success",
                                  logmsg="Reconciliation simulated event")
        raise faults.BadRequest('Server not in ACTIVE state.')

    # Let vncauthproxy decide on the source port.
    # The alternative: static allocation, e.g.
    # sport = console_data['port'] - 1000
    sport = 0
    daddr = console_data['host']
    dport = console_data['port']
    password = util.random_password()

    vnc_extra_opts = settings.CYCLADES_VNCAUTHPROXY_OPTS

    # Maintain backwards compatibility with the dict setting
    if isinstance(vnc_extra_opts, list):
        vnc_extra_opts = choice(vnc_extra_opts)

    fwd = request_vnc_forwarding(sport,
                                 daddr,
                                 dport,
                                 password,
                                 console_type=console_type,
                                 **vnc_extra_opts)

    if fwd['status'] != "OK":
        log.error("vncauthproxy returned error status: '%s'" % fwd)
        raise faults.ServiceUnavailable('vncauthproxy returned error status')

    # Verify that the VNC server settings haven't changed
    with pooled_rapi_client(vm) as c:
        i = c.GetInstance(vm.backend_vm_id)
    if get_console_data(i) != console_data:
        raise faults.ServiceUnavailable('VNC Server settings changed.')

    try:
        host = fwd['proxy_address']
    except KeyError:
        host = getfqdn()

    console = {
        'type': console_type,
        'host': host,
        'port': fwd['source_port'],
        'password': password
    }

    return console
Ejemplo n.º 22
0
def do_create_server(userid, name, password, flavor, image, metadata={},
                     personality=[], network=None, backend=None):
    # Fix flavor for archipelago
    disk_template, provider = util.get_flavor_provider(flavor)
    if provider:
        flavor.disk_template = disk_template
        flavor.disk_provider = provider
        flavor.disk_origin = image['checksum']
        image['backend_id'] = 'null'
    else:
        flavor.disk_provider = None
        flavor.disk_origin = None

    try:
        if backend is None:
            # Allocate backend to host the server.
            backend_allocator = BackendAllocator()
            backend = backend_allocator.allocate(userid, flavor)
            if backend is None:
                log.error("No available backend for VM with flavor %s", flavor)
                raise faults.ServiceUnavailable("No available backends")

        if network is None:
            # Allocate IP from public network
            (network, address) = util.get_public_ip(backend)
        else:
            address = util.get_network_free_address(network)

        # We must save the VM instance now, so that it gets a valid
        # vm.backend_vm_id.
        vm = VirtualMachine.objects.create(
            name=name,
            backend=backend,
            userid=userid,
            imageid=image["id"],
            flavor=flavor,
            action="CREATE")

        log.info("Created entry in DB for VM '%s'", vm)

        # Create VM's public NIC. Do not wait notification form ganeti hooks to
        # create this NIC, because if the hooks never run (e.g. building error)
        # the VM's public IP address will never be released!
        nic = NetworkInterface.objects.create(machine=vm, network=network,
                                              index=0, ipv4=address,
                                              state="BUILDING")

        # Also we must create the VM metadata in the same transaction.
        for key, val in metadata.items():
            VirtualMachineMetadata.objects.create(
                meta_key=key,
                meta_value=val,
                vm=vm)
        # Issue commission to Quotaholder and accept it since at the end of
        # this transaction the VirtualMachine object will be created in the DB.
        # Note: the following call does a commit!
        quotas.issue_and_accept_commission(vm)
    except:
        transaction.rollback()
        raise
    else:
        transaction.commit()

    try:
        vm = VirtualMachine.objects.select_for_update().get(id=vm.id)
        # dispatch server created signal needed to trigger the 'vmapi', which
        # enriches the vm object with the 'config_url' attribute which must be
        # passed to the Ganeti job.
        server_created.send(sender=vm, created_vm_params={
            'img_id': image['backend_id'],
            'img_passwd': password,
            'img_format': str(image['format']),
            'img_personality': json.dumps(personality),
            'img_properties': json.dumps(image['metadata']),
        })

        jobID = create_instance(vm, nic, flavor, image)
        # At this point the job is enqueued in the Ganeti backend
        vm.backendopcode = "OP_INSTANCE_CREATE"
        vm.backendjobid = jobID
        vm.save()
        transaction.commit()
        log.info("User %s created VM %s, NIC %s, Backend %s, JobID %s",
                 userid, vm, nic, backend, str(jobID))
    except:
        # If an exception is raised, then the user will never get the VM id.
        # In order to delete it from DB and release it's resources, we
        # mock a successful OP_INSTANCE_REMOVE job.
        process_op_status(vm=vm,
                          etime=datetime.datetime.now(),
                          jobid=-0,
                          opcode="OP_INSTANCE_REMOVE",
                          status="success",
                          logmsg="Reconciled eventd: VM creation failed.")
        raise

    return vm
Ejemplo n.º 23
0
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join([str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join([str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join([
                    "%d is %s in DB, %s in Ganeti" %
                    (x[0], x[1], ('UP' if x[2] else 'DOWN')) for x in unsynced
                ])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:

            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE',
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode=opcode,
                    status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode="OP_INSTANCE_CREATE",
                    status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm,
                                               etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"
Ejemplo n.º 24
0
    def handle(self, **options):
        verbosity = int(options['verbosity'])
        self._process_args(options)
        backend_id = options['backend-id']
        backend = get_backend(backend_id) if backend_id else None

        G, GNics = reconciliation.get_instances_from_ganeti(backend)
        D = reconciliation.get_servers_from_db(backend)

        DBNics = reconciliation.get_nics_from_db(backend)

        #
        # Detect problems
        #
        if options['detect_stale']:
            stale = reconciliation.stale_servers_in_db(D, G)
            if len(stale) > 0:
                print >> sys.stderr, "Found the following stale server IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in stale])
            elif verbosity == 2:
                print >> sys.stderr, "Found no stale server IDs in DB."

        if options['detect_orphans']:
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
            if len(orphans) > 0:
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
                print "    " + "\n    ".join(
                    [str(x) for x in orphans])
            elif verbosity == 2:
                print >> sys.stderr, "Found no orphan Ganeti instances."

        if options['detect_unsynced']:
            unsynced = reconciliation.unsynced_operstate(D, G)
            if len(unsynced) > 0:
                print >> sys.stderr, "The operstate of the following server" \
                                     " IDs is out-of-sync:"
                print "    " + "\n    ".join(
                    ["%d is %s in DB, %s in Ganeti" %
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
                     for x in unsynced])
            elif verbosity == 2:
                print >> sys.stderr, "The operstate of all servers is in sync."

        if options['detect_build_errors']:
            build_errors = reconciliation.instances_with_build_errors(D, G)
            if len(build_errors) > 0:
                msg = "The os for the following server IDs was not build"\
                      " successfully:"
                print >> sys.stderr, msg
                print "    " + "\n    ".join(
                    ["%d" % x for x in build_errors])
            elif verbosity == 2:
                print >> sys.stderr, "Found no instances with build errors."

        if options['detect_unsynced_nics']:
            def pretty_print_nics(nics):
                if not nics:
                    print ''.ljust(18) + 'None'
                for index, info in nics.items():
                    print ''.ljust(18) + 'nic/' + str(index) +\
                          ': MAC: %s, IP: %s, Network: %s' % \
                          (info['mac'], info['ipv4'], info['network'])

            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
            if len(unsynced_nics) > 0:
                msg = "The NICs of the servers with the following IDs are"\
                      " unsynced:"
                print >> sys.stderr, msg
                for id, nics in unsynced_nics.items():
                    print ''.ljust(2) + '%6d:' % id
                    print ''.ljust(8) + '%8s:' % 'DB'
                    pretty_print_nics(nics[0])
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
                    pretty_print_nics(nics[1])
            elif verbosity == 2:
                print >> sys.stderr, "All instance nics are synced."

        #
        # Then fix them
        #
        if options['fix_stale'] and len(stale) > 0:
            print >> sys.stderr, \
                "Simulating successful Ganeti removal for %d " \
                "servers in the DB:" % len(stale)
            for vm in VirtualMachine.objects.filter(pk__in=stale):
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm,
                    etime=event_time,
                    jobid=-0,
                    opcode='OP_INSTANCE_REMOVE', status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_orphans'] and len(orphans) > 0:
            print >> sys.stderr, \
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
                len(orphans)
            for id in orphans:
                try:
                    vm = VirtualMachine.objects.get(pk=id)
                    with pooled_rapi_client(vm) as client:
                        client.DeleteInstance(utils.id_to_instance_name(id))
                except VirtualMachine.DoesNotExist:
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced'] and len(unsynced) > 0:
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
                len(unsynced)
            for id, db_state, ganeti_up in unsynced:
                vm = VirtualMachine.objects.get(pk=id)
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
                         else "OP_INSTANCE_SHUTDOWN"
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode=opcode, status='success',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_build_errors'] and len(build_errors) > 0:
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\
                                 len(build_errors)
            for id in build_errors:
                vm = VirtualMachine.objects.get(pk=id)
                event_time = datetime.datetime.now()
                backend_mod.process_op_status(
                    vm=vm, etime=event_time, jobid=-0,
                    opcode="OP_INSTANCE_CREATE", status='error',
                    logmsg='Reconciliation: simulated Ganeti event')
            print >> sys.stderr, "    ...done"

        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
                                 len(unsynced_nics)
            for id, nics in unsynced_nics.items():
                vm = VirtualMachine.objects.get(pk=id)
                nics = nics[1]  # Ganeti nics
                if nics == {}:  # No nics
                    vm.nics.all.delete()
                    continue
                for index, nic in nics.items():
                    net_id = utils.id_from_network_name(nic['network'])
                    subnet6 = Network.objects.get(id=net_id).subnet6
                    # Produce ipv6
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
                    nic['ipv6'] = ipv6
                    # Rename ipv4 to ip
                    nic['ip'] = nic['ipv4']
                # Dict to sorted list
                final_nics = []
                nics_keys = nics.keys()
                nics_keys.sort()
                for i in nics_keys:
                    if nics[i]['network']:
                        final_nics.append(nics[i])
                    else:
                        print 'Network of nic %d of vm %s is None. ' \
                              'Can not reconcile' % (i, vm.backend_vm_id)
                event_time = datetime.datetime.now()
                backend_mod.process_net_status(vm=vm, etime=event_time,
                                               nics=final_nics)
            print >> sys.stderr, "    ...done"