Beispiel #1
0
    def deploy_goldimage_image_service(self, goldimages_directory,
                                       goldimage_name):
        """
    Deploy a gold image to the image service.

    Args:
      goldimage_name (str): Name of the gold image to deploy.

    Returns:
      str: ID of the created disk image.
    """
        arch = self.get_cluster_architecture()
        # Select a vdisk format to use. Currently PPC64LE goldimages are only built
        # using qcow2 format and the x86_64 in vmdk. We could have the manager
        # perform a conversion, but acropolis can already do the image conversion
        # for us.
        if arch == GoldImageManager.ARCH_PPC64LE:
            disk_format = GoldImageManager.FORMAT_QCOW2
        else:
            disk_format = GoldImageManager.FORMAT_VMDK

        # Use the GoldImage manager to get a path to our appropriate goldimage
        goldimage_manager = GoldImageManager(goldimages_directory)
        goldimage_path = goldimage_manager.get_goldimage_path(
            goldimage_name, format_str=disk_format, arch=arch)
        log.debug("Deploying %s to cluster", goldimage_path)

        # Deploy the image to service
        disk_name = os.path.splitext(os.path.basename(goldimage_path))[0]
        img_uuid, tid, _ = self._prism_client.images_create(
            NameUtil.goldimage_vmdisk_name(disk_name, "os"), goldimage_path,
            self._container_id)
        TaskPoller.execute_parallel_tasks(tasks=PrismTask.from_task_id(
            self._prism_client, tid),
                                          timeout_secs=3600)

        # NB: Required due to possible AHV bug. See XRAY-225.
        num_images_get_retries = 5
        for attempt_num in xrange(num_images_get_retries):
            images_get_data = self._prism_client.images_get(image_id=img_uuid)
            image_state = images_get_data["image_state"]
            if image_state.lower() == "active":
                # Return the disk image
                return images_get_data["vm_disk_id"]
            else:
                log.info(
                    "Waiting for created image to become active "
                    "(imageState: %s, retry %d of %d)", image_state,
                    attempt_num + 1, num_images_get_retries)
                log.debug(images_get_data)
                time.sleep(1)
        else:
            raise CurieException(
                CurieError.kInternalError,
                "Created image failed to become active within "
                "%d attempts" % num_images_get_retries)
Beispiel #2
0
    def create_vm(self,
                  goldimages_directory,
                  goldimage_name,
                  vm_name,
                  vcpus=1,
                  ram_mb=1024,
                  node_id=None,
                  datastore_name=None,
                  data_disks=()):
        """
    See 'Cluster.create_vm' for documentation.
    """
        log.info(
            "Creating VM %s based on %s with %d vCPUs, %d MB RAM and %s "
            "disks on node %s in datastore %s ", vm_name, goldimage_name,
            vcpus, ram_mb, str(data_disks), str(node_id), datastore_name)
        image_uuid = self.deploy_goldimage_image_service(
            goldimages_directory, goldimage_name)

        # This namedtuple hackery is to handle the expectations in vm.py which
        # expects information directly parsed from an OVF file.
        Units = namedtuple("Units", ["multiplier"])
        Disk = namedtuple("Disk", ["capacity", "units"])
        attach_disks = [
            Disk(gb, Units(1024 * 1024 * 1024)) for gb in data_disks
        ]

        vm_desc = VmDescriptor(name=vm_name,
                               memory_mb=ram_mb,
                               num_vcpus=vcpus,
                               vmdisk_uuid_list=[image_uuid],
                               attached_disks=attach_disks,
                               container_uuid=self._container_id)
        # Create the VM
        log.info("Creating VM '%s' with %s MB RAM and %s vCPUs", vm_desc.name,
                 vm_desc.memory_mb, vm_desc.num_vcpus)
        nic_specs = \
          [vm_desc.to_ahv_vm_nic_create_spec(self._network_id)["specList"][0]]
        resp = self._prism_client.vms_create(vm_desc, nic_specs)
        tid = resp.get("taskUuid")
        if not tid:
            raise CurieException(CurieError.kManagementServerApiError,
                                 "Failed to deploy VM: %s" % resp)

        TaskPoller.execute_parallel_tasks(tasks=PrismTask.from_task_id(
            self._prism_client, tid),
                                          timeout_secs=60)

        task_json = self._prism_client.tasks_get_by_id(tid)
        vm_uuid = task_json["entityList"][0]["uuid"]

        # Make a Curie VM descriptor and assign it to the requested node
        vm = self.__vm_json_to_curie_vm(
            self._prism_client.vms_get_by_id(vm_uuid))
        vm._node_id = node_id
        return vm
Beispiel #3
0
  def test_prism_task_from_id(self, mock_tasks_get_by_id):
    dummy_task = DummyPrismTask()
    mock_tasks_get_by_id.side_effect = MockTasksGetById()

    poller = TaskPoller(10, poll_interval_secs=0)

    prism_task = PrismTask.from_task_id(self.prism, dummy_task.id())
    mock_tasks_get_by_id.side_effect.add_task(dummy_task)
    poller.add_task(prism_task)
    poller.start()

    ret = poller.wait_for()
    self.assertNotEqual(ret, None)
Beispiel #4
0
 def cleanup_images(self):
     """
 Cleans up image service, removing any images associated with curie.
 """
     images = self._prism_client.images_get().get("entities", {})
     to_delete_image_uuids = []
     for image in images:
         if image["name"].startswith(CURIE_GOLDIMAGE_VM_DISK_PREFIX):
             to_delete_image_uuids.append(image["uuid"])
     log.info("Deleting images %s",
              ", ".join([i for i in to_delete_image_uuids]))
     task_map = self._prism_client.images_delete(to_delete_image_uuids)
     image_id_tid_map = {}
     for image_id, tid in task_map.iteritems():
         image_id_tid_map[image_id] = PrismTask.from_task_id(
             self._prism_client, tid)
     TaskPoller.execute_parallel_tasks(tasks=image_id_tid_map.values(),
                                       timeout_secs=300)
Beispiel #5
0
    def migrate_vms(self, vms, nodes, max_parallel_tasks=None):
        """Move 'vms' to 'nodes'.

    For each VM 'vms[xx]' move to the corresponding Node 'nodes[xx]'.

    Args:
      vms (list<Vm>): List of VMs to migrate.
      nodes (list<Node>):  List of nodes to which 'vms' should be
        migrated. Must be the same length as 'vms'.

        Each VM in 'vms' wll be moved to the corresponding node in 'nodes'.
      max_parallel_tasks (int): The number of VMs to migrate in parallel.
    """
        cutoff = self._prism_client.get_cluster_timestamp_usecs()
        # TODO (jklein): Max parallel tasks won't work unless this is converted
        # to a descriptor.
        log.info("Migrating VMS")
        if len(vms) != len(nodes):
            raise CurieException(
                CurieError.kInvalidParameter,
                "Must provide a destination node for each VM")

        ret = {}
        for ii, vm in enumerate(vms):
            ret[vm.vm_id()] = self._prism_client.vms_migrate(
                vm.vm_id(), nodes[ii].node_id())

        return PrismTaskPoller.execute_parallel_tasks(
            tasks=[
                PrismTask.from_task_id(self._prism_client, tid)
                for tid in ret.values()
            ],
            max_parallel=self._get_max_parallel_tasks(max_parallel_tasks),
            timeout_secs=len(vms) * 1200,
            prism_client=self._prism_client,
            cutoff_usecs=cutoff)
Beispiel #6
0
    def __set_power_state_for_vms(self,
                                  vms,
                                  state,
                                  wait_for_ip=False,
                                  max_parallel_tasks=None,
                                  power_on_retries=10,
                                  timeout_secs=900):

        max_parallel_tasks = self._get_max_parallel_tasks(max_parallel_tasks)
        t0 = time.time()
        cutoff = self._prism_client.get_cluster_timestamp_usecs()

        vm_host_map = dict((vm.vm_id(), vm.node_id()) for vm in vms)
        power_op_vm_ids = vm_host_map.keys()
        # TODO (jklein): Why are these APIs broken :(
        for ii in xrange(power_on_retries):
            vm_id_task_id_map = self._prism_client.vms_set_power_state_for_vms(
                power_op_vm_ids,
                state,
                host_ids=[vm_host_map[vm] for vm in power_op_vm_ids])
            task_id_vm_id_map = dict(
                (v, k) for k, v in vm_id_task_id_map.iteritems())

            tasks = []
            failed_for_vm_ids = []
            # Filter out tasks which immediately failed.
            for vm_id, tid in vm_id_task_id_map.iteritems():
                # TODO (jklein): Don't use literal True to indicate a state.
                if tid is True:
                    continue
                if tid is None:
                    failed_for_vm_ids.append(vm_id)
                    continue
                tasks.append(PrismTask.from_task_id(self._prism_client, tid))

            PrismTaskPoller.execute_parallel_tasks(
                tasks=tasks,
                max_parallel=max_parallel_tasks,
                timeout_secs=timeout_secs,
                prism_client=self._prism_client,
                cutoff_usecs=cutoff,
                raise_on_failure=False)

            failed_for_vm_ids.extend(
                task_id_vm_id_map[t.id()] for t in tasks
                if TaskStatus.cannot_succeed(t._state.status))
            if not failed_for_vm_ids:
                break

            power_op_vm_ids = failed_for_vm_ids

            log.warning(
                "Failed to perform power op %s on %d VMs (attempt %d of %d)",
                state, len(power_op_vm_ids), ii + 1, power_on_retries)

        else:
            raise CurieTestException("Failed to power %s VMs %s" %
                                     (state, ", ".join(failed_for_vm_ids)))

        # TODO (jklein): Fix the terrible handling of time here when brain is more
        # functional.
        timeout_secs -= time.time() - t0
        t0 = time.time()
        while timeout_secs > 0:
            vm_id_status_map = dict(
                (vm["uuid"], vm)
                for vm in self._prism_client.vms_get()["entities"])
            failed_for_vm_ids = []
            for vm_id in vm_host_map.iterkeys():
                status = vm_id_status_map.get(vm_id)
                if not status or status.get("powerState") != state:
                    failed_for_vm_ids.append(vm_id)

            if failed_for_vm_ids:
                log.info("Waiting for %d of %d VMs to transition to state %s",
                         len(failed_for_vm_ids), len(power_op_vm_ids), state)
                timeout_secs -= time.time() - t0
                t0 = time.time()
                time.sleep(1)
            else:
                break

        if failed_for_vm_ids:
            raise CurieTestException("Failed to power %s VMs %s" %
                                     (state, ", ".join(failed_for_vm_ids)))

        if not wait_for_ip:
            return

        if state != "on":
            raise CurieTestException(
                "Cannot wait for IPs to be assigned to powered off VMs")

        timeout_secs -= time.time() - t0
        t0 = time.time()
        needs_ip_vm_ids = set(vm.vm_id() for vm in vms)
        has_ip_vm_ids = set()
        while timeout_secs > 0:
            vm_id_status_map = dict(
                (vm["uuid"], vm)
                for vm in self._prism_client.vms_get()["entities"])
            for vm_id in (needs_ip_vm_ids - has_ip_vm_ids):
                if vm_id not in vm_id_status_map:
                    # NB: Prism API may temporarily return an incomplete list of VMs.
                    continue
                ip_addr = vm_id_status_map[vm_id].get("ipAddresses", [])
                if ip_addr:
                    log.debug("VM %r has IP addresses: %r", vm_id, ip_addr)
                    has_ip_vm_ids.add(vm_id)
                else:
                    log.debug("VM %r IP addresses are %r, retrying", vm_id,
                              ip_addr)

            timeout_secs -= time.time() - t0
            t0 = time.time()
            if needs_ip_vm_ids - has_ip_vm_ids:
                log.info(
                    "Waiting for %d of %d VMs to acquire IPs (%d seconds remaining)",
                    len(needs_ip_vm_ids - has_ip_vm_ids), len(needs_ip_vm_ids),
                    timeout_secs)
                time.sleep(1)
            else:
                return

        raise CurieTestException(
            "Timed out waiting for %d of %d VMs to acquire IPs" %
            (len(needs_ip_vm_ids), len(power_op_vm_ids)))
Beispiel #7
0
    def clone_vms(self,
                  vm,
                  vm_names,
                  node_ids=(),
                  datastore_name=None,
                  max_parallel_tasks=None,
                  linked_clone=False):
        """
    Clones 'vm' and creates the VMs with names 'vm_names'.

    Args:
      vm  CurieVM: Base VM that clones will be created from.
      vm_names list of strings: One clone will be created for each name in
        list.
      node_ids list of node ids: If provided, must be the same length
        as 'vm_names', then 'vm_names[xx]' will be cloned to 'node_ids[xx]'.
        Otherwise VMs will be cloned to random nodes on cluster.
      datastore_name: If provided, name of datastore VMs will be cloned to.
        Otherwise the VMs will be created on the datastore associated with the
        curie server's settings for this cluster.
      max_parallel_tasks int: The number of VMs to power on in parallel. The
        default value is FLAGS.prism_max_parallel_tasks.
      linked_clone (bool): Whether or not the clones should be "normal" full
        clones or linked clones.

    Returns:
      List of cloned VMs.
    """
        # TODO (jklein): Max parallel tasks
        if not node_ids:
            nodes = self.nodes()
            node_ids = []
            for _ in range(len(vm_names)):
                node_ids.append(random.choice(nodes).node_id())

        vm_desc = VmDescriptor.from_prism_entity_json(
            self._prism_client.vms_get_by_id(vm.vm_id()))

        if datastore_name is None:
            target_ctr_uuid = self._container_id
        else:
            target_ctr_uuid = self.__identifier_to_container_uuid(
                datastore_name)

        clone_spec = vm_desc.to_ahv_vm_clone_spec(vm_names,
                                                  ctr_uuid=target_ctr_uuid)
        cutoff = self._prism_client.get_cluster_timestamp_usecs()
        task = PrismTask.from_task_id(
            self._prism_client,
            self._prism_client.vms_clone(vm.vm_id(), clone_spec))
        PrismTaskPoller.execute_parallel_tasks(tasks=[
            task,
        ],
                                               timeout_secs=len(vm_names) *
                                               900,
                                               prism_client=self._prism_client,
                                               cutoff_usecs=cutoff)
        log.info("Clone task complete")
        task_json = AcropolisTaskInfo(
            **self._prism_client.tasks_get_by_id(task.id(), True))
        created_uuids = set(e.uuid for e in task_json.entity_list
                            if e.entity_type.strip().upper() == "VM")
        # Block until all VMs are found via /vms API.
        vms = self.__wait_for_vms(created_uuids)

        vm_name_map = {vm["vmName"]: vm for vm in vms}
        sorted_vms = [vm_name_map[vm_name] for vm_name in vm_names]
        # Create placement map which controls where VMs are placed when powered on.
        for node_id, vm in zip(node_ids, sorted_vms):
            self.__vm_uuid_host_uuid_map[vm["uuid"]] = node_id
        return sorted_vms
Beispiel #8
0
    def delete_vms(self,
                   vms,
                   ignore_errors=False,
                   max_parallel_tasks=None,
                   timeout_secs=None):
        """Delete VMs.

    Acropolis DELETE requests for /vms/{vm_id} are async. This method collects
    all taskUuids and polls until completion.

    Args:
      vms (list<CurieVM>): List of VMs to delete.
      ignore_errors (bool): Optional. Whether to allow individual tasks to
        fail. Default False.
      max_parallel_tasks (int): Max number of requests to have in-flight at
        any given time. (Currently ignored)
      timeout_secs (int): If provided, overall timeout for VM deletion tasks.

    Raises:
      CurieTestException:
        - If any VM is not already powered off.
        - All VMs are not destroyed with in the timeout.
        - Destroy task fails and ignore_errors is False.
    """
        # TODO (jklein): max_parallel_tasks won't work unless this is changed to
        # use task descriptors.
        if timeout_secs is None:
            timeout_secs = len(vms) * 60

        task_t0 = self._prism_client.get_cluster_timestamp_usecs()

        vm_id_task_map = {}
        for vm_id, tid in self._prism_client.vms_delete(
            [vm.vm_id() for vm in vms]).iteritems():
            if tid is None:
                raise CurieTestException("Failed to delete VM %s" % vm_id)
            vm_id_task_map[vm_id] = PrismTask.from_task_id(
                self._prism_client, tid)

        try:
            PrismTaskPoller.execute_parallel_tasks(
                tasks=vm_id_task_map.values(),
                max_parallel=self._get_max_parallel_tasks(max_parallel_tasks),
                timeout_secs=timeout_secs,
                prism_client=self._prism_client,
                cutoff_usecs=task_t0)
        except CurieTestException:
            if not ignore_errors:
                raise
            log.debug("Ignoring exception in delete_vms", exc_info=True)

        failed_to_delete_vm_ids = []
        for vm_id, task in vm_id_task_map.iteritems():
            if task.get_status() != TaskStatus.kSucceeded:
                failed_to_delete_vm_ids.append(vm_id)

        if failed_to_delete_vm_ids:
            msg = "Failed to delete vms: %s" % ", ".join(
                failed_to_delete_vm_ids)
            if ignore_errors:
                log.error(msg)
            else:
                raise CurieTestException(msg)
Beispiel #9
0
 def create_curie_task_instance(self):
   return PrismTask(
     self._prism, PrismTaskDescriptor(create_task_func=self))