Exemple #1
0
    def get_goldimage_path(self,
                           image_name,
                           format_str=FORMAT_VMDK,
                           arch=ARCH_X86_64,
                           auto_convert=True):
        """
    Find an image file based on the name, format, and architecture.

    If the image doesn't exist in the current format, if auto_convert is set
    and a QCOW2 formatted version of the image exists, attempt to convert to
    the desired format.

    Args:
      image_name (str): The name of the image to get.
      format_str (str): A format to get the image in. Valid options include
        raw, qcow2, vmdk or vhdx.
      arch (str): The target architecture of the image. Valid options include
        x86_64 or ppc64le.
      auto_convert (bool): Whether or not an image should be automatically
        converted from an existing format to the desired format.

    Returns:
      str: Full path to image.

    Raises:
      CurieException:
        If the image doesn't exist or could not be created.
    """

        filename = self.get_goldimage_filename(image_name, format_str, arch)
        qcow2_filename = self.get_goldimage_filename(image_name,
                                                     self.FORMAT_QCOW2, arch)

        qcow2_filepath = os.path.join(self.images_root, qcow2_filename)
        full_path = os.path.join(self.images_root, filename)
        if os.path.exists(full_path):
            return full_path
        elif os.path.exists(qcow2_filepath):
            if auto_convert:
                dest_image_path = qcow2_filepath.split(
                    ".")[0] + "." + format_str
                rv = self.convert_image_format(qcow2_filepath, dest_image_path,
                                               format_str)
                if rv:
                    raise CurieException(
                        CurieError.kInternalError,
                        "Error attempting to convert image %s from QCOW2 to %s"
                        % (qcow2_filepath, format_str))
                return full_path
            else:
                raise CurieException(
                    CurieError.kInternalError,
                    "Goldimage %s does not exist in %s. The QCOW2 image does exist, "
                    "however auto-conversion was not attempted." %
                    (filename, self.images_root))
        else:
            raise CurieException(
                CurieError.kInternalError,
                "Goldimage %s does not exist. The QCOW2 image also does not "
                "exist in %s." % (filename, self.images_root))
Exemple #2
0
 def __api_cmd_status(self, arg):
     if flask.request.data == "":
         raise CurieException(CurieError.kInvalidParameter, "Empty request")
     cmd_id = arg.cmd_id
     cmd_dir = self.cmd_dir(cmd_id)
     ret = charon_agent_interface_pb2.CmdStatusRet()
     with self.__lock:
         cmd_state = self.__cmd_map.get(cmd_id)
         if cmd_state is None:
             raise CurieException(CurieError.kInvalidParameter,
                                  "Command %s not found" % cmd_id)
         status_path = os.path.join(cmd_dir, "status.bin")
         ret.cmd_status.ParseFromString(open(status_path).read())
     if arg.include_output and ret.cmd_status.state != CmdStatus.kRunning:
         stdout_path = os.path.join(cmd_dir, "stdout.txt")
         stderr_path = os.path.join(cmd_dir, "stderr.txt")
         if ret.cmd_status.HasField("exit_status"):
             exit_status = ret.cmd_status.exit_status
         else:
             log.warning("No exit status for command %s", cmd_id)
             exit_status = None
         if os.path.exists(stdout_path):
             ret.stdout = open(stdout_path).read()
         else:
             log.warning(
                 "No stdout for command %s that exited with status %s",
                 cmd_id, exit_status)
         if os.path.exists(stderr_path):
             ret.stderr = open(stderr_path).read()
         else:
             log.warning(
                 "No stderr for command %s that exited with status %s",
                 cmd_id, exit_status)
     return ret.SerializeToString()
Exemple #3
0
    def import_vm(self,
                  goldimages_directory,
                  goldimage_name,
                  vm_name,
                  node_id=None):
        """
    Creates a VM from the specified gold image. If 'node_id' is specified, the
    VM is created on that node, else a random node is selected. The VM will be
    created on the datastore associated with the curie server's settings for
    this cluster.
    """
        if node_id is None:
            node_id = random.choice(self.nodes()).node_id()

        ovfs = glob.glob(
            os.path.join(goldimages_directory, goldimage_name, "*.ovf"))
        if len(ovfs) == 0:
            raise CurieException(
                CurieError.kInternalError,
                "Unable to locate .ovf file in '%s'" %
                os.path.join(goldimages_directory, goldimage_name))
        elif len(ovfs) > 1:
            raise CurieException(
                CurieError.kInternalError,
                "Unique .ovf file expected. Found: '%s'" % ovfs)

        vm = self.__vm_json_to_curie_vm(
            self._prism_client.deploy_ovf(vm_name,
                                          node_id,
                                          self._container_id,
                                          ovf_abs_path=ovfs[0],
                                          network_uuids=[self._network_id]))
        self.__vm_uuid_host_uuid_map[vm.vm_id()] = node_id
        vm._node_id = node_id
        return vm
Exemple #4
0
    def power_off(self, sync_management_state=True):
        """
    Powers off the node using out-of-band management interface specified in the
    cluster's metadata.

    Args:
      sync_management_state (bool): If true, wait until the management software
      detects the power state is off. This is True by default in order to
      prevent other management server methods that require power to be on from
      failing unexpectedly.

    Raises:
      CurieTestException if no suitable metadata exists, CurieException on
      all other errors.
    """
        log.debug("Powering off node '%s'", self._node_id)
        if not self.power_management_util.power_off():
            raise CurieException(
                CurieError.kInternalError,
                "Failed to power off node '%s'" % self._node_id)
        # If 'sync_management_state', wait until the management server state is
        # synced with the hardware's state.
        if sync_management_state:
            timeout_secs = 40 * 60
            powered_off = CurieUtil.wait_for(
                lambda: not self.is_powered_on_soft(sync_with_oob=True),
                "management server power state to sync to off for node: %s" %
                self.node_id(),
                timeout_secs,
                poll_secs=5)
            if not powered_off:
                raise CurieException(
                    CurieError.kInternalError,
                    "Failed to sync management server power state after 300s")
Exemple #5
0
 def discover_clusters_vcenter(address, username, password, ret):
   """
   Args:
     address (str): Address of the management server.
     username (str): Name of user of the management server.
     password (str): Password of user of the management server
     ret (DiscoverClustersV2Ret): Return proto to be populated.
   """
   conn = None
   try:
     conn = SmartConnectNoSSL(host=address, user=username, pwd=password)
     log.debug("Connected to vCenter %s", address)
     vim_inventory_map = \
         DiscoveryUtil.compute_vcenter_cluster_inventory(conn.content.rootFolder)
     DiscoveryUtil._fill_vcenter_cluster_inventory_v2(
       vim_inventory_map, ret)
     for cluster_collection in ret.cluster_inventory.cluster_collection_vec:
       for cluster in cluster_collection.cluster_vec:
         cluster.management_server.type = cluster.management_server.kVcenter
         cluster.management_server.version = conn.content.about.version
   except socket.error:
     # The plain old socket errors don't indicate which connection.
     raise CurieException(CurieError.kInternalError,
       "Could not connect to vCenter at %s" % address)
   except vim.fault.InvalidLogin:
     # The failures back from vSphere don't provide the best experience.
     raise CurieException(CurieError.kInternalError,
       "Incorrect username or password for vCenter at %s" %address)
   finally:
     if conn is not None:
       Disconnect(conn)
Exemple #6
0
  def _send_rpc_sync_with_retries(self, method_name, arg, initial_timeout_secs,
                                  max_retries):
    """
    Sends RPC as in '_send_rpc_sync'. On a CurieException, retries the RPC up
    to 'max_retries' times, starting with a delay of 'initial_timeout_secs'
    seconds and using exponential backoff on subsequent retries.

    Args:
      method_name (str): Name of RPC method to be issued.
      arg (protobuf): Populated argument proto for the RPC 'method_name'.
      initial_timeout_secs (numeric): Timeout for initial RPC attempt.
        Subsequent attempts will scale this value using exponential backoff.
      max_retries (int): Maximum number of retry attempts to allow. Set to 0
        if no retry is desired.
    """
    # TODO: In the case of the CurieUnixAgent it's possible this may fail
    # with 'cmd_id already exists'. See if there are reasonable cases where
    # this will occur and handle them appropriately.
    rpc_excs = []
    curr_timeout_secs = initial_timeout_secs
    for ii in range(max_retries + 1):
      try:
        ret, err = self._send_rpc_sync(method_name, arg, curr_timeout_secs)
        # TODO: See about the convention for multiple error codes.
        if err and err.error_codes[-1] == CurieError.kRetry:
          raise CurieException(err.error_codes[-1], str(err))
        return ret, err
      except CurieException as exc:
        if exc.error_code not in [CurieError.kRetry, CurieError.kTimeout]:
          raise
        rpc_excs.append("Attempt %s: %s" % (ii, str(exc)))
        if ii < max_retries:
          if exc.error_code == CurieError.kRetry:
            log.warning(
              "RPC failed. Retrying after '%s' seconds (%s of %s attempts)",
              curr_timeout_secs, 1 + ii, max_retries)
            time.sleep(curr_timeout_secs)
          else:
            CHECK_EQ(exc.error_code, CurieError.kTimeout)
            log.warning(
              "RPC timed out. Retrying (%s of %s attempts)",
              1 + ii, max_retries)
          # Increase the timeout for the next attempt using exponential
          # backoff. We impose a cap of
          # CURIE_CLIENT_DEFAULT_RETRY_TIMEOUT_CAP_SECS secs on this unless
          # the base RPC timeout is already greater than that.
          curr_timeout_secs = min(
            2 * curr_timeout_secs,
            max(CURIE_CLIENT_DEFAULT_RETRY_TIMEOUT_CAP_SECS,
                initial_timeout_secs))
    rpc_excs.append("Exhausted retry attempts")
    raise CurieException(CurieError.kInternalError,
                          "RPC failed:\n%s" % "\n".join(rpc_excs))
Exemple #7
0
    def __is_ready(self):
        """
    See public method 'is_ready' documentation for further details.
    """
        host = self.__api.hosts_get(host_ip=self.__node.node_ip(),
                                    projection="HEALTH")
        state = host.get("state").strip().upper()
        log.debug("Host '%s' in state: %s", self.__node.node_id(), state)
        if state != "NORMAL":
            log.warning("Host '%s' reports abnormal health state: %s",
                        self.__node.node_id(), state)
            return False
        ret = self.__api.vms_get(cvm_only=True)
        if not ret["entities"]:
            raise CurieException(CurieError.kClusterApiError,
                                 "No CVM entities found")
        cvm_ip = host["serviceVMExternalIP"]
        found_ip = False
        for cvm_dto in ret["entities"]:
            for ip_address in cvm_dto["ipAddresses"]:
                if ip_address.startswith(cvm_ip):
                    found_ip = True
                    log.debug("found entity for cvm: %s", cvm_ip)
                    break
            if found_ip:
                break
        else:
            raise CurieException(CurieError.kClusterApiError,
                                 "No entity for CVM: %s" % cvm_ip)

        # Pylint doesn't understand for/else raising if cvm_dto is undefined.
        # pylint: disable=undefined-loop-variable
        log.info("CVM %s power state %s", cvm_ip, cvm_dto["powerState"])
        if cvm_dto["powerState"].strip().lower() != "on":
            return False
        try:
            status = self.__api.genesis_node_services_status()
            # Check overall node status. If it's determined we only care about a
            # subset of services, we can alternatively iterate through the list
            # 'services' to check PIDs and error messages per-service.
            # State may be a CSV list such as "Up, Zeus Leader".
            return "up" in [
                val.strip()
                for val in status["state"].strip().lower().split(",")
            ]
        except Exception:
            # If Prism is not up and running, the above will raise an exception
            # when failing to successfully connect and issue the RPC.
            log.warning("Failed to query Genesis on node '%s'",
                        self.__node.node_id(),
                        exc_info=True)
            return False
Exemple #8
0
 def cmd_execute_sync(self, arg, timeout_secs, include_output=False):
     """
 Given the CmdExecute request 'arg', simulate synchronous execution by
 sending a CmdExecute RPC to start the command, polling until the command
 reaches a terminal state, then returning an (exit_status, stdout, stderr)
 tuple for the command. 'include_output' specifies whether stdout and stderr
 should be returned or not (both are None if this is set to False).
 """
     curie_ex = None
     try:
         t1 = time.time()
         # Send a CmdExecute RPC to start the command.
         execute_ret, execute_err = self.CmdExecute(arg)
         if execute_err is not None:
             raise CurieException(execute_err.error_codes[0],
                                  execute_err.error_msgs[0])
         status_arg = charon_agent_interface_pb2.CmdStatusArg()
         status_arg.cmd_id = arg.cmd_id
         status_arg.include_output = include_output
         status_ret = None
         # Poll until the command reaches a terminal state or it times out.
         while True:
             status_ret, status_err = self.CmdStatus(status_arg)
             if status_err is not None:
                 raise CurieException(status_err.error_codes[0],
                                      status_err.error_msgs[0])
             if (status_ret.cmd_status.state !=
                     charon_agent_interface_pb2.CmdStatus.kRunning):
                 # Command is in a terminal state.
                 break
             t2 = time.time()
             if (t2 - t1) > timeout_secs:
                 raise CurieException(
                     CurieError.kTimeout,
                     "Timeout waiting for command %s" % arg.cmd_id)
             time.sleep(1)
         # Check that we have the exit status for the command.
         CHECK(status_ret is not None)
         if not status_ret.cmd_status.HasField("exit_status"):
             raise CurieException(
                 CurieError.kInternalError,
                 "Missing exit status for command %s" % arg.cmd_id)
         # Return an (exit_status, stdout, stderr) tuple for the command.
         exit_status = status_ret.cmd_status.exit_status
         stdout = status_ret.stdout if status_ret.HasField(
             "stdout") else None
         stderr = status_ret.stderr if status_ret.HasField(
             "stderr") else None
         return (exit_status, stdout, stderr)
     except CurieException, ex:
         curie_ex = ex
         raise
Exemple #9
0
    def test_validate_oob_config(self, mock_ping, mock_status):
        proto_patch_encryption_support(CurieSettings)

        cluster_pb = CurieSettings.Cluster()
        for ii in xrange(4):
            node_pb = cluster_pb.cluster_nodes.add()
            node_pb.CopyFrom(self._no_oob_node_proto)
            node_pb.id = str(ii)

        DiscoveryUtil.validate_oob_config(cluster_pb)
        self.assertEqual(mock_ping.call_count, 0)
        self.assertEqual(mock_status.call_count, 0)

        cluster_pb = CurieSettings.Cluster()
        for ii in xrange(4):
            node_pb = cluster_pb.cluster_nodes.add()
            node_pb.CopyFrom(self._ipmi_node_proto)
            node_pb.id = str(ii)

        mock_ping.return_value = True
        DiscoveryUtil.validate_oob_config(cluster_pb)
        self.assertEqual(mock_ping.call_count, len(cluster_pb.cluster_nodes))
        self.assertEqual(mock_status.call_count, len(cluster_pb.cluster_nodes))

        mock_ping.reset_mock()
        mock_status.reset_mock()

        mock_ping.side_effect = [True, False, True, True]
        with self.assertRaises(CurieException):
            DiscoveryUtil.validate_oob_config(cluster_pb)
        # We expect that the first ping succeeds and then the second fails. There
        # should be an exception after the second ping attempt. If ping fails, the
        # expectations is then that the chassis status won't be called.
        self.assertEqual(mock_ping.call_count, 2)
        self.assertEqual(mock_status.call_count, 1)

        mock_ping.reset_mock()
        mock_status.reset_mock()

        mock_ping.return_value = True
        mock_ping.side_effect = None
        mock_status.side_effect = [{},
                                   CurieException(
                                       CurieError.kOobAuthenticationError,
                                       "AuthError"), {},
                                   CurieException(CurieError.kInternalError,
                                                  "SomeOtherError")]
        with self.assertRaises(CurieException):
            DiscoveryUtil.validate_oob_config(cluster_pb)
        self.assertEqual(mock_ping.call_count, 2)
        self.assertEqual(mock_status.call_count, 2)
Exemple #10
0
  def _send_rpc_sync(self, method_name, arg, timeout_secs):
    """
    Synchronously issue RPC 'method_name' with argument 'arg'.

    Args:
      method_name (str): Name of RPC method to be issued.
      arg (protobuf): Populated argument proto for the RPC 'method_name'.
      timeout_secs (float): Desired RPC timeout in seconds. Expected a
        non-negative value coercable to float.

    Returns:
      (ret, err) On success, 'ret' is the appropriate deserialized return
      proto and 'err' is None. On error, 'ret' is None, and err is the
      deserialized CurieError proto.

    Raises:
      (CurieError<kTimeout>) on timeout.
      (CurieError<kRetry>) on connection error.
    """
    try:
      resp = requests.post(
        self.__url,
        headers={"Content-Type": "application/x-rpc",
                 "X-Rpc-Method": method_name},
        data=arg.SerializeToString(),
        timeout=timeout_secs)
    except (ValueError, TypeError) as exc:
      log.exception("Failed to issue RPC")
      raise CurieException(CurieError.kInternalError,
                            "Failed to issue RPC: %s" % exc)
    except (requests.exceptions.Timeout, socket.timeout):
      log.exception("RPC timed out")
      raise CurieException(CurieError.kTimeout,
                            "RPC '%s' timed out after %f seconds" % (
                              method_name, timeout_secs))
    except requests.exceptions.RequestException as exc:
      log.exception("Exception in RPC request")
      raise CurieException(CurieError.kRetry, str(exc))
    else:
      # Succeeded, expect appropriate serialized return proto.
      if resp.status_code == 200:
        ret_cls = self._service.get_ret_proto(method_name)
        ret = ret_cls()
        ret.ParseFromString(resp.content)
        return ret, None
      # Error, expect serialized CurieError proto.
      else:
        err = ErrorRet()
        err.ParseFromString(resp.content)
        return None, err
  def test_update_metadata_if_cluster_contains_extra_nodes(
      self, m_NutanixRestApiClient):
    m_prism_client = mock.MagicMock(spec=NutanixRestApiClient)
    m_NutanixRestApiClient.from_proto.return_value = m_prism_client

    def fake_clusters_get(**kwargs):
      cluster_data = {"clusterUuid": "fake-cluster-id"}
      if kwargs.get("cluster_id"):
        return cluster_data
      else:
        return {"entities": [cluster_data]}

    m_prism_client.clusters_get.side_effect = fake_clusters_get
    m_prism_client.hosts_get.return_value = {
      "entities": [
        {
          "clusterUuid": "fake-cluster-id",
          "uuid": "fake_node_uuid_0"
        },
        {
          "clusterUuid": "fake-cluster-id",
          "uuid": "fake_node_uuid_1"
        },
        {
          "clusterUuid": "fake-cluster-id",
          "uuid": "fake_node_uuid_2"
        },
        {
          "clusterUuid": "fake-cluster-id",
          "uuid": "fake_node_uuid_3"
        },
      ]
    }

    extra_node = self.cluster_metadata.cluster_nodes.add()
    extra_node.id = "fake_node_extra"

    cluster = AcropolisCluster(self.cluster_metadata)
    with mock.patch.object(cluster, "identifier_to_node_uuid") as m_itnu:
      m_itnu.side_effect = ["fake_node_uuid_0",
                            "fake_node_uuid_1",
                            "fake_node_uuid_2",
                            "fake_node_uuid_3",
                            CurieException(CurieError.kInvalidParameter,
                                           "Unable to locate host.")]
      with self.assertRaises(CurieTestException) as ar:
        cluster.update_metadata(False)

    self.assertIn(
      "Cause: Node with ID 'fake_node_extra' is in the Curie cluster "
      "metadata, but not found in the AHV cluster.\n"
      "\n"
      "Impact: The cluster configuration is invalid.\n"
      "\n"
      "Corrective Action: Please check that all of the nodes in the Curie "
      "cluster metadata are part of the AHV cluster. For example, if the "
      "cluster configuration has four nodes, please check that all four nodes "
      "are present in the AHV cluster.\n"
      "\n"
      "Traceback (most recent call last):", str(ar.exception))
Exemple #12
0
  def _update_cluster_version_info_prism(cluster_pb):
    """
    See 'DiscoveryUtil.update_cluster_version_info' for info.
    """
    mgmt_info = cluster_pb.cluster_management_server_info.prism_info
    software_info = cluster_pb.cluster_software_info.nutanix_info
    hyp_info = cluster_pb.cluster_hypervisor_info.ahv_info

    cli = NutanixRestApiClient.from_proto(mgmt_info, timeout_secs=10)

    DiscoveryUtil._update_cluster_version_info_nos(cli, cluster_pb)
    mgmt_info.prism_version = software_info.version

    for host in cli.hosts_get().get("entities", []):
      if host["clusterUuid"] != software_info.cluster_uuid:
        continue

      # We only support homogeneous AHV clusters via Prism.
      if host.get("hypervisorType") != "kKvm":
        raise CurieException(CurieError.kInvalidParameter,
                              "Specified cluster is mixed hypervisor")

      # Strip any "Nutanix " prefix from AHV version strings.
      curr_hyp_version = re.sub(
        "^Nutanix ", "", DiscoveryUtil._get_hyp_version_for_host(host))
      hyp_info.version.extend([curr_hyp_version])
Exemple #13
0
  def __send_racadm_command_with_retries(self, cmd, max_retries=5):
    """
    Issue 'cmd', retrying on failure up to 'max_retries' times.

    Interval between calls has exponential backoff applied up to a cap of
    '_MAX_RETRY_INTERVAL_SECS'.

    Args:
      cmd (str): Command to execute.
      max_retries (int): Optional. Maximum number of retry attempts.

    Returns:
      (dict) parsed XML response

    Raises:
      CurieException on error.
    """
    curr_retry_interval_secs = 1
    for ii in range(max_retries + 1):
      try:
        return self.__send_racadm_command(cmd)
      except CurieException as exc:
        log.exception("'%s' failed", cmd)
        if ii < max_retries:
          log.info("Retrying (%d of %d attempts)", ii + 1, max_retries)
          if exc.error_code == CurieError.kOobAuthenticationError:
            log.debug("Possible session expiration, reauthenticating")
            self.__get_session_cookie(cached_ok=False)
          curr_retry_interval_secs = min(
            self._MAX_RETRY_INTERVAL_SECS, 2 * curr_retry_interval_secs)
          time.sleep(curr_retry_interval_secs)
    raise CurieException(
      CurieError.kInternalError,
      "Failed to execute '%s' after %d retries" % (cmd, max_retries))
Exemple #14
0
    def identifier_to_node_uuid(cls, rest_client, node_id_name_or_ip):
        # These will raise appropriate exceptions on failure, so it's safe to
        # assume that otherwise accessing the 'uuid' key is safe.
        if CurieUtil.is_ipv4_address(node_id_name_or_ip):
            return rest_client.hosts_get(host_ip=node_id_name_or_ip)["uuid"]
        elif CurieUtil.is_uuid(node_id_name_or_ip):
            try:
                return rest_client.hosts_get(
                    host_id=node_id_name_or_ip)["uuid"]
            except Exception:
                log.debug("Failed to lookup node via UUID '%s'",
                          node_id_name_or_ip)

        # The provided node identifier is not an IPv4 address or a UUID. It may
        # be either an unresolved hostname or a Prism name. Try Prism name first
        # to avoid potential overhead in name resolution.
        try:
            return rest_client.hosts_get(host_name=node_id_name_or_ip)["uuid"]
        except Exception:
            log.debug("Failed to lookup node via Prism name '%s'",
                      node_id_name_or_ip)

        try:
            ip = CurieUtil.resolve_hostname(node_id_name_or_ip)
        except Exception:
            raise CurieException(
                CurieError.kInvalidParameter,
                "Unable to resolve IP address for '%s'" % node_id_name_or_ip)

        # Allow this to raise it's own exception on failure, as there are no
        # further methods to which we can fall back.
        return rest_client.hosts_get(host_ip=ip)["uuid"]
Exemple #15
0
  def __get_session_cookie(self, cached_ok=True, max_retries=5):
    """
    Initialize new session if necessary, and return a session cookie.

    Args:
      cached_ok (bool): Optional. If True, use cached session if present.
      max_retries (int): Maximum number of retries on failure.

    Returns:
      (dict) Map "Cookie" -> <formatted session cookie>
    """
    with self.LOCK:
      if cached_ok:
        session_id = self.HOST_SESSION_ID_MAP.get(self.host)
        if session_id:
          return {"Cookie": "sid=%s; path=/cgi-bin/" % session_id}
      for ii in range(max_retries):
        try:
          session_id = self.__authenticate()
          break
        except CurieException as exc:
          log.debug("Failed to establish session: %s (%d of %d attempts)",
                    exc, ii + 1, max_retries)
          time.sleep(5)
      else:
        err_msg = ("Failed to establish valid iDRAC session within %d attempts"
                   % max_retries)
        log.error(err_msg)
        raise CurieException(CurieError.kInternalError, err_msg)
      self.HOST_SESSION_ID_MAP[self.host] = session_id
      return {"Cookie": "sid=%s; path=/cgi-bin/" % session_id}
Exemple #16
0
  def __parse_response(self, raw_resp):
    """
    Parse 'raw_resp' whose content is expected to be an XML-formatted response.

    Args:
      raw_resp (requests.Response): response to parse.

    Returns:
      (dict) parsed response.

    Raises:
      CurieException<kInvalidParameter> on error.
    """
    error = ""
    try:
      root = etree.fromstring(raw_resp.content)
      assert root.tag == self.node().getroottree().getroot().tag, (
        "Invalid response, root tags do not match between request and "
        "response. (req: %s, resp: %s)" %
        (self.__node.getroottree().getroot().tag, root.tag))
      resp_node = root.getchildren()
    except (AssertionError, etree.Error) as exc:
      error = str(exc)
    if len(resp_node) == 0:
      error = "Response content missing <RESP> body"
    elif len(resp_node) > 1:
      error = "Invalid response content returned: '%s'" % raw_resp.content
    elif resp_node[0].tag != "RESP":
      error = ("Invalid response. First child is not a <RESP> tag. (found %s)"
               % resp_node[0].tag)
    if error:
      raise CurieException(CurieError.kInternalError, error)

    return RacAdmResponse(resp_node[0])
Exemple #17
0
def _validate(name, val, type_or_types, values, func, err_code,
              *args, **kwargs):
  """
  Helper used by 'validate_parameter' and 'validate_return'. See either for
  more documentation.
  """
  log.trace("Validating '%s': %s", name, val)
  log.trace("valid_types=%s, valid_values=%s, valid_func=%s",
            type_or_types, values, func)
  try:
    if type_or_types and not isinstance(val, type_or_types):
      if isinstance(type_or_types, collections.Iterable):
        msg = "Expected one of '%s'" % ", ".join(
          [type.__name__ for type in type_or_types])
      else:
        msg = "Expected '%s'" % type_or_types.__name__
      raise AssertionError("Invalid '%s' type '%s': %s" %
                           (name, val.__class__.__name__, msg))
    if values and val not in values:
      if isinstance(values, list) or isinstance(values, tuple):
        msg = "Expected one of '%s'" % ", ".join(values)
      else:
        msg = "Expected '%s'" % values
      raise AssertionError("Invalid '%s' value '%s': %s" % (name, val, msg))
    if func:
      if not func(*args, **kwargs):
        raise AssertionError("Invalid '%s' value '%s': Functional validation "
                             "failed" % (name, val))
  except AssertionError as exc:
    raise CurieException(err_code, str(exc))
Exemple #18
0
  def send(self):
    """
    Issues command represented by this instance to iDRAC at 'self.host'.

    Returns:
      (dict) parsed XML response.

    Raises:
      CurieException<kInvalidParameter> on error.
    """
    cmd_type = "exec" if self._cookie else "login"
    path = self.URL.format(idrac_ip=self.host, cmd_type=cmd_type)
    # Don't log raw login command as credentials are in plaintext.
    if cmd_type != "login":
      log.trace("Sending XML-HTTP request to: %s\n"
                "\tHeaders: %s\n"
                "\tBody: %s", path, self.headers(), self.xml())
    raw_resp = requests.post(path, data=self.xml(), headers=self.headers(),
                             verify=False)
    # pylint takes issue with the requests.status_codes.codes LookupDict.
    # pylint: disable=no-member
    if raw_resp.status_code != requests.status_codes.codes.OK:
      raise CurieException(
        CurieError.kInternalError,
        "Error sending XML-HTTP request to iDRAC: %s %s" %
        (raw_resp.status_code, raw_resp.reason))
    log.trace("Received raw response:\n"
              "Headers: %s\n"
              "Body: %s", raw_resp.headers, raw_resp.content)
    return self.__parse_response(raw_resp)
Exemple #19
0
 def __api_cmd_remove(self, arg):
     if flask.request.data == "":
         raise CurieException(CurieError.kInvalidParameter, "Empty request")
     cmd_id = arg.cmd_id
     cmd_dir = self.cmd_dir(cmd_id)
     with self.__lock:
         ret = charon_agent_interface_pb2.CmdRemoveRet()
         cmd_state = self.__cmd_map.get(cmd_id)
         if cmd_state is None:
             # It's possible this is a retry of a request that previously succeeded.
             log.warning("Command %s not found", cmd_id)
             return ret.SerializeToString()
         if cmd_state.pid is not None:
             # Kill the command wrapper process group which will kill the
             # curie_cmd_wrapper process and all of its descendants.
             log.info("Killing command %s as part of a remove, PID %d",
                      cmd_id, cmd_state.pid)
             try:
                 os.killpg(cmd_state.pid, signal.SIGKILL)
             except OSError, ex:
                 CHECK_EQ(ex.errno, errno.ESRCH, msg=str(ex))
         # Move the command's directory to the garbage directory to be garbage
         # collected later.
         garbage_cmd_dir = os.path.join(self.__garbage_dir,
                                        os.path.basename(cmd_dir))
         os.rename(cmd_dir, garbage_cmd_dir)
         del self.__cmd_map[cmd_id]
         return ret.SerializeToString()
Exemple #20
0
  def __init__(self, valid_types=None, valid_values=None, valid_func=None,
               err_code=CurieError.kInvalidParameter):
    """
    Validates return by type, value, and/or using a custom function.

    Args:
      valid_types (list|None): If not None, verify that the parameter is of one
        of the provided types.
      valid_values (list|None): If not None, verify that the parameter is equal
        to one of the provided values.
      valid_func (callable|None): If not None, a callable accepting the wrapped
        function's arguments and raising an AssertionError if the return value
        is invalid.
      err_code (CurieError.Type): Optional. CurieException error code to use
        on failure.

    Raises CurieException<kInvalidParameter> If parameter fails validation.
    """
    self.valid_types = valid_types
    self.valid_values = valid_values
    self.valid_func = valid_func
    self.err_code = err_code

    if valid_func and not callable(valid_func):
      raise CurieException(CurieError.kInternalError,
                            "Skipping uncallable object provided for return "
                            "value validation")
Exemple #21
0
 def __api_cmd_execute(self, arg):
     if flask.request.data == "":
         raise CurieException(CurieError.kInvalidParameter, "Empty request")
     try:
         cmd_uid = pwd.getpwnam(arg.user).pw_uid
     except KeyError:
         raise CurieException(CurieError.kInvalidParameter,
                              "Invalid user %s" % arg.user)
     ret = charon_agent_interface_pb2.CmdExecuteRet()
     with self.__lock:
         if arg.cmd_id not in self.__cmd_map:
             self.__execute_cmd(arg.cmd_id, arg.cmd, cmd_uid)
         else:
             # It's possible this is a retry of a request that previously succeeded.
             log.warning("Command %s already exists", arg.cmd_id)
     return ret.SerializeToString()
Exemple #22
0
  def __execute_command_with_retries(self, cmd, max_retries=5):
    """
    Executes 'cmd', retrying on error up to 'max_retries' times.

    Interval between calls has exponential backoff applied up to a cap of
    '_MAX_RETRY_INTERVAL_SECS'.

    Returns:
      (tuple): (stdout, stderr)

    Raises:
      CurieException if 'cmd' does not succeed within 'max_retries' + 1 calls.
    """
    curr_retry_interval_secs = 1
    for ii in range(max_retries + 1):
      try:
        rv, stdout, stderr = self.__execute_command(cmd)
        if rv == 0:
          return stdout, stderr
        error_msg = (
          "Error executing '%s':\n\trv=%s\n\tstdout=%s\n\tstderr=%s" %
          (cmd, rv, stdout, stderr))
      except CurieException as exc:
        error_msg = "'%s' failed: '%s'" % (cmd, exc)

      if ii < max_retries:
        log.error(error_msg)
        log.info("Retrying (%d of %d retries)", ii + 1, max_retries)
        curr_retry_interval_secs = min(self._MAX_RETRY_INTERVAL_SECS,
                                       2 * curr_retry_interval_secs)
        time.sleep(curr_retry_interval_secs)
      else:
        raise CurieException(CurieError.kInternalError, error_msg)
Exemple #23
0
 def validate_host_connectivity(cluster_pb):
   cluster_cls = get_cluster_class(cluster_pb)
   cluster = cluster_cls(cluster_pb)
   for node in cluster.nodes():
     if not CurieUtil.ping_ip(node.node_ip()):
       raise CurieException(CurieError.kInternalError,
                             "Host %s - %s not reachable." %
                             (node.node_id(), node.node_ip()))
Exemple #24
0
def _CHECK_BINARY_OP(x, y, op, symbol, msg="", **kwargs):
    if op(x, y):
        return

    output = ["%s %s %s failed," % (x, symbol, y)]
    if msg:
        output.append("%s," % msg)

    raise CurieException(CurieError.kInternalError, output)
Exemple #25
0
    def get_management_software_value_for_attribute(cls, attr):
        """
    Get management software specific value for 'attr'.
    """
        if getattr(NodePropertyNames, attr) is None:
            raise CurieException(CurieError.kInvalidParameter,
                                 "Unknown node property '%s'" % attr)

        return cls.get_management_software_property_name_map()[attr]
Exemple #26
0
 def test_RunCommand_fail_on_error(self):
     for vm in self.vms:
         vm.execute_sync.return_value = (1, "stdout", "stderr")
         vm.execute_sync.side_effect = CurieException(
             CurieError.kInternalError, "Message")
     step = steps.vm_group.RunCommand(self.scenario, self.vm_group._name,
                                      "ps")
     with self.assertRaises(CurieException):
         step()
Exemple #27
0
  def _update_cluster_version_info_vcenter(cluster_pb):
    """
    See 'DiscoveryUtil.update_cluster_version_info' for info.
    """
    mgmt_info = cluster_pb.cluster_management_server_info.vcenter_info
    hyp_info = cluster_pb.cluster_hypervisor_info.esx_info

    with VsphereVcenter.from_proto(mgmt_info) as vcenter:
      vim_dc = vcenter.lookup_datacenter(mgmt_info.vcenter_datacenter_name)
      vim_cluster = vcenter.lookup_cluster(vim_dc,
                                           mgmt_info.vcenter_cluster_name)
      if vim_cluster is None:
        raise CurieException(CurieError.kInvalidParameter,
                              "Cluster not found in specified vCenter")

      esx_version_pairs = vcenter.get_esx_versions(vim_cluster)
      hyp_info.version.extend(pair[0] for pair in esx_version_pairs)
      hyp_info.build.extend(pair[1] for pair in esx_version_pairs)

      mgmt_info.vcenter_version, mgmt_info.vcenter_build = \
          vcenter.get_vcenter_version_info()

      if cluster_pb.cluster_software_info.HasField("nutanix_info"):
        cvms = [vim_vm for vim_vm in vcenter.lookup_vms(vim_cluster)
                if vcenter.vim_vm_is_nutanix_cvm(vim_vm)]
        if not cvms:
          raise CurieException(
            CurieError.kInvalidParameter,
            "Unable to locate any CVMs on cluster. Is this a Nutanix cluster?")
        for cvm in cvms:
          ip = get_optional_vim_attr(cvm.guest, "ipAddress")
          if ip and CurieUtil.is_ipv4_address(ip):
            break
        else:
          raise CurieException(
            CurieError.kInvalidParameter,
            "Unable to locate any CVMs with IPv4 addresses on cluster")

        software_info = cluster_pb.cluster_software_info.nutanix_info
        cli = NutanixRestApiClient(
          ip,
          software_info.decrypt_field("prism_user"),
          software_info.decrypt_field("prism_password"))
        DiscoveryUtil._update_cluster_version_info_nos(cli, cluster_pb)
Exemple #28
0
    def create_vm(self,
                  goldimages_directory,
                  goldimage_name,
                  vm_name,
                  vcpus=1,
                  ram_mb=1024,
                  node_id=None,
                  datastore_name=None,
                  data_disks=()):
        """
    See 'Cluster.create_vm' for documentation.
    """
        log.info(
            "Creating VM %s based on %s with %d vCPUs, %d MB RAM and %s "
            "disks on node %s in datastore %s ", vm_name, goldimage_name,
            vcpus, ram_mb, str(data_disks), str(node_id), datastore_name)
        image_uuid = self.deploy_goldimage_image_service(
            goldimages_directory, goldimage_name)

        # This namedtuple hackery is to handle the expectations in vm.py which
        # expects information directly parsed from an OVF file.
        Units = namedtuple("Units", ["multiplier"])
        Disk = namedtuple("Disk", ["capacity", "units"])
        attach_disks = [
            Disk(gb, Units(1024 * 1024 * 1024)) for gb in data_disks
        ]

        vm_desc = VmDescriptor(name=vm_name,
                               memory_mb=ram_mb,
                               num_vcpus=vcpus,
                               vmdisk_uuid_list=[image_uuid],
                               attached_disks=attach_disks,
                               container_uuid=self._container_id)
        # Create the VM
        log.info("Creating VM '%s' with %s MB RAM and %s vCPUs", vm_desc.name,
                 vm_desc.memory_mb, vm_desc.num_vcpus)
        nic_specs = \
          [vm_desc.to_ahv_vm_nic_create_spec(self._network_id)["specList"][0]]
        resp = self._prism_client.vms_create(vm_desc, nic_specs)
        tid = resp.get("taskUuid")
        if not tid:
            raise CurieException(CurieError.kManagementServerApiError,
                                 "Failed to deploy VM: %s" % resp)

        TaskPoller.execute_parallel_tasks(tasks=PrismTask.from_task_id(
            self._prism_client, tid),
                                          timeout_secs=60)

        task_json = self._prism_client.tasks_get_by_id(tid)
        vm_uuid = task_json["entityList"][0]["uuid"]

        # Make a Curie VM descriptor and assign it to the requested node
        vm = self.__vm_json_to_curie_vm(
            self._prism_client.vms_get_by_id(vm_uuid))
        vm._node_id = node_id
        return vm
Exemple #29
0
    def deploy_goldimage_image_service(self, goldimages_directory,
                                       goldimage_name):
        """
    Deploy a gold image to the image service.

    Args:
      goldimage_name (str): Name of the gold image to deploy.

    Returns:
      str: ID of the created disk image.
    """
        arch = self.get_cluster_architecture()
        # Select a vdisk format to use. Currently PPC64LE goldimages are only built
        # using qcow2 format and the x86_64 in vmdk. We could have the manager
        # perform a conversion, but acropolis can already do the image conversion
        # for us.
        if arch == GoldImageManager.ARCH_PPC64LE:
            disk_format = GoldImageManager.FORMAT_QCOW2
        else:
            disk_format = GoldImageManager.FORMAT_VMDK

        # Use the GoldImage manager to get a path to our appropriate goldimage
        goldimage_manager = GoldImageManager(goldimages_directory)
        goldimage_path = goldimage_manager.get_goldimage_path(
            goldimage_name, format_str=disk_format, arch=arch)
        log.debug("Deploying %s to cluster", goldimage_path)

        # Deploy the image to service
        disk_name = os.path.splitext(os.path.basename(goldimage_path))[0]
        img_uuid, tid, _ = self._prism_client.images_create(
            NameUtil.goldimage_vmdisk_name(disk_name, "os"), goldimage_path,
            self._container_id)
        TaskPoller.execute_parallel_tasks(tasks=PrismTask.from_task_id(
            self._prism_client, tid),
                                          timeout_secs=3600)

        # NB: Required due to possible AHV bug. See XRAY-225.
        num_images_get_retries = 5
        for attempt_num in xrange(num_images_get_retries):
            images_get_data = self._prism_client.images_get(image_id=img_uuid)
            image_state = images_get_data["image_state"]
            if image_state.lower() == "active":
                # Return the disk image
                return images_get_data["vm_disk_id"]
            else:
                log.info(
                    "Waiting for created image to become active "
                    "(imageState: %s, retry %d of %d)", image_state,
                    attempt_num + 1, num_images_get_retries)
                log.debug(images_get_data)
                time.sleep(1)
        else:
            raise CurieException(
                CurieError.kInternalError,
                "Created image failed to become active within "
                "%d attempts" % num_images_get_retries)
Exemple #30
0
  def is_powered_on(self):
    """
    Checks whether chassis power state is 'on'.

    Returns:
      (bool) True if powered on, else False.
    """
    raise CurieException(
      CurieError.kInvalidParameter,
      "Attempted to make out-of-band management calls in an environment "
      "which has not been configured to support out-of-band management")