Example #1
0
class DaosServerConfig(ObjectWithParameters):
    """Defines the daos_server configuration yaml parameters."""
    class SingleServerConfig(ObjectWithParameters):
        """Defines the configuration yaml parameters for a single server."""
        def __init__(self, index=None):
            """Create a SingleServerConfig object.

            Args:
                index (int, optional): index number for the namespace path used
                    when specifying multiple servers per host. Defaults to None.
            """
            namespace = "/run/server_config/servers/*"
            if isinstance(index, int):
                namespace = "/run/server_config/servers/{}/*".format(index)
            super(DaosServerConfig.SingleServerConfig,
                  self).__init__(namespace)

            # Use environment variables to get default parameters
            default_interface = os.environ.get("OFI_INTERFACE", "eth0")
            default_port = os.environ.get("OFI_PORT", 31416)

            # Parameters
            #   targets:                count of VOS targets
            #   first_core:             starting index for targets
            #   nr_xs_helpers:          offload helpers per server
            #   fabric_iface:           map to OFI_INTERFACE=eth0
            #   fabric_iface_port:      map to OFI_PORT=31416
            #   log_mask:               map to D_LOG_MASK env
            #   log_file:               map to D_LOG_FILE env
            #   env_vars:               influences DAOS IO Server behaviour
            #       Add to enable scalable endpoint:
            #           - CRT_CREDIT_EP_CTX=0
            #           - CRT_CTX_SHARE_ADDR=1
            #           - CRT_CTX_NUM=8
            #       nvme options:
            #           - IO_STAT_PERIOD=10
            self.targets = BasicParameter(None, 8)
            self.first_core = BasicParameter(None, 0)
            self.nr_xs_helpers = BasicParameter(None, 16)
            self.fabric_iface = BasicParameter(None, default_interface)
            self.fabric_iface_port = BasicParameter(None, default_port)
            self.pinned_numa_node = BasicParameter(None)
            self.log_mask = BasicParameter(None, "DEBUG")
            self.log_file = BasicParameter(None, "daos_server.log")
            self.env_vars = BasicParameter(None, [
                "ABT_ENV_MAX_NUM_XSTREAMS=100", "ABT_MAX_NUM_XSTREAMS=100",
                "DAOS_MD_CAP=1024", "CRT_CTX_SHARE_ADDR=0", "CRT_TIMEOUT=30",
                "FI_SOCKETS_MAX_CONN_RETRY=1", "FI_SOCKETS_CONN_TIMEOUT=2000",
                "DD_MASK=mgmt,io,md,epc,rebuild"
            ])

            # Storage definition parameters:
            #
            # When scm_class is set to ram, tmpfs will be used to emulate SCM.
            #   scm_mount: /mnt/daos        - map to -s /mnt/daos
            #   scm_class: ram
            #   scm_size: 6                 - size in GB units
            #
            # When scm_class is set to dcpm, scm_list is the list of device
            # paths for AppDirect pmem namespaces (currently only one per
            # server supported).
            #   scm_class: dcpm
            #   scm_list: [/dev/pmem0]
            #
            # If using NVMe SSD (will write /mnt/daos/daos_nvme.conf and start
            # I/O service with -n <path>)
            #   bdev_class: nvme
            #   bdev_list: ["0000:81:00.0"] - generate regular nvme.conf
            #
            # If emulating NVMe SSD with malloc devices
            #   bdev_class: malloc          - map to VOS_BDEV_CLASS=MALLOC
            #   bdev_size: 4                - malloc size of each device in GB.
            #   bdev_number: 1              - generate nvme.conf as follows:
            #       [Malloc]
            #       NumberOfLuns 1
            #       LunSizeInMB 4000
            #
            # If emulating NVMe SSD over kernel block device
            #   bdev_class: kdev            - map to VOS_BDEV_CLASS=AIO
            #   bdev_list: [/dev/sdc]       - generate nvme.conf as follows:
            #       [AIO]
            #       AIO /dev/sdc AIO2
            #
            # If emulating NVMe SSD with backend file
            #   bdev_class: file            - map to VOS_BDEV_CLASS=AIO
            #   bdev_size: 16               - file size in GB. Create file if
            #                                 it does not exist.
            #   bdev_list: [/tmp/daos-bdev] - generate nvme.conf as follows:
            #       [AIO]
            #       AIO /tmp/aiofile AIO1 4096
            self.scm_mount = BasicParameter(None, "/mnt/daos")
            self.scm_class = BasicParameter(None, "ram")
            self.scm_size = BasicParameter(None, 16)
            self.scm_list = BasicParameter(None)
            self.bdev_class = BasicParameter(None)
            self.bdev_list = BasicParameter(None)
            self.bdev_size = BasicParameter(None)
            self.bdev_number = BasicParameter(None)

    def __init__(self):
        """Create a DaosServerConfig object."""
        super(DaosServerConfig, self).__init__("/run/server_config/*")

        # Parameters
        self.name = BasicParameter(None, "daos_server")
        self.access_points = BasicParameter(None)  # e.g. "<host>:<port>"
        self.port = BasicParameter(None, 10001)
        self.provider = BasicParameter(None, "ofi+sockets")
        self.socket_dir = BasicParameter(None)  # /tmp/daos_sockets
        self.nr_hugepages = BasicParameter(None, 4096)
        self.control_log_mask = BasicParameter(None, "DEBUG")
        self.control_log_file = BasicParameter(None, "daos_control.log")
        self.helper_log_file = BasicParameter(None, "daos_admin.log")

        # Used to drop privileges before starting data plane
        # (if started as root to perform hardware provisioning)
        self.user_name = BasicParameter(None)  # e.g. 'daosuser'
        self.group_name = BasicParameter(None)  # e.g. 'daosgroup'

        # Defines the number of single server config parameters to define in
        # the yaml file
        self.servers_per_host = BasicParameter(None)

        # Single server config parameters
        self.server_params = []

    def get_params(self, test):
        """Get values for all of the command params from the yaml file.

        If no key matches are found in the yaml file the BasicParameter object
        will be set to its default value.

        Args:
            test (Test): avocado Test object
        """
        super(DaosServerConfig, self).get_params(test)

        # Create the requested number of single server parameters
        if isinstance(self.servers_per_host.value, int):
            self.server_params = [
                self.SingleServerConfig(index)
                for index in range(self.servers_per_host.value)
            ]
        else:
            self.server_params = [self.SingleServerConfig()]

        for server_params in self.server_params:
            server_params.get_params(test)

    def update_log_files(self, control_log, helper_log, server_log):
        """Update each log file name for the daos server.

        If there are multiple server configurations defined the server_log value
        will be made unique for each server's log_file parameter.

        Any log file name set to None will result in no update to the respective
        log file parameter value.

        Args:
            control_log (str): control log file name
            helper_log (str): control log file name
            server_log (str): control log file name
        """
        if control_log is not None:
            self.control_log_file.update(control_log,
                                         "server_config.control_log_file")
        if helper_log is not None:
            self.helper_log_file.update(helper_log,
                                        "server_config.helper_log_file")
        if server_log is not None:
            for index, server_params in enumerate(self.server_params):
                log_name = list(os.path.splitext(server_log))
                if len(self.server_params) > 1:
                    log_name.insert(1, "_{}".format(index))
                server_params.log_file.update(
                    "".join(log_name),
                    "server_config.server[{}].log_file".format(index))

    def is_nvme(self):
        """Return if NVMe is provided in the configuration."""
        if self.server_params[-1].bdev_class.value == "nvme":
            return True
        return False

    def is_scm(self):
        """Return if SCM is provided in the configuration."""
        if self.server_params[-1].scm_class.value == "dcpm":
            return True
        return False

    def create_yaml(self, filename):
        """Create a yaml file from the parameter values.

        Args:
            filename (str): the yaml file to create
        """
        log_dir = os.environ.get("DAOS_TEST_LOG_DIR", "/tmp")

        # Convert the parameters into a dictionary to write a yaml file
        yaml_data = {"servers": []}
        for name in self.get_param_names():
            if name != "servers_per_host":
                value = getattr(self, name).value
                if value is not None and value is not False:
                    if name.endswith("log_file"):
                        yaml_data[name] = os.path.join(log_dir, value)
                    else:
                        yaml_data[name] = value
        for server_params in self.server_params:
            yaml_data["servers"].append({})
            for name in server_params.get_param_names():
                value = getattr(server_params, name).value
                if value is not None and value is not False:
                    if name.endswith("log_file"):
                        yaml_data["servers"][-1][name] = os.path.join(
                            log_dir, value)
                    else:
                        yaml_data["servers"][-1][name] = value

        # Don't set scm_size when scm_class is "dcpm"
        for index in range(len(self.server_params)):
            srv_cfg = yaml_data["servers"][index]
            scm_class = srv_cfg.get("scm_class", "ram")
            if scm_class == "dcpm" and "scm_size" in srv_cfg:
                del srv_cfg["scm_size"]

        # Write default_value_set dictionary in to AVOCADO_FILE
        # This will be used to start with daos_server -o option.
        try:
            with open(filename, 'w') as write_file:
                yaml.dump(yaml_data, write_file, default_flow_style=False)
        except Exception as error:
            print("<SERVER> Exception occurred: {0}".format(error))
            raise ServerFailed(
                "Error writing daos_server command yaml file {}: {}".format(
                    filename, error))
        return filename
Example #2
0
class TestPool(TestDaosApiBase):
    # pylint: disable=too-many-public-methods
    """A class for functional testing of DaosPools objects."""

    def __init__(self, context, dmg_command, cb_handler=None,
                 label_generator=None):
        # pylint: disable=unused-argument
        """Initialize a TestPool object.

        Args:
            context (DaosContext): The daos environment and other info. Use
                self.context when calling from a test.
            dmg_command (DmgCommand): DmgCommand used to call dmg command. This
                value can be obtained by calling self.get_dmg_command() from a
                test. It'll return the object with -l <Access Point host:port>
                and --insecure.
            cb_handler (CallbackHandler, optional): callback object to use with
                the API methods. Defaults to None.
            label_generator (LabelGenerator, optional): Generates label by
                adding number to the end of the prefix set in self.label.
                There's a link between label_generator and label. If the label
                is used as it is, i.e., not None, label_generator must be
                provided in order to call create(). Defaults to None.
        """
        super().__init__("/run/pool/*", cb_handler)
        self.context = context
        self.uid = os.geteuid()
        self.gid = os.getegid()

        self.mode = BasicParameter(None)
        self.name = BasicParameter(None)            # server group name
        self.svcn = BasicParameter(None)
        self.target_list = BasicParameter(None)
        self.nranks = BasicParameter(None)
        self.size = BasicParameter(None)
        self.tier_ratio = BasicParameter(None)
        self.scm_size = BasicParameter(None)
        self.nvme_size = BasicParameter(None)
        self.prop_name = BasicParameter(None)       # name of property to be set
        self.prop_value = BasicParameter(None)      # value of property
        self.properties = BasicParameter(None)      # string of cs name:value
        self.rebuild_timeout = BasicParameter(None)
        self.pool_query_timeout = BasicParameter(None)
        self.acl_file = BasicParameter(None)
        self.label = BasicParameter(None, "TestLabel")
        self.label_generator = label_generator

        # Optional TestPool parameters used to autosize the dmg pool create
        # 'size', 'scm_size', and/or 'nvme_size' values:
        #   server_index: TestWithServers.server_managers list index
        #   quantity:     number of pools to account for in sizing
        #   min_targets:  minimum number of targets allowed
        self.server_index = BasicParameter(None, 0)
        self.quantity = BasicParameter(None, 1)
        self.min_targets = BasicParameter(None, 1)

        self.pool = None
        self.info = None
        self.svc_ranks = None
        self.connected = False
        # Flag to allow the non-create operations to use UUID. e.g., if you want
        # to destroy the pool with UUID, set this to False, then call destroy().
        self.use_label = True

        self._dmg = None
        self.dmg = dmg_command

        self.query_data = []

    def get_params(self, test):
        """Get values for all of the command params from the yaml file.

        Autosize any size/scm_size/nvme_size parameter whose value ends in "%".
        Also create a unique label by adding the incremented number prefix.

        Args:
            test (Test): avocado Test object
        """
        super().get_params(test)

        # Autosize any size/scm_size/nvme_size parameters
        # pylint: disable=too-many-boolean-expressions
        if ((self.size.value is not None and str(self.size.value).endswith("%"))
                or (self.scm_size.value is not None
                    and str(self.scm_size.value).endswith("%"))
                or (self.nvme_size.value is not None
                    and str(self.nvme_size.value).endswith("%"))):
            index = self.server_index.value
            try:
                params = test.server_managers[index].autosize_pool_params(
                    size=self.size.value,
                    tier_ratio=self.tier_ratio.value,
                    scm_size=self.scm_size.value,
                    nvme_size=self.nvme_size.value,
                    min_targets=self.min_targets.value,
                    quantity=self.quantity.value)
            except ServerFailed as error:
                test.fail(
                    "Failure autosizing pool parameters: {}".format(error))
            except AutosizeCancel as error:
                test.cancel(error)

            # Update the pool parameters with any autosized values
            for name in params:
                test_pool_param = getattr(self, name)
                test_pool_param.update(params[name], name)

                # Cache the autosized value so we do not calculate it again
                # pylint: disable=protected-access
                cache_id = (name, self.namespace, test_pool_param._default)
                test.params._cache[cache_id] = params[name]

        # Use a unique pool label if using pool labels
        if self.label.value is not None:
            if not isinstance(self.label_generator, LabelGenerator):
                raise CommandFailure(
                    "Unable to create a unique pool label; " +\
                        "Undefined label_generator")
            self.label.update(self.label_generator.get_label(self.label.value))

    @property
    def uuid(self):
        """Get the pool UUID.

        Returns:
            str: pool UUID

        """
        uuid = None
        if self.pool:
            uuid = self.pool.get_uuid_str()
        return uuid

    @uuid.setter
    def uuid(self, value):
        """Set the pool UUID.

        Args:
            value (str): pool UUID
        """
        if self.pool:
            self.pool.set_uuid_str(value)

    @property
    def identifier(self):
        """Get the pool uuid or label.

        Returns:
            str: pool label if using labels and one is defined; otherwise the
                pool uuid

        """
        identifier = self.uuid
        if self.use_label and self.label.value is not None:
            identifier = self.label.value
        return identifier

    @property
    def dmg(self):
        """Get the DmgCommand object.

        Returns:
            DmgCommand: the dmg command object assigned to this class

        """
        return self._dmg

    @dmg.setter
    def dmg(self, value):
        """Set the DmgCommand object.

        Args:
            value (DmgCommand): dmg command object to use with this class

        Raises:
            TypeError: Raised if value is not DmgCommand object.

        """
        if not isinstance(value, DmgCommand):
            raise TypeError("Invalid 'dmg' object type: {}".format(type(value)))
        self._dmg = value

    @fail_on(CommandFailure)
    @fail_on(DaosApiError)
    def create(self):
        """Create a pool with dmg.

        To use dmg, the test needs to set dmg_command through the constructor.
        For example,

            self.pool = TestPool(self.context, DmgCommand(self.bin))

        If it wants to use --nsvc option, it needs to set the value to
        svcn.value. Otherwise, 1 is used. If it wants to use --group, it needs
        to set groupname.value. If it wants to use --user, it needs to set
        username.value. If it wants to add other options, directly set it
        to self.dmg.action_command. Refer dmg_utils.py pool_create method for
        more details.

        To test the negative case on create, the test needs to catch
        CommandFailure. Thus, we need to make more than one line modification
        to the test only for this purpose.
        Currently, pool_svc is the only test that needs this change.
        """
        self.destroy()
        if self.target_list.value is not None:
            self.log.info(
                "Creating a pool on targets %s", self.target_list.value)
        else:
            self.log.info("Creating a pool")

        self.pool = DaosPool(self.context)

        kwargs = {
            "uid": self.uid,
            "gid": self.gid,
            "size": self.size.value,
            "tier_ratio": self.tier_ratio.value,
            "scm_size": self.scm_size.value,
            "nranks": self.nranks.value,
            "properties": self.properties.value,
            "acl_file": self.acl_file.value,
            "label": self.label.value
        }
        for key in ("target_list", "svcn", "nvme_size"):
            value = getattr(self, key).value
            if value is not None:
                kwargs[key] = value

        # Create a pool with the dmg command and store its CmdResult
        self._log_method("dmg.pool_create", kwargs)
        data = self.dmg.pool_create(**kwargs)

        if self.dmg.result.exit_status == 0:
            # Convert the string of service replicas from the dmg command
            # output into an ctype array for the DaosPool object using the
            # same technique used in DaosPool.create().
            service_replicas = [
                int(value) for value in data["svc"].split(",")]
            rank_t = ctypes.c_uint * len(service_replicas)
            rank = rank_t(*service_replicas)
            rl_ranks = ctypes.POINTER(ctypes.c_uint)(rank)
            self.pool.svc = daos_cref.RankList(
                rl_ranks, len(service_replicas))

            # Set UUID and attached to the DaosPool object
            self.uuid = data["uuid"]
            self.pool.attached = 1

        # Set the TestPool attributes for the created pool
        if self.pool.attached:
            self.svc_ranks = [
                int(self.pool.svc.rl_ranks[index])
                for index in range(self.pool.svc.rl_nr)]

    @fail_on(DaosApiError)
    def connect(self, permission=2):
        """Connect to the pool.

        Args:
            permission (int, optional): connect permission. Defaults to 2.

        Returns:
            bool: True if the pool has been connected; False if the pool was
                already connected or the pool is not defined.

        """
        if self.pool and not self.connected:
            kwargs = {"flags": permission}
            self.log.info(
                "Connecting to pool %s with permission %s (flag: %s)",
                self.uuid, permission, kwargs["flags"])
            self._call_method(self.pool.connect, kwargs)
            self.connected = True
            return True
        return False

    @fail_on(DaosApiError)
    def disconnect(self):
        """Disconnect from connected pool.

        Returns:
            bool: True if the pool has been disconnected; False if the pool was
                already disconnected or the pool is not defined.

        """
        if self.pool and self.connected:
            self.log.info("Disconnecting from pool %s", self.uuid)
            self._call_method(self.pool.disconnect, {})
            self.connected = False
            return True
        return False

    @fail_on(CommandFailure)
    @fail_on(DaosApiError)
    def destroy(self, force=1, disconnect=1):
        """Destroy the pool with either API or dmg.

        It uses control_method member previously set, so if you want to use the
        other way for some reason, update it before calling this method.

        Args:
            force (int, optional): force flag. Defaults to 1.
            disconnect (int, optional): disconnect flag. Defaults to 1.

        Returns:
            bool: True if the pool has been destroyed; False if the pool is not
                defined.

        """
        status = False
        if self.pool:
            if disconnect:
                self.disconnect()
            if self.pool.attached:
                self.log.info("Destroying pool %s", self.identifier)

                # Destroy the pool with the dmg command.
                self.dmg.pool_destroy(pool=self.identifier, force=force)
                status = True

            self.pool = None
            self.info = None
            self.svc_ranks = None

        return status

    @fail_on(CommandFailure)
    def set_property(self, prop_name=None, prop_value=None):
        """Set Property.

        It sets property for a given pool uuid using dmg.

        Args:
            prop_name (str, optional): pool property name. Defaults to
                None, which uses the TestPool.prop_name.value
            prop_value (str, optional): value to be set for the property.
                Defaults to None, which uses the TestPool.prop_value.value
        """
        if self.pool:
            self.log.info("Set-prop for Pool: %s", self.identifier)

            # If specific values are not provided, use the class values
            if prop_name is None:
                prop_name = self.prop_name.value
            if prop_value is None:
                prop_value = self.prop_value.value
            self.dmg.pool_set_prop(self.identifier, prop_name, prop_value)

    @fail_on(CommandFailure)
    def get_property(self, prop_name):
        """Get Property.

        It gets property for a given pool uuid using dmg.

        Args:
            prop_name (str): Name of the pool property.

        Returns:
            prop_value (str): Return pool property value.

        """
        prop_value = ""
        if self.pool:
            self.log.info("Get-prop for Pool: %s", self.identifier)

            if self.control_method.value == self.USE_DMG and self.dmg:
                # If specific property are not provided, get all the property
                self.dmg.pool_get_prop(self.identifier, prop_name)

                if self.dmg.result.exit_status == 0:
                    prop_value = json.loads(
                        self.dmg.result.stdout)['response'][0]['value']

            elif self.control_method.value == self.USE_DMG:
                self.log.error("Error: Undefined dmg command")

            else:
                self.log.error(
                    "Error: Undefined control_method: %s",
                    self.control_method.value)
        return prop_value

    @fail_on(CommandFailure)
    def evict(self):
        """Evict all pool connections to a DAOS pool."""
        if self.pool:
            self.log.info(
                "Evict all pool connections for pool: %s", self.identifier)

            self.dmg.pool_evict(self.identifier)

    @fail_on(DaosApiError)
    def get_info(self):
        """Query the pool for information.

        Sets the self.info attribute.
        """
        if self.pool:
            self.connect()
            self._call_method(self.pool.pool_query, {})
            self.info = self.pool.pool_info

    def check_pool_info(self, pi_uuid=None, pi_ntargets=None, pi_nnodes=None,
                        pi_ndisabled=None, pi_map_ver=None, pi_leader=None,
                        pi_bits=None):
        # pylint: disable=unused-argument
        """Check the pool info attributes.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Args:
            pi_uuid (str, optional): pool uuid. Defaults to None.
            pi_ntargets (int, optional): number of targets. Defaults to None.
            pi_nnodes (int, optional): number of nodes. Defaults to None.
            pi_ndisabled (int, optional): number of disabled. Defaults to None.
            pi_map_ver (int, optional): pool map version. Defaults to None.
            pi_leader (int, optional): pool leader. Defaults to None.
            pi_bits (int, optional): pool bits. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key,
             c_uuid_to_str(getattr(self.info, key))
             if key == "pi_uuid" else getattr(self.info, key),
             val)
            for key, val in list(locals().items())
            if key != "self" and val is not None]
        return self._check_info(checks)

    def check_pool_space(self, ps_free_min=None, ps_free_max=None,
                         ps_free_mean=None, ps_ntargets=None, ps_padding=None):
        # pylint: disable=unused-argument
        """Check the pool info space attributes.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Args:
            ps_free_min (list, optional): minimum free space per device.
                Defaults to None.
            ps_free_max (list, optional): maximum free space per device.
                Defaults to None.
            ps_free_mean (list, optional): mean free space per device.
                Defaults to None.
            ps_ntargets (int, optional): number of targets. Defaults to None.
            ps_padding (int, optional): space padding. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = []
        for key in ("ps_free_min", "ps_free_max", "ps_free_mean"):
            val = locals()[key]
            if isinstance(val, list):
                for index, item in val:
                    checks.append((
                        "{}[{}]".format(key, index),
                        getattr(self.info.pi_space, key)[index],
                        item))
        for key in ("ps_ntargets", "ps_padding"):
            val = locals()[key]
            if val is not None:
                checks.append(key, getattr(self.info.pi_space, key), val)
        return self._check_info(checks)

    def check_pool_daos_space(self, s_total=None, s_free=None):
        # pylint: disable=unused-argument
        """Check the pool info daos space attributes.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Args:
            s_total (list, optional): total space per device. Defaults to None.
            s_free (list, optional): free space per device. Defaults to None.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            ("{}_{}".format(key, index),
             getattr(self.info.pi_space.ps_space, key)[index],
             item)
            for key, val in list(locals().items())
            if key != "self" and val is not None
            for index, item in enumerate(val)]
        return self._check_info(checks)

    def check_rebuild_status(self, rs_version=None, rs_seconds=None,
                             rs_errno=None, rs_done=None, rs_padding32=None,
                             rs_fail_rank=None, rs_toberb_obj_nr=None,
                             rs_obj_nr=None, rs_rec_nr=None, rs_size=None):
        # pylint: disable=unused-argument
        # pylint: disable=too-many-arguments
        """Check the pool info rebuild attributes.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Args:
            rs_version (int, optional): rebuild version. Defaults to None.
            rs_seconds (int, optional): rebuild seconds. Defaults to None.
            rs_errno (int, optional): rebuild error number. Defaults to None.
            rs_done (int, optional): rebuild done flag. Defaults to None.
            rs_padding32 (int, optional): padding. Defaults to None.
            rs_fail_rank (int, optional): rebuild fail target. Defaults to None.
            rs_toberb_obj_nr (int, optional): number of objects to be rebuilt.
                Defaults to None.
            rs_obj_nr (int, optional): number of rebuilt objects.
                Defaults to None.
            rs_rec_nr (int, optional): number of rebuilt records.
                Defaults to None.
            rs_size (int, optional): size of all rebuilt records.

        Note:
            Arguments may also be provided as a string with a number preceded
            by '<', '<=', '>', or '>=' for other comparisons besides the
            default '=='.

        Returns:
            bool: True if at least one expected value is specified and all the
                specified values match; False otherwise

        """
        self.get_info()
        checks = [
            (key, getattr(self.info.pi_rebuild_st, key), val)
            for key, val in list(locals().items())
            if key != "self" and val is not None]
        return self._check_info(checks)

    def rebuild_complete(self):
        """Determine if the pool rebuild is complete.

        Returns:
            bool: True if pool rebuild is complete; False otherwise

        """
        status = False

        if self.control_method.value == self.USE_API:
            self.display_pool_rebuild_status()
            status = self.info.pi_rebuild_st.rs_done == 1
        elif self.control_method.value == self.USE_DMG:
            self.set_query_data()
            self.log.info(
                "Pool %s query data: %s\n", self.uuid, self.query_data)
            status = self.query_data["response"]["rebuild"]["state"] == "done"
        else:
            self.log.error(
                "Error: Undefined control_method: %s",
                self.control_method.value)

        return status

    def wait_for_rebuild(self, to_start, interval=1):
        """Wait for the rebuild to start or end.

        Args:
            to_start (bool): whether to wait for rebuild to start or end
            interval (int): number of seconds to wait in between rebuild
                completion checks
        """
        start = time()
        self.log.info(
            "Waiting for rebuild to %s%s ...",
            "start" if to_start else "complete",
            " with a {} second timeout".format(self.rebuild_timeout.value)
            if self.rebuild_timeout.value is not None else "")

        start = time()
        while self.rebuild_complete() == to_start:
            self.log.info(
                "  Rebuild %s ...",
                "has not yet started" if to_start else "in progress")
            if self.rebuild_timeout.value is not None:
                if time() - start > self.rebuild_timeout.value:
                    raise DaosTestError(
                        "TIMEOUT detected after {} seconds while for waiting "
                        "for rebuild to {}.  This timeout can be adjusted via "
                        "the 'pool/rebuild_timeout' test yaml "
                        "parameter.".format(
                            self.rebuild_timeout.value,
                            "start" if to_start else "complete"))
            sleep(interval)

        self.log.info(
            "Rebuild %s detected", "start" if to_start else "completion")

    @fail_on(CommandFailure)
    def exclude(self, ranks, tgt_idx=None):
        """Manually exclude a rank from this pool.

        Args:
            ranks (list): a list daos server ranks (int) to exclude
            tgt_idx (string, optional): str of targets to exclude on ranks
                ex: "1,2". Defaults to None.
        """
        self.dmg.pool_exclude(self.identifier, ranks, tgt_idx)

    def check_files(self, hosts):
        """Check if pool files exist on the specified list of hosts.

        Args:
            hosts (list): list of hosts

        Returns:
            bool: True if the files for this pool exist on each host; False
                otherwise

        """
        return check_pool_files(self.log, hosts, self.uuid.lower())

    def write_file(self, orterun, processes, hostfile, size, timeout=60):
        """Write a file to the pool.

        Args:
            orterun (str): full path to the orterun command
            processes (int): number of processes to launch
            hosts (list): list of clients from which to write the file
            size (int): size of the file to create in bytes
            timeout (int, optional): number of seconds before timing out the
                command. Defaults to 60 seconds.

        Returns:
            process.CmdResult: command execution result

        """
        self.log.info("Writing %s bytes to pool %s", size, self.uuid)
        env = {
            "DAOS_POOL": self.uuid,
            "PYTHONPATH": os.getenv("PYTHONPATH", "")
        }
        if not load_mpi("openmpi"):
            raise CommandFailure("Failed to load openmpi")

        current_path = os.path.dirname(os.path.abspath(__file__))
        command = "{} --np {} --hostfile {} {} {} testfile".format(
            orterun, processes, hostfile,
            os.path.join(current_path, "write_some_data.py"), size)
        return run_command(command, timeout, True, env=env)

    def get_pool_daos_space(self):
        """Get the pool info daos space attributes as a dictionary.

        Returns:
            dict: a dictionary of lists of the daos space attributes

        """
        self.get_info()
        keys = ("s_total", "s_free")
        return {key: getattr(self.info.pi_space.ps_space, key) for key in keys}

    def get_pool_free_space(self, device="scm"):
        """Get SCM or NVME free space.

        Args:
            device (str, optional): device type, e.g. "scm" or "nvme". Defaults
                to "scm".

        Returns:
            str: free SCM or NVME space

        """
        free_space = "0"
        dev = device.lower()
        daos_space = self.get_pool_daos_space()
        if dev == "scm":
            free_space = daos_space["s_free"][0]
        elif dev == "nvme":
            free_space = daos_space["s_free"][1]
        return free_space

    def display_pool_daos_space(self, msg=None):
        """Display the pool info daos space attributes.

        Args:
            msg (str, optional): optional text to include in the output.
                Defaults to None.
        """
        daos_space = self.get_pool_daos_space()
        sizes = [
            "{}[{}]={}".format(key, index, item)
            for key in sorted(daos_space.keys())
            for index, item in enumerate(daos_space[key])]
        self.log.info(
            "Pool %s space%s:\n  %s", self.uuid,
            " " + msg if isinstance(msg, str) else "", "\n  ".join(sizes))

    def pool_percentage_used(self):
        """Get the pool storage used % for SCM and NVMe.

        Returns:
            dict: a dictionary of SCM/NVMe pool space usage in %(float)

        """
        daos_space = self.get_pool_daos_space()
        pool_percent = {'scm': round(float(daos_space["s_free"][0]) /
                                     float(daos_space["s_total"][0]) * 100, 2),
                        'nvme': round(float(daos_space["s_free"][1]) /
                                      float(daos_space["s_total"][1]) * 100, 2)}
        return pool_percent

    def get_pool_rebuild_status(self):
        """Get the pool info rebuild status attributes as a dictionary.

        Returns:
            dict: a dictionary of lists of the rebuild status attributes

        """
        self.get_info()
        keys = (
            "rs_version", "rs_padding32", "rs_errno", "rs_done",
            "rs_toberb_obj_nr", "rs_obj_nr", "rs_rec_nr")
        return {key: getattr(self.info.pi_rebuild_st, key) for key in keys}

    def display_pool_rebuild_status(self):
        """Display the pool info rebuild status attributes."""
        status = self.get_pool_rebuild_status()
        self.log.info(
            "Pool rebuild status: %s",
            ", ".join(
                ["{}={}".format(key, status[key]) for key in sorted(status)]))

    def read_data_during_rebuild(self, container):
        """Read data from the container while rebuild is active.

        Args:
            container (TestContainer): container from which to read data

        Returns:
            bool: True if all the data is read successfully before rebuild
                completes; False otherwise

        """
        container.open()
        self.log.info(
            "Reading objects in container %s during rebuild", self.uuid)

        # Attempt to read all of the data from the container during rebuild
        index = 0
        status = read_incomplete = index < len(container.written_data)
        while not self.rebuild_complete() and read_incomplete:
            try:
                status &= container.written_data[index].read_object(container)
            except DaosTestError as error:
                self.log.error(str(error))
                status = False
            index += 1
            read_incomplete = index < len(container.written_data)

        # Verify that all of the container data was read successfully
        if read_incomplete:
            self.log.info(
                "Rebuild completed before all the written data could be read - "
                "Currently not reporting this as an error.")
            # status = False
        elif not status:
            self.log.error("Errors detected reading data during rebuild")
        return status

    @fail_on(CommandFailure)
    def set_query_data(self):
        """Execute dmg pool query and store the results.

        Only supported with the dmg control method.
        """
        self.query_data = {}
        if self.pool:
            if self.dmg:
                end_time = None
                if self.pool_query_timeout.value is not None:
                    self.log.info(
                        "Waiting for pool %s query to be responsive with a %s "
                        "second timeout", self.identifier,
                        self.pool_query_timeout.value)
                    end_time = time() + self.pool_query_timeout.value
                while True:
                    try:
                        self.query_data = self.dmg.pool_query(self.identifier)
                        break
                    except CommandFailure as error:
                        if end_time is not None:
                            self.log.info(
                                "Pool %s query still non-responsive: %s",
                                self.identifier, str(error))
                            if time() > end_time:
                                raise CommandFailure(
                                    "TIMEOUT detected after {} seconds while "
                                    "waiting for pool {} query response. This "
                                    "timeout can be adjusted via the "
                                    "'pool/pool_query_timeout' test yaml "
                                    "parameter.".format(
                                        self.identifier,
                                        self.pool_query_timeout.value)) \
                                            from error
                        else:
                            raise CommandFailure(error) from error
            else:
                self.log.error("Error: Undefined dmg command")

    @fail_on(CommandFailure)
    def reintegrate(self, rank, tgt_idx=None):
        """Use dmg to reintegrate the rank and targets into this pool.

        Only supported with the dmg control method.

        Args:
            rank (str): daos server rank to reintegrate
            tgt_idx (str, optional): string of targets to reintegrate on ranks
            ex: "1,2". Defaults to None.
        """
        self.dmg.pool_reintegrate(self.identifier, rank, tgt_idx)

    @fail_on(CommandFailure)
    def drain(self, rank, tgt_idx=None):
        """Use dmg to drain the rank and targets from this pool.

        Only supported with the dmg control method.

        Args:
            rank (str): daos server rank to drain
            tgt_idx (str, optional): string of targets to drain on ranks
                ex: "1,2". Defaults to None.
        """
        self.dmg.pool_drain(self.identifier, rank, tgt_idx)

    def get_acl(self):
        """Get ACL from a DAOS pool.

        Returns:
            str: dmg pool get-acl output.

        """
        return self.dmg.pool_get_acl(pool=self.identifier)

    def update_acl(self, use_acl, entry=None):
        """Update ACL for a DAOS pool.

        Can't use both ACL file and entry, so use_acl = True and entry != None
        isn't allowed.

        Args:
            use_acl (bool): Whether to use the ACL file during the update.
            entry (str, optional): entry to be updated.
        """
        acl_file = None
        if use_acl:
            acl_file = self.acl_file.value
        self.dmg.pool_update_acl(
            pool=self.identifier, acl_file=acl_file, entry=entry)

    def delete_acl(self, principal):
        """Delete ACL from a DAOS pool.

        Args:
            principal (str): principal to be deleted
        """
        self.dmg.pool_delete_acl(pool=self.identifier, principal=principal)

    def overwrite_acl(self):
        """Overwrite ACL in a DAOS pool."""
        if self.acl_file.value:
            self.dmg.pool_overwrite_acl(
                pool=self.identifier, acl_file=self.acl_file.value)
        else:
            self.log.error("self.acl_file isn't defined!")
Example #3
0
class DaosAgentConfig(ObjectWithParameters):
    """Defines the daos_agent configuration yaml parameters."""
    class AgentSecurityConfig(ObjectWithParameters):
        """Defines the configuration yaml parameters for agent security."""
        def __init__(self):
            """Create a AgentSecurityConfig object."""
            super(DaosAgentConfig.AgentSecurityConfig,
                  self).__init__("/run/agent_config/transport_config/*")
            # transport_config:
            #   allow_insecure: true
            #   ca_cert:        .daos/daosCA.crt
            #   cert:           .daos/daos_agent.crt
            #   key:            .daos/daos_agent.key
            #   server_name:    server
            self.allow_insecure = BasicParameter(None, True)
            self.ca_cert = BasicParameter(None, ".daos/daosCA.crt")
            self.cert = BasicParameter(None, ".daos/daos_agent.crt")
            self.key = BasicParameter(None, ".daos/daos_agent.key")
            self.server_name = BasicParameter(None, "server")

    def __init__(self):
        """Create a DaosAgentConfig object."""
        super(DaosAgentConfig, self).__init__("/run/agent_config/*")

        # DaosAgentConfig Parameters
        #   name: daos
        #   access_points: ['server[0]:10001']
        #   port: 10001
        #   hostlist: ['host1', 'host2']
        #   runtime_dir: /var/run/daos_agent
        #   log_file: /tmp/daos_agent.log
        self.name = BasicParameter(None, "daos_server")
        self.access_points = BasicParameter(None)
        self.port = BasicParameter(None, 10001)
        self.hostlist = BasicParameter(None)
        self.runtime_dir = BasicParameter(None, "/var/run/daos_agent")
        self.log_file = BasicParameter(None, "daos_agent.log")

        # Agent transport_config parameters
        self.transport_params = self.AgentSecurityConfig()

    def get_params(self, test):
        """Get values for all of the command params from the yaml file.

        If no key matches are found in the yaml file the BasicParameter object
        will be set to its default value.

        Args:
            test (Test): avocado Test object
        """
        super(DaosAgentConfig, self).get_params(test)
        self.transport_params.get_params(test)

    def update_log_file(self, name):
        """Update the log file name for the daos agent.

        If the log file name is set to None the log file parameter value will
        not be updated.

        Args:
            name (str): log file name
        """
        if name is not None:
            self.log_file.update(name, "agent_config.log_file")

    def create_yaml(self, filename):
        """Create a yaml file from the parameter values.

        Args:
            filename (str): the yaml file to create
        """
        log_dir = os.environ.get("DAOS_TEST_LOG_DIR", "/tmp")

        # Convert the parameters into a dictionary to write a yaml file
        yaml_data = {"transport_config": []}
        for name in self.get_param_names():
            value = getattr(self, name).value
            if value is not None and value is not False:
                if name.endswith("log_file"):
                    yaml_data[name] = os.path.join(log_dir, value)
                else:
                    yaml_data[name] = value

        # transport_config
        yaml_data["transport_config"] = {}
        for name in self.transport_params.get_param_names():
            value = getattr(self.transport_params, name).value
            if value is not None:
                yaml_data["transport_config"][name] = value

        # Write default_value_set dictionary in to self.tmp
        # This will be used to start with daos_agent -o option.
        print("<AGENT> Agent yaml_data= ", yaml_data)
        try:
            with open(filename, 'w') as write_file:
                yaml.dump(yaml_data, write_file, default_flow_style=False)
        except Exception as error:
            print("<AGENT> Exception occurred: {0}".format(error))
            raise AgentFailed(
                "Error writing daos_agent command yaml file {}: {}".format(
                    filename, error))