Esempio n. 1
0
class NvmeFault(ServerFillUp):
    # pylint: disable=too-many-ancestors
    """
    Test Class Description: To validate IO works fine when NVMe fault generated
                            on single or multiple servers with single drive.
    :avocado: recursive
    """
    def setUp(self):
        """Set up for test case."""
        super(NvmeFault, self).setUp()
        self.no_of_pools = self.params.get("number_of_pools", '/run/pool/*', 1)
        self.capacity = self.params.get("percentage",
                                        '/run/faulttests/pool_capacity/*')
        self.no_of_servers = self.params.get(
            "count", '/run/faulttests/no_of_servers/*/')
        self.no_of_drives = self.params.get("count",
                                            '/run/faulttests/no_of_drives/*/')
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")
        #Set to True to generate the NVMe fault during IO
        self.set_faulty_device = True

    @skipForTicket("DAOS-5497")
    def test_nvme_fault(self):
        """Jira ID: DAOS-4722.

        Test Description: Test NVMe disk fault.
        Use Case: Create the large size of pool and start filling up the pool.
                  while IO is in progress remove single disks from
                  single/multiple servers.

        :avocado: tags=all,hw,medium,nvme,ib2,nvme_fault,full_regression
        """
        #Create the Pool with Maximum NVMe size
        self.create_pool_max_size(nvme=True)

        #Start the IOR Command and generate the NVMe fault.
        self.start_ior_load(precent=self.capacity)

        print("pool_percentage_used -- After -- {}".format(
            self.pool.pool_percentage_used()))

        #Check nvme-health command works
        try:
            self.dmg.hostlist = self.hostlist_servers
            self.dmg.storage_query_nvme_health()
        except CommandFailure as _error:
            self.fail("dmg nvme-health failed")
Esempio n. 2
0
class NvmeHealth(ServerFillUp):
    # pylint: disable=too-many-ancestors
    """
    Test Class Description: To validate NVMe health test cases
    :avocado: recursive
    """
    def test_monitor_for_large_pools(self):
        """Jira ID: DAOS-4722.

        Test Description: Test Health monitor for large number of pools.
        Use Case: This tests will create the 40 number of pools and verify the
                  dmg list-pools, device-health and nvme-health works for all
                  pools.

        :avocado: tags=all,hw,medium,nvme,ib2,full_regression
        :avocado: tags=nvme_health
        """
        # pylint: disable=attribute-defined-outside-init
        # pylint: disable=too-many-branches
        no_of_pools = self.params.get("number_of_pools", '/run/pool/*')
        #Stop the servers to run SPDK too to get the server capacity
        self.stop_servers()
        storage = self.get_nvme_max_capacity()
        self.start_servers()

        #Create the pool from 80% of available of storage space
        single_pool_nvme_size = int((storage * 0.80) / no_of_pools)

        self.pool = []
        #Create the Large number of pools
        for _pool in range(no_of_pools):
            pool = TestPool(self.context, dmg_command=self.get_dmg_command())
            pool.get_params(self)
            #SCM size is 10% of NVMe
            pool.scm_size.update('{}'.format(int(single_pool_nvme_size *
                                                 0.10)))
            pool.nvme_size.update('{}'.format(single_pool_nvme_size))
            pool.create()
            self.pool.append(pool)

        #initialize the dmg command
        self.dmg = DmgCommand(os.path.join(self.prefix, "bin"))
        self.dmg.get_params(self)
        self.dmg.insecure.update(
            self.server_managers[0].get_config_value("allow_insecure"),
            "dmg.insecure")

        #List all pools
        self.dmg.set_sub_command("storage")
        self.dmg.sub_command_class.set_sub_command("query")
        self.dmg.sub_command_class.sub_command_class.\
        set_sub_command("list-pools")
        for host in self.hostlist_servers:
            self.dmg.hostlist = host
            try:
                result = self.dmg.run()
            except CommandFailure as error:
                self.fail("dmg command failed: {}".format(error))
            #Verify all pools UUID listed as part of query
            for pool in self.pool:
                if pool.uuid.lower() not in result.stdout:
                    self.fail('Pool uuid {} not found in smd query'.format(
                        pool.uuid.lower()))

        # Get the device ID from all the servers.
        device_ids = get_device_ids(self.dmg, self.hostlist_servers)

        # Get the device health
        for host in device_ids:
            self.dmg.hostlist = host
            for _dev in device_ids[host]:
                try:
                    result = self.dmg.storage_query_device_health(_dev)
                except CommandFailure as error:
                    self.fail("dmg get device states failed {}".format(error))
                if 'State:NORMAL' not in result.stdout:
                    self.fail("device {} on host {} is not NORMAL".format(
                        _dev, host))

        # Get the nvme-health
        try:
            self.dmg.storage_query_nvme_health()
        except CommandFailure as error:
            self.fail("dmg nvme-health failed {}".format(error))