Esempio n. 1
0
class RbldReadArrayTest(RebuildTestBase):
    # pylint: disable=too-many-ancestors
    """Run rebuild tests with DAOS servers and clients.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.daos_cmd = None

    def execute_during_rebuild(self):
        """Read the objects during rebuild."""

        self.daos_cmd = DaosCommand(self.bin)
        self.daos_cmd.container_set_prop(pool=self.pool.uuid,
                                         cont=self.container.uuid,
                                         prop="status",
                                         value="healthy")

        message = "Reading the array objects during rebuild"
        self.log.info(message)
        self.d_log.info(message)
        self.assertTrue(self.pool.read_data_during_rebuild(self.container),
                        "Error reading data during rebuild")

    def test_read_array_during_rebuild(self):
        """Jira ID: DAOS-691.

        Test Description:
            Configure 5 targets with 1 pool with a service leader quantity
            of 2.  Add 1 container to the pool configured with 3 replicas.
            Add 10 objects of 10 records each populated with an array of 5
            values (currently a sufficient amount of data to be read fully
            before rebuild completes) to a specific rank.  Exclude this
            rank and verify that rebuild is initiated.  While rebuild is
            active, confirm that all the objects and records can be read.
            Finally verify that rebuild completes and the pool info indicates
            the correct number of rebuilt objects and records.

        Use Cases:
            Basic rebuild of container objects of array values with sufficient
            numbers of rebuild targets and no available rebuild targets.

        :avocado: tags=all,full_regression
        :avocado: tags=vm,large,rebuild,rebuildreadarray
        """
        self.execute_rebuild_test()
class ContSecurityTestBase(TestWithServers):
    """Container security test cases.

    Test Class Description:
        Test methods to verify the Container security with acl by
        using daos tool.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a ContSecurityTestBase object."""
        super().__init__(*args, **kwargs)
        self.dmg = None
        self.daos_tool = None
        self.user_uid = None
        self.user_gid = None
        self.current_user = None
        self.current_group = None
        self.pool_uuid = None
        self.container_uuid = None

    def setUp(self):
        """Set up each test case."""
        super().setUp()
        self.user_uid = os.geteuid()
        self.user_gid = os.getegid()
        self.current_user = pwd.getpwuid(self.user_uid)[0]
        self.current_group = grp.getgrgid(self.user_uid)[0]
        self.co_prop = self.params.get("container_properties",
                                       "/run/container/*")
        self.dmg = self.get_dmg_command()
        self.daos_tool = DaosCommand(self.bin)

    @fail_on(CommandFailure)
    def create_pool_with_dmg(self):
        """Create a pool with the dmg tool.

        Obtains the pool uuid from the operation's result

        Returns:
            pool_uuid (str): Pool UUID, randomly generated.
        """
        self.prepare_pool()
        pool_uuid = self.pool.pool.get_uuid_str()

        return pool_uuid

    def create_container_with_daos(self, pool, acl_type=None, acl_file=None):
        """Create a container with the daos tool.

        Also, obtains the container uuid from the operation's result.

        Args:
            pool (TestPool): Pool object.
            acl_type (str, optional): valid or invalid.

        Returns:
            container_uuid: Container UUID created.

        """
        file_name = None
        get_acl_file = None
        expected_acl_types = [None, "valid", "invalid"]

        if acl_file is None:
            if acl_type not in expected_acl_types:
                self.fail("    Invalid '{}' acl type passed.".format(acl_type))
            if acl_type:
                get_acl_file = "acl_{}.txt".format(acl_type)
                file_name = os.path.join(self.tmp, get_acl_file)
            else:
                get_acl_file = ""
        else:
            file_name = acl_file

        try:
            self.container = TestContainer(pool=pool,
                                           daos_command=self.daos_tool)
            self.container.get_params(self)
            self.container.create(acl_file=file_name)
            container_uuid = self.container.uuid
        except TestFail as error:
            if acl_type != "invalid":
                raise DaosTestError(
                    "Could not create expected container ") from error
            container_uuid = None

        return container_uuid

    def get_container_acl_list(self,
                               pool_uuid,
                               container_uuid,
                               verbose=False,
                               outfile=None):
        """Get daos container acl list by daos container get-acl.

        Args:
            pool_uuid (str): Pool uuid.
            container_uuid (str): Container uuid.
            verbose (bool, optional): Verbose mode.
            outfile (str, optional): Write ACL to file

        Return:
            cont_permission_list: daos container acl list.

        """
        if not general_utils.check_uuid_format(pool_uuid):
            self.fail("    Invalid Pool UUID '{}' provided.".format(pool_uuid))

        if not general_utils.check_uuid_format(container_uuid):
            self.fail("    Invalid Container UUID '{}' provided.".format(
                container_uuid))

        result = self.daos_tool.container_get_acl(pool_uuid, container_uuid,
                                                  verbose, outfile)

        cont_permission_list = []
        for line in result.stdout_text.splitlines():
            if not line.startswith("A:"):
                continue
            elif line.startswith("A::"):
                found_user = re.search(r"A::(.+)@:(.*)", line)
                if found_user:
                    cont_permission_list.append(line)
            elif line.startswith("A:G:"):
                found_group = re.search(r"A:G:(.+)@:(.*)", line)
                if found_group:
                    cont_permission_list.append(line)
        return cont_permission_list

    def overwrite_container_acl(self, acl_file):
        """Overwrite existing container acl-entries with acl_file.

        Args:
            acl_file (str): acl filename.

        Return:
            result (str): daos_tool.container_overwrite_acl.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_overwrite_acl(
            self.pool_uuid, self.container_uuid, acl_file)
        return result

    def update_container_acl(self, entry):
        """Update container acl entry.

        Args:
            entry (str): acl entry to be updated.

        Return:
            result (str): daos_tool.container_update_acl.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_update_acl(self.pool_uuid,
                                                     self.container_uuid,
                                                     entry=entry)
        return result

    def test_container_destroy(self, pool_uuid, container_uuid):
        """Test container destroy/delete.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.

        Return:
            result (str): daos_tool.container_destroy result.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_destroy(pool_uuid, container_uuid,
                                                  True)
        return result

    def set_container_attribute(self, pool_uuid, container_uuid, attr, value):
        """Write/Set container attribute.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.
            attr (str): container attribute.
            value (str): container attribute value to be set.

        Return:
            result (str): daos_tool.container_set_attr result.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_set_attr(pool_uuid, container_uuid,
                                                   attr, value)
        return result

    def get_container_attribute(self, pool_uuid, container_uuid, attr):
        """Get container attribute.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.
            attr (str): container attribute.

        Return:
            CmdResult: Object that contains exit status, stdout, and other
                information.
        """
        self.daos_tool.exit_status_exception = False
        self.daos_tool.container_get_attr(pool_uuid, container_uuid, attr)
        return self.daos_tool.result

    def list_container_attribute(self, pool_uuid, container_uuid):
        """List container attribute.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.

        Return:
            result (str): daos_tool.container_list_attrs result.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_list_attrs(pool_uuid, container_uuid)
        return result

    def set_container_property(self, pool_uuid, container_uuid, prop, value):
        """Write/Set container property.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.
            prop (str): container property name.
            value (str): container property value to be set.

        Return:
            result (str): daos_tool.container_set_prop result.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_set_prop(pool_uuid, container_uuid,
                                                   prop, value)
        return result

    def get_container_property(self, pool_uuid, container_uuid):
        """Get container property.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.

        Return:
            result (str): daos_tool.container_get_prop.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_get_prop(pool_uuid, container_uuid)
        return result

    def set_container_owner(self, pool_uuid, container_uuid, user, group):
        """Set container owner.

        Args:
            pool_uuid (str): pool uuid.
            container_uuid (str): container uuid.
            user (str): container user-name to be set owner to.
            group (str): container group-name to be set owner to.

        Return:
            result (str): daos_tool.container_set_owner.
        """
        self.daos_tool.exit_status_exception = False
        result = self.daos_tool.container_set_owner(pool_uuid, container_uuid,
                                                    user, group)
        return result

    def compare_acl_lists(self, get_acl_list, expected_list):
        """Compare two permission lists.

        Args:
            get_acl_list (str list): list of permissions obtained by get-acl
            expected_list (str list): list of expected permissions

        Returns:
            True or False if both permission lists are identical or not

        """
        self.log.info("    ===> get-acl ACL:  %s", get_acl_list)
        self.log.info("    ===> Expected ACL: %s", expected_list)

        exp_list = expected_list[:]
        if len(get_acl_list) != len(exp_list):
            return False
        for acl in get_acl_list:
            if acl in exp_list:
                exp_list.remove(acl)
            else:
                return False
        return True

    def get_base_acl_entries(self, test_user):
        """Get container acl entries per cont enforcement order for test_user.

        Args:
            test_user (str): test user.

        Returns (list str):
            List of base container acl entries for the test_user.

        """
        if test_user == "OWNER":
            base_acl_entries = [
                secTestBase.acl_entry("user", "OWNER", ""),
                secTestBase.acl_entry("user", self.current_user, ""),
                secTestBase.acl_entry("group", "GROUP", "rwcdtTaAo"),
                secTestBase.acl_entry("group", self.current_group,
                                      "rwcdtTaAo"),
                secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")
            ]
        elif test_user == "user":
            base_acl_entries = [
                "",
                secTestBase.acl_entry("user", self.current_user, ""),
                secTestBase.acl_entry("group", "GROUP", "rwcdtTaAo"),
                secTestBase.acl_entry("group", self.current_group, ""),
                secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")
            ]
        elif test_user == "group":
            base_acl_entries = [
                "", "",
                secTestBase.acl_entry("group", "GROUP", ""),
                secTestBase.acl_entry("group", self.current_group, ""),
                secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")
            ]
        elif test_user == "GROUP":
            base_acl_entries = [
                "", "", "",
                secTestBase.acl_entry("group", self.current_group, ""),
                secTestBase.acl_entry("user", "EVERYONE", "rwcdtTaAo")
            ]
        elif test_user == "EVERYONE":
            base_acl_entries = [
                "", "", "", "",
                secTestBase.acl_entry("user", "EVERYONE", "")
            ]
        else:
            base_acl_entries = ["", "", "", "", ""]
        return base_acl_entries

    def cleanup(self, types):
        """Remove all temporal acl files created during the test.

        Args:
            types (list): types of acl files [valid, invalid]

        """
        for typ in types:
            get_acl_file = "acl_{}.txt".format(typ)
            file_name = os.path.join(self.tmp, get_acl_file)
            cmd = "rm -r {}".format(file_name)
            general_utils.run_command(cmd)

    def error_handling(self, results, err_msg):
        """Handle errors when test fails and when command unexpectedly passes.

        Args:
            results (CmdResult): object containing stdout, stderr and
                exit status.
            err_msg (str): error message string to look for in stderr.

        Returns:
            list: list of test errors encountered.

        """
        test_errs = []
        if results.exit_status == 0:
            test_errs.append("{} passed unexpectedly: {}".format(
                results.command, results.stdout_text))
        elif results.exit_status == 1:
            # REMOVE BELOW IF Once DAOS-5635 is resolved
            if results.stdout_text and err_msg in results.stdout_text:
                self.log.info("Found expected error %s", results.stdout_text)
            # REMOVE ABOVE IF Once DAOS-5635 is resolved
            elif results.stderr_text and err_msg in results.stderr_text:
                self.log.info("Found expected error %s", results.stderr_text)
            else:
                self.fail("{} seems to have failed with \
                    unexpected error: {}".format(results.command, results))
        return test_errs

    def acl_file_diff(self, prev_acl, flag=True):
        """Compare current content of acl-file with helper function.

        If provided  prev_acl file information is different from current acl
        file information test will fail if flag=True. If flag=False, test will
        fail in the case that the acl contents are found to have no difference.

        Args:
            prev_acl (list): list of acl entries within acl-file.
                Defaults to True.
            flag (bool, optional): if True, test will fail when acl-file
                contents are different, else test will fail when acl-file
                contents are same. Defaults to True.

        """
        current_acl = self.get_container_acl_list(self.pool.uuid,
                                                  self.container.uuid)
        if self.compare_acl_lists(prev_acl, current_acl) != flag:
            self.fail("Previous ACL:\n{} \nPost command ACL:\n{}".format(
                prev_acl, current_acl))
Esempio n. 3
0
File: ec_utils.py Progetto: liw/daos
class ErasureCodeSingle(TestWithServers):
    # pylint: disable=too-many-ancestors
    # pylint: disable=too-many-instance-attributes
    """
    Class to used for EC testing for single type data.
    """
    def __init__(self, *args, **kwargs):
        """Initialize a TestWithServers object."""
        super().__init__(*args, **kwargs)
        self.server_count = None
        self.set_online_rebuild = False
        self.rank_to_kill = None
        self.daos_cmd = None
        self.container = []

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super().setUp()
        engine_count = self.server_managers[0].get_config_value(
            "engines_per_host")
        self.server_count = len(self.hostlist_servers) * engine_count
        self.obj_class = self.params.get("dfs_oclass_list",
                                         '/run/objectclass/*')
        self.singledata_set = self.params.get("single_data_set",
                                              '/run/container/*')
        self.add_pool()
        self.out_queue = queue.Queue()

    def ec_container_create(self, index, oclass):
        """Create the container for EC object

        Args:
            index(int): container number
            oclass(str): object class for creating the container.
        """
        self.container.append(TestContainer(self.pool))
        # Get container parameters
        self.container[index].get_params(self)

        # update object class for container create, if supplied explicitly.
        self.container[index].oclass.update(oclass)

        # Get the Parity count for setting the container RF property.
        ec_object = get_data_parity_number(self.log, oclass)
        self.container[index].properties.update("rf:{}".format(
            ec_object['parity']))

        # create container
        self.container[index].create()

    def single_type_param_update(self, index, data):
        """Update the data set content provided from yaml file.

        Args:
            index(int): container number
            data(list): dataset content from test yaml file.
        """
        self.container[index].object_qty.update(data[0])
        self.container[index].record_qty.update(data[1])
        self.container[index].dkey_size.update(data[2])
        self.container[index].akey_size.update(data[3])
        self.container[index].data_size.update(data[4])

    def write_single_type_dataset(self, results=None):
        """Write single type data set with different EC object and different sizes.

        Args:
            results (queue): queue for returning thread results
        """
        cont_count = 0
        for oclass in self.obj_class:
            for sizes in self.singledata_set:
                # Skip the object type if server count does not meet the minimum EC object server
                # count
                if oclass[1] > self.server_count:
                    continue
                # Create the new container with correct redundancy factor for EC object type
                try:
                    self.ec_container_create(cont_count, oclass[0])
                    self.single_type_param_update(cont_count, sizes)
                    # Write the data
                    self.container[cont_count].write_objects(
                        obj_class=oclass[0])
                    cont_count += 1
                    if results is not None:
                        results.put("PASS")
                except (CommandFailure, DaosApiError, DaosTestError):
                    if results is not None:
                        results.put("FAIL")
                    raise

    def read_single_type_dataset(self, results=None, parity=1):
        """Read single type data and verify for different EC object and different sizes.

        Args:
            results (queue): queue for returning thread results
            parity(int): object parity number for reading, default All.
        """
        cont_count = 0
        self.daos_cmd = DaosCommand(self.bin)
        for oclass in self.obj_class:
            for _sizes in self.singledata_set:
                # Skip the object type if server count does not meet the minimum EC object server
                # count
                if oclass[1] > self.server_count:
                    continue
                parity_set = "P{}".format(parity)
                # Read the requested data+parity data set only
                if parity != 1 and parity_set not in oclass[0]:
                    print("Skipping Read as object type is {}".format(
                        oclass[0]))
                    cont_count += 1
                    continue

                self.daos_cmd.container_set_prop(
                    pool=self.pool.uuid,
                    cont=self.container[cont_count].uuid,
                    prop="status",
                    value="healthy")

                # Read data and verified the content
                try:
                    if not self.container[cont_count].read_objects():
                        if results is not None:
                            results.put("FAIL")
                        self.fail("Data verification Error")
                    cont_count += 1
                    if results is not None:
                        results.put("PASS")
                except (CommandFailure, DaosApiError, DaosTestError):
                    if results is not None:
                        results.put("FAIL")
                    raise

    def start_online_single_operation(self, operation, parity=1):
        """Do Write/Read operation with single data type.

        Args:
            operation (string): Write/Read operation
        """
        # Create the single data Write/Read threads
        if operation == 'WRITE':
            job = threading.Thread(target=self.write_single_type_dataset,
                                   kwargs={"results": self.out_queue})
        elif operation == 'READ':
            job = threading.Thread(target=self.read_single_type_dataset,
                                   kwargs={
                                       "results": self.out_queue,
                                       "parity": parity
                                   })

        # Launch the single data write/read thread
        job.start()

        # Kill the server rank while IO operation in progress
        if self.set_online_rebuild:
            time.sleep(10)
            # Kill the server rank
            if self.rank_to_kill is not None:
                self.server_managers[0].stop_ranks([self.rank_to_kill],
                                                   self.d_log,
                                                   force=True)

        # Wait to finish the thread
        job.join()

        # Verify the queue and make sure no FAIL for any run
        while not self.out_queue.empty():
            if self.out_queue.get() == "FAIL":
                self.fail("FAIL")
Esempio n. 4
0
class RbldCascadingFailures(RebuildTestBase):
    # pylint: disable=too-many-ancestors
    """Test cascading failures during rebuild.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        """Initialize a CascadingFailures object."""
        super().__init__(*args, **kwargs)
        self.mode = None
        self.daos_cmd = None

    def create_test_container(self):
        """Create a container and write objects."""
        self.container.create()
        self.container.write_objects(
            self.inputs.rank.value[0], self.inputs.object_class.value)

    def verify_rank_has_objects(self):
        """Verify the first rank to be excluded has at least one object."""
        rank_list = self.container.get_target_rank_lists(" before rebuild")
        objects = {
            rank: self.container.get_target_rank_count(rank, rank_list)
            for rank in self.inputs.rank.value
        }
        self.assertGreater(
            objects[self.inputs.rank.value[0]], 0,
            "No objects written to rank {}".format(self.inputs.rank.value[0]))

    def verify_rank_has_no_objects(self):
        """Verify the excluded rank has zero objects."""
        rank_list = self.container.get_target_rank_lists(" after rebuild")
        objects = {
            rank: self.container.get_target_rank_count(rank, rank_list)
            for rank in self.inputs.rank.value
        }
        for rank in self.inputs.rank.value:
            self.assertEqual(
                objects[rank], 0,
                "Excluded rank {} still has objects".format(rank))

    def start_rebuild(self):
        """Start the rebuild process."""
        if self.mode == "simultaneous":
            # Exclude both ranks from the pool to initiate rebuild
            self.server_managers[0].stop_ranks(
                self.inputs.rank.value, self.d_log)
        else:
            # Exclude the first rank from the pool to initiate rebuild
            self.server_managers[0].stop_ranks(
                [self.inputs.rank.value[0]], self.d_log)

        if self.mode == "sequential":
            # Exclude the second rank from the pool
            self.server_managers[0].stop_ranks(
                [self.inputs.rank.value[1]], self.d_log)

        # Wait for rebuild to start
        self.pool.wait_for_rebuild(True, 1)

    def execute_during_rebuild(self):
        """Execute test steps during rebuild."""
        self.daos_cmd = DaosCommand(self.bin)
        if self.mode == "cascading":
            # Exclude the second rank from the pool during rebuild
            self.server_managers[0].stop_ranks(
                [self.inputs.rank.value[1]], self.d_log)

        self.daos_cmd.container_set_prop(
                      pool=self.pool.uuid,
                      cont=self.container.uuid,
                      prop="status",
                      value="healthy")
        # Populate the container with additional data during rebuild
        self.container.write_objects(obj_class=self.inputs.object_class.value)

    def test_simultaneous_failures(self):
        """Jira ID: DAOS-842.

        Test Description:
            Configure a pool with sufficient redundancy to survive and rebuild
            from two target failures.  Trigger two target failures at the same
            time.  User application I/O should continue to succeed throughout
            the rebuild process and after.  Once the rebuild is complete the
            pool should reflect a normal status.

        Use Cases:
            Verify rebuild with multiple server failures.

        :avocado: tags=all,large,full_regression,rebuild
        :avocado: tags=multitarget,simultaneous
        """
        self.mode = "simultaneous"
        self.execute_rebuild_test()

    def test_sequential_failures(self):
        """Jira ID: DAOS-843.

        Test Description:
            Configure a pool with sufficient redundancy to survive and rebuild
            from two target failures.  Trigger a single target failure.  Before
            rebuilding from the first failure, activate a second failure.  User
            application I/O should continue to succeed throughout the rebuild
            process and after.  Once the rebuild is complete the pool should
            reflect a normal status.

        Use Cases:
            Verify rebuild with multiple server failures.

        :avocado: tags=all,large,full_regression,rebuild
        :avocado: tags=multitarget,sequential
        """
        self.mode = "sequential"
        self.execute_rebuild_test()

    def test_cascading_failures(self):
        """Jira ID: DAOS-844.

        Test Description:
            Configure a pool with sufficient redundancy to survive and rebuild
            from two target failures.  Trigger a single target failure.  While
            rebuilding from the first failure, activate a second failure.  User
            application I/O should continue to succeed throughout the rebuild
            process and after.  Once the rebuild is complete the pool should
            reflect a normal status.

        Use Cases:
            Verify rebuild with multiple server failures.

        :avocado: tags=all,large,full_regression,rebuild
        :avocado: tags=multitarget,cascading
        """
        self.mode = "cascading"
        self.execute_rebuild_test()
Esempio n. 5
0
class RebuildTestBase(TestWithServers):
    """Base rebuild test class.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        """Initialize a RebuildTestBase object."""
        super().__init__(*args, **kwargs)
        self.inputs = RebuildTestParams()
        self.targets = None
        self.server_count = 0
        self.info_checks = None
        self.rebuild_checks = None
        self.daos_cmd = None

    def setUp(self):
        """Set up each test case."""
        # Start the servers and agents
        super().setUp()

        # Get the test parameters
        self.inputs.get_params(self)

        # Get the number of targets per engine for pool info calculations
        self.targets = self.params.get("targets", "/run/server_config/*")

        self.server_count = len(self.hostlist_servers)

    def setup_test_pool(self):
        """Define a TestPool object."""
        self.add_pool(create=False)

    def setup_test_container(self):
        """Define a TestContainer object."""
        self.add_container(self.pool, create=False)

    def setup_pool_verify(self):
        """Set up pool verification initial expected values."""
        self.info_checks = {
            "pi_uuid": self.pool.uuid,
            "pi_nnodes": self.server_count,
            "pi_ntargets": (self.server_count * self.targets),
            "pi_ndisabled": 0,
        }
        self.rebuild_checks = {
            "rs_done": 1,
            "rs_obj_nr": 0,
            "rs_rec_nr": 0,
            "rs_errno": 0,
        }

    def update_pool_verify(self):
        """Update the pool verification expected values."""
        self.info_checks["pi_ndisabled"] = ">0"
        self.rebuild_checks["rs_obj_nr"] = ">0"
        self.rebuild_checks["rs_rec_nr"] = ">0"

    def execute_pool_verify(self, msg=None):
        """Verify the pool info.

        Args:
            msg (str, optional): additional information to include in the error
                message. Defaults to None.
        """
        status = self.pool.check_pool_info(**self.info_checks)
        status &= self.pool.check_rebuild_status(**self.rebuild_checks)
        self.assertTrue(
            status,
            "Error confirming pool info{}".format("" if msg is None else msg))

    def create_test_pool(self):
        """Create the pool and verify its info."""
        # Create a pool
        self.pool.create()

        # Verify the pool information before rebuild
        self.setup_pool_verify()
        self.execute_pool_verify(" before rebuild")

    def create_test_container(self):
        """Create a container and write objects."""
        if self.container is not None:
            self.container.create()
            self.container.write_objects(
                self.inputs.rank.value, self.inputs.object_class.value)

    def verify_rank_has_objects(self):
        """Verify the rank to be excluded has at least one object."""
        if self.container is not None:
            rank = self.inputs.rank.value
            rank_list = self.container.get_target_rank_lists(" before rebuild")
            qty = self.container.get_target_rank_count(rank, rank_list)
            self.assertGreater(
                qty, 0, "No objects written to rank {}".format(rank))

    def verify_rank_has_no_objects(self):
        """Verify the excluded rank has zero objects."""
        if self.container is not None:
            rank = self.inputs.rank.value
            rank_list = self.container.get_target_rank_lists(" after rebuild")
            qty = self.container.get_target_rank_count(rank, rank_list)
            self.assertEqual(
                qty, 0, "Excluded rank {} still has objects".format(rank))

    def start_rebuild(self):
        """Start the rebuild process."""
        # Exclude the rank from the pool to initiate rebuild
        if isinstance(self.inputs.rank.value, list):
            self.server_managers[0].stop_ranks(
                self.inputs.rank.value, self.d_log, force=True)
        else:
            self.server_managers[0].stop_ranks(
                [self.inputs.rank.value], self.d_log, force=True)

        # Wait for rebuild to start
        self.pool.wait_for_rebuild(True, 1)

    def execute_during_rebuild(self):
        """Execute test steps during rebuild."""

    def verify_container_data(self, txn=None):
        """Verify the container data.

        Args:
            txn (int, optional): transaction timestamp to read. Defaults to None
                which uses the last timestamp written.
        """
        if self.container is not None:
            self.assertTrue(
                self.container.read_objects(txn),
                "Error verifying container data")

    def execute_rebuild_test(self, create_container=True):
        """Execute the rebuild test steps.

        Args:
            create_container (bool, optional): should the test create a
                container. Defaults to True.
        """
        # Get the test params
        self.setup_test_pool()
        self.daos_cmd = DaosCommand(self.bin)
        if create_container:
            self.setup_test_container()

        # Create a pool and verify the pool information before rebuild
        self.create_test_pool()

        # Create a container and write objects
        self.create_test_container()

        # Verify the rank to be excluded has at least one object
        self.verify_rank_has_objects()

        # Start the rebuild process
        self.start_rebuild()

        # Execute the test steps during rebuild
        self.execute_during_rebuild()

        # Confirm rebuild completes
        self.pool.wait_for_rebuild(False, 1)

        # clear container status for the RF issue
        self.daos_cmd.container_set_prop(
                      pool=self.pool.uuid,
                      cont=self.container.uuid,
                      prop="status",
                      value="healthy")

        # Refresh local pool and container
        self.pool.check_pool_info()
        self.container.check_container_info()

        # Verify the excluded rank is no longer used with the objects
        self.verify_rank_has_no_objects()

        # Verify the pool information after rebuild
        self.update_pool_verify()
        self.execute_pool_verify(" after rebuild")

        # Verify the container data can still be accessed
        self.verify_container_data()

        self.log.info("Test passed")
Esempio n. 6
0
class RbldBasic(TestWithServers):
    """Test class for rebuild tests.

    Test Class Description:
        This class contains tests for pool rebuild.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.daos_cmd = None

    def run_rebuild_test(self, pool_quantity):
        """Run the rebuild test for the specified number of pools.

        Args:
            pool_quantity (int): number of pools to test
        """
        # Get the test parameters
        self.pool = []
        self.container = []
        self.daos_cmd = DaosCommand(self.bin)
        for _ in range(pool_quantity):
            self.pool.append(self.get_pool(create=False))
            self.container.append(
                self.get_container(self.pool[-1], create=False))
        rank = self.params.get("rank", "/run/testparams/*")
        obj_class = self.params.get("object_class", "/run/testparams/*")

        # Collect server configuration information
        server_count = len(self.hostlist_servers)
        engine_count = self.server_managers[0].get_config_value(
            "engines_per_host")
        engine_count = 1 if engine_count is None else int(engine_count)
        target_count = int(self.server_managers[0].get_config_value("targets"))
        self.log.info(
            "Running with %s servers, %s engines per server, and %s targets "
            "per engine", server_count, engine_count, target_count)

        # Create the pools and confirm their status
        status = True
        for index in range(pool_quantity):
            self.pool[index].create()
            status &= self.pool[index].check_pool_info(
                pi_nnodes=server_count * engine_count,
                pi_ntargets=server_count * engine_count * target_count,
                pi_ndisabled=0)
            status &= self.pool[index].check_rebuild_status(rs_done=1,
                                                            rs_obj_nr=0,
                                                            rs_rec_nr=0,
                                                            rs_errno=0)
        self.assertTrue(status, "Error confirming pool info before rebuild")

        # Create containers in each pool and fill them with data
        rs_obj_nr = []
        rs_rec_nr = []
        for index in range(pool_quantity):
            self.container[index].create()
            self.container[index].write_objects(rank, obj_class)

        # Determine how many objects will need to be rebuilt
        for index in range(pool_quantity):
            target_rank_lists = self.container[index].get_target_rank_lists(
                " prior to rebuild")
            rebuild_qty = self.container[index].get_target_rank_count(
                rank, target_rank_lists)
            rs_obj_nr.append(rebuild_qty)
            self.log.info(
                "Expecting %s/%s rebuilt objects in container %s after "
                "excluding rank %s", rs_obj_nr[-1], len(target_rank_lists),
                self.container[index], rank)
            rs_rec_nr.append(rs_obj_nr[-1] *
                             self.container[index].record_qty.value)
            self.log.info(
                "Expecting %s/%s rebuilt records in container %s after "
                "excluding rank %s", rs_rec_nr[-1],
                self.container[index].object_qty.value *
                self.container[index].record_qty.value, self.container[index],
                rank)

        # Manually exclude the specified rank
        for index in range(pool_quantity):
            if index == 0:
                self.server_managers[0].stop_ranks([rank], self.d_log, True)
            else:
                self.pool[index].exclude(ranks=[rank])

        # Wait for recovery to start for first pool.
        self.pool[0].wait_for_rebuild(True)

        # Wait for recovery to complete
        for index in range(pool_quantity):
            self.pool[index].wait_for_rebuild(False)

        # Check the pool information after the rebuild
        status = True
        for index in range(pool_quantity):
            status &= self.pool[index].check_pool_info(
                pi_nnodes=server_count * engine_count,
                pi_ntargets=server_count * engine_count * target_count,
                pi_ndisabled=target_count)
            status &= self.pool[index].check_rebuild_status(
                rs_done=1,
                rs_obj_nr=rs_obj_nr[index],
                rs_rec_nr=rs_rec_nr[index],
                rs_errno=0)
        self.assertTrue(status, "Error confirming pool info after rebuild")

        # Verify the data after rebuild
        for index in range(pool_quantity):
            self.daos_cmd.container_set_prop(pool=self.pool[index].uuid,
                                             cont=self.container[index].uuid,
                                             prop="status",
                                             value="healthy")
            if self.container[index].object_qty.value != 0:
                self.assertTrue(self.container[index].read_objects(),
                                "Data verification error after rebuild")
        self.log.info("Test Passed")

    def test_simple_rebuild(self):
        """JIRA ID: DAOS-XXXX Rebuild-001.

        Test Description:
            The most basic rebuild test.

        Use Cases:
            single pool rebuild, single client, various record/object counts

        :avocado: tags=all,daily_regression
        :avocado: tags=vm,large
        :avocado: tags=rebuild
        :avocado: tags=pool,rebuild_tests,test_simple_rebuild
        """
        self.run_rebuild_test(1)

    def test_multipool_rebuild(self):
        """JIRA ID: DAOS-XXXX (Rebuild-002).

        Test Description:
            Expand on the basic test by rebuilding 2 pools at once.

        Use Cases:
            multipool rebuild, single client, various object and record counts

        :avocado: tags=all,daily_regression
        :avocado: tags=vm,large
        :avocado: tags=rebuild
        :avocado: tags=pool,rebuild_tests,test_multipool_rebuild
        """
        self.run_rebuild_test(self.params.get("quantity", "/run/testparams/*"))
Esempio n. 7
0
class RbldWithIO(TestWithServers):
    """Test class for pool rebuild during I/O.

    Test Class Description:
        This class contains tests for pool rebuild that feature I/O going on
        during the rebuild.

    :avocado: recursive
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.daos_cmd = None

    def test_rebuild_with_io(self):
        """JIRA ID: Rebuild-003.

        Test Description:
            Trigger a rebuild while I/O is ongoing.

        Use Cases:
            single pool, single client performing continuous read/write/verify
            sequence while failure/rebuild is triggered in another process

        :avocado: tags=all,pool,rebuild,daily_regression,medium,rebuildwithio
        """
        # Get the test params
        self.add_pool(create=False)
        self.add_container(self.pool, create=False)
        targets = self.params.get("targets", "/run/server_config/*")
        # data = self.params.get("datasize", "/run/testparams/*")
        rank = self.params.get("rank", "/run/testparams/*")
        obj_class = self.params.get("object_class", "/run/testparams/*")
        server_count = len(self.hostlist_servers)

        # Create a pool and verify the pool info before rebuild (also connects)
        self.pool.create()
        checks = {
            "pi_nnodes": server_count,
            "pi_ntargets": server_count * targets,
            "pi_ndisabled": 0,
        }
        self.assertTrue(
            self.pool.check_pool_info(**checks),
            "Invalid pool information detected before rebuild")

        self.assertTrue(
            self.pool.check_rebuild_status(rs_errno=0, rs_done=1,
                                           rs_obj_nr=0, rs_rec_nr=0),
            "Invalid pool rebuild info detected before rebuild")

        # Create and open the container
        self.container.create()

        # Write data to the container for 30 seconds
        self.log.info(
            "Wrote %s bytes to container %s",
            self.container.execute_io(30, rank, obj_class), self.container.uuid)

        # Determine how many objects will need to be rebuilt
        self.container.get_target_rank_lists(" prior to rebuild")

        # Trigger rebuild
        self.server_managers[0].stop_ranks([rank], self.d_log)

        # Wait for recovery to start
        self.pool.wait_for_rebuild(True)

        self.daos_cmd = DaosCommand(self.bin)
        self.daos_cmd.container_set_prop(
                      pool=self.pool.uuid,
                      cont=self.container.uuid,
                      prop="status",
                      value="healthy")

        # Write data to the container for another 30 seconds
        self.log.info(
            "Wrote an additional %s bytes to container %s",
            self.container.execute_io(30), self.container.uuid)

        # Wait for recovery to complete
        self.pool.wait_for_rebuild(False)

        # Check the pool information after the rebuild
        status = status = self.pool.check_pool_info(
            pi_nnodes=server_count,
            pi_ntargets=(server_count * targets),  # DAOS-2799
            pi_ndisabled=targets,                  # DAOS-2799
        )
        status &= self.pool.check_rebuild_status(
            rs_done=1, rs_obj_nr=">0", rs_rec_nr=">0", rs_errno=0)
        self.assertTrue(status, "Error confirming pool info after rebuild")

        # Verify the data after rebuild
        self.assertTrue(
            self.container.read_objects(),
            "Data verification error after rebuild")
        self.log.info("Test Passed")
Esempio n. 8
0
class RbldDeleteObjects(RebuildTestBase):
    # pylint: disable=too-many-ancestors
    """Test class for deleting objects during pool rebuild.

    Test Class Description:
        This class contains tests for deleting objects from a container during
        rebuild.

    :avocado: recursive
    """
    def __init__(self, *args, **kwargs):
        """Initialize a RebuildDeleteObjects object."""
        super().__init__(*args, **kwargs)
        self.punched_indices = None
        self.punched_qty = 0
        self.punch_type = None
        self.daos_cmd = None

    def execute_during_rebuild(self):
        """Delete half of the objects from the container during rebuild."""
        self.daos_cmd = DaosCommand(self.bin)
        self.daos_cmd.container_set_prop(pool=self.pool.uuid,
                                         cont=self.container.uuid,
                                         prop="status",
                                         value="healthy")

        if self.punch_type == "object":
            # Punch half of the objects
            self.punched_indices = [
                index for index in range(self.container.object_qty.value)
                if index % 2
            ]
            self.punched_qty = self.container.punch_objects(
                self.punched_indices)

        elif self.punch_type == "record":
            # Punch half of the records in each object
            self.punched_indices = [
                index for index in range(self.container.record_qty.value)
                if index % 2
            ]
            self.punched_qty = self.container.punch_records(
                self.punched_indices)
            # self.punched_qty /= self.container.object_qty.value

    def verify_container_data(self, txn=0):
        """Verify the container data.

        Args:
            txn (int, optional): transaction timestamp to read. Defaults to 0.
        """
        # Verify the expected number of objects/records were punched
        if self.punch_type == "object":
            expected_qty = len(self.punched_indices)
        elif self.punch_type == "record":
            expected_qty = \
                 len(self.punched_indices) * self.container.object_qty.value
        else:
            expected_qty = 0
        self.assertEqual(
            expected_qty, self.punched_qty,
            "Error punching {}s during rebuild: {}/{}".format(
                self.punch_type, self.punched_qty, expected_qty))

        # Read objects from the last transaction
        super().verify_container_data(txn)

    def test_rebuild_delete_objects(self):
        """JIRA ID: DAOS-2572.

        Test Description:
            Delete objects during rebuild. Rebuild should complete successfully
            and only the remaining data should be accessible and it should only
            exist on the rebuild target and non-excluded, original targets. The
            data in the deleted objects should not be accessible.

        Use Cases:
            foo

        :avocado: tags=all,full_regression
        :avocado: tags=large
        :avocado: tags=rebuild,delete_objects,rebuilddeleteobject
        """
        self.punch_type = "object"
        self.execute_rebuild_test()

    def test_rebuild_delete_records(self):
        """JIRA ID: DAOS-2574.

        Test Description:
            Delete records during rebuild. Rebuild should complete successfully
            and only the remaining data should be accessible and it should only
            exist on the rebuild target and non-excluded, original targets. The
            data in the deleted records should not be accessible.

        Use Cases:
            foo

        :avocado: tags=all,full_regression
        :avocado: tags=large
        :avocado: tags=rebuild,delete_objects,rebuilddeleterecord
        """
        self.punch_type = "record"
        self.execute_rebuild_test()