def test_replace_brick_when_io_in_progress(self):
        """Test replacing brick using existing servers bricks when IO is
            in progress.

        Description:
            - replace_brick
            - wait for heal to complete
            - validate IO
        """
        # Log Volume Info and Status before replacing brick from the volume.
        g.log.info(
            "Logging volume info and Status before replacing brick "
            "from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Replace brick from a sub-volume
        g.log.info("Replace a faulty brick from the volume")
        ret = replace_brick_from_volume(self.mnode, self.volname, self.servers,
                                        self.all_servers_info)
        self.assertTrue(ret, "Failed to replace faulty brick from the volume")
        g.log.info("Successfully replaced faulty brick from the volume")

        # Wait for gluster processes to come online
        time.sleep(30)

        # Log Volume Info and Status after replacing the brick
        g.log.info(
            "Logging volume info and Status after replacing brick "
            "from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal to complete
        g.log.info("Wait for self-heal to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(
            ret, "Self heal didn't complete even after waiting "
            "for 20 minutes. 20 minutes is too much a time for "
            "current test workload")
        g.log.info("self-heal is successful after replace-brick operation")

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #2
0
    def test_afr_self_heal_add_brick_rebalance(self):
        """
        Test Steps:
        1. Create a replicated/distributed-replicate volume and mount it
        2. Start IO from the clients
        3. Bring down a brick from the subvol and validate it is offline
        4. Bring back the brick online and wait for heal to complete
        5. Once the heal is completed, expand the volume.
        6. Trigger rebalance and wait for rebalance to complete
        7. Validate IO, no errors during the steps performed from step 2
        8. Check arequal of the subvol and all the brick in the same subvol
        should have same checksum
        """
        # Start IO from the clients
        self.all_mounts_procs = []
        for count, mount_obj in enumerate(self.mounts):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 3 --dir-length 5 "
                   "--max-num-of-dirs 5 --num-of-files 30 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # List a brick in each subvol and bring them offline
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        brick_to_bring_offline = []
        for subvol in subvols:
            self.assertTrue(subvol, "List is empty")
            brick_to_bring_offline.extend(sample(subvol, 1))

        ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
        self.assertTrue(
            ret,
            "Unable to bring brick: {} offline".format(brick_to_bring_offline))

        # Validate the brick is offline
        ret = are_bricks_offline(self.mnode, self.volname,
                                 brick_to_bring_offline)
        self.assertTrue(
            ret, "Brick:{} is still online".format(brick_to_bring_offline))

        # Wait for 10 seconds for IO to be generated
        sleep(10)

        # Start volume with force to bring all bricks online
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Volume start with force failed")
        g.log.info("Volume: %s started successfully", self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))

        # Monitor heal completion
        self.assertTrue(
            monitor_heal_completion(self.mnode,
                                    self.volname,
                                    interval_check=10),
            "Heal failed after 20 mins")

        # Check are there any files in split-brain and heal completion
        self.assertFalse(
            is_volume_in_split_brain(self.mnode, self.volname),
            "Some files are in split brain for "
            "volume: {}".format(self.volname))

        # Expanding volume by adding bricks to the volume when IO in progress
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))

        # Wait for volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))

        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the "
                   "volume %s", self.volname)

        # Without sleep the next step will fail with Glusterd Syncop locking.
        sleep(2)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1800)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on "
                   "the volume %s", self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        self.all_mounts_procs *= 0

        # List all files and dirs created
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")

        # Check arequal checksum of all the bricks is same
        for subvol in subvols:
            ret, arequal_from_the_bricks = collect_bricks_arequal(subvol)
            self.assertTrue(
                ret, "Arequal is collected successfully across "
                "the bricks in the subvol {}".format(subvol))
            cmd = len(set(arequal_from_the_bricks))
            if (self.volume_type == "arbiter"
                    or self.volume_type == "distributed-arbiter"):
                cmd = len(set(arequal_from_the_bricks[:2]))
            self.assertEqual(
                cmd, 1, "Arequal"
                " is same on all the bricks in the subvol")
Beispiel #3
0
    def test_rebalance_with_hidden_files(self):
        # pylint: disable=too-many-statements
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_files "
                   "--base-file-name . "
                   "-f 99 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Verify DHT values across mount points
        for mount_obj in self.mounts:
            g.log.debug("Verifying hash layout values %s:%s",
                        mount_obj.client_system, mount_obj.mountpoint)
            ret = validate_files_in_dir(mount_obj.client_system,
                                        mount_obj.mountpoint,
                                        test_type=FILE_ON_HASHED_BRICKS,
                                        file_type=FILETYPE_FILES)
            self.assertTrue(
                ret, "Expected - Files are created on only "
                "sub-volume according to its hashed value")
            g.log.info("Hash layout values are verified %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

        # Getting areequal checksum before rebalance
        g.log.info("Getting areequal checksum before rebalance")
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online ", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            failed_files_count = int(each_node['failures'])
            self.assertEqual(
                failed_files_count, 0,
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("There are no migration failures")

        # Getting areequal checksum after rebalance
        g.log.info("Getting areequal checksum after rebalance")
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance
        g.log.info("Comparing arequals checksum before and after rebalance")
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_quota_enable_disable_enable_when_io_in_progress(self):
        """Enable, Disable and Re-enable Quota on the volume when IO is
            in progress.
        """
        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = enable_quota(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Check if quota is enabled
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Quota is not enabled on the volume %s", self.volname))
        g.log.info("Successfully Validated quota is enabled on volume %s",
                   self.volname)

        # Path to set quota limit
        path = "/"

        # Set Quota limit on the root of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s", path,
                   self.volname)
        ret, _, _ = set_quota_limit_usage(self.mnode,
                                          self.volname,
                                          path=path,
                                          limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  " the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # get_quota_list
        g.log.info("Get Quota list for path %s of the volume %s", path,
                   self.volname)
        quota_list = get_quota_list(self.mnode, self.volname, path=path)
        self.assertIsNotNone(quota_list,
                             ("Failed to get the quota list for "
                              "path %s of the volume %s", path, self.volname))
        self.assertIn(
            path, quota_list.keys(),
            ("%s not part of the "
             "quota list %s even if "
             "it is set on the volume %s", path, quota_list, self.volname))
        g.log.info(
            "Successfully listed path %s in the quota list %s of the "
            "volume %s", path, quota_list, self.volname)

        # Disable quota
        g.log.info("Disable quota on the volume %s", self.volname)
        ret, _, _ = disable_quota(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to disable quota on the volume %s", self.volname))
        g.log.info("Successfully disabled quota on the volume %s",
                   self.volname)

        # Check if quota is still enabled (expected : Disabled)
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertFalse(ret, ("Quota is still enabled on the volume %s "
                               "(expected: Disable) ", self.volname))
        g.log.info("Successfully Validated quota is disabled on volume %s",
                   self.volname)

        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = enable_quota(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Check if quota is enabled
        g.log.info("Validate Quota is enabled on the volume %s", self.volname)
        ret = is_quota_enabled(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Quota is not enabled on the volume %s", self.volname))
        g.log.info("Successfully Validated quota is enabled on volume %s",
                   self.volname)

        # get_quota_list
        g.log.info("Get Quota list for path %s of the volume %s", path,
                   self.volname)
        quota_list = get_quota_list(self.mnode, self.volname, path=path)
        self.assertIsNotNone(quota_list,
                             ("Failed to get the quota list for "
                              "path %s of the volume %s", path, self.volname))
        self.assertIn(
            path, quota_list.keys(),
            ("%s not part of the quota list %s even if "
             "it is set on the volume %s", path, quota_list, self.volname))
        g.log.info(
            "Successfully listed path %s in the quota list %s of the "
            "volume %s", path, quota_list, self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #5
0
    def test_dir_change_perm_recursive(self):
        # pylint: disable=too-many-statements
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for index, mount_obj in enumerate(self.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 5 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, index + 10, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Wait for IO to complete
        g.log.info("Wait for IO to complete as IO validation did not "
                   "succeed in test method")
        ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on mount %s", self.clients[0])

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "layout is complete: FAILED")
        g.log.info("layout is complete: PASS")

        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list")
        g.log.info("Successful in getting brick list %s", brick_list)

        mount_obj = self.mounts[0]
        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | "
               "xargs chown -R test_user1" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change user owner successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 0)
        self.assertTrue(
            retval, "Failed to compare user permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("User permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1"
               " -type d | xargs chmod 777\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")

        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | "
               "xargs chgrp -R test_user1" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change group owner successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 1)
        self.assertTrue(
            retval, "Failed to compare group permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("Group permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -type d "
               "| xargs chmod 777\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")

        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | xargs chmod -R 777" %
               (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change permission 777 successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 2)
        self.assertTrue(
            retval, "Failed to compare permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("Permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1"
               " -type d | xargs chmod 666\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")
Beispiel #6
0
    def test_volume_status_inode_while_io_in_progress(self):
        '''
        Create any type of volume then mount the volume, once
        volume mounted successfully on client, start running IOs on
        mount point then run the "gluster volume status volname inode"
        command on all clusters randomly.
            "gluster volume status volname inode" command should not get
        hang while IOs in progress.
        Then check that IOs completed successfully or not on mount point.
        Check that files in mount point listing properly or not.
        '''

        # Mounting a volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "Volume mount failed for %s" % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        # After Mounting immediately writing IO's are failing some times,
        # that's why keeping sleep for 10 secs
        sleep(10)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 15 "
                "--max-num-of-dirs 5 "
                "--num-of-files 25 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # performing  "gluster volume status volname inode" command on
        # all cluster servers randomly while io is in progress,
        # this command should not get hang while io is in progress
        # pylint: disable=unused-variable
        for i in range(20):
            ret, _, _ = g.run(
                random.choice(self.servers),
                "gluster --timeout=12000 volume status %s "
                "inode" % self.volname)
            self.assertEqual(ret, 0, ("Volume status 'inode' failed on "
                                      "volume %s" % self.volname))
            g.log.info(
                "Successful in logging volume status"
                "'inode' of volume %s", self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_quota_single_brick_volume(self):
        """
        Verifying directory quota functionality on a single brick volume.

        * Create a volume with single brick (1x1) start and mount it
        * Enable quota on the volume
        * Set a limit of 1GB on this volume
        * Create some directories and files in the mount point
        * Execute a quota list command

        """

        # Enable Quota
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to enable quota on the "
                                  "volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Path to set quota limit
        path = "/"

        # Set Quota limit on the root of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s",
                   path, self.volname)
        ret, _, _ = quota_limit_usage(self.mnode, self.volname,
                                      path=path, limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  "the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # Starting IO on the mounts
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 20 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" % (
                       self.script_upload_path,
                       count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get Quota list on Volume
        g.log.info("Get Quota list for the volume %s", self.volname)
        quota_list1 = quota_fetch_list(self.mnode, self.volname)
        self.assertIsNotNone(quota_list1, ("Failed to get the quota list for "
                                           "the volume %s", self.volname))
        self.assertIn(path, quota_list1.keys(),
                      ("%s not part of the ""quota list %s even if "
                       "it is set on the volume %s", path,
                       quota_list1, self.volname))
        g.log.info("Successfully listed quota list %s of the "
                   "volume %s", quota_list1, self.volname)

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_volume_checksum_after_changing_network_ping_timeout(self):

        # Create Volume
        # Mount the Volume
        # Create some files on mount point
        # calculate the checksum of Mount point
        # Check the default network ping timeout of the volume.
        # Change network ping timeout to some other value
        # calculate checksum again
        # checksum should be same without remounting the volume.

        # Mounting volume as glusterfs
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "volume mount failed for %s" % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        # Checking volume mounted or not
        ret = is_mounted(self.volname, self.mounts[0].mountpoint, self.mnode,
                         self.mounts[0].client_system, self.mount_type)
        self.assertTrue(ret, "Volume not mounted on mount point: %s"
                        % self.mounts[0].mountpoint)
        g.log.info("Volume %s mounted on %s", self.volname,
                   self.mounts[0].mountpoint)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_files -f 10 --base-file-name newfile %s"
                   % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Checksum calculation of mount point before
        # changing network.ping-timeout
        ret, before_checksum = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, "checksum failed to calculate for mount point")
        g.log.info("checksum calculated successfully")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # performing gluster volume get volname all and
        # getting network ping time out value
        volume_options = get_volume_options(self.mnode, self.volname, "all")
        self.assertIsNotNone(volume_options, "gluster volume get %s all "
                                             "command failed" % self.volname)
        g.log.info("gluster volume get %s all command executed "
                   "successfully", self.volname)
        ret = False
        if re.search(r'\b42\b', volume_options['network.ping-timeout']):
            ret = True
        self.assertTrue(ret, "network ping time out value is not correct")
        g.log.info("network ping time out value is correct")

        # Changing network ping time out value to specific volume
        self.networking_ops = {'network.ping-timeout': '12'}
        ret = set_volume_options(self.mnode, self.volname,
                                 self.networking_ops)
        self.assertTrue(ret, "Changing of network.ping-timeout "
                             "failed for :%s" % self.volname)
        g.log.info("Changing of network.ping-timeout "
                   "success for :%s", self.volname)

        # Checksum calculation of mount point after
        # changing network.ping-timeout
        ret, after_checksum = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, "checksum failed to calculate for mount point")
        g.log.info("checksum calculated successfully")

        # comparing list of checksums of mountpoints before and after
        # network.ping-timeout change
        self.assertItemsEqual(before_checksum, after_checksum,
                              "Checksum not same before and after "
                              "network.ping-timeout change")
        g.log.info("checksum same before and after "
                   "changing network.ping-timeout")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #9
0
    def test_rebalance_start_status_stop(self):
        # pylint: disable=too-many-statements
        # Form brick list for expanding volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info,
                                                       distribute_count=1)
        self.assertIsNotNone(add_brick_list,
                             ("Volume %s: Failed to form "
                              "bricks list to expand", self.volname))
        g.log.info("Volume %s: Formed bricks list to expand", self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Volume %s: Expand start")
        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertEqual(ret, 0, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand successful", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, "Logging volume info and status failed on "
            "volume %s" % self.volname)
        g.log.info(
            "Successful in logging volume info and status of volume "
            "%s", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Getting arequal checksum before rebalance start
        g.log.info("Getting arequal before rebalance start")
        arequal_checksum_before_rebalance_start = collect_mounts_arequal(
            self.mounts)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Stop on-going rebalance
        g.log.info("Stop rebalance on the volume")
        ret, _, _ = rebalance_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Volume %s: Failed to stop rebalance", self.volname))
        g.log.info("Checking whether the migration is stopped or not")

        # Wait till the on-going file migration completes on all servers
        count = 0
        while count < 80:
            rebalance_count = 0
            for server in self.servers:
                ret, _, _ = g.run(server, "pgrep rebalance")
                if ret != 0:
                    rebalance_count += 1
            if rebalance_count == len(self.servers):
                break
            sleep(2)
            count += 1
        g.log.info("Volume %s: Rebalance process is not running on servers",
                   self.volname)

        # List all files and dirs from mount point
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        g.log.info("Listing all files and directories is successful")

        # Getting arequal checksum after the rebalance is stopped
        g.log.info("Getting arequal checksum after the rebalance is stopped")
        arequal_checksum_after_rebalance_stop = collect_mounts_arequal(
            self.mounts)

        # Comparing arequals checksum before start of rebalance and
        #                       after the rebalance is stopped
        g.log.info("Comparing arequals checksum before start of rebalance and"
                   "after the rebalance is stopped")
        self.assertEqual(arequal_checksum_before_rebalance_start,
                         arequal_checksum_after_rebalance_stop,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_brickreset_ec_volume(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - Start resource consumption tool
        - Create IO on dir2 of volume mountpoint
        - Reset brick start
        - Check if brick is offline
        - Reset brick with destination same as source with force running IO's
        - Validating IO's and waiting for it to complete on dir2
        - Remove dir2
        - Create 5 directory and 5 files in dir of mountpoint
        - Rename all files inside dir1 at mountpoint
        - Create softlink and hardlink of files in dir1 of mountpoint
        - Delete op for deleting all file in one of the dirs inside dir1
        - Change chmod, chown, chgrp
        - Create tiny, small, medium and large file
        - Create IO's
        - Validating IO's and waiting for it to complete
        - Calculate arequal before kiiling brick
        - Get brick from Volume
        - Reset brick
        - Check if brick is offline
        - Reset brick by giving a different source and dst node
        - Reset brick by giving dst and source same without force
        - Obtain hostname
        - Reset brick with dst-source same force using hostname - Successful
        - Monitor heal completion
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Reset brick by giving a same source and dst brick
        - Kill brick manually
        - Check if brick is offline
        - Reset brick by giving a same source and dst brick
        - Wait for brick to come online
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Remove brick from backend
        - Check if brick is offline
        - Reset brick by giving dst and source same without force - Successful
        - Monitor heal completion
        - Compare the arequal's calculated
        """
        # Starting resource consumption using top
        log_file_mem_monitor = getcwd() + '/mem_usage.log'
        cmd = 'for i in {1..100};do top -n 1 -b|egrep \
                "RES|gluster" & free -h 2>&1 >> '                                                  + \
            log_file_mem_monitor + ' ;sleep 10;done'
        g.log.info(cmd)
        for mount_obj in self.mounts:
            g.run_async(mount_obj.client_system, cmd)
        bricks_list = []

        # Get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # Creating directory2
        cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory2")
        g.log.info("Directory 2 on %s created successfully", self.mounts[0])

        # Creating files on client side for dir2
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir2" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Reset a brick
        g.log.info('Reset of brick using start')
        brick_reset = choice(bricks_list)
        ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        offline_bricks = get_offline_bricks_list(self.mnode, self.volname)
        self.assertEqual(offline_bricks[0], brick_reset, "Brick not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source with force while running IO's
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                brick_reset,
                                "commit",
                                brick_reset,
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Deleting dir2
        cmd = ('rm -rf %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory2")
        g.log.info("Directory 2 deleted successfully for %s", self.mounts[0])

        del self.all_mounts_procs[:]

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory1")
        g.log.info("Directory 1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = str(start) + ".." + str(end)
            file_range = str(start) + ".." + str(end)
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            g.run(mount_obj.client_system, cmd)

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            g.run(mount_obj.client_system, cmd)

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        clients = []
        for mount_obj in self.mounts:
            clients.append(mount_obj.client_system)
            cmd = ('cd %s/dir1/dir1/; '
                   'for FILENAME in *;'
                   'do mv $FILENAME Unix_$FILENAME; '
                   'done;' % mount_obj.mountpoint)
            g.run_parallel(clients, cmd)

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # Delete op for deleting all file in one of the dirs. start is being
        # used as offset like in previous testcase in dir1
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do rm -f $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            offset += 1

        # Creating files on client side for dir1
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir1" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Get areequal before killing the brick
        g.log.info('Getting areequal before killing of brick...')
        ret, result_before_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Reset a brick
        g.log.info('Reset of brick using start')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick by giving a different source and dst brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick is successfull")
        g.log.info("Expected : Source and Destination brick must be same for"
                   " reset")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[0])
        self.assertNotEqual(ret, 0, "Not Expected : Reset brick is successful")
        g.log.info("Expected : Reset brick failed,Vol id is same use force")

        # Obtain hostname of node
        ret, hostname_node1, _ = g.run(self.mnode, "hostname")
        self.assertEqual(ret, 0,
                         ("Failed to obtain hostname of node %s", self.mnode))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mnode, hostname_node1.strip())

        # Reset brick with destination same as source with force using hostname
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(),
                                self.volname,
                                bricks_list[0],
                                "commit",
                                bricks_list[0],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Get List of bricks to bring offline

        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[1:3])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')
        sleep(2)

        # Check if 4 bricks are online
        all_bricks = []
        all_bricks = [
            bricks_list[0], bricks_list[3], bricks_list[4], bricks_list[5]
        ]
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details ")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down bricks...')
        ret, result_offline_redundant_brick1 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[1:3]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Reset brick without bringing down brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick passed")
        g.log.info("Expected : Brick reset failed as source brick must be"
                   " stopped")

        # Kill the brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source after killing brick manually
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                bricks_list[1],
                                "commit",
                                bricks_list[1],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[2:4])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down redundant bricks...')
        ret, result_offline_redundant_brick2 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[2:4]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Removing brick from backend
        brick = bricks_list[0].strip().split(":")
        cmd = "rm -rf %s" % brick[1]
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "Failed to delete brick %s" % bricks_list[0])
        g.log.info("Removed brick %s sucessfully", bricks_list[0])

        # Check if the brick is offline
        count = 0
        while count <= 20:
            g.log.info("Check the brick status if it is offline")
            ret = are_bricks_offline(self.mnode, self.volname,
                                     [bricks_list[0]])
            if ret:
                break
            sleep(2)
            count = +1
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(), self.volname,
                                bricks_list[0], "commit", bricks_list[0])
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Comparing arequals
        self.assertEqual(
            result_before_killing_brick, result_offline_redundant_brick1,
            'Arequals are not equals before killing brick'
            'processes and after offlining redundant bricks')
        g.log.info('Arequals are equals before killing brick'
                   'processes and after offlining redundant bricks')

        # Comparing arequals
        self.assertEqual(
            result_offline_redundant_brick2, result_offline_redundant_brick1,
            'Arequals are not equals for offlining redundant'
            ' bricks')
        g.log.info('Arequals are equals for offlining redundant bricks')

        # Deleting dir1
        cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory1")
        g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
Beispiel #11
0
    def test_entry_self_heal_heal_command(self):
        """
        Test Entry-Self-Heal (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        - create IO
        - get areequal before getting bricks offline
        - set the volume option
        "self-heal-daemon": "off"
        - bring down all bricks processes from selected set
        - get areequal after getting bricks offline and compare with
        arequal after bringing bricks offline
        - modify the data
        - get areequal before getting bricks online
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check if heal is completed
        - check for split-brain
        - get areequal after getting bricks online and compare with
        arequal before bringing bricks online
        """

        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "are set to 'off'")

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s" %
                       (mount_obj.client_system, mount_obj.mountpoint))
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-length 2 "
                "--dir-depth 2 "
                "--max-num-of-dirs 2 "
                "--num-of-files 20 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
            g.log.info("IO on %s:%s is started successfully" %
                       (mount_obj.client_system, mount_obj.mountpoint))
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Command list to do different operations with data -
        # create, rename, copy and delete
        cmd_list = [
            "python %s create_files -f 20 %s",
            "python %s mv -i '.trashcan' %s",
            "python %s copy --dest-dir new_dir %s",
            "python %s delete %s",
        ]

        for cmd in cmd_list:
            # Get areequal before getting bricks offline
            g.log.info('Getting areequal before getting bricks offline...')
            ret, result_before_offline = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal before getting bricks offline '
                       'is successful')

            # Setting options
            g.log.info('Setting options...')
            options = {
                "self-heal-daemon": "off",
            }
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' "
                       "is set to 'off' successfully")

            # Select bricks to bring offline
            bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
                self.mnode, self.volname))
            bricks_to_bring_offline = filter(
                None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                       bricks_to_bring_offline_dict['cold_tier_bricks'] +
                       bricks_to_bring_offline_dict['volume_bricks']))

            # Bring brick offline
            g.log.info('Bringing bricks %s offline...' %
                       bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s offline' % bricks_to_bring_offline)

            ret = are_bricks_offline(self.mnode, self.volname,
                                     bricks_to_bring_offline)
            self.assertTrue(
                ret, 'Bricks %s are not offline' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s offline is successful' %
                       bricks_to_bring_offline)

            # Get areequal after getting bricks offline
            g.log.info('Getting areequal after getting bricks offline...')
            ret, result_after_offline = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal after getting bricks offline '
                       'is successful')

            # Checking areequals before bringing bricks offline
            # and after bringing bricks offline
            self.assertEqual(result_before_offline, result_after_offline,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks offline '
                       'and after bringing bricks offline are equal')

            # Modify the data
            g.log.info("Start modifying IO on all mounts...")
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Modifying IO on %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = cmd % (self.script_upload_path, mount_obj.mountpoint)
                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)
                g.log.info("IO on %s:%s is modified successfully" %
                           (mount_obj.client_system, mount_obj.mountpoint))
            self.io_validation_complete = False

            # Validate IO
            g.log.info("Wait for IO to complete and validate IO ...")
            ret = validate_io_procs(self.all_mounts_procs, self.mounts)
            self.assertTrue(ret, "IO failed on some of the clients")
            self.io_validation_complete = True
            g.log.info("IO is successful on all mounts")

            # Get areequal before getting bricks online
            g.log.info('Getting areequal before getting bricks online...')
            ret, result_before_online = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal before getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Bring brick online
            g.log.info('Bringing bricks %s online...' %
                       bricks_to_bring_offline)
            ret = bring_bricks_online(self.mnode, self.volname,
                                      bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s online' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s online is successful' %
                       bricks_to_bring_offline)

            # Setting options
            g.log.info('Setting options...')
            options = {
                "self-heal-daemon": "on",
            }
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online" % self.volname)

            # Wait for self-heal-daemons to be online
            g.log.info("Waiting for self-heal-daemons to be online")
            ret = is_shd_daemonized(self.all_servers)
            self.assertTrue(ret, "Either No self heal daemon process found")
            g.log.info("All self-heal-daemons are online")

            # Start healing
            ret = trigger_heal(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not started')
            g.log.info('Healing is started')

            # Monitor heal completion
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal has not yet completed')

            # Check if heal is completed
            ret = is_heal_complete(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not complete')
            g.log.info('Heal is completed successfully')

            # Check for split-brain
            ret = is_volume_in_split_brain(self.mnode, self.volname)
            self.assertFalse(ret, 'Volume is in split-brain state')
            g.log.info('Volume is not in split-brain state')

            # Get areequal after getting bricks online
            g.log.info('Getting areequal after getting bricks online...')
            ret, result_after_online = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal after getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Checking areequals before bringing bricks online
            # and after bringing bricks online
            self.assertEqual(result_before_online, result_after_online,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks online '
                       'and after bringing bricks online are equal')
Beispiel #12
0
    def test_dir_change_perm(self):
        # pylint: disable=too-many-statements
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        mount_obj = self.mounts[0]
        cmd = ('cd %s ; mkdir testdir; '
               'mkdir -p testdir/dir{1..10} '
               'touch testdir/file{1..10}') % (mount_obj.mountpoint)
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("IO is successful on mount %s", self.clients[0])

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(mount_obj)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "layout is complete: FAILED")
        g.log.info("layout is complete: PASS")

        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list")
        g.log.info("Successful in getting brick list %s", brick_list)

        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | "
               "xargs chown test_user1" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change user owner successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 0)
        self.assertTrue(
            retval, "Failed to compare user permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("User permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1"
               " -type d | xargs chmod 777\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")

        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | "
               "xargs chgrp test_user1" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change group owner successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 1)
        self.assertTrue(
            retval, "Failed to compare group permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("Group permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1 -type d "
               "| xargs chmod 777\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")

        cmd = ("find %s -mindepth 1 -maxdepth 1 -type d | xargs chmod 777" %
               (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("Change permission 777 successfully for testdir on %s",
                   mount_obj.client_system)

        retval = compare_dir_structure(mount_obj.client_system,
                                       mount_obj.mountpoint, brick_list, 2)
        self.assertTrue(
            retval, "Failed to compare permission for all"
            " files/dir in mount directory with brick directory")
        g.log.info("Permission is same on mount and brick directory")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1"
               " -type d\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertEqual(rcode, 0, err)
        g.log.info("directory is successfully accessed with different user")

        cmd = ("su -l test_user2 -c \"find %s -mindepth 1 -maxdepth 1"
               " -type d | xargs chmod 666\"" % (mount_obj.mountpoint))
        rcode, _, err = g.run(mount_obj.client_system, cmd)
        self.assertNotEqual(rcode, 0, err)
        g.log.info("directory permission are not changed by different user")
Beispiel #13
0
    def setUpClass(cls):

        # Calling GlusterBaseClass setUpClass
        GlusterBaseClass.setUpClass.im_func(cls)

        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

        # Upload io scripts for running IO on mounts
        g.log.info("Upload io scripts to clients %s for running IO on "
                   "mounts", cls.clients)
        script_local_path = ("/usr/share/glustolibs/io/scripts/"
                             "file_dir_ops.py")
        cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
                                  "file_dir_ops.py")
        ret = upload_scripts(cls.clients, script_local_path)
        if not ret:
            raise ExecutionError("Failed to upload IO scripts to clients %s" %
                                 cls.clients)
        g.log.info("Successfully uploaded IO scripts to clients %s",
                   cls.clients)

        # The --dir-length argument value for
        # file_dir_ops.py create_deep_dirs_with_files is set to 10
        # (refer to the cmd in setUp method). This means every mount will
        # create
        # 10 top level dirs. For every mountpoint/testcase to create new set of
        # dirs, we are incrementing the counter by --dir-length value i.e 10
        # in this test suite.
        #
        # If we are changing the --dir-length to new value, ensure the counter
        # is also incremented by same value to create new set of files/dirs.

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        cls.all_mounts_procs = []
        for index, mount_obj in enumerate(cls.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 1 "
                   "--dir-length 2 "
                   "--max-num-of-dirs 2 "
                   "--num-of-files 55 %s" % (cls.script_upload_path,
                                             index + 10,
                                             mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            cls.all_mounts_procs.append(proc)
        cls.io_validation_complete = False

        # Wait for IO to complete
        if not cls.io_validation_complete:
            g.log.info("Wait for IO to complete")
            ret = wait_for_io_to_complete(cls.all_mounts_procs, cls.mounts)
            if not ret:
                raise ExecutionError("IO failed on some of the clients")
            g.log.info("IO is successful on all mounts")

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(cls.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")
    def test_quota_file_larger_than_limit(self):
        # pylint: disable=too-many-statements
        """
        Verifying directory Quota functionality with respect to the
        limit-usage option.

        If a limit is set and a file of size larger than limit is created
        then the file creation will stop when it will reach the limit.

        Quota list will show limit-set and size as same.

        * Enable Quota
        * Create a directory from mount point
        * Set a limit of 10 MB on the directory
        * Set Quota soft-timeout and hard-timeout to 0 seconds
        * Create a file of size larger than the Quota limit
          eg. 20 MB file
        * Perform Quota list operation to check if all the fields are
          appropriate such as hard_limit, available_space, sl_exceeded,
          hl_execeeded, etc.
        """
        # Enable Quota
        g.log.info("Enabling Quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable Quota on the volume %s", self.volname))
        g.log.info("Successfully enabled Quota on the volume %s", self.volname)

        # Path to set the Quota limit
        path = '/foo'

        # Create a directory 'foo' from the mount point
        mount_obj = self.mounts[0]
        mount_dir = mount_obj.mountpoint
        client = mount_obj.client_system

        g.log.info("Creating dir named 'foo' from client %s", client)
        ret = mkdir(client, "%s/foo" % mount_dir)
        self.assertTrue(
            ret, "Failed to create dir under %s-%s" % (client, mount_dir))
        g.log.info("Directory 'foo' created successfully")

        # Set Quota limit of 10 MB on the directory 'foo' of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s", path,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path=path,
                                      limit="10MB")
        self.assertEqual(ret, 0, ("Failed to set Quota limit on path %s of "
                                  "the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # Set Quota soft-timeout to 0 seconds
        g.log.info("Set Quota soft timeout:")
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set soft timeout"))
        g.log.info("Quota soft timeout set successful")

        # Set Quota hard-timeout to 0 second
        g.log.info("Set Quota hard timeout:")
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, '0sec')
        self.assertEqual(ret, 0, ("Failed to set hard timeout"))
        g.log.info("Quota hard timeout set successful")

        # Validate if the Quota limit set is appropriate
        g.log.info(
            "Validate if the Quota limit set is correct for the "
            "directory %s of the volume %s", path, self.volname)
        ret = quota_validate(self.mnode,
                             self.volname,
                             path=path,
                             hard_limit=10485760)
        self.assertTrue(
            ret, ("Quota Limit of 10 MB was not set properly on "
                  "the directory %s of the volume %s", path, self.volname))
        g.log.info(
            "Successfully Validated Quota Limit of 10 MB is set on the"
            " directory %s of the volume %s", path, self.volname)

        # Create a single file of size 20 MB
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        cmd = ("cd %s/foo ; "
               "dd if=/dev/zero of=20MBfile "
               "bs=1M "
               "count=20" % mount_dir)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(
            ret, 1, "Unexpected: File creation succeeded even "
            "after exceeding the hard-limit")
        g.log.info("Expected: File creation failed after exceeding "
                   "hard-limit")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Check if the file created above exists
        g.log.info("Checking if the file created exists in the volume %s",
                   self.volname)
        ret = file_exists(client, "%s/foo/20MBfile" % mount_dir)
        self.assertTrue(ret,
                        ("File does not exist in the volume %s", self.volname))
        g.log.info(
            "Successfully validated the presence of file in the "
            "volume %s", self.volname)

        # Validate if the Quota limit set is appropriate
        g.log.info(
            "Validate if the Quota list fields are appropriate for the "
            "directory %s of the volume %s", path, self.volname)
        ret = quota_validate(self.mnode,
                             self.volname,
                             path=path,
                             hard_limit=10485760,
                             avail_space=0,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, ("Failed to validate the Quota limits on "
                              "the volume %s", self.volname))
        g.log.info(
            "Successfully Validated Quota Limit of 100 MB is set on the"
            " directory %s of the volume %s", path, self.volname)
    def test_nfs_ganesha_export_with_multiple_volumes(self):
        """
        Testcase to verfiy multiple volumes gets exported when IO is in
        progress.
        """

        for i in range(5):
            self.volume['name'] = "nfsvol" + str(i)
            self.volume['voltype']['type'] = 'distributed'
            self.volume['voltype']['replica_count'] = 1
            self.volume['voltype']['dist_count'] = 2

            # Create volume
            ret = setup_volume(mnode=self.mnode,
                               all_servers_info=self.all_servers_info,
                               volume_config=self.volume,
                               force=True)
            if not ret:
                self.assertTrue(ret, ("Setup volume %s failed" % self.volume))
            time.sleep(5)

            # Export volume with nfs ganesha, if it is not exported already
            vol_option = get_volume_options(self.mnode,
                                            self.volume['name'],
                                            option='ganesha.enable')
            self.assertIsNotNone(
                vol_option, "Failed to get ganesha.enable "
                "volume option for %s" % self.volume['name'])
            if vol_option['ganesha.enable'] != 'on':
                ret, _, _ = export_nfs_ganesha_volume(
                    mnode=self.mnode, volname=self.volume['name'])
                self.assertEqual(
                    ret, 0, "Failed to export volume %s as NFS "
                    "export" % self.volume['name'])
                time.sleep(5)
            else:
                g.log.info("Volume %s is exported already",
                           self.volume['name'])

            # Waiting for few seconds for volume export. Max wait time is
            # 120 seconds.
            ret = wait_for_nfs_ganesha_volume_to_get_exported(
                self.mnode, (self.volume['name']))
            self.assertTrue(ret, ("Failed to export volume %s after "
                                  "starting volume when IO is running on "
                                  "another volume" % self.volume['name']))

            # Log Volume Info and Status
            ret = log_volume_info_and_status(self.mnode, self.volume['name'])
            self.assertTrue(
                ret, "Logging volume %s info and status failed" %
                self.volume['name'])

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #16
0
    def test_self_heal_when_io_in_progress(self):
        """Test self-heal is successful when IO is in progress.

        Description:
            - simulate brick down.
            - bring bricks online
            - wait for heal to complete
            - validate IO
        """
        # pylint: disable=too-many-statements
        # Check if volume type is dispersed. If the volume type is
        # dispersed, set the volume option 'disperse.optimistic-change-log'
        # to 'off'
        # Refer to: https://bugzilla.redhat.com/show_bug.cgi?id=1470938
        # pylint: disable=unsupported-membership-test
        if 'dispersed' in self.volume_type and 'nfs' in self.mount_type:
            g.log.info("Set volume option 'disperse.optimistic-change-log' "
                       "to 'off' on a dispersed volume . "
                       "Refer to bug: "
                       "https://bugzilla.redhat.com/show_bug.cgi?id=1470938")
            ret = set_volume_options(self.mnode, self.volname,
                                     {'disperse.optimistic-change-log': 'off'})
            self.assertTrue(ret, ("Failed to set the volume option %s to "
                                  "off on volume %s",
                                  'disperse.optimistic-change-log',
                                  self.volname))
            g.log.info("Successfully set the volume option "
                       "'disperse.optimistic-change-log' to 'off'")

        # Log Volume Info and Status before simulating brick failure
        g.log.info("Logging volume info and Status before bringing bricks "
                   "offlien from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(None, (
            bricks_to_bring_offline_dict['hot_tier_bricks'] +
            bricks_to_bring_offline_dict['cold_tier_bricks'] +
            bricks_to_bring_offline_dict['volume_bricks']))

        # Bring bricks offline
        g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to bring bricks: %s offline",
                              bricks_to_bring_offline))
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Log Volume Info and Status
        g.log.info("Logging volume info and Status after bringing bricks "
                   "offline from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Validate if bricks are offline
        g.log.info("Validating if bricks: %s are offline",
                   bricks_to_bring_offline)
        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret, ("Not all the bricks in list: %s are offline",
                              bricks_to_bring_offline))
        g.log.info("Successfully validated that bricks: %s are all offline",
                   bricks_to_bring_offline)

        # Add delay before bringing bricks online
        time.sleep(40)

        # Bring bricks online
        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to bring bricks: %s online",
                              bricks_to_bring_offline))
        g.log.info("Successfully brought all bricks:%s online",
                   bricks_to_bring_offline)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Successful in waiting for volume %s processes to be "
                   "online", self.volname)

        # Log Volume Info and Status
        g.log.info("Logging volume info and Status after bringing bricks "
                   "online from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online",
                              self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal to complete
        g.log.info("Wait for self-heal to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Self heal didn't complete even after waiting "
                        "for 20 minutes. 20 minutes is too much a time for "
                        "current test workload")
        g.log.info("self-heal is successful after replace-brick operation")

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_volume_status_fd(self):

        '''
        -> Create volume
        -> Mount the volume on 2 clients
        -> Run I/O's on mountpoint
        -> While I/O's are in progress
        -> Perform gluster volume status fd repeatedly
        -> List all files and dirs listed
        '''

        # checking volume mounted or not
        for mount_obj in self.mounts:
            ret = is_mounted(self.volname, mount_obj.mountpoint, self.mnode,
                             mount_obj.client_system, self.mount_type)
            self.assertTrue(ret, "Not mounted on %s"
                            % mount_obj.client_system)
            g.log.info("Mounted on %s", mount_obj.client_system)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 15 %s" % (self.script_upload_path,
                                             self.counter,
                                             mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # performing  "gluster volume status volname fd" command on
        # all cluster servers randomly while io is in progress,
        # this command should not get hang while io is in progress
        count = 0
        while count < 300:
            ret, _, _ = g.run(random.choice(self.servers),
                              "gluster volume status %s fd" % self.volname)
            self.assertEqual(ret, 0, ("Volume status 'fd' failed on volume %s"
                                      % self.volname))
            g.log.info("Volume status fd is successful for %s", self.volname)
            count += 1

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #18
0
    def test_quota_volume_subdir_limits(self):
        """
        Verifying directory quota functionality WRT limit-usage on volume
        as well as sub-directories in volume.

        * Enable quota
        * Set a limit of 1 GB on / of volume
        * Create 10 directories on mount point
        * Set a limit of 100 MB on all the sub-directories created
        * Create data inside the sub-directories on mount point till the limits
          are reached
        * Validate if the hard limit and available space fields inside the
          quota list command are appropriate
        """

        # Enable quota on the volume
        g.log.info("Enabling quota on the volume %s", self.volname)
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on the volume %s", self.volname)

        # Path to set quota limit
        path = "/"

        # Set a limit of 1 GB on the root of the volume
        g.log.info("Set Quota Limit on the path %s of the volume %s", path,
                   self.volname)
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path=path,
                                      limit="1GB")
        self.assertEqual(ret, 0, ("Failed to set quota limit on path %s of "
                                  "the volume %s", path, self.volname))
        g.log.info("Successfully set the Quota limit on %s of the volume %s",
                   path, self.volname)

        # Create 10 directories from the mount point
        mount_obj = self.mounts[0]
        mount_dir = mount_obj.mountpoint
        client = mount_obj.client_system

        g.log.info("Creating directories on %s:%s", client, mount_dir)
        for i in range(1, 11):
            ret = mkdir(client, "%s/foo%s" % (mount_dir, i))
            self.assertTrue(
                ret, ("Failed to create dir under %s-%s", client, mount_dir))
            g.log.info("Directory 'foo%s' created successfully", i)
        g.log.info("Successfully created directories on %s:%s", client,
                   mount_dir)

        # Set a limit of 100 MB on each directory
        g.log.info(
            "Setting a limit of 100 MB on all the directories inside "
            "the volume %s", self.volname)
        for j in range(1, 11):
            dir_name = "/foo" + str(j)
            ret, _, _ = quota_limit_usage(self.mnode,
                                          self.volname,
                                          path=dir_name,
                                          limit="100MB")
            self.assertEqual(ret, 0,
                             ("Failed to set quota limit on path "
                              "%s of the volume %s", dir_name, self.volname))
            g.log.info(
                "Successfully set the Quota limit on /foo%s of "
                "the volume %s", j, self.volname)
        g.log.info(
            "Successfully set the limit of 100 MB on all directories "
            "inside the volume %s", self.volname)

        # Validate if quota limit usage is set properly
        g.log.info("Validate quota limit usage on all directories")
        for k in range(1, 11):
            dir_name = "/foo" + str(k)
            ret = quota_validate(self.mnode,
                                 self.volname,
                                 path=dir_name,
                                 hard_limit=104857600)
            self.assertTrue(ret, ("Failed to validate quota limit usage on the"
                                  "directory %s", dir_name))
            g.log.info(
                "Successfully validated quota limit usage for the "
                "directory %s of volume %s", dir_name, self.volname)

        # Create data inside each directory from mount point
        g.log.info("Creating Files on %s:%s", client, mount_dir)
        for var1 in range(1, 11):
            cmd = ("cd %s/foo%s ; "
                   "for i in `seq 1 100` ; "
                   "do dd if=/dev/zero of=testfile$i "
                   "bs=1M "
                   "count=1 ; "
                   "done" % (mount_dir, var1))
            ret, _, _ = g.run(client, cmd)
            self.assertEqual(ret, 0,
                             ("Failed to create files in /foo%s", var1))
            g.log.info("Files created successfully in /foo%s", var1)
        g.log.info(
            "Files creation is successful on all directories of the "
            "volume %s", self.volname)

        # List the files inside each directory
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Validate the hard limit and available space fields are appropriate
        g.log.info("Validate quota hard limit and available space on all the "
                   "directories are appropriate")
        for var2 in range(1, 11):
            dir_name = "/foo" + str(var2)
            ret = quota_validate(self.mnode,
                                 self.volname,
                                 path=dir_name,
                                 hard_limit=104857600,
                                 avail_space=0,
                                 sl_exceeded=True,
                                 hl_exceeded=True,
                                 used_space=104857600)
            self.assertTrue(ret,
                            ("Failed to validate quota hard limit and "
                             "available space on the directory %s", dir_name))
            g.log.info(
                "Successfully validated quota hard limit and available"
                " space fields inside quota list for directory %s "
                "of volume %s", dir_name, self.volname)
    def test_entry_self_heal_heal_command(self):
        """
        Test Entry-Self-Heal (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        - create IO
        - get arequal before getting bricks offline
        - set the volume option
        "self-heal-daemon": "off"
        - bring down all bricks processes from selected set
        - get arequal after getting bricks offline and compare with
        arequal after bringing bricks offline
        - modify the data
        - get arequal before getting bricks online
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check if heal is completed
        - check for split-brain
        - get arequal after getting bricks online and compare with
        arequal before bringing bricks online
        """
        # pylint: disable=too-many-statements

        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "are set to 'off'")

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("Starting IO on %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        cmd = ("python %s create_deep_dirs_with_files "
               "--dir-length 2 "
               "--dir-depth 2 "
               "--max-num-of-dirs 2 "
               "--num-of-files 20 %s/files" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, err = g.run(self.mounts[0].client_system,
                            cmd,
                            user=self.mounts[0].user)
        self.assertFalse(
            ret, 'Failed to create the data for %s: %s' %
            (self.mounts[0].mountpoint, err))
        g.log.info('Created IO for %s is successfully',
                   self.mounts[0].mountpoint)

        # Command list to do different operations with data -
        # create, rename, copy and delete
        cmd_list = [
            "python %s create_files -f 20 %s/files",
            "python %s mv %s/files",
            # 'copy' command works incorrect. disable until fixed
            # "python %s copy --dest-dir %s/new_dir %s/files",
            "python %s delete %s"
        ]

        for cmd in cmd_list:
            # Get arequal before getting bricks offline
            g.log.info('Getting arequal before getting bricks offline...')
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            result_before_offline = arequals[0].splitlines()[-1].split(':')[-1]
            g.log.info('Getting arequal before getting bricks offline '
                       'is successful')

            # Setting options
            g.log.info('Setting options...')
            options = {"self-heal-daemon": "off"}
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' "
                       "is set to 'off' successfully")

            # Select bricks to bring offline
            bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
                self.mnode, self.volname))
            bricks_to_bring_offline = filter(
                None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                       bricks_to_bring_offline_dict['cold_tier_bricks'] +
                       bricks_to_bring_offline_dict['volume_bricks']))

            # Bring brick offline
            g.log.info('Bringing bricks %s offline...',
                       bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s offline' % bricks_to_bring_offline)

            ret = are_bricks_offline(self.mnode, self.volname,
                                     bricks_to_bring_offline)
            self.assertTrue(
                ret, 'Bricks %s are not offline' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s offline is successful',
                       bricks_to_bring_offline)

            # Get arequal after getting bricks offline
            g.log.info('Getting arequal after getting bricks offline...')
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            result_after_offline = arequals[0].splitlines()[-1].split(':')[-1]
            g.log.info('Getting arequal after getting bricks offline '
                       'is successful')

            # Checking arequals before bringing bricks offline
            # and after bringing bricks offline
            self.assertEqual(result_before_offline, result_after_offline,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks offline '
                       'and after bringing bricks offline are equal')

            # Modify the data
            g.log.info("Start modifying IO on all mounts...")
            g.log.info("Modifying IO on %s:%s", self.mounts[0].client_system,
                       self.mounts[0].mountpoint)
            if 'copy --dest-dir' in cmd:
                parsed_cmd = cmd % (self.script_upload_path,
                                    self.mounts[0].mountpoint,
                                    self.mounts[0].mountpoint)
            else:
                parsed_cmd = cmd % (self.script_upload_path,
                                    self.mounts[0].mountpoint)
            ret, _, err = g.run(self.mounts[0].client_system,
                                parsed_cmd,
                                user=self.mounts[0].user)
            self.assertFalse(
                ret, 'Failed to modify the data for %s: %s' %
                (self.mounts[0].mountpoint, err))
            g.log.info('Modified IO for %s is successfully',
                       self.mounts[0].mountpoint)

            # Get arequal before getting bricks online
            g.log.info('Getting arequal before getting bricks online...')
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            result_before_online = arequals[0].splitlines()[-1].split(':')[-1]
            g.log.info('Getting arequal before getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Bring brick online
            g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
            ret = bring_bricks_online(self.mnode, self.volname,
                                      bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s online' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s online is successful',
                       bricks_to_bring_offline)

            # Setting options
            g.log.info('Setting options...')
            options = {"self-heal-daemon": "on"}
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online", self.volname)

            # Wait for self-heal-daemons to be online
            g.log.info("Waiting for self-heal-daemons to be online")
            ret = is_shd_daemonized(self.all_servers)
            self.assertTrue(ret, "Either No self heal daemon process found")
            g.log.info("All self-heal-daemons are online")

            # Start healing
            ret = trigger_heal(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not started')
            g.log.info('Healing is started')

            # Monitor heal completion
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal has not yet completed')

            # Check if heal is completed
            ret = is_heal_complete(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not complete')
            g.log.info('Heal is completed successfully')

            # Check for split-brain
            ret = is_volume_in_split_brain(self.mnode, self.volname)
            self.assertFalse(ret, 'Volume is in split-brain state')
            g.log.info('Volume is not in split-brain state')

            # Get arequal after getting bricks online
            g.log.info('Getting arequal after getting bricks online...')
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            result_after_online = arequals[0].splitlines()[-1].split(':')[-1]
            g.log.info('Getting arequal after getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Checking arequals before bringing bricks online
            # and after bringing bricks online
            self.assertEqual(result_before_online, result_after_online,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks online '
                       'and after bringing bricks online are equal')
    def test_self_heal(self):
        """
        Description:-
        - Create files on mount point
        - Kill one brick from volume
        - rm -rfv on mount point
        - bring bricks online
        - wait for heals
        - list
        """
        # pylint: disable=too-many-statements

        # IO on the mount point
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 35 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" % (
                       self.script_upload_path,
                       self.counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(filter(None, (
            bricks_to_bring_offline_dict['hot_tier_bricks'] +
            bricks_to_bring_offline_dict['cold_tier_bricks'] +
            bricks_to_bring_offline_dict['volume_bricks'])))

        # Killing one brick from the volume set
        g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to bring bricks: %s offline",
                              bricks_to_bring_offline))
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Validate if bricks are offline
        g.log.info("Validating if bricks: %s are offline",
                   bricks_to_bring_offline)
        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret, "Not all the bricks in list: %s are offline" %
                        bricks_to_bring_offline)
        g.log.info("Successfully validated that bricks: %s are all offline",
                   bricks_to_bring_offline)

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        self.io_validation_complete = True

        # Checking volume status
        g.log.info("Logging volume info and Status after bringing bricks "
                   "offline from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Removing files from the mount point when one brick is down
        g.log.info("Removing files from the mount point")
        mountpoint = self.mounts[0].mountpoint
        client = self.mounts[0].client_system
        cmd = "rm -rfv %s/*" % mountpoint
        ret, _, _ = g.run(client, cmd)
        if ret != 0:
            raise ExecutionError("failed to delete the files")

        # Bringing bricks online
        g.log.info('Bringing bricks %s online', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s online' %
                        bricks_to_bring_offline)
        g.log.info('Bricks %s are online', bricks_to_bring_offline)

        # Check if bricks are online
        g.log.info("Checking bricks are online or not")
        ret = are_bricks_online(self.mnode, self.volname,
                                bricks_to_bring_offline)
        self.assertTrue(ret, 'Bricks %s are not online' %
                        bricks_to_bring_offline)
        g.log.info('Bricks %s are online', bricks_to_bring_offline)

        # Monitoring heals on the volume
        g.log.info("Wait for heal completion...")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Self heal didn't complete even after waiting "
                             "for 20 minutes.")
        g.log.info("self-heal is successful after changing the volume type "
                   "from replicated to arbitered volume")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #21
0
    def test_eagerlock_while_io_in_progress(self):
        '''
        Create replica volume then mount the volume, once
        volume mounted successfully on client, start running IOs on
        mount point then run the "gluster volume <volname> profile info"
        command on all clusters randomly.
        Then check that IOs completed successfully or not on mount point.
        Check that files in mount point listing properly or not.
        check the release directory value should be less or equals '4'
        '''

        status_on = "on"
        validate_profiles = ('cluster.eager-lock',
                             'diagnostics.count-fop-hits',
                             'diagnostics.latency-measurement')

        ret, _, _ = profile_start(random.choice(self.servers), self.volname)
        self.assertEqual(
            ret, 0,
            ("Volume profile failed to start for volume %s" % self.volname))

        for validate_profile in validate_profiles:
            out = get_volume_options(random.choice(self.servers),
                                     self.volname,
                                     option=(validate_profile))
            self.assertIsNotNone(
                out, "Volume get failed for volume "
                "%s" % self.volname)
            self.assertEqual(out[validate_profile], status_on, "Failed to "
                             "match profile information")

        # Mounting a volume
        ret = self.mount_volume(self.mounts)
        self.assertTrue(ret, "Volume mount failed for %s" % self.volname)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 15 "
                "--max-num-of-dirs 5 "
                "--num-of-files 25 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # this command should not get hang while io is in progress
        # pylint: disable=unused-variable
        for i in range(20):
            ret, _, _ = profile_info(random.choice(self.servers), self.volname)
            self.assertEqual(ret, 0, ("Volume profile info failed on "
                                      "volume %s" % self.volname))

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        volume_profile_info = "gluster v profile %s info"
        _, out, _ = g.run(
            random.choice(self.servers),
            volume_profile_info % self.volname + " | grep"
            "OPENDIR | awk '{print$8}'")
        self.assertIsNotNone(
            out, "Failed to get volume %s profile info" % self.volname)
        out.strip().split('\n')
        for value in out:
            self.assertLessEqual(
                value, '4', "Failed to Validate profile"
                " on volume %s" % self.volname)
Beispiel #22
0
    def test_expanding_volume_when_io_in_progress(self):
        # pylint: disable=too-many-statements
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume while IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume while IO in progress on "
            "volume %s : Success", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Waiting for volume %s process to be online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Started rebalance on the volume %s: Success", self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1800)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO on all mounts: Complete")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("List all files and directories: Success")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            self.assertEqual(
                0, int(each_node['failures']),
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("No migration failures on %s", each_node['nodeName'])
Beispiel #23
0
    def test_shrinking_volume_when_io_in_progress(self):
        """Test shrinking volume (Decrease distribute count) using existing
        servers bricks when IO is in progress.

        Description:
            - remove brick (start, status, commit)
            - validate IO
        """
        # Log Volume Info and Status before shrinking the volume.
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Temporary code:
        # Additional checks to gather infomartion from all
        # servers for Bug 1810901 and setting log level to debug.
        if self.volume_type == 'distributed-dispersed':
            for brick_path in get_all_bricks(self.mnode, self.volname):
                node, path = brick_path.split(':')
                ret, out, _ = g.run(node, 'find {}/'.format(path))
                g.log.info(out)
                for filedir in out.split('\n'):
                    ret, out, _ = g.run(node, 'ls -l {}'.format(filedir))
                    g.log.info("Return value for ls -l command: %s", ret)
                    g.log.info(out)
                    ret = get_fattr_list(node, filedir, encode_hex=True)
                    g.log.info(ret)

        # Shrinking volume by removing bricks from volume when IO in progress
        ret = shrink_volume(self.mnode, self.volname)

        # Temporary code:
        # Additional checks to gather infomartion from all
        # servers for Bug 1810901.
        if not ret and self.volume_type == 'distributed-dispersed':
            for brick_path in get_all_bricks(self.mnode, self.volname):
                node, path = brick_path.split(':')
                ret, out, _ = g.run(node, 'find {}/'.format(path))
                g.log.info(out)
                for filedir in out.split('\n'):
                    ret, out, _ = g.run(node, 'ls -l {}'.format(filedir))
                    g.log.info("Return value for ls -l command: %s", ret)
                    g.log.info(out)
                    ret = get_fattr_list(node, filedir, encode_hex=True)
                    g.log.info(ret)

        self.assertTrue(ret, ("Failed to shrink the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Shrinking volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Wait for volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))

        # Log Volume Info and Status after shrinking the volume
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
    def test_expanding_volume_when_io_in_progress(self):
        """Test expanding volume (Increase distribution) using existing
        servers bricks when IO is in progress.

        Description:
            - add bricks
            - starts rebalance
            - wait for rebalance to complete
            - validate IO
        """
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Wait for gluster processes to come online
        time.sleep(30)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        _, _, _ = rebalance_status(self.mnode, self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Successfully got rebalance status of the volume %s",
                   self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #25
0
    def test_shrinking_volume_when_io_in_progress(self):
        """Test shrinking volume (Decrease distribute count) using existing
        servers bricks when IO is in progress.

        Description:
            - remove brick (start, status, commit)
            - validate IO
        """
        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Shrinking volume by removing bricks from volume when IO in progress
        g.log.info("Start removing bricks from volume when IO in progress")
        ret = shrink_volume(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to shrink the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Shrinking volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Log Volume Info and Status after shrinking the volume
        g.log.info("Logging volume info and Status after shrinking volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online after "
                   "shrinking volume")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online after shrinking volume",
                   self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_snapshot_basic_commands_when_io_in_progress(self):
        """Create, List, Activate, Enable USS (User Serviceable Snapshot),
            Viewing Snap of the volume from mount, De-Activate
            when IO is in progress.
        """
        snap_name = "snap_cvt"
        # Create Snapshot
        g.log.info("Creating snapshot %s of the volume %s", snap_name,
                   self.volname)
        ret, _, _ = snap_create(self.mnode, self.volname, snap_name)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot with name %s "
                          " of the volume %s", snap_name, self.volname))
        g.log.info("Successfully created snapshot %s of the volume %s",
                   snap_name, self.volname)

        # List Snapshot
        g.log.info("Listing the snapshot created for the volume %s",
                   self.volname)
        snap_list = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list, "Unable to get the Snapshot list")
        self.assertIn(snap_name, snap_list,
                      ("snapshot %s not listed in Snapshot list", snap_name))
        g.log.info("Successfully listed snapshot %s in gluster snapshot list",
                   snap_name)

        # Activate the snapshot
        g.log.info("Activating snapshot %s of the volume %s", snap_name,
                   self.volname)
        ret, _, _ = snap_activate(self.mnode, snap_name)
        self.assertEqual(ret, 0,
                         ("Failed to activate snapshot with name %s "
                          " of the volume %s", snap_name, self.volname))
        g.log.info("Successfully activated snapshot %s of the volume %s",
                   snap_name, self.volname)

        # Enable USS on the volume.
        uss_options = ["features.uss"]
        if self.mount_type == "cifs":
            uss_options.append("features.show-snapshot-directory")
        g.log.info("Enable uss options %s on the volume %s", uss_options,
                   self.volname)
        ret = enable_and_validate_volume_options(self.mnode,
                                                 self.volname,
                                                 uss_options,
                                                 time_delay=30)
        self.assertTrue(ret, ("Unable to enable uss options %s on volume %s",
                              uss_options, self.volname))
        g.log.info("Successfully enabled uss options %s on the volume: %s",
                   uss_options, self.volname)

        # Viewing snapshot from mount
        g.log.info("Viewing Snapshot %s from mounts:", snap_name)
        ret = view_snaps_from_mount(self.mounts, snap_name)
        self.assertTrue(ret, ("Failed to View snap %s from mounts", snap_name))
        g.log.info("Successfully viewed snap %s from mounts", snap_name)

        # De-Activate the snapshot
        g.log.info("Deactivating snapshot %s of the volume %s", snap_name,
                   self.volname)
        ret, _, _ = snap_deactivate(self.mnode, snap_name)
        self.assertEqual(ret, 0,
                         ("Failed to deactivate snapshot with name %s "
                          " of the volume %s", snap_name, self.volname))
        g.log.info("Successfully deactivated snapshot %s of the volume %s",
                   snap_name, self.volname)

        # Viewing snapshot from mount (.snaps shouldn't be listed from mount)
        for mount_obj in self.mounts:
            g.log.info("Viewing Snapshot %s from mount %s:%s", snap_name,
                       mount_obj.client_system, mount_obj.mountpoint)
            ret = view_snaps_from_mount(mount_obj, snap_name)
            self.assertFalse(ret, ("Still able to View snap %s from mount "
                                   "%s:%s", snap_name, mount_obj.client_system,
                                   mount_obj.mountpoint))
            g.log.info("%s not listed under .snaps from mount %s:%s",
                       snap_name, mount_obj.client_system,
                       mount_obj.mountpoint)
        g.log.info(
            "%s not listed under .snaps from mounts after "
            "deactivating ", snap_name)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #27
0
    def test_files_on_mount(self):
        """""
        Description:-
        - I/O on the mounts
        - kill brick in cyclic order
        - list the files after healing
        """ ""

        # IO on the mount point
        # Each client will write 2 files each of 1 GB and keep
        # modifying the same file
        g.log.info("Starting IO on all mounts...")
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s "
                   "--file-sizes-list 1G "
                   "--chunk-sizes-list 128 "
                   "--write-time 900 "
                   "--num-of-files 2 "
                   "--base-file-name test_brick_down_from_client_%s.txt "
                   "--dir %s " %
                   (self.script_upload_path, mount_obj.client_system,
                    mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
        self.io_validation_complete = False

        # Killing bricks in cyclic order
        bricks_list = get_all_bricks(self.mnode, self.volname)

        # Total number of cyclic brick-down cycles to be executed
        number_of_cycles = 0
        while number_of_cycles < 3:
            number_of_cycles += 1
            for brick in bricks_list:
                # Bring brick offline
                g.log.info('Bringing bricks %s offline', brick)
                ret = bring_bricks_offline(self.volname, [brick])
                self.assertTrue(ret,
                                ("Failed to bring bricks %s offline" % brick))

                ret = are_bricks_offline(self.mnode, self.volname, [brick])
                self.assertTrue(ret, 'Bricks %s are not offline' % brick)
                g.log.info('Bringing bricks %s offline is successful', brick)

                # Introducing 30 second sleep when brick is down
                g.log.info(
                    "Waiting for 30 seconds, with ongoing IO while "
                    "brick %s is offline", brick)
                ret = time.sleep(30)

                # Bring brick online
                g.log.info('Bringing bricks %s online', brick)
                ret = bring_bricks_online(self.mnode, self.volname, [brick])
                self.assertTrue(ret,
                                ("Failed to bring bricks %s online " % brick))
                g.log.info('Bricks %s are online', brick)

                # Check if bricks are online
                ret = are_bricks_online(self.mnode, self.volname, bricks_list)
                self.assertTrue(ret, 'Bricks %s are not online' % bricks_list)
                g.log.info('Bricks %s are online', bricks_list)

                # Check daemons
                g.log.info('Checking daemons...')
                ret = are_all_self_heal_daemons_are_online(
                    self.mnode, self.volname)
                self.assertTrue(ret, ("Some of the self-heal Daemons are "
                                      "offline"))
                g.log.info('All self-heal Daemons are online')

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Checking volume status
        g.log.info(
            "Logging volume info and Status after bringing bricks "
            "offline from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s" % self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Monitoring heals on the volume
        g.log.info("Wait for self-heal to completeon the volume")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, ("Self heal didn't complete even after waiting "
                              "for 20 minutes."))
        g.log.info("self-heal is successful after changing the volume type "
                   "from replicated to arbitered volume")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_self_heal_when_io_in_progress(self):
        """Test self-heal is successful when IO is in progress.

        Description:
            - simulate brick down.
            - bring bricks online
            - wait for heal to complete
            - validate IO
        """
        # Log Volume Info and Status before simulating brick failure
        g.log.info(
            "Logging volume info and Status before bringing bricks "
            "offlien from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))

        # Bring bricks offline
        g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret,
            ("Failed to bring bricks: %s offline", bricks_to_bring_offline))
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Wait for gluster processes to be offline
        time.sleep(10)

        # Log Volume Info and Status
        g.log.info(
            "Logging volume info and Status after bringing bricks "
            "offline from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Validate if bricks are offline
        g.log.info("Validating if bricks: %s are offline",
                   bricks_to_bring_offline)
        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret, "Not all the bricks in list:%s are offline")
        g.log.info("Successfully validated that bricks: %s are all offline")

        # Add delay before bringing bricks online
        time.sleep(40)

        # Bring bricks online
        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret,
            ("Failed to bring bricks: %s online", bricks_to_bring_offline))
        g.log.info("Successfully brought all bricks:%s online",
                   bricks_to_bring_offline)

        # Wait for gluster processes to be online
        time.sleep(10)

        # Log Volume Info and Status
        g.log.info(
            "Logging volume info and Status after bringing bricks "
            "online from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal to complete
        g.log.info("Wait for self-heal to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(
            ret, "Self heal didn't complete even after waiting "
            "for 20 minutes. 20 minutes is too much a time for "
            "current test workload")
        g.log.info("self-heal is successful after replace-brick operation")

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #29
0
    def test_custom_xlator_ops(self):
        '''
        Steps:
        - Perform minimal IO on the mount
        - Enable custom xlator and verify xlator position in the volfile
        - After performing any operation on the custom xlator set options using
          'storage.reserve' to validate other xlators aren't effected
        - Add brick to the volume and verify the xlator position in volfile in
          the new brick
        - Replace brick and verify the xlator position in new brick volfile
        - Verify debug xlator is reflected correctly in the volfile when set
        - Validate unexisting xlator position should fail
        - Reset the volume and verify all the options set above are reset

        For more details refer inline comments
        '''

        # Write IO on the mount
        self._simple_io()

        # Set storage.reserve option, just a baseline that set options are
        # working
        self._set_and_assert_volume_option('storage.reserve', '2%')

        # Test mount is accessible in RW
        self._simple_io()

        # Position custom xlator in the graph
        xlator, parent, xtype = 'ro', 'worm', 'user'
        self._enable_xlator(xlator, parent, xtype)

        # Verify mount is accessible as we didn't set any options yet
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Set 'read-only' to 'on'
        self._set_and_assert_volume_option('user.xlator.ro.read-only', 'on')

        # Functional verification that mount should be RO
        self._simple_io(xfail=True)
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Shouldn't effect other xlator options
        self._set_and_assert_volume_option('storage.reserve', '3%')

        # Functional validation that mount should be RW
        self._set_and_assert_volume_option('user.xlator.ro.read-only', 'off')
        self._simple_io()

        # Shouldn't effect other xlator options
        self._set_and_assert_volume_option('storage.reserve', '4%')

        # Add brick to the volume and new brick volfile should have custom
        # xlator
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, 'Unable to expand volume')
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to log volume info and status')
        self._verify_position(xlator, parent, xtype)
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, 'Unable to start rebalance operaiont post '
            'expanding volume')
        sleep(.5)
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance on the volume is not completed')

        # Replace on 'pure distribute' isn't recommended
        if self.volume['voltype']['type'] != 'distributed':

            # Replace brick and new brick volfile should have custom xlator
            ret = replace_brick_from_volume(self.mnode, self.volname,
                                            self.servers,
                                            self.all_servers_info)
            self.assertTrue(ret, 'Unable to perform replace brick operation')
            self._verify_position(xlator, parent, xtype)
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(
                ret, 'Heal is not yet completed after performing '
                'replace brick operation')

        # Regression cases
        # Framework should fail when non existing xlator position is supplied
        self._set_and_assert_volume_option('user.xlator.ro',
                                           'unknown',
                                           xfail=True)

        # Any failure in setting xlator option shouldn't result in degraded
        # volume
        self._simple_io()
        self._set_and_assert_volume_option('storage.reserve', '5%')

        # Custom xlator framework touches existing 'debug' xlators and minimal
        # steps to verify no regression
        xlator, parent, xtype = 'delay-gen', 'posix', 'debug'
        self._enable_xlator(xlator, parent, xtype)

        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Volume shouldn't be able to start on using same name for custom
        # xlator and existing xlator
        if self.mount_type != 'nfs':
            xlator, parent, xtype = 'posix', 'posix', 'user'
            self._enable_xlator(xlator, parent, xtype, xsfail=True)

        # Volume reset should remove all the options that are set upto now
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Unable to reset volume')

        # Volume start here is due to earlier failure starting the volume and
        # isn't related to 'volume_reset'
        if self.mount_type != 'nfs':
            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0, 'Unable to start a stopped volume')
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, 'Not all volume processes are online after '
            'starting a stopped volume')
        sleep(self.timeout)
        self._simple_io()

        # Verify options are reset
        vol_info = get_volume_info(self.mnode, self.volname)
        options = vol_info[self.volname]['options']
        negate = ['user.xlator.ro', 'debug.delay-gen', 'storage.reserve']
        for option in negate:
            self.assertNotIn(
                option, options, 'Found {0} in volume info even '
                'after volume reset'.format(option))

        g.log.info(
            'Pass: Validating custom xlator framework for volume %s '
            'is successful', self.volname)