def test_remove_brick_command_force(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create some data on the volume.
        3. Run remove-brick with force.
        4. Check if bricks are still seen on volume or not
        """
        # Create some data on the volume
        self._run_io_on_mount_point()

        # Remove-brick on the volume with force option
        brick_list_to_remove = form_bricks_list_to_remove_brick(self.mnode,
                                                                self.volname)
        self.assertIsNotNone(brick_list_to_remove, "Brick list is empty")

        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 brick_list_to_remove, option="force")
        self.assertFalse(ret, "Failed to run remove-brick with force")
        g.log.info("Successfully run remove-brick with force")

        # Get a list of all bricks
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Brick list is empty")

        # Check if bricks removed brick are present or not in brick list
        for brick in brick_list_to_remove:
            self.assertNotIn(brick, brick_list,
                             "Brick still present in brick list even "
                             "after removing")
    def test_rebalance_while_remove_brick_in_progress(self):
        """
        - Create directories and files on the mount point.
        -  now remove one of the brick from the volume
            gluster volume remove-brick <vol> <brick> start
        - immediately start rebalance on the same volume
            gluster volume rebalance <vol> start
        """
        # pylint: disable=too-many-statements
        # DHT Layout validation
        for mount in self.mounts:
            g.log.debug('Check DHT values %s:%s', mount.client_system,
                        mount.mountpoint)
            ret = validate_files_in_dir(self.clients[0], mount.mountpoint,
                                        test_type=LAYOUT_IS_COMPLETE,
                                        file_type=FILETYPE_DIRS)
            self.assertTrue(ret, "TEST_LAYOUT_IS_COMPLETE: FAILED")
            g.log.info("TEST_LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)
        g.log.info("Successful in logging volume info and status of volume "
                   "%s", self.volname)

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks with option start
        g.log.info("Start removing bricks for %s", self.volname)
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick start success ", self.volname)

        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick status", self.volname)
        g.log.info(out)

        # Start rebalance while volume shrink in-progress
        g.log.info("Volume %s: Start rebalance while volume shrink is "
                   "in-progress")
        _, _, err = rebalance_start(self.mnode, self.volname)
        self.assertIn("Either commit or stop the remove-brick task.", err,
                      "Rebalance started successfully while volume shrink"
                      " is in-progress")
        g.log.info("Failed to start rebalance while volume shrink is "
                   "in progress <EXPECTED>")
    def setUp(self):

        # Calling GlusterBaseClass setUp
        self.get_super_method(self, 'setUp')()

        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)

        if not self.remove_brick_list:
            g.log.error("Volume %s: Failed to form bricks list for shrink",
                        self.volname)
            raise ExecutionError("Volume %s: Failed to form bricks list for"
                                 " shrink" % self.volname)
        g.log.info("Volume %s: Formed bricks list for volume shrink",
                   (self.remove_brick_list, self.volname))

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for index, mount_obj in enumerate(self.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 5 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 10 %s" % (
                       self.script_upload_path,
                       index + 10, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Wait for IO to complete
        g.log.info("Wait for IO to complete as IO validation did not "
                   "succeed in test method")
        ret = wait_for_io_to_complete(self.all_mounts_procs, self.mounts)
        if not ret:
            raise ExecutionError("IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        if not ret:
            raise ExecutionError("Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Example #4
0
    def test_add_brick_while_remove_brick_is_in_progress(self):
        # DHT Layout and hash validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Form bricks list for volume shrink
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)
        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))
        g.log.info("Remove-brick status %s", self.volname)
        g.log.info(out)

        # Expanding volume while volume shrink is in-progress
        g.log.info("Volume %s: Expand volume while volume shrink in-progress",
                   self.volname)
        _, _, err = add_brick(self.mnode, self.volname, self.add_brick_list)
        self.assertIn(
            "rebalance is in progress", err, "Successfully added"
            "bricks to the volume <NOT EXPECTED>")
        g.log.info(
            "Volume %s: Failed to add-bricks while volume shrink "
            "in-progress <EXPECTED>", self.volname)

        # cleanup add-bricks list
        for brick in self.add_brick_list:
            brick_node, brick_path = brick.split(":")
            ret, _, _ = g.run(brick_node, ("rm -rf %s", brick_path))
            if ret != 0:
                g.log.error("Failed to clean %s:%s", brick_node, brick_path)
        g.log.info("Successfully cleaned backend add-brick bricks list")
Example #5
0
    def setUp(self):

        # Calling GlusterBaseClass setUp
        GlusterBaseClass.setUp.im_func(self)

        if self.volume_type == "distributed-replicated":
            self.volume_configs = []
            # Redefine distributed-replicated volume
            self.volume['voltype'] = {
                'type': 'distributed-replicated',
                'replica_count': 3,
                'dist_count': 4,
                'transport': 'tcp'
            }

        if self.volume_type == "distributed-dispersed":
            self.volume_configs = []
            # Redefine distributed-dispersed volume
            self.volume['voltype'] = {
                'type': 'distributed-dispersed',
                'dist_count': 3,
                'disperse_count': 6,
                'redundancy_count': 2,
                'transport': 'tcp'
            }

        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                                  self.volname,
                                                                  subvol_num=1)
        if not self.remove_brick_list:
            g.log.error(
                "Volume %s: Failed to form bricks list "
                "for volume shrink", self.volname)
            raise ExecutionError("Volume %s: Failed to form bricks list "
                                 "for volume shrink" % self.volname)
        g.log.info("Volume %s: Formed bricks list for volume shrink",
                   self.volname)
    def setUp(self):

        # Calling GlusterBaseClass setUp
        GlusterBaseClass.setUp.im_func(self)

        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                                  self.volname,
                                                                  subvol_num=1)
        if not self.remove_brick_list:
            g.log.error(
                "Volume %s: Failed to form bricks list "
                "for volume shrink", self.volname)
            raise ExecutionError("Volume %s: Failed to form bricks list "
                                 "for volume shrink" % self.volname)
        g.log.info("Volume %s: Formed bricks list for volume shrink",
                   self.volname)
    def test_delete_dir_with_self_pointing_linkto_files(self):
        """
        Test case:
        1. Create a pure distribute volume with 2 bricks, start and mount it.
        2. Create dir dir0/dir1/dir2 inside which create 1000 files and rename
           all the files.
        3. Start remove-brick operation on the volume.
        4. Check remove-brick status till status is completed.
        5. When remove-brick status is completed stop it.
        6. Go to brick used for remove brick and perform lookup on the files.
        8. Change the linkto xattr value for every file in brick used for
           remove brick to point to itself.
        9. Perfrom rm -rf * from mount point.
        """
        # Create dir /dir0/dir1/dir2
        self.dir_path = "{}/dir0/dir1/dir2/".format(self.mounts[0].mountpoint)
        ret = mkdir(self.first_client, self.dir_path, parents=True)
        self.assertTrue(ret, "Failed to create /dir0/dir1/dir2/ dir")

        # Create 1000 files inside /dir0/dir1/dir2
        ret, _, _ = g.run(
            self.first_client, 'cd %s;for i in {1..1000}; do echo "Test file" '
            '> tfile-$i; done' % self.dir_path)
        self.assertFalse(ret,
                         "Failed to create 1000 files inside /dir0/dir1/dir2")

        # Rename 1000 files present inside /dir0/dir1/dir2
        ret, _, _ = g.run(
            self.first_client, "cd %s;for i in {1..1000};do mv tfile-$i "
            "ntfile-$i;done" % self.dir_path)
        self.assertFalse(ret,
                         "Failed to rename 1000 files inside /dir0/dir1/dir2")
        g.log.info("I/O successful on mount point.")

        # Start remove-brick operation on the volume
        brick = form_bricks_list_to_remove_brick(self.mnode,
                                                 self.volname,
                                                 subvol_num=1)
        self.assertIsNotNone(brick, "Brick_list is empty")
        ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'start')
        self.assertFalse(ret, "Failed to start remov-brick on volume")

        # Check remove-brick status till status is completed
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                brick)
        self.assertTrue(ret, "Remove-brick didn't complete on volume")

        # When remove-brick status is completed stop it
        ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'stop')
        self.assertFalse(ret, "Failed to start remov-brick on volume")
        g.log.info("Successfully started and stopped remove-brick")

        # Go to brick used for remove brick and perform lookup on the files
        node, path = brick[0].split(":")
        path = "{}/dir0/dir1/dir2/".format(path)
        ret, _, _ = g.run(node, 'ls {}*'.format(path))
        self.assertFalse(ret, "Failed to do lookup on %s" % brick[0])

        # Change the linkto xattr value for every file in brick used for
        # remove brick to point to itself
        ret = get_dir_contents(node, path)
        self.assertIsNotNone(ret,
                             "Unable to get files present in dir0/dir1/dir2")

        ret = get_dht_linkto_xattr(node, "{}{}".format(path, ret[0]))
        self.assertIsNotNone(ret, "Unable to fetch dht linkto xattr")

        # Change trusted.glusterfs.dht.linkto from dist-client-0 to
        # dist-client-1 or visa versa according to initial value
        dht_linkto_xattr = ret.split("-")
        if int(dht_linkto_xattr[2]):
            dht_linkto_xattr[2] = "0"
        else:
            dht_linkto_xattr[2] = "1"
        linkto_value = "-".join(dht_linkto_xattr)

        # Set xattr trusted.glusterfs.dht.linkto on all the linkto files
        ret = set_fattr(node, '{}*'.format(path),
                        'trusted.glusterfs.dht.linkto', linkto_value)
        self.assertTrue(ret, "Failed to change linkto file to point to itself")

        # Perfrom rm -rf * from mount point
        ret, _, _ = g.run(self.first_client,
                          "rm -rf {}/*".format(self.mounts[0].mountpoint))
        self.assertFalse(ret, "Failed to run rm -rf * on mount point")
        g.log.info("rm -rf * successful on mount point")
Example #8
0
    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" %
               (self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(validate_io_procs([proc], self.mounts[0]),
                        "IO failed on some of the clients")

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname,
                                                             subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                   "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
    def test_quorum_remove_brick(self):
        '''
        -> Create volume
        -> Enabling server quorum
        -> Set server quorum ratio to 95%
        -> Stop the glusterd on any one of the node
        -> Perform remove brick operation
        -> start glusterd
        -> Check gluster vol info, bricks should be same before and after
        performing remove brick operation.
        '''
        # Enabling server quorum
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, "Failed to set server quorum for volume %s"
                        % self.volname)
        g.log.info("Able to set server quorum successfully for %s",
                   self.volname)

        # Setting server quorum ratio in percentage
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(ret, "Failed to set server quorum ratio for %s"
                        % self.servers)
        g.log.info("Able to set server quorum ratio successfully for %s",
                   self.servers)

        # Getting brick list from volume
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list of %s"
                             % self.volname)
        g.log.info("Successful in getting brick list of %s", self.volname)

        # Stopping glusterd
        self.random_server = random.choice(self.servers[1:])
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to stop glusterd on %s"
                        % self.random_server)
        g.log.info("Glusterd stopped successfully on %s", self.random_server)

        # Forming brick list for performing remove brick operation
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname)
        self.assertIsNotNone(remove_brick_list, "Failed to get brick list for "
                                                "performing remove brick "
                                                "operation")
        g.log.info("Successful in getting brick list for performing remove "
                   "brick operation")

        # Performing remove brick operation
        ret, _, err = remove_brick(self.mnode, self.volname,
                                   remove_brick_list, 'force')
        self.assertNotEqual(ret, 0, "Remove brick should fail when quorum is "
                                    "in not met condition, but brick removed "
                                    "successfully for %s" % self.volname)
        g.log.info("Failed to remove brick when quorum is in not met condition"
                   " as expected for %s", self.volname)

        # Expected error message for remove brick operation
        msg = ("volume remove-brick commit force: failed: "
               "Quorum not met. Volume operation not allowed")

        # Checking error message for remove brick operation
        self.assertIn(msg, err, "Error message is not correct for "
                                "remove brick operation when quorum not met")
        g.log.info("Error message is correct for remove brick operation when "
                   "quorum not met")

        # Starting glusterd
        ret = start_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s"
                        % self.random_server)
        g.log.info("Glusted started successfully on %s", self.random_server)

        # Checking glusterd status
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.random_server)
            if not ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(ret, 0, "Glusterd is not running on %s"
                         % self.random_server)
        g.log.info("Glusterd is running on %s", self.random_server)

        # Getting brick list of volume after performing remove brick operation
        new_brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(new_brick_list, "Failed to get brick list of %s"
                             % self.volname)
        g.log.info("Successful in getting brick list of %s", self.volname)

        # Comparing bricks info before and after performing
        # remove brick operation
        self.assertListEqual(brick_list, new_brick_list,
                             "Bricks are not same before and after performing"
                             " remove brick operation")
        g.log.info("Bricks are same before and after "
                   "performing remove brick operation")
Example #10
0
    def test_kill_brick_with_remove_brick(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create some data on the volume.
        3. Start remove-brick on the volume.
        4. When remove-brick is in progress kill brick process of a brick
           which is being remove.
        5. Remove-brick should complete without any failures.
        """
        # Start I/O from clients on the volume
        counter = 1
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Failed to create datat on volume")
            counter += 10

        # Collect arequal checksum before ops
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        # Start remove-brick on the volume
        brick_list = form_bricks_list_to_remove_brick(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Brick list is empty")

        ret, _, _ = remove_brick(self.mnode, self.volname, brick_list, 'start')
        self.assertFalse(ret, "Failed to start remove-brick on volume")
        g.log.info("Successfully started remove-brick on volume")

        # Check rebalance is in progress
        ret = get_remove_brick_status(self.mnode, self.volname, brick_list)
        ret = ret['aggregate']['statusStr']
        self.assertEqual(ret, "in progress", ("Rebalance is not in "
                                              "'in progress' state, either "
                                              "rebalance is in completed state"
                                              " or failed to get rebalance "
                                              "status"))

        # kill brick process of a brick which is being removed
        brick = choice(brick_list)
        node, _ = brick.split(":")
        ret = kill_process(node, process_names="glusterfsd")
        self.assertTrue(ret,
                        "Failed to kill brick process of brick %s" % brick)

        # Wait for remove-brick to complete on the volume
        ret = wait_for_remove_brick_to_complete(self.mnode,
                                                self.volname,
                                                brick_list,
                                                timeout=1200)
        self.assertTrue(ret, "Remove-brick didn't complete")
        g.log.info("Remove brick completed successfully")

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
Example #11
0
    def test_induce_holes_then_lookup(self):
        """
        Test Script to induce holes in layout by using remove-brick force
        and then performing lookup in order to fix the layout.

        Steps :
        1) Create a volume and mount it using FUSE.
        2) Create a directory "testdir" on mount point.
        3) Check if the layout is complete.
        4) Log volume info and status before remove-brick operation.
        5) Form a list of bricks to be removed.
        6) Start remove-brick operation using 'force'.
        7) Let remove-brick complete and check layout.
        8) Mount the volume on a new mount.
        9) Send a lookup on mount point.
        10) Check if the layout is complete.

        """
        # pylint: disable=too-many-statements
        # Create a directory on mount point
        m_point = self.mounts[0].mountpoint
        dirpath = '/testdir'
        command = 'mkdir -p ' + m_point + dirpath
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "mkdir failed")
        g.log.info("mkdir is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    m_point)
        ret = validate_files_in_dir(self.clients[0],
                                    m_point,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                                  self.volname,
                                                                  subvol_num=1)
        self.assertNotEqual(self.remove_brick_list, None,
                            ("Volume %s: Failed to form bricks list for volume"
                             " shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for volume shrink",
                   self.volname)

        # Shrinking volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "force")
        self.assertFalse(ret, "Remove-brick with force: FAIL")
        g.log.info("Remove-brick with force: PASS")

        # Check the layout
        ret = is_layout_complete(self.mnode, self.volname, dirpath)
        self.assertFalse(ret, ("Volume %s: Layout is complete", self.volname))
        g.log.info("Volume %s: Layout has some holes", self.volname)

        # Mount the volume on a new mount point
        ret, _, _ = mount_volume(self.volname,
                                 mtype='glusterfs',
                                 mpoint=m_point,
                                 mserver=self.mnode,
                                 mclient=self.clients[1])
        self.assertEqual(ret, 0,
                         ("Failed to do gluster mount of volume %s"
                          " on client node %s", self.volname, self.clients[1]))
        g.log.info("Volume %s mounted successfullly on %s", self.volname,
                   self.clients[1])

        # Send a look up on the directory
        cmd = 'ls %s%s' % (m_point, dirpath)
        ret, _, err = g.run(self.clients[1], cmd)
        self.assertEqual(ret, 0,
                         ("Lookup failed on %s with error %s", (dirpath, err)))
        g.log.info("Lookup sent successfully on %s", m_point + dirpath)

        # DHT Layout validation
        g.log.info("Checking layout after new mount")
        g.log.debug("Verifying hash layout values %s:%s", self.clients[1],
                    m_point + dirpath)
        ret = validate_files_in_dir(self.clients[1],
                                    m_point + dirpath,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")