def test_rebalance_while_remove_brick_in_progress(self):
        """
        - Create directories and files on the mount point.
        -  now remove one of the brick from the volume
            gluster volume remove-brick <vol> <brick> start
        - immediately start rebalance on the same volume
            gluster volume rebalance <vol> start
        """
        # pylint: disable=too-many-statements
        # DHT Layout validation
        for mount in self.mounts:
            g.log.debug('Check DHT values %s:%s', mount.client_system,
                        mount.mountpoint)
            ret = validate_files_in_dir(self.clients[0], mount.mountpoint,
                                        test_type=LAYOUT_IS_COMPLETE,
                                        file_type=FILETYPE_DIRS)
            self.assertTrue(ret, "TEST_LAYOUT_IS_COMPLETE: FAILED")
            g.log.info("TEST_LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)
        g.log.info("Successful in logging volume info and status of volume "
                   "%s", self.volname)

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks with option start
        g.log.info("Start removing bricks for %s", self.volname)
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick start success ", self.volname)

        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick status", self.volname)
        g.log.info(out)

        # Start rebalance while volume shrink in-progress
        g.log.info("Volume %s: Start rebalance while volume shrink is "
                   "in-progress")
        _, _, err = rebalance_start(self.mnode, self.volname)
        self.assertIn("Either commit or stop the remove-brick task.", err,
                      "Rebalance started successfully while volume shrink"
                      " is in-progress")
        g.log.info("Failed to start rebalance while volume shrink is "
                   "in progress <EXPECTED>")
Esempio n. 2
0
    def test_volume_reduce_replica_count(self):
        """
        Test case:
        1) Create a 2x3 replica volume.
        2) Remove bricks in the volume to make it a 2x2 replica volume.
        3) Remove bricks in the volume to make it a distribute volume.
        """

        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # Getting a list of all the bricks.
        g.log.info("Get all the bricks of the volume")
        self.brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(self.brick_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Converting 2x3 to 2x2 volume.
        remove_brick_list = [self.brick_list[0], self.brick_list[3]]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 remove_brick_list,
                                 'force',
                                 replica_count=2)
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove brick operation successfully")

        # Checking if volume is 2x2 or not.
        volume_info = get_volume_info(self.mnode, self.volname)
        brick_count = int(volume_info[self.volname]['brickCount'])
        self.assertEqual(brick_count, 4, "Failed to remove 2 bricks.")
        g.log.info("Successfully removed 2 bricks.")
        type_string = volume_info[self.volname]['typeStr']
        self.assertEqual(type_string, 'Distributed-Replicate',
                         "Convertion to 2x2 failed.")
        g.log.info("Convertion to 2x2 successful.")

        # Converting 2x2 to distribute volume.
        remove_brick_list = [self.brick_list[1], self.brick_list[4]]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 remove_brick_list,
                                 'force',
                                 replica_count=1)
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove brick operation successfully")

        # Checking if volume is pure distribute or not.
        volume_info = get_volume_info(self.mnode, self.volname)
        brick_count = int(volume_info[self.volname]['brickCount'])
        self.assertEqual(brick_count, 2, "Failed to remove 2 bricks.")
        g.log.info("Successfully removed 2 bricks.")
        type_string = volume_info[self.volname]['typeStr']
        self.assertEqual(type_string, 'Distribute',
                         "Convertion to distributed failed.")
        g.log.info("Convertion to distributed successful.")
Esempio n. 3
0
    def test_add_brick_while_remove_brick_is_in_progress(self):
        # DHT Layout and hash validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Form bricks list for volume shrink
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)
        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))
        g.log.info("Remove-brick status %s", self.volname)
        g.log.info(out)

        # Expanding volume while volume shrink is in-progress
        g.log.info("Volume %s: Expand volume while volume shrink in-progress",
                   self.volname)
        _, _, err = add_brick(self.mnode, self.volname, self.add_brick_list)
        self.assertIn(
            "rebalance is in progress", err, "Successfully added"
            "bricks to the volume <NOT EXPECTED>")
        g.log.info(
            "Volume %s: Failed to add-bricks while volume shrink "
            "in-progress <EXPECTED>", self.volname)

        # cleanup add-bricks list
        for brick in self.add_brick_list:
            brick_node, brick_path = brick.split(":")
            ret, _, _ = g.run(brick_node, ("rm -rf %s", brick_path))
            if ret != 0:
                g.log.error("Failed to clean %s:%s", brick_node, brick_path)
        g.log.info("Successfully cleaned backend add-brick bricks list")
    def test_remove_brick_command_force(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create some data on the volume.
        3. Run remove-brick with force.
        4. Check if bricks are still seen on volume or not
        """
        # Create some data on the volume
        self._run_io_on_mount_point()

        # Remove-brick on the volume with force option
        brick_list_to_remove = form_bricks_list_to_remove_brick(self.mnode,
                                                                self.volname)
        self.assertIsNotNone(brick_list_to_remove, "Brick list is empty")

        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 brick_list_to_remove, option="force")
        self.assertFalse(ret, "Failed to run remove-brick with force")
        g.log.info("Successfully run remove-brick with force")

        # Get a list of all bricks
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Brick list is empty")

        # Check if bricks removed brick are present or not in brick list
        for brick in brick_list_to_remove:
            self.assertNotIn(brick, brick_list,
                             "Brick still present in brick list even "
                             "after removing")
Esempio n. 5
0
    def test_induce_holes_thenfixlayout(self):

        # pylint: disable=too-many-statements
        m_point = self.mounts[0].mountpoint
        command = 'mkdir -p ' + m_point + '/testdir'
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "mkdir failed")
        g.log.info("mkdir is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Shrinking volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "force")
        self.assertFalse(ret, "Remove-brick with force: FAIL")
        g.log.info("Remove-brick with force: PASS")

        # Check the layout
        ret = is_layout_complete(self.mnode, self.volname, dirpath='/testdir')
        self.assertFalse(ret, "Volume %s: Layout is complete")
        g.log.info("Volume %s: Layout has some holes")

        # Start Rebalance fix-layout
        g.log.info("Volume %s: Start fix-layout", self.volname)
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, ("Volume %s: fix-layout start failed"
                                  "%s", self.volname))
        g.log.info("Volume %s: fix-layout start success", self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: Fix-layout is either failed or "
                              "in-progress", self.volname))
        g.log.info("Volume %s: Fix-layout completed successfully",
                   self.volname)

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")
Esempio n. 6
0
    def tearDown(self):

        status_info = get_remove_brick_status(
            self.mnode, self.volname, bricks_list=self.remove_brick_list)
        status = status_info['aggregate']['statusStr']
        if 'in progress' in status:
            # Shrink volume by removing bricks with option start
            g.log.info("Vol %s: Stop remove brick", self.volname)
            ret, _, _ = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "stop")
            g.log.info("Volume %s shrink stopped ", self.volname)

        # Unmount Volume and Cleanup Volume
        g.log.info("Starting to Unmount Volume and Cleanup Volume")
        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Unmount Volume and Cleanup Volume")
        g.log.info("Successful in Unmount Volume and Cleanup Volume")

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
    def test_quorum_remove_brick(self):
        '''
        -> Create volume
        -> Enabling server quorum
        -> Set server quorum ratio to 95%
        -> Stop the glusterd on any one of the node
        -> Perform remove brick operation
        -> start glusterd
        -> Check gluster vol info, bricks should be same before and after
        performing remove brick operation.
        '''
        # Enabling server quorum
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, "Failed to set server quorum for volume %s"
                        % self.volname)
        g.log.info("Able to set server quorum successfully for %s",
                   self.volname)

        # Setting server quorum ratio in percentage
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(ret, "Failed to set server quorum ratio for %s"
                        % self.servers)
        g.log.info("Able to set server quorum ratio successfully for %s",
                   self.servers)

        # Getting brick list from volume
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list of %s"
                             % self.volname)
        g.log.info("Successful in getting brick list of %s", self.volname)

        # Stopping glusterd
        self.random_server = random.choice(self.servers[1:])
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to stop glusterd on %s"
                        % self.random_server)
        g.log.info("Glusterd stopped successfully on %s", self.random_server)

        # Forming brick list for performing remove brick operation
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname)
        self.assertIsNotNone(remove_brick_list, "Failed to get brick list for "
                                                "performing remove brick "
                                                "operation")
        g.log.info("Successful in getting brick list for performing remove "
                   "brick operation")

        # Performing remove brick operation
        ret, _, err = remove_brick(self.mnode, self.volname,
                                   remove_brick_list, 'force')
        self.assertNotEqual(ret, 0, "Remove brick should fail when quorum is "
                                    "in not met condition, but brick removed "
                                    "successfully for %s" % self.volname)
        g.log.info("Failed to remove brick when quorum is in not met condition"
                   " as expected for %s", self.volname)

        # Expected error message for remove brick operation
        msg = ("volume remove-brick commit force: failed: "
               "Quorum not met. Volume operation not allowed")

        # Checking error message for remove brick operation
        self.assertIn(msg, err, "Error message is not correct for "
                                "remove brick operation when quorum not met")
        g.log.info("Error message is correct for remove brick operation when "
                   "quorum not met")

        # Starting glusterd
        ret = start_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s"
                        % self.random_server)
        g.log.info("Glusted started successfully on %s", self.random_server)

        # Checking glusterd status
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.random_server)
            if not ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(ret, 0, "Glusterd is not running on %s"
                         % self.random_server)
        g.log.info("Glusterd is running on %s", self.random_server)

        # Getting brick list of volume after performing remove brick operation
        new_brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(new_brick_list, "Failed to get brick list of %s"
                             % self.volname)
        g.log.info("Successful in getting brick list of %s", self.volname)

        # Comparing bricks info before and after performing
        # remove brick operation
        self.assertListEqual(brick_list, new_brick_list,
                             "Bricks are not same before and after performing"
                             " remove brick operation")
        g.log.info("Bricks are same before and after "
                   "performing remove brick operation")
    def test_arb_to_repl_conversion_with_io(self):
        """
        Description: To perform a volume conversion from Arbiter to Replicated
        with background IOs

        Steps:
        - Create, start and mount an arbiter volume in two clients
        - Create two dir's, fill IO in first dir and take note of arequal
        - Start a continuous IO from second directory
        - Convert arbiter to x2 replicated volume (remove brick)
        - Convert x2 replicated to x3 replicated volume (add brick)
        - Wait for ~5 min for vol file to be updated on all clients
        - Enable client side heal options and issue volume heal
        - Validate heal completes with no errors and arequal of first dir
          matches against initial checksum
        """

        client, m_point = (self.mounts[0].client_system,
                           self.mounts[0].mountpoint)

        # Fill IO in first directory
        cmd = ('/usr/bin/env python {} '
               'create_deep_dirs_with_files --dir-depth 10 '
               '--fixed-file-size 1M --num-of-files 100 '
               '--dirname-start-num 1 {}'.format(self.script_path, m_point))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, 'Not able to fill directory with IO')

        # Take `arequal` checksum on first directory
        ret, exp_arequal = collect_mounts_arequal(self.mounts[0],
                                                  m_point + '/user1')
        self.assertTrue(ret, 'Failed to get arequal checksum on mount')

        # Start continuous IO from second directory
        client = self.mounts[1].client_system
        cmd = ('/usr/bin/env python {} '
               'create_deep_dirs_with_files --dir-depth 10 '
               '--fixed-file-size 1M --num-of-files 250 '
               '--dirname-start-num 2 {}'.format(self.script_path, m_point))
        proc = g.run_async(client, cmd)
        self.all_mounts_procs.append(proc)

        # Wait for IO to fill before volume conversion
        sleep(30)

        # Remove arbiter bricks ( arbiter to x2 replicated )
        kwargs = {'replica_count': 2}
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 self._get_arbiter_bricks(),
                                 option='force',
                                 **kwargs)
        self.assertEqual(ret, 0, 'Not able convert arbiter to x2 replicated '
                         'volume')
        # Wait for IO to fill after volume conversion
        sleep(30)

        # Add bricks (x2 replicated to x3 replicated)
        kwargs['replica_count'] = 3
        vol_info = get_volume_info(self.mnode, volname=self.volname)
        self.assertIsNotNone(vol_info, 'Not able to get volume info')
        dist_count = vol_info[self.volname]['distCount']
        bricks_list = form_bricks_list(
            self.mnode,
            self.volname,
            number_of_bricks=int(dist_count) * 1,
            servers=self.servers,
            servers_info=self.all_servers_info,
        )
        self.assertTrue(bricks_list, 'Not able to get unused list of bricks')
        ret, _, _ = add_brick(self.mnode,
                              self.volname,
                              bricks_list,
                              force='True',
                              **kwargs)
        self.assertEqual(ret, 0, 'Not able to add-brick to '
                         '{}'.format(self.volname))
        # Wait for IO post x3 replicated volume conversion
        sleep(30)

        # Validate volume info
        vol_info = get_volume_info(self.mnode, volname=self.volname)
        self.assertIsNotNone(vol_info, 'Not able to get volume info')
        vol_info = vol_info[self.volname]
        repl_count, brick_count = (vol_info['replicaCount'],
                                   vol_info['brickCount'])

        # Wait for the volfile to sync up on clients
        cmd = ('grep -ir connected {}/.meta/graphs/active/{}-client-*/private '
               '| wc -l')
        wait_time = time() + 300
        in_sync = False
        while time() <= wait_time:
            ret, rout, _ = g.run(client, cmd.format(m_point, self.volname))
            self.assertEqual(ret, 0,
                             'Not able to grep for volfile sync from client')
            if int(rout) == int(brick_count):
                in_sync = True
                break
            sleep(30)
        self.assertTrue(
            in_sync, 'Volfiles from clients are not synced even '
            'after polling for ~5 min')

        self.assertEqual(
            int(repl_count), kwargs['replica_count'], 'Not able '
            'to validate x2 to x3 replicated volume conversion')

        # Enable client side heal options, trigger and monitor heal
        ret = set_volume_options(
            self.mnode, self.volname, {
                'data-self-heal': 'on',
                'entry-self-heal': 'on',
                'metadata-self-heal': 'on'
            })
        self.assertTrue(ret, 'Unable to set client side heal options')
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to trigger heal on volume')
        ret = monitor_heal_completion(self.mnode,
                                      self.volname,
                                      timeout_period=1800)
        self.assertTrue(ret,
                        'Heal is not completed for {}'.format(self.volname))

        # Validate IO
        prev_time = datetime.now().replace(microsecond=0)
        ret = validate_io_procs(self.all_mounts_procs, [self.mounts[1]])
        curr_time = datetime.now().replace(microsecond=0)
        self.assertTrue(ret, 'Not able to validate completion of IO on mount')
        self.all_mounts_procs *= 0

        # To ascertain IO was happening during brick operations
        self.assertGreater(
            curr_time - prev_time, timedelta(seconds=10), 'Unable '
            'to validate IO was happening during brick operations')

        # Take and validate `arequal` checksum on first directory
        ret, act_areequal = collect_mounts_arequal(self.mounts[1],
                                                   m_point + '/user1')
        self.assertTrue(ret, 'Failed to get arequal checksum from mount')
        self.assertEqual(
            exp_arequal, act_areequal, '`arequal` checksum did '
            'not match post arbiter to x3 replicated volume conversion')

        g.log.info('PASS: Arbiter to x3 replicated volume conversion complete')
Esempio n. 9
0
    def test_remove_brick_operations(self):
        """
        Steps:
        1. Remove data brick count number of bricks from the volume
           should fail
        2. step 1 with force option should fail
        3. Remove redundant brick count number of bricks from the volume
           should fail
        4. step 3 with force option should fail
        5. Remove data brick count+1 number of bricks from the volume
           should fail
        6. step 5 with force option should fail
        7. Remove disperse count number of bricks from the volume with
           one wrong brick path should fail
        8. step 7 with force option should fail
        9. Start remove brick on first subvol bricks
        10. Remove all the subvols to make a pure EC vol
            by start remove brick on second subvol bricks
        11. Start remove brick on third subvol bricks
        12. Write files and perform read on mountpoints
        """
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-statements

        subvols_list = get_subvols(self.mnode, self.volname)
        volinfo = get_volume_info(self.mnode, self.volname)
        initial_brickcount = volinfo[self.volname]['brickCount']
        data_brick_count = (self.volume['voltype']['disperse_count'] -
                            self.volume['voltype']['redundancy_count'])

        # Try to remove data brick count number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0][0:data_brick_count])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove redundant brick count number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0]
            [0:self.volume['voltype']['redundancy_count']])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume"
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove data brick count+1 number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0][0:data_brick_count + 1])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove disperse count number of bricks from the volume with
        # one wrong brick path
        bricks_list_to_remove = (subvols_list['volume_subvols'][0]
                                 [0:self.volume['voltype']['disperse_count']])
        bricks_list_to_remove[0] = bricks_list_to_remove[0] + "wrong_path"
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Verify that the brick count is intact
        volinfo = get_volume_info(self.mnode, self.volname)
        latest_brickcount = volinfo[self.volname]['brickCount']
        self.assertEqual(initial_brickcount, latest_brickcount,
                         ("Brick count is not expected to "
                          "change, but changed"))

        # Start remove brick on first subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][0]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0,
                         ("Failed to remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Verify that the brick count is intact
        volinfo = get_volume_info(self.mnode, self.volname)
        latest_brickcount = volinfo[self.volname]['brickCount']
        self.assertEqual(initial_brickcount, latest_brickcount,
                         ("Brick count is not expected to "
                          "change, but changed"))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s" % self.volname))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Remove all the subvols to make a pure EC vol
        # Start remove brick on second subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][1]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0,
                         ("Failed to remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s", self.volname))

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume"
                          " %s" % (bricks_list_to_remove, self.volname)))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Start remove brick on third subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][2]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0, ("Failed to remove bricks %s from "
                                  "the volume %s" %
                                  (bricks_list_to_remove, self.volname)))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s" % self.volname))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Log volume info and status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s" % self.volname))
        g.log.info(
            "Successful in logging volume info and status "
            "of volume %s", self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Write some files on the mount point
        cmd1 = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
                "do touch file$i; done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd1)
        self.assertEqual(ret, 0, ("Write operation failed on client "
                                  "%s " % self.mounts[0].client_system))
        g.log.info("Writes on mount point successful")

        # Perform read operation on mountpoint
        cmd2 = ("cd %s; ls -lRt;" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd2)
        self.assertEqual(ret, 0, ("Read operation failed on client "
                                  "%s " % self.mounts[0].client_system))
        g.log.info("Read on mount point successful")
Esempio n. 10
0
    def test_remove_brick(self):
        """
        In this test case:
        1. Trusted storage Pool of 4 nodes
        2. Create a distributed-replicated volumes with 4 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Create some data file
        6. Start remove-brick operation for one replica pair
        7. Restart glusterd on all nodes
        8. Try to commit the remove-brick operation while rebalance
           is in progress, it should fail
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:4]
        my_server_info = {}
        for server in self.servers[0:4]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 4):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])

        # Validating whether the peer are connected or not
        # In jenkins This case is failing saying peers are not in connected
        # state, that is reason adding a check whether peers are connected
        # or not
        count = 0
        while count < 30:
            ret = is_peer_connected(self.mnode, my_servers)
            if ret:
                g.log.info("Peers are in connected state")
                break
            sleep(3)
            count = count + 1
        self.assertTrue(ret, "Some peers are not in connected state")

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 4, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        kwargs = {}
        kwargs['replica_count'] = 2
        ret = volume_create(self.mnode,
                            self.volname,
                            bricks_list,
                            force=False,
                            **kwargs)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        self.all_mounts_procs = []
        # Creating files
        command = ("cd %s/ ; "
                   "for i in `seq 1 10` ; "
                   "do mkdir l1_dir.$i ; "
                   "for j in `seq 1 5` ; "
                   "do mkdir l1_dir.$i/l2_dir.$j ; "
                   "for k in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k "
                   "bs=128k count=$k ; "
                   "done ; "
                   "done ; "
                   "done ; " % (self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False
        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        remove_brick_list = bricks_list[2:4]
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")
        g.log.info("Restart glusterd on servers %s", self.servers)
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to restart glusterd on servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on servers %s",
                   self.servers)

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'commit')
        self.assertNotEqual(ret, 0, "Remove brick commit ops should be fail")
        g.log.info("Remove bricks commit operation failure is expected")
    def test_status_string(self):
        '''
        -> Create Volume
        -> Start rebalance
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        -> Start Remove brick operation
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        '''

        # Start rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance for volume %s"
                         % self.volname)
        g.log.info("Rebalance started successfully on volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance failed for volume %s" % self.volname)
        g.log.info("Rebalance completed successfully on volume %s",
                   self.volname)

        # Getting volume status after rebalance start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status for rebalance
        self.assertIn('Rebalance', status_list[len(status_list) - 4],
                      "Incorrect task type found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task type found in volume status for %s",
                   self.volname)

        # Verifying task status string in volume status for rebalance
        self.assertIn('completed', status_list[len(status_list) - 2],
                      "Incorrect task status found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task status found in volume status for %s",
                   self.volname)

        # Getting volume status --xml after rebalance start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml for rebalance
        self.assertEqual('Rebalance',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml "
                         "for %s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml for rebalance
        self.assertEqual(
            'completed',
            vol_status[self.volname]['task_status'][0]['statusStr'],
            "Incorrect task status found in volume status "
            "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)

        # Getting sub vols
        subvol_dict = get_subvols(self.mnode, self.volname)
        subvol = subvol_dict['volume_subvols'][1]

        # Perform remove brick start
        ret, _, _ = remove_brick(self.mnode, self.volname, subvol,
                                 'start', replica_count=3)
        self.assertEqual(ret, 0, "Failed to start remove brick operation "
                                 "for %s" % self.volname)
        g.log.info("Remove brick operation started successfully on volume %s",
                   self.volname)

        # Getting volume status after remove brick start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status after remove brick start
        self.assertIn('Remove brick', status_list[len(status_list) - 8],
                      "Incorrect task type found in volume status for "
                      "%s" % self.volname)
        g.log.info("Correct task type found in volume status task for %s",
                   self.volname)

        # Verifying task status string in volume status after remove
        # brick start
        ret = False
        remove_status = ['completed', 'in progress']
        if (status_list[len(status_list) - 2].split(':')[1].strip() in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "task for %s" % self.volname)
        g.log.info("Correct task status found in volume status task for %s",
                   self.volname)

        # Getting volume status --xml after remove brick start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml after
        # remove brick start
        self.assertEqual('Remove brick',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml for "
                         "%s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml
        # after remove brick start
        ret = False
        if (vol_status[self.volname]['task_status'][0]['statusStr'] in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)
Esempio n. 12
0
    def test_remove_brick_scenarios(self):
        # pylint: disable=too-many-statements
        """
        Test case:
        1. Create a cluster by peer probing and create a volume.
        2. Mount it and write some IO like 100000 files.
        3. Initiate the remove-brick operation on pair of bricks.
        4. Stop the remove-brick operation using other pairs of bricks.
        5. Get the remove-brick status using other pair of bricks in
           the volume.
        6. stop the rebalance process using non-existing brick.
        7. Check for the remove-brick status using non-existent bricks.
        8. Stop the remove-brick operation where remove-brick start have been
            initiated.
        9. Perform fix-layout on the volume.
        10. Get the rebalance fix-layout.
        11. Create a directory from mountpoint.
        12. check for 'trusted.glusterfs.dht' extended attribute in the
            newly created directory in the bricks where remove brick stopped
            (which was tried to be removed in step 8).
        13. Umount, stop and delete the volume.
        """

        # Getting a list of all the bricks.
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Running IO.
        pool = ThreadPool(5)
        # Build a command per each thread
        # e.g. "seq 1 20000 ... touch" , "seq 20001 40000 ... touch" etc
        cmds = ["seq {} {} | sed 's|^|{}/test_file|' | xargs touch".
                format(i, i + 19999, self.mounts[0].mountpoint)
                for i in range(1, 100000, 20000)]
        # Run all commands in parallel (each thread returns a tuple from g.run)
        ret = pool.map(
            lambda command: g.run(self.mounts[0].client_system, command), cmds)
        # ret -> list of tuples [(return_code, stdout, stderr),...]
        pool.close()
        pool.join()
        # Verify all commands' exit code is 0 (first element of each tuple)
        for thread_return in ret:
            self.assertEqual(thread_return[0], 0, "File creation failed.")
        g.log.info("Files create on mount point.")

        # Removing bricks from volume.
        remove_brick_list_original = bricks_list[3:6]
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_original, 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation.")
        g.log.info("Remove bricks operation started successfully.")

        # Stopping brick remove operation for other pair of bricks.
        remove_brick_list_other_pair = bricks_list[0:3]
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_other_pair, 'stop')
        self.assertEqual(ret, 1, "Successfully stopped remove brick operation "
                                 "on other pair of bricks.")
        g.log.info("Failed to stop remove brick operation on"
                   " other pair of bricks.")

        # Checking status of brick remove operation for other pair of bricks.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_other_pair, 'status')
        self.assertEqual(ret, 1, "Error: Got status on other pair of bricks.")
        g.log.info("EXPECTED: Failed to get status on other pair of bricks.")

        # Stopping remove operation for non-existent bricks.
        remove_brick_list_non_existent = [bricks_list[0] + 'non-existent',
                                          bricks_list[1] + 'non-existent',
                                          bricks_list[2] + 'non-existent']
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_non_existent, 'stop')
        self.assertEqual(ret, 1, "Error: Successfully stopped remove brick"
                                 " operation on non-existent bricks.")
        g.log.info("EXPECTED: Failed to stop remove brick operation"
                   " on non existent bricks.")

        # Checking status of brick remove opeation for non-existent bricks.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_non_existent, 'status')
        self.assertEqual(ret, 1,
                         "Error: Status on non-existent bricks successful.")
        g.log.info("EXPECTED: Failed to get status on non existent bricks.")

        # Stopping the initial brick remove opeation.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_original, 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stop successfully")

        # Start rebalance fix layout for volume.
        g.log.info("Starting Fix-layout on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started fix-layout on the volume %s",
                   self.volname)

        # Checking status of rebalance fix layout for the volume.
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to check status of rebalance"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully checked status on the volume %s",
                   self.volname)
        ret = wait_for_fix_layout_to_complete(self.mnode,
                                              self.volname, timeout=30000)
        self.assertTrue(ret, ("Failed to check for rebalance."))
        g.log.info("Rebalance completed.")

        # Creating directory.
        dir_name = ''
        for counter in range(0, 10):
            ret = mkdir(self.mounts[0].client_system,
                        self.mounts[0].mountpoint + "/dir1" + str(counter),
                        parents=True)
            if ret:
                dir_name = "/dir1" + str(counter)
                break
        self.assertTrue(ret, ("Failed to create directory dir1."))
        g.log.info("Directory dir1 created successfully.")

        # Checking value of attribute for dht.
        brick_server, brick_dir = bricks_list[0].split(':')
        dir_name = brick_dir + dir_name
        g.log.info("Check trusted.glusterfs.dht on host  %s for directory %s",
                   brick_server, dir_name)
        ret = get_fattr(brick_server, dir_name, 'trusted.glusterfs.dht')
        self.assertTrue(ret, ("Failed to get trusted.glusterfs.dht for %s"
                              % dir_name))
        g.log.info("Get trusted.glusterfs.dht xattr for %s successfully",
                   dir_name)
    def test_remove_brick_while_rebalance_is_running(self):

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s",
                    self.clients[0], self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0], self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand successful", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: one or more volume process are "
                              "not up", self.volname))
        g.log.info("All volume %s processes are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Verify volume's all process are online
        g.log.info("Volume %s: Verifying that all process are online",
                   self.volname)
        ret = verify_all_process_of_volume_are_online(self.mnode,
                                                      self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online ",
                              self.volname))
        g.log.info("Volume %s: All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Check if rebalance is running
        status_info = get_rebalance_status(self.mnode, self.volname)
        status = status_info['aggregate']['statusStr']
        if 'in progress' in status:
            # Shrinking volume by removing bricks
            g.log.info("Start removing bricks from volume")
            _, _, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "start")
            self.assertIn("Rebalance is in progress", err, "Successfully "
                          "removed bricks while volume rebalance is "
                          "in-progress")
            g.log.info("Failed to start remove-brick as rebalance is "
                       "in-progress")
        else:
            g.log.error("Rebalance process is not running")
            raise ExecutionError("Rebalance process is not running")
    def test_remove_brick_when_quorum_not_met(self):

        # create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))

        # set cluster.server-quorum-type as server
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.server-quorum-type': 'server'})
        self.assertTrue(ret, ("Failed to set the quorum type as a server"
                              " on volume %s", self.volname))

        # Setting quorum ratio to 95%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '95%'})
        self.assertTrue(
            ret, "Failed to set server quorum ratio on %s" % self.volname)

        # bring down glusterd of half nodes
        num_of_servers = len(self.servers)
        num_of_nodes_to_bring_down = num_of_servers / 2

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            ret = stop_glusterd(self.servers[node])
            self.assertTrue(
                ret, ("Failed to stop glusterd on %s" % self.servers[node]))

        for node in range(num_of_nodes_to_bring_down, num_of_servers):
            count = 0
            while count < 80:
                ret = is_glusterd_running(self.servers[node])
                if ret:
                    break
                sleep(2)
                count += 1
            self.assertNotEqual(
                ret, 0, "glusterd is still running on %s" % self.servers[node])

        # Verifying node count in volume status after glusterd stopped
        # on half of the servers, Its not possible to check the brick status
        # immediately in volume status after glusterd stop
        count = 0
        while count < 100:
            vol_status = get_volume_status(self.mnode, self.volname)
            servers_count = len(vol_status[self.volname].keys())
            if servers_count == num_of_servers - num_of_nodes_to_bring_down:
                break
            sleep(2)
            count += 1

        # confirm that quorum is not met, brick process should be down
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        bricks_to_check = bricks_list[0:num_of_nodes_to_bring_down]
        ret = are_bricks_offline(self.mnode, self.volname, bricks_to_check)
        self.assertTrue(ret, "Server quorum is not met, Bricks are up")

        # try remove brick operation, which should fail
        brick1 = bricks_list[0]
        ret, _, _ = remove_brick(self.mnode, self.volname, [brick1], "start")
        self.assertNotEqual(ret, 0, ("remove brick is success, when quorum is"
                                     " not met"))

        # confirm that, removed brick is still part of volume
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        if brick1 not in bricks_list:
            ret = False
            self.assertTrue(ret, ("remove brick is success, when quorum is"
                                  " not met"))
    def test_remove_brick_scenarios(self):
        # pylint: disable=too-many-statements
        """
        Test case:
        1. Create a cluster by peer probing and create a volume.
        2. Mount it and write some IO like 100000 files.
        3. Initiate the remove-brick operation on pair of bricks.
        4. Stop the remove-brick operation using other pairs of bricks.
        5. Get the remove-brick status using other pair of bricks in
           the volume.
        6. stop the rebalance process using non-existing brick.
        7. Check for the remove-brick status using non-existent bricks.
        8. Stop the remove-brick operation where remove-brick start have been
            initiated.
        9. Perform fix-layout on the volume.
        10. Get the rebalance fix-layout.
        11. Create a directory from mountpoint.
        12. check for 'trusted.glusterfs.dht' extended attribute in the
            newly created directory in the bricks where remove brick stopped
            (which was tried to be removed in step 8).
        13. Umount, stop and delete the volume.
        """

        # Getting a list of all the bricks.
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Running IO.
        command = ('for number in `seq 1 100000`;do touch ' +
                   self.mounts[0].mountpoint + '/test_file$number; done')
        ret, _, _ = g.run(self.mounts[0].client_system, command)
        self.assertEqual(ret, 0, "File creation: failed.")
        g.log.info("Files create on mount point.")

        # Removing bricks from volume.
        remove_brick_list_original = bricks_list[3:6]
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_original, 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation.")
        g.log.info("Remove bricks operation started successfully.")

        # Stopping brick remove operation for other pair of bricks.
        remove_brick_list_other_pair = bricks_list[0:3]
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_other_pair, 'stop')
        self.assertEqual(
            ret, 1, "Successfully stopped remove brick operation "
            "on other pair of bricks.")
        g.log.info("Failed to stop remove brick operation on"
                   " other pair of bricks.")

        # Checking status of brick remove operation for other pair of bricks.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_other_pair, 'status')
        self.assertEqual(ret, 1, "Error: Got status on other pair of bricks.")
        g.log.info("EXPECTED: Failed to get status on other pair of bricks.")

        # Stopping remove operation for non-existent bricks.
        remove_brick_list_non_existent = [
            bricks_list[0] + 'non-existent', bricks_list[1] + 'non-existent',
            bricks_list[2] + 'non-existent'
        ]
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_non_existent, 'stop')
        self.assertEqual(
            ret, 1, "Error: Successfully stopped remove brick"
            " operation on non-existent bricks.")
        g.log.info("EXPECTED: Failed to stop remove brick operation"
                   " on non existent bricks.")

        # Checking status of brick remove opeation for non-existent bricks.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_non_existent, 'status')
        self.assertEqual(ret, 1,
                         "Error: Status on non-existent bricks successful.")
        g.log.info("EXPECTED: Failed to get status on non existent bricks.")

        # Stopping the initial brick remove opeation.
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 remove_brick_list_original, 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stop successfully")

        # Start rebalance fix layout for volume.
        g.log.info("Starting Fix-layout on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started fix-layout on the volume %s",
                   self.volname)

        # Checking status of rebalance fix layout for the volume.
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to check status of rebalance"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully checked status on the volume %s",
                   self.volname)
        ret = wait_for_fix_layout_to_complete(self.mnode,
                                              self.volname,
                                              timeout=30000)
        self.assertTrue(ret, ("Failed to check for rebalance."))
        g.log.info("Rebalance completed.")

        # Creating directory.
        dir_name = ''
        for counter in range(0, 10):
            ret = mkdir(self.mounts[0].client_system,
                        self.mounts[0].mountpoint + "/dir1" + str(counter),
                        parents=True)
            if ret:
                dir_name = "/dir1" + str(counter)
                break
        self.assertTrue(ret, ("Failed to create directory dir1."))
        g.log.info("Directory dir1 created successfully.")

        # Checking value of attribute for dht.
        brick_server, brick_dir = bricks_list[0].split(':')
        folder_name = brick_dir + dir_name
        g.log.info("Check trusted.glusterfs.dht on host  %s for directory %s",
                   brick_server, folder_name)
        ret = get_fattr(brick_server, folder_name, 'trusted.glusterfs.dht')
        self.assertTrue(
            ret, ("Failed to get trusted.glusterfs.dht for %s" % folder_name))
        g.log.info("Get trusted.glusterfs.dht xattr for %s successfully",
                   folder_name)
Esempio n. 16
0
    def test_kill_brick_with_remove_brick(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create some data on the volume.
        3. Start remove-brick on the volume.
        4. When remove-brick is in progress kill brick process of a brick
           which is being remove.
        5. Remove-brick should complete without any failures.
        """
        # Start I/O from clients on the volume
        counter = 1
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Failed to create datat on volume")
            counter += 10

        # Collect arequal checksum before ops
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        # Start remove-brick on the volume
        brick_list = form_bricks_list_to_remove_brick(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Brick list is empty")

        ret, _, _ = remove_brick(self.mnode, self.volname, brick_list, 'start')
        self.assertFalse(ret, "Failed to start remove-brick on volume")
        g.log.info("Successfully started remove-brick on volume")

        # Check rebalance is in progress
        ret = get_remove_brick_status(self.mnode, self.volname, brick_list)
        ret = ret['aggregate']['statusStr']
        self.assertEqual(ret, "in progress", ("Rebalance is not in "
                                              "'in progress' state, either "
                                              "rebalance is in completed state"
                                              " or failed to get rebalance "
                                              "status"))

        # kill brick process of a brick which is being removed
        brick = choice(brick_list)
        node, _ = brick.split(":")
        ret = kill_process(node, process_names="glusterfsd")
        self.assertTrue(ret,
                        "Failed to kill brick process of brick %s" % brick)

        # Wait for remove-brick to complete on the volume
        ret = wait_for_remove_brick_to_complete(self.mnode,
                                                self.volname,
                                                brick_list,
                                                timeout=1200)
        self.assertTrue(ret, "Remove-brick didn't complete")
        g.log.info("Remove brick completed successfully")

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_spurious_rebalance(self):
        """
        In this test case:
        1. Trusted storage Pool of 3 nodes
        2. Create a distributed volumes with 3 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Remove a brick from the volume
        6. Check remove-brick status
        7. Stop the remove brick process
        8. Perform fix-layoyt on the volume
        9. Get the rebalance fix-layout status
       10. Create a directory from mount point
       11. Check trusted.glusterfs.dht extended attribue for newly
           created directory on the remove brick
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:3]
        my_server_info = {}
        for server in self.servers[0:3]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 3):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 3, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret, _, _ = volume_create(self.mnode,
                                  self.volname,
                                  bricks_list,
                                  force=False)
        self.assertEqual(ret, 0, ("Unable"
                                  "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)
        remove_brick_list = []
        remove_brick_list.append(bricks_list[2])
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stopped successfully")

        g.log.info("Starting Fix-layoyt on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, True)
        self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started fix-layout on the volume %s",
                   self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Fix-layout is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Fix-layout is successfully complete on the volume %s",
                   self.volname)
        ret = mkdir(self.mounts[0].client_system,
                    "%s/dir1" % self.mounts[0].mountpoint)
        self.assertTrue(ret, ("Failed to create directory dir1"))
        g.log.info("directory dir1 is created successfully")

        brick_server, brick_dir = remove_brick_list[0].split(':')
        folder_name = brick_dir + "/dir1"
        g.log.info("Check trusted.glusterfs.dht on host  %s for directory %s",
                   brick_server, folder_name)

        ret = get_fattr(brick_server, folder_name, 'trusted.glusterfs.dht')
        self.assertTrue(
            ret, ("Failed to get trusted.glusterfs.dht for %s" % folder_name))
        g.log.info("get trusted.glusterfs.dht xattr for %s successfully",
                   folder_name)
Esempio n. 18
0
    def test_change_reserve_limit_to_lower_value(self):

        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a distributed-replicated volume and start it.
        2) Enable storage.reserve option on the volume using below command:
            gluster volume set <volname> storage.reserve <value>
        3) Mount the volume on a client.
        4) Write some data on the mount points.
        5) Start remove-brick operation.
        6) While remove-brick is in-progress change the reserve limit to a
           lower value.
        """

        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # Setting storage.reserve to 50
        ret = set_volume_options(self.mnode, self.volname,
                                 {'storage.reserve': '50'})
        self.assertTrue(ret,
                        "Failed to set storage reserve on %s" % self.mnode)
        g.log.info("Able to set storage reserve successfully on %s",
                   self.mnode)

        # Mounting the volume.
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        # Run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 5 "
                "--max-num-of-dirs 3 "
                "--num-of-files 10 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Getting a list of all the bricks.
        g.log.info("Get all the bricks of the volume")
        self.brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(self.brick_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Removing bricks from volume.
        remove_brick_list = self.brick_list[3:6]
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation.")
        g.log.info("Remove bricks operation started successfully.")

        # Setting storage.reserve to 33
        ret = set_volume_options(self.mnode, self.volname,
                                 {'storage.reserve': '33'})
        self.assertTrue(ret,
                        "Failed to set storage reserve on %s" % self.mnode)
        g.log.info("Able to set storage reserve successfully on %s",
                   self.mnode)

        # Stopping brick remove opeation.
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stop successfully")
Esempio n. 19
0
    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" %
               (self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(validate_io_procs([proc], self.mounts[0]),
                        "IO failed on some of the clients")

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname,
                                                             subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                   "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
    def test_induce_holes_then_lookup(self):

        # pylint: disable=too-many-statements
        m_point = self.mounts[0].mountpoint
        command = 'mkdir -p ' + m_point + '/testdir'
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "mkdir failed")
        g.log.info("mkdir is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Shrinking volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "force")
        self.assertFalse(ret, "Remove-brick with force: FAIL")
        g.log.info("Remove-brick with force: PASS")

        # Check the layout
        dirpath = '/testdir'
        ret = is_layout_complete(self.mnode, self.volname, dirpath)
        self.assertFalse(ret, "Volume %s: Layout is complete")
        g.log.info("Volume %s: Layout has some holes")

        # Mount the volume on a new mount point
        mount_point = tempfile.mkdtemp()
        ret, _, _ = mount_volume(self.volname,
                                 mtype='glusterfs',
                                 mpoint=mount_point,
                                 mserver=self.mnode,
                                 mclient=self.mnode)
        self.assertEqual(
            ret, 0, ("Failed to do gluster mount on volume %s", self.volname))
        g.log.info("Volume %s: mount success", self.mnode)

        # Send a look up on the directory
        cmd = 'ls %s%s' % (mount_point, dirpath)
        ret, _, err = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0,
                         ("Lookup failed on %s with error %s", (dirpath, err)))
        g.log.info("Lookup sent successfully on %s", dirpath)

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")
    def test_delete_dir_with_self_pointing_linkto_files(self):
        """
        Test case:
        1. Create a pure distribute volume with 2 bricks, start and mount it.
        2. Create dir dir0/dir1/dir2 inside which create 1000 files and rename
           all the files.
        3. Start remove-brick operation on the volume.
        4. Check remove-brick status till status is completed.
        5. When remove-brick status is completed stop it.
        6. Go to brick used for remove brick and perform lookup on the files.
        8. Change the linkto xattr value for every file in brick used for
           remove brick to point to itself.
        9. Perfrom rm -rf * from mount point.
        """
        # Create dir /dir0/dir1/dir2
        self.dir_path = "{}/dir0/dir1/dir2/".format(self.mounts[0].mountpoint)
        ret = mkdir(self.first_client, self.dir_path, parents=True)
        self.assertTrue(ret, "Failed to create /dir0/dir1/dir2/ dir")

        # Create 1000 files inside /dir0/dir1/dir2
        ret, _, _ = g.run(
            self.first_client, 'cd %s;for i in {1..1000}; do echo "Test file" '
            '> tfile-$i; done' % self.dir_path)
        self.assertFalse(ret,
                         "Failed to create 1000 files inside /dir0/dir1/dir2")

        # Rename 1000 files present inside /dir0/dir1/dir2
        ret, _, _ = g.run(
            self.first_client, "cd %s;for i in {1..1000};do mv tfile-$i "
            "ntfile-$i;done" % self.dir_path)
        self.assertFalse(ret,
                         "Failed to rename 1000 files inside /dir0/dir1/dir2")
        g.log.info("I/O successful on mount point.")

        # Start remove-brick operation on the volume
        brick = form_bricks_list_to_remove_brick(self.mnode,
                                                 self.volname,
                                                 subvol_num=1)
        self.assertIsNotNone(brick, "Brick_list is empty")
        ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'start')
        self.assertFalse(ret, "Failed to start remov-brick on volume")

        # Check remove-brick status till status is completed
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                brick)
        self.assertTrue(ret, "Remove-brick didn't complete on volume")

        # When remove-brick status is completed stop it
        ret, _, _ = remove_brick(self.mnode, self.volname, brick, 'stop')
        self.assertFalse(ret, "Failed to start remov-brick on volume")
        g.log.info("Successfully started and stopped remove-brick")

        # Go to brick used for remove brick and perform lookup on the files
        node, path = brick[0].split(":")
        path = "{}/dir0/dir1/dir2/".format(path)
        ret, _, _ = g.run(node, 'ls {}*'.format(path))
        self.assertFalse(ret, "Failed to do lookup on %s" % brick[0])

        # Change the linkto xattr value for every file in brick used for
        # remove brick to point to itself
        ret = get_dir_contents(node, path)
        self.assertIsNotNone(ret,
                             "Unable to get files present in dir0/dir1/dir2")

        ret = get_dht_linkto_xattr(node, "{}{}".format(path, ret[0]))
        self.assertIsNotNone(ret, "Unable to fetch dht linkto xattr")

        # Change trusted.glusterfs.dht.linkto from dist-client-0 to
        # dist-client-1 or visa versa according to initial value
        dht_linkto_xattr = ret.split("-")
        if int(dht_linkto_xattr[2]):
            dht_linkto_xattr[2] = "0"
        else:
            dht_linkto_xattr[2] = "1"
        linkto_value = "-".join(dht_linkto_xattr)

        # Set xattr trusted.glusterfs.dht.linkto on all the linkto files
        ret = set_fattr(node, '{}*'.format(path),
                        'trusted.glusterfs.dht.linkto', linkto_value)
        self.assertTrue(ret, "Failed to change linkto file to point to itself")

        # Perfrom rm -rf * from mount point
        ret, _, _ = g.run(self.first_client,
                          "rm -rf {}/*".format(self.mounts[0].mountpoint))
        self.assertFalse(ret, "Failed to run rm -rf * on mount point")
        g.log.info("rm -rf * successful on mount point")
Esempio n. 22
0
    def test_induce_holes_then_lookup(self):
        """
        Test Script to induce holes in layout by using remove-brick force
        and then performing lookup in order to fix the layout.

        Steps :
        1) Create a volume and mount it using FUSE.
        2) Create a directory "testdir" on mount point.
        3) Check if the layout is complete.
        4) Log volume info and status before remove-brick operation.
        5) Form a list of bricks to be removed.
        6) Start remove-brick operation using 'force'.
        7) Let remove-brick complete and check layout.
        8) Mount the volume on a new mount.
        9) Send a lookup on mount point.
        10) Check if the layout is complete.

        """
        # pylint: disable=too-many-statements
        # Create a directory on mount point
        m_point = self.mounts[0].mountpoint
        dirpath = '/testdir'
        command = 'mkdir -p ' + m_point + dirpath
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "mkdir failed")
        g.log.info("mkdir is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    m_point)
        ret = validate_files_in_dir(self.clients[0],
                                    m_point,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                                  self.volname,
                                                                  subvol_num=1)
        self.assertNotEqual(self.remove_brick_list, None,
                            ("Volume %s: Failed to form bricks list for volume"
                             " shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for volume shrink",
                   self.volname)

        # Shrinking volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "force")
        self.assertFalse(ret, "Remove-brick with force: FAIL")
        g.log.info("Remove-brick with force: PASS")

        # Check the layout
        ret = is_layout_complete(self.mnode, self.volname, dirpath)
        self.assertFalse(ret, ("Volume %s: Layout is complete", self.volname))
        g.log.info("Volume %s: Layout has some holes", self.volname)

        # Mount the volume on a new mount point
        ret, _, _ = mount_volume(self.volname,
                                 mtype='glusterfs',
                                 mpoint=m_point,
                                 mserver=self.mnode,
                                 mclient=self.clients[1])
        self.assertEqual(ret, 0,
                         ("Failed to do gluster mount of volume %s"
                          " on client node %s", self.volname, self.clients[1]))
        g.log.info("Volume %s mounted successfullly on %s", self.volname,
                   self.clients[1])

        # Send a look up on the directory
        cmd = 'ls %s%s' % (m_point, dirpath)
        ret, _, err = g.run(self.clients[1], cmd)
        self.assertEqual(ret, 0,
                         ("Lookup failed on %s with error %s", (dirpath, err)))
        g.log.info("Lookup sent successfully on %s", m_point + dirpath)

        # DHT Layout validation
        g.log.info("Checking layout after new mount")
        g.log.debug("Verifying hash layout values %s:%s", self.clients[1],
                    m_point + dirpath)
        ret = validate_files_in_dir(self.clients[1],
                                    m_point + dirpath,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")