def bricks_online_and_volume_reset(cls):
        """
        reset the volume if any bricks are offline.
        waits for all bricks to be online and resets
        volume options set
        """
        bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname)
        if bricks_offline is not None:
            ret = volume_start(cls.mnode, cls.volname, force=True)
            if not ret:
                raise ExecutionError("Failed to force start volume"
                                     "%s" % cls.volname)
        ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Failed to bring bricks online"
                                 "for volume %s" % cls.volname)

        ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True)
        if ret:
            raise ExecutionError("Failed to reset volume %s" % cls.volname)
        g.log.info("Successful in volume reset %s", cls.volname)
    def test_snap_self_heal(self):
        """
        Steps:

        1. create a volume
        2. mount volume
        3. create snapshot of that volume
        4. Activate snapshot
        5. Clone snapshot and Mount
        6. Perform I/O
        7. Bring Down Few bricks from volume without
           affecting the volume or cluster.
        8. Perform I/O
        9. Bring back down bricks to online
        10. Validate heal is complete with areequal

        """
        # pylint: disable=too-many-statements, too-many-locals
        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(
            ret, 0, ("Failed to create snapshot for volume %s" % self.volname))
        g.log.info("Snapshot %s created successfully for volume %s", self.snap,
                   self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to Activate snapshot %s" % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # snapshot list
        ret, _, _ = snap_list(self.mnode)
        self.assertEqual(ret, 0, ("Failed to list all the snapshot"))
        g.log.info("Snapshot list command was successful")

        # Creating a Clone volume from snapshot:
        g.log.info("Starting to Clone volume from Snapshot")
        ret, _, _ = snap_clone(self.mnode, self.snap, self.clone)
        self.assertEqual(ret, 0, ("Failed to clone %s from snapshot %s" %
                                  (self.clone, self.snap)))
        g.log.info("%s created successfully", self.clone)

        #  start clone volumes
        g.log.info("start to created clone volumes")
        ret, _, _ = volume_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start clone %s" % self.clone)
        g.log.info("clone volume %s started successfully", self.clone)

        # Mounting a clone volume
        g.log.info("Mounting a clone volume")
        ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1,
                                 self.mnode, self.clients[0])
        self.assertEqual(ret, 0,
                         "Failed to mount clone Volume %s" % self.clone)
        g.log.info("Clone volume %s mounted Successfully", self.clone)

        # Checking cloned volume mounted or not
        ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0],
                         self.mount_type)
        self.assertTrue(
            ret,
            "Failed to mount clone volume on mount point: %s" % self.mount1)
        g.log.info("clone Volume %s mounted on %s", self.clone, self.mount1)

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.clone)
        bricks_list = get_all_bricks(self.mnode, self.clone)
        g.log.info("Brick List : %s", bricks_list)

        # Select bricks to bring offline
        g.log.info("Starting to bring bricks to offline")
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))
        g.log.info("Brick to bring offline: %s ", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.clone, bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring the bricks offline")
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Offline Bricks list
        offline_bricks = get_offline_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(
            offline_bricks, "Failed to get offline bricklist"
            "for volume %s" % self.clone)
        for bricks in offline_bricks:
            self.assertIn(bricks, bricks_to_bring_offline,
                          "Failed to validate "
                          "Bricks offline")
        g.log.info("Bricks Offline: %s", offline_bricks)

        # Online Bricks list
        online_bricks = get_online_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(
            online_bricks, "Failed to get online bricks"
            " for volume %s" % self.clone)
        g.log.info("Bricks Online: %s", online_bricks)

        # write files mountpoint
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # Bring all bricks online
        g.log.info("bring all bricks online")
        ret = bring_bricks_online(self.mnode, self.clone,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring bricks online")
        g.log.info("Successful in bringing all bricks online")

        # Validate Bricks are online
        g.log.info("Validating all bricks are online")
        ret = are_bricks_online(self.mnode, self.clone, bricks_list)
        self.assertTrue(ret, "Failed to bring all the bricks online")
        g.log.info("bricks online: %s", bricks_list)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.clone)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online" % self.clone))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.clone)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.clone)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.clone))
        g.log.info("Volume %s : All process are online", self.clone)

        # wait for the heal process to complete
        g.log.info("waiting for heal process to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to complete the heal process")
        g.log.info("Successfully completed heal process")

        # Check areequal
        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.clone)
        subvols = get_subvols(self.mnode, self.clone)
        num_subvols = len(subvols['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Get arequals and compare
        g.log.info("Starting to Compare areequals")
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

        # Get arequal for every brick and compare with first brick
        for brick in subvol_brick_list:
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, brick_arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
            self.assertEqual(
                first_brick_total, brick_total,
                'Arequals for subvol and %s are not equal' % brick)
            g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')
Exemple #3
0
    def test_create_snap_bricks(self):
        """
        1. get brick list
        2. check all bricks are online
        3. Selecting one brick randomly to bring it offline
        4. get brick list
        5. check all bricks are online
        6. Offline Bricks list
        7. Online Bricks list
        8. Create snapshot of volume
        9. snapshot create should fail
        """

        bricks_list = []
        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s" % self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s" % bricks_list)

        # check all bricks are online
        g.log.info("Verifying all bricks are online or not.....")
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, ("Not all bricks are online"))
        g.log.info("All bricks are online.")

        # Selecting one brick randomly to bring it offline
        g.log.info("Selecting one brick randomly to bring it offline")
        brick_to_bring_offline = random.choice(bricks_list)
        g.log.info("Brick to bring offline:%s " % brick_to_bring_offline)
        ret = bring_bricks_offline(self.volname, brick_to_bring_offline, None)
        self.assertTrue(ret, "Failed to bring the bricks offline")
        g.log.info("Randomly Selected brick: %s" % brick_to_bring_offline)

        # get brick list
        g.log.info("Fetching bricks for the volume : %s" % self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s" % bricks_list)

        # check all bricks are online
        g.log.info("Verifying all bricks are online or not.....")
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertFalse(ret, ("Not all bricks are online"))
        g.log.info("All bricks are online.")

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s" % self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s" % bricks_list)

        # Offline Bricks list
        offbricks = get_offline_bricks_list(self.mnode, self.volname)
        g.log.info("Bricks Offline: %s" % offbricks)

        # Online Bricks list
        onbricks = get_online_bricks_list(self.mnode, self.volname)
        g.log.info("Bricks Online: %s" % onbricks)

        # Create snapshot of volume
        ret = snap_create(self.mnode, self.volname, "snap1", False,
                          "Description with $p3c1al characters!")
        self.assertTrue(ret, ("Failed to create snapshot snap1"))
        g.log.info("Snapshot snap1 of volume %s created Successfully" %
                   (self.volname))

        # Volume status
        ret = get_volume_info(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to perform gluster volume"
                              "info on volume %s" % self.volname))
        g.log.info("Gluster volume info on volume %s is successful" %
                   self.volname)
        # snapshot list
        ret = snap_list(self.mnode)
        self.assertTrue(
            ret, ("Failed to list snapshot of volume %s" % self.volname))
        g.log.info("Snapshot list command for volume %s was successful" %
                   self.volname)
    def test_ec_all_healtypes(self):
        """
        Test steps:
        - Create directory dir1
        - Create files inside dir1
        - Rename all file inside dir1
        - Create softlink and hardlink of files in mountpoint
        - Create tiny, small, medium nd large file
        - Get arequal of dir1
        - Create directory dir2
        - Creating files on dir2
        - Bring down other bricks to max redundancy
        - Create directory dir3
        - Start pumping IO to dir3
        - Validating IO's on dir2 and waiting to complete
        - Bring bricks online
        - Wait for bricks to come online
        - Check if bricks are online
        - Monitor heal completion
        - Get arequal of dir1
        - Compare arequal of dir1
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        # Get the bricks from the volume
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        mountpoint = self.mounts[0].mountpoint
        client = self.mounts[0].client_system

        # Creating dir1
        ret = mkdir(client, "%s/dir1" % mountpoint)
        self.assertTrue(ret, "Failed to create dir1")
        g.log.info("Directory dir1 on %s created successfully", self.mounts[0])

        # Create files inside dir1
        cmd = ('touch %s/dir1/file{1..5};' % mountpoint)
        ret, _, _ = g.run(client, cmd)
        self.assertFalse(ret, "File creation failed")
        g.log.info("File created successfull")

        # Rename all files inside dir1
        cmd = ('cd %s/dir1/; '
               'for FILENAME in *;'
               'do mv $FILENAME Unix_$FILENAME; cd ~;'
               'done;' % mountpoint)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "Failed to rename files on " "client")
        g.log.info("Successfully renamed files on client")

        # Create softlink and hardlink of files in mountpoint
        cmd = ('cd %s/dir1/; '
               'for FILENAME in *; '
               'do ln -s $FILENAME softlink_$FILENAME; cd ~;'
               'done;' % mountpoint)
        ret, _, _ = g.run(client, cmd)
        self.assertFalse(ret, "Creating Softlinks have failed")
        g.log.info("Softlink of files have been changed successfully")

        cmd = ('cd %s/dir1/; '
               'for FILENAME in *; '
               'do ln $FILENAME hardlink_$FILENAME; cd ~;'
               'done;' % mountpoint)
        ret, _, _ = g.run(client, cmd)
        self.assertFalse(ret, "Creating Hardlinks have failed")
        g.log.info("Hardlink of files have been changed successfully")

        # Create tiny, small, medium and large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for tiny files failed")
            g.log.info("Fallocate for tiny files successfully")

            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for small files failed")
            g.log.info("Fallocate for small files successfully")

            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for medium files failed")
            g.log.info("Fallocate for medium files successfully")

            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for large files failed")
            g.log.info("Fallocate for large files successfully")
            offset += 1

        # Get arequal of dir1
        ret, result_before_brick_down = (collect_mounts_arequal(self.mounts[0],
                                                                path='dir1/'))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal of dir1 ' 'is successful')

        # Creating dir2
        ret = mkdir(self.mounts[0].client_system, "%s/dir2" % mountpoint)
        self.assertTrue(ret, "Failed to create dir2")
        g.log.info("Directory dir2 on %s created successfully", self.mounts[0])

        # Creating files on dir2
        # Write IO
        all_mounts_procs, count = [], 1
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir2" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        bricks_to_offline = sample(bricks_list, 2)
        ret = bring_bricks_offline(self.volname, bricks_to_offline)
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Creating dir3
        ret = mkdir(self.mounts[0].client_system, "%s/dir3" % mountpoint)
        self.assertTrue(ret, "Failed to create dir2")
        g.log.info("Directory dir2 on %s created successfully", self.mounts[0])

        # Start pumping IO to dir3
        cmd = ("cd %s/dir3; for i in `seq 1 100` ;"
               "do dd if=/dev/urandom of=file$i bs=1M "
               "count=5;done" % mountpoint)

        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writing on files while a brick is DOWN')

        appendcmd = ("cd %s/dir3; for i in `seq 1 100` ;"
                     "do dd if=/dev/urandom of=file$i bs=1M "
                     "count=1 oflag=append conv=notrunc;done" % mountpoint)

        readcmd = ("cd %s/dir3; for i in `seq 1 100` ;"
                   "do dd if=file$i of=/dev/null bs=1M "
                   "count=5;done" % mountpoint)

        ret, _, err = g.run(self.mounts[0].client_system, appendcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished append on files after redundant bricks offline')

        ret, _, err = g.run(self.mounts[0].client_system, readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished read on files after redundant bricks offline')

        # Validating IO's on dir2 and waiting to complete
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all IO's")

        # Bring bricks online
        ret = bring_bricks_online(self.mnode, self.volname, bricks_to_offline)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("EXPECTED : Bricks are online")

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Get arequal of dir1
        ret, result_after_brick_up = (collect_mounts_arequal(self.mounts[0],
                                                             path='dir1/'))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal of dir1 ' 'is successful')

        # Comparing arequals of dir1
        self.assertEqual(
            result_before_brick_down, result_after_brick_up,
            'Arequals are not equals before and after '
            'bringing down redundant bricks')
        g.log.info('Arequals are equals before before and after '
                   'bringing down redundant bricks')
    def test_ec_data_integrity(self):
        """
        Test steps:
        - Create directory dir1
        - Create 5 dir and 5 files in each dir in directory 1
        - Rename all file inside dir1
        - Truncate at any dir in mountpoint inside dir1
        - Create softlink and hardlink of files in mountpoint
        - chmod, chown, chgrp inside dir1
        - Create tiny, small, medium nd large file
        - Creating files on client side for dir1
        - Validating IO's and waiting to complete
        - Get arequal of dir1
        - Bring redundant bricks offline
        - Get arequal of dir1 after 1st set of bricks down
        - Bring redundant bricks offline
        - Get arequal of dir1 after 2nd set of bricks down
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        brickset_to_offline = []

        # Creating dir1
        ret = mkdir(self.mounts[0].client_system, "%s/dir1"
                    % self.mounts[0].mountpoint)
        self.assertTrue(ret, "Failed to create dir1")
        g.log.info("Directory dir1 on %s created successfully", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = ("%s..%s" % (str(start), str(end)))
            file_range = ("%s..%s" % (str(start), str(end)))
            # Create dir 1-5 at mountpoint.
            ret = mkdir(mount_obj.client_system, "%s/dir1/dir{%s}"
                        % (mount_obj.mountpoint, dir_range))
            self.assertTrue(ret, "Failed to create directory")
            g.log.info("Directory created successfully")

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};'
                   % (mount_obj.mountpoint, dir_range, file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "File creation failed")
            g.log.info("File created successfull")

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        cmd = ('cd %s/dir1/dir1/; '
               'for FILENAME in *;'
               'do mv $FILENAME Unix_$FILENAME; cd ~;'
               'done;'
               % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to rename file on "
                         "client")
        g.log.info("Successfully renamed file on client")

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; cd ~;'
                   'done;'
                   % (mount_obj.mountpoint, str(start)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Truncate failed")
            g.log.info("Truncate of files successfull")

        # Create softlink and hardlink of files in mountpoint
        start = 1
        for mount_obj in self.mounts:
            for link_type, ln_mode in (('softlink', 'ln -s'),
                                       ('hardlink', 'ln')):
                cmd = ('cd %s/dir1/dir%s; '
                       'for FILENAME in *; '
                       'do %s $FILENAME %s_$FILENAME; cd ~;'
                       'done;'
                       % (mount_obj.mountpoint, str(start), ln_mode,
                          link_type))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(ret, "Creating %s have failed" % link_type)
                g.log.info("%s of files created successfully", link_type)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}'
                   % (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing mode of files has failed")
            g.log.info("Mode of files have been changed successfully")

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}'
                   % (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing owner of files has failed")
            g.log.info("Owner of files have been changed successfully")

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}'
                   % (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing group of files has failed")
            g.log.info("Group of files have been changed successfully")
            start += 5
            end += 5

        # Create tiny, small, medium and large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            for size, filename in (('100', 'tiny_file'), ('20M', 'small_file'),
                                   ('200M', 'medium_file'),
                                   ('1G', 'large_file')):
                cmd = 'fallocate -l {} {}{}.txt'.format(size, filename, offset)
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(ret, "Fallocate for files failed")
                g.log.info("Fallocate for files successfully")
            offset += 1

        # Creating files on client side for dir1
        # Write IO
        all_mounts_procs, count = [], 1
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir1" % (
                       self.script_upload_path, count,
                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count += 10

        # Validating IO's and waiting to complete
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all IO's")

        # Get arequal of dir1
        ret, result_before_bricks_down = (
            collect_mounts_arequal(self.mounts[0], path='dir1/'))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal of dir1 '
                   'is successful')

        # Bring redundant bricks offline
        brickset_to_offline = self._bring_redundant_bricks_offline(
            self.mnode, self.volname)

        # Get arequal of dir1 after 1st set of bricks down
        ret, result_after_1st_brickset_down = (
            collect_mounts_arequal(self.mounts[0], path='dir1/'))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal of dir1 '
                   'is successful')

        # Bring bricks online
        ret = bring_bricks_online(self.mnode, self.volname,
                                  brickset_to_offline)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("EXPECTED : Bricks are online")

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring redundant bricks offline
        brickset_to_offline = self._bring_redundant_bricks_offline(
            self.mnode, self.volname)

        # Get arequal of dir1 after 2nd set of bricks down
        ret, result_after_2nd_brickset_down = (
            collect_mounts_arequal(self.mounts[0], path='dir1/'))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal of dir1 '
                   'is successful')

        # Comparing arequals
        self.assertEqual(result_before_bricks_down,
                         result_after_1st_brickset_down,
                         'Arequals are not equals before brickset '
                         'down and after 1st brickset down')
        g.log.info('Arequals are equals before brickset down '
                   'and after brickset down')

        self.assertEqual(result_after_2nd_brickset_down,
                         result_after_1st_brickset_down,
                         'Arequals are not equals before 2nd set '
                         'brick down and after 1st set brick down')
        g.log.info('Arequals are equals for 2nd brickset down '
                   'and 1st brickset down')
    def test_ec_quorumcount_5(self):
        """
        Test Steps:
        - Write IO's when all bricks are online
        - Get subvol from which bricks to be brought down
        - Set volume disperse quorum count to 5
        - Start writing and reading IO's
        - Bring a brick down,say b1
        - Validate write and read is successful
        - Bring a brick down,say b2
        - Validate write has failed and read is successful
        - Start IO's again while quorum is not met on volume
          write should fail and read should pass
        - Add-brick and log
        - Start Rebalance
        - Wait for rebalance,which should fail as quorum is not met
        - Bring brick online
        - Wait for brick to come online
        - Check if bricks are online
        - Start IO's again when all bricks are online
        - IO's should complete successfully
        - Start IO's again and reset volume
        - Bring down other bricks to max redundancy
        - Validating IO's and waiting to complete
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals

        mountpoint = self.mounts[0].mountpoint
        client1 = self.mounts[0].client_system
        client2 = self.mounts[1].client_system

        # Write IO's  when all bricks are online
        writecmd = ("cd %s; for i in `seq 1 100` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        # Select a subvol from which bricks to be brought down
        sub_vols = get_subvols(self.mnode, self.volname)
        bricks_list1 = list(choice(sub_vols['volume_subvols']))
        brick_1, brick_2 = sample(bricks_list1, 2)

        # Set volume disperse quorum count to 5
        ret = set_volume_options(self.mnode, self.volname,
                                 {"disperse.quorum-count": "5"})
        self.assertTrue(
            ret, 'Failed to set volume {}'
            ' options'.format(self.volname))
        g.log.info('Successfully set disperse quorum on %s', self.volname)

        # Start writing and reading IO's
        procwrite, procread, count = [], [], 1
        for mount_obj in self.mounts:
            writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                        "--dirname-start-num %d --dir-depth 5 "
                        "--dir-length 10 --max-num-of-dirs 2 "
                        "--num-of-files 15 %s" %
                        (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               writecmd,
                               user=mount_obj.user)
            procwrite.append(proc)
            count += 10

        self.generate_read_cmd(mountpoint, '1', '10')
        ret = g.run_async(client2, self.readcmd)
        procread.append(ret)

        # Brick 1st brick down
        ret = bring_bricks_offline(self.volname, brick_1)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1))
        g.log.info('Brick %s is offline successfully', brick_1)

        writecmd = ("cd %s; for i in `seq 101 110` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        self.generate_read_cmd(mountpoint, '101', '110')
        ret, _, err = g.run(client1, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished reads on files sucessfully')

        # Brick 2nd brick down
        ret = bring_bricks_offline(self.volname, brick_2)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2))
        g.log.info('Brick %s is offline successfully', brick_2)

        # Validate write has failed and read is successful
        ret = validate_io_procs(procwrite, self.mounts)
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        ret = validate_io_procs(procread, self.mounts[1])
        self.assertTrue(ret, 'Read operation failed on the client')
        g.log.info('Reads on files successful')

        # Start IO's again while quorum is not met on volume
        procwrite = []
        writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                    "--dirname-start-num 20 --dir-depth 1 "
                    "--dir-length 10 --max-num-of-dirs 1 "
                    "--num-of-files 10 %s" %
                    (self.script_upload_path, mountpoint))
        proc = g.run_async(client1, writecmd)
        procwrite.append(proc)
        ret = validate_io_procs(procwrite, self.mounts[0])
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        self.generate_read_cmd(mountpoint, '1', '100')
        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Reads on files successful')

        # Add brick
        ret = expand_volume(self.mnode,
                            self.volname,
                            self.servers,
                            self.all_servers_info,
                            force=True)
        self.assertTrue(
            ret, ("Failed to expand the volume {}".format(self.volname)))
        g.log.info("Expanding volume %s is successful", self.volname)

        # Log Volume Info and Status after expanding the volume
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume {}".format(self.volname)))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ('Rebalance failed on the volume'
                                  ' {}'.format(self.volname)))
        g.log.info('Rebalance has started on volume %s', self.volname)

        # Wait for rebalance to complete
        # Which should also fail as quorum is not met
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=600)
        self.assertFalse(
            ret, "Rebalance passed though disperse quorum "
            "is not met on volume")
        g.log.info(
            "Expected: Rebalance failed on the volume %s,disperse"
            " quorum is not met", self.volname)

        # Bring brick online
        brick_list = brick_1, brick_2
        ret = bring_bricks_online(self.mnode, self.volname, brick_list)
        self.assertTrue(ret, 'Brick not brought online')
        g.log.info('Brick brought online successfully')

        # Wait for brick to come online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, 'Bricks are not online')
        g.log.info('EXPECTED : Bricks are online')

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Start IO's again when all bricks are online
        writecmd = ("cd %s; for i in `seq 101 200` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)
        self.generate_read_cmd(mountpoint, '101', '120')

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Writes on client % successful', client1)

        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Read on client % successful', client2)

        # Start IO's again
        all_mounts_procs, count = [], 30
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count += 10

        # Reset volume
        ret, _, err = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, err)
        g.log.info('Reset of volume %s successful', self.volname)

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        bricks_to_offline = sample(bricks_list1, 2)
        ret = bring_bricks_offline(self.volname, bricks_to_offline)
        self.assertTrue(ret, 'Redundant bricks not offline')
        g.log.info('Redundant bricks are offline successfully')

        # Validating IO's and waiting to complete
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, 'IO failed on some of the clients')
        g.log.info("Successfully validated all IO's")
    def test_brickreset_ec_volume(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - Start resource consumption tool
        - Create IO on dir2 of volume mountpoint
        - Reset brick start
        - Check if brick is offline
        - Reset brick with destination same as source with force running IO's
        - Validating IO's and waiting for it to complete on dir2
        - Remove dir2
        - Create 5 directory and 5 files in dir of mountpoint
        - Rename all files inside dir1 at mountpoint
        - Create softlink and hardlink of files in dir1 of mountpoint
        - Delete op for deleting all file in one of the dirs inside dir1
        - Change chmod, chown, chgrp
        - Create tiny, small, medium and large file
        - Create IO's
        - Validating IO's and waiting for it to complete
        - Calculate arequal before kiiling brick
        - Get brick from Volume
        - Reset brick
        - Check if brick is offline
        - Reset brick by giving a different source and dst node
        - Reset brick by giving dst and source same without force
        - Obtain hostname
        - Reset brick with dst-source same force using hostname - Successful
        - Monitor heal completion
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Reset brick by giving a same source and dst brick
        - Kill brick manually
        - Check if brick is offline
        - Reset brick by giving a same source and dst brick
        - Wait for brick to come online
        - Bring down other bricks to max redundancy
        - Get arequal after bringing down bricks
        - Bring bricks online
        - Remove brick from backend
        - Check if brick is offline
        - Reset brick by giving dst and source same without force - Successful
        - Monitor heal completion
        - Compare the arequal's calculated
        """
        # Starting resource consumption using top
        log_file_mem_monitor = getcwd() + '/mem_usage.log'
        cmd = 'for i in {1..100};do top -n 1 -b|egrep \
                "RES|gluster" & free -h 2>&1 >> '                                                  + \
            log_file_mem_monitor + ' ;sleep 10;done'
        g.log.info(cmd)
        for mount_obj in self.mounts:
            g.run_async(mount_obj.client_system, cmd)
        bricks_list = []

        # Get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # Creating directory2
        cmd = ('mkdir %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory2")
        g.log.info("Directory 2 on %s created successfully", self.mounts[0])

        # Creating files on client side for dir2
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir2" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Reset a brick
        g.log.info('Reset of brick using start')
        brick_reset = choice(bricks_list)
        ret, _, _ = reset_brick(self.mnode, self.volname, brick_reset, "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        offline_bricks = get_offline_bricks_list(self.mnode, self.volname)
        self.assertEqual(offline_bricks[0], brick_reset, "Brick not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source with force while running IO's
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                brick_reset,
                                "commit",
                                brick_reset,
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Deleting dir2
        cmd = ('rm -rf %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory2")
        g.log.info("Directory 2 deleted successfully for %s", self.mounts[0])

        del self.all_mounts_procs[:]

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory1")
        g.log.info("Directory 1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = str(start) + ".." + str(end)
            file_range = str(start) + ".." + str(end)
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            g.run(mount_obj.client_system, cmd)

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            g.run(mount_obj.client_system, cmd)

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        clients = []
        for mount_obj in self.mounts:
            clients.append(mount_obj.client_system)
            cmd = ('cd %s/dir1/dir1/; '
                   'for FILENAME in *;'
                   'do mv $FILENAME Unix_$FILENAME; '
                   'done;' % mount_obj.mountpoint)
            g.run_parallel(clients, cmd)

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # Delete op for deleting all file in one of the dirs. start is being
        # used as offset like in previous testcase in dir1
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do rm -f $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            offset += 1

        # Creating files on client side for dir1
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s/dir1" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validating IO's and waiting to complete
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Get areequal before killing the brick
        g.log.info('Getting areequal before killing of brick...')
        ret, result_before_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Reset a brick
        g.log.info('Reset of brick using start')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "start")

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick by giving a different source and dst brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick is successfull")
        g.log.info("Expected : Source and Destination brick must be same for"
                   " reset")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[0],
                                "commit", bricks_list[0])
        self.assertNotEqual(ret, 0, "Not Expected : Reset brick is successful")
        g.log.info("Expected : Reset brick failed,Vol id is same use force")

        # Obtain hostname of node
        ret, hostname_node1, _ = g.run(self.mnode, "hostname")
        self.assertEqual(ret, 0,
                         ("Failed to obtain hostname of node %s", self.mnode))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.mnode, hostname_node1.strip())

        # Reset brick with destination same as source with force using hostname
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(),
                                self.volname,
                                bricks_list[0],
                                "commit",
                                bricks_list[0],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Get List of bricks to bring offline

        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[1:3])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')
        sleep(2)

        # Check if 4 bricks are online
        all_bricks = []
        all_bricks = [
            bricks_list[0], bricks_list[3], bricks_list[4], bricks_list[5]
        ]
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details ")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down bricks...')
        ret, result_offline_redundant_brick1 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[1:3]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Reset brick without bringing down brick
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode, self.volname, bricks_list[1],
                                "commit", bricks_list[1])
        self.assertNotEqual(ret, 0, "Not Expected: Reset brick passed")
        g.log.info("Expected : Brick reset failed as source brick must be"
                   " stopped")

        # Kill the brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Check if the brick is offline
        g.log.info("Check the brick status if it is offline")
        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with dest same as source after killing brick manually
        g.log.info('Reset of brick by giving different src and dst brick')
        ret, _, _ = reset_brick(self.mnode,
                                self.volname,
                                bricks_list[1],
                                "commit",
                                bricks_list[1],
                                force="true")
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("Expected : Bricks are online")

        # Check if bricks are online
        all_bricks = get_all_bricks(self.mnode, self.volname)
        ret = are_bricks_online(self.mnode, self.volname, all_bricks)
        self.assertTrue(ret, 'All bricks are not online')
        g.log.info('All bricks are online')

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[2:4])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Check mount point
        cmd = 'ls -lrt /mnt'
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        g.log.info("Client mount point details")

        # Get arequal after bringing down bricks
        g.log.info('Getting arequal after bringing down redundant bricks...')
        ret, result_offline_redundant_brick2 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[2:4]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Removing brick from backend
        brick = bricks_list[0].strip().split(":")
        cmd = "rm -rf %s" % brick[1]
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "Failed to delete brick %s" % bricks_list[0])
        g.log.info("Removed brick %s sucessfully", bricks_list[0])

        # Check if the brick is offline
        count = 0
        while count <= 20:
            g.log.info("Check the brick status if it is offline")
            ret = are_bricks_offline(self.mnode, self.volname,
                                     [bricks_list[0]])
            if ret:
                break
            sleep(2)
            count = +1
        self.assertTrue(ret, "Brick is not offline")
        g.log.info("Expected : Brick is offline")

        # Reset brick with destination same as source
        g.log.info('Reset of brick with same src and dst brick')
        ret, _, _ = reset_brick(hostname_node1.strip(), self.volname,
                                bricks_list[0], "commit", bricks_list[0])
        self.assertEqual(ret, 0, "Not Expected: Reset brick failed")
        g.log.info("Expected : Reset brick is successful")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Comparing arequals
        self.assertEqual(
            result_before_killing_brick, result_offline_redundant_brick1,
            'Arequals are not equals before killing brick'
            'processes and after offlining redundant bricks')
        g.log.info('Arequals are equals before killing brick'
                   'processes and after offlining redundant bricks')

        # Comparing arequals
        self.assertEqual(
            result_offline_redundant_brick2, result_offline_redundant_brick1,
            'Arequals are not equals for offlining redundant'
            ' bricks')
        g.log.info('Arequals are equals for offlining redundant bricks')

        # Deleting dir1
        cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory1")
        g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
Exemple #8
0
    def test_fops_ec_brickdown(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - 1.Start resource consumption tool
        - 2.Create directory dir1
        - 3.Create 5 dir and 5 files in each dir in directory 1
        - 4.Rename all file inside dir1
        - 5.Truncate at any dir in mountpoint inside dir1
        - 6.Create softlink and hardlink of files in mountpoint
        - 7.chmod, chown, chgrp inside dir1
        - 8.Create tiny, small, medium nd large file
        - 9.Creating files on client side for dir1
        - 10.Brick redundant bricks down
        - 11.Validating IO's and waiting to complete
        - 12.Creating dir2
        - 13.Creating files on client side for dir2
        - 14.Bring bricks online
        - 15.Wait for brick to come online
        - 16.Check if bricks are online
        - 17.Monitor heal completion
        - 18.Validating IO's and waiting to complete
        """

        # Starting resource consumption using top
        log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log'
        cmd = ('for i in {1..100};do top -n 1 -b|egrep \
              "RES|gluster" & free -h 2>&1 >> %s ; \
              sleep 10;done' % (log_file_mem_monitor))
        g.log.info(cmd)
        for server in self.servers:
            g.run_async(server, cmd)
        bricks_list = []

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Brick list is empty")
        g.log.info("Brick List : %s", bricks_list)

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create dir1")
        g.log.info("dir1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = ("%s..%s" % (str(start), str(end)))
            file_range = ("%s..%s" % (str(start), str(end)))
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Directory creation failed")
            g.log.info("Directory created successfull")

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "File creation failed")
            g.log.info("File created successfull")

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        cmd = ('cd %s/dir1/dir1/; '
               'for FILENAME in *;'
               'do mv $FILENAME Unix_$FILENAME; '
               'done;' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to rename file on" "client")
        g.log.info("Successfully renamed file on client")

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Truncate failed")
            g.log.info("Truncate of files successfull")

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Creating Softlinks have failed")
            g.log.info("Softlink of files have been changed successfully")

            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Creating Hardlinks have failed")
            g.log.info("Hardlink of files have been changed successfully")
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing mode of files has failed")
            g.log.info("Mode of files have been changed successfully")

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing owner of files has failed")
            g.log.info("Owner of files have been changed successfully")

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing group of files has failed")
            g.log.info("Group of files have been changed successfully")
            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for tiny files failed")
            g.log.info("Fallocate for tiny files successfully")

            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for small files failed")
            g.log.info("Fallocate for small files successfully")

            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for medium files failed")
            g.log.info("Fallocate for medium files successfully")

            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for large files failed")
            g.log.info("Fallocate for large files successfully")
            offset += 1

        # Creating files on client side for dir1
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir1" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[2:4])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Creating dir2
        cmd = ('mkdir  %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create dir2 ")
        g.log.info("dir2 created successfully for %s", self.mounts[0])

        # Creating files on client side for dir2
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir2" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[2:4]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("EXPECTED : Bricks are online")

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Check file exist for memory log
        g.log.info("Validating log exists")
        ret = file_exists(self.mnode, '/var/log/glusterfs/mem_usage.log')
        self.assertTrue(ret, "Memory log file does not exist")
        g.log.info("Memory log file exists")