コード例 #1
0
    def test_mkdir_with_subvol_down(self):
        '''
        Test mkdir hashed to a down subvol
        '''
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-branches
        # pylint: disable=too-many-statements
        # pylint: disable=W0212
        mount_obj = self.mounts[0]
        mountpoint = mount_obj.mountpoint

        # directory that needs to be created
        parent_dir = mountpoint + '/parent'
        child_dir = mountpoint + '/parent/child'

        # get hashed subvol for name "parent"
        subvols = (get_subvols(self.mnode, self.volname))['volume_subvols']
        hashed, count = find_hashed_subvol(subvols, "/", "parent")
        self.assertIsNotNone(hashed, "Could not find hashed subvol")

        # bring target_brick offline
        bring_bricks_offline(self.volname, subvols[count])
        ret = are_bricks_offline(self.mnode, self.volname, subvols[count])
        self.assertTrue(
            ret, ('Error in bringing down subvolume %s', subvols[count]))
        g.log.info('target subvol is offline')

        # create parent dir
        ret, _, err = g.run(self.clients[0], ("mkdir %s" % parent_dir))
        self.assertNotEqual(
            ret, 0, ('Expected mkdir of %s to fail with %s', parent_dir, err))
        g.log.info('mkdir of dir %s failed as expected', parent_dir)

        # check that parent_dir does not exist on any bricks and client
        brickobject = create_brickobjectlist(subvols, "/")
        for brickdir in brickobject:
            adp = "%s/parent" % brickdir.path
            bpath = adp.split(":")
            self.assertTrue(
                (file_exists(brickdir._host, bpath[1])) == 0,
                ('Expected dir %s not to exist on servers', parent_dir))

        for client in self.clients:
            self.assertTrue(
                (file_exists(client, parent_dir)) == 0,
                ('Expected dir %s not to exist on clients', parent_dir))

        g.log.info('dir %s does not exist on mount as expected', parent_dir)

        # Bring up the subvols and create parent directory
        bring_bricks_online(self.mnode,
                            self.volname,
                            subvols[count],
                            bring_bricks_online_methods=None)
        ret = are_bricks_online(self.mnode, self.volname, subvols[count])
        self.assertTrue(
            ret, ("Error in bringing back subvol %s online", subvols[count]))
        g.log.info('Subvol is back online')

        ret, _, _ = g.run(self.clients[0], ("mkdir %s" % parent_dir))
        self.assertEqual(ret, 0,
                         ('Expected mkdir of %s to succeed', parent_dir))
        g.log.info('mkdir of dir %s successful', parent_dir)

        # get hash subvol for name "child"
        hashed, count = find_hashed_subvol(subvols, "parent", "child")
        self.assertIsNotNone(hashed, "Could not find hashed subvol")

        # bring target_brick offline
        bring_bricks_offline(self.volname, subvols[count])
        ret = are_bricks_offline(self.mnode, self.volname, subvols[count])
        self.assertTrue(
            ret, ('Error in bringing down subvolume %s', subvols[count]))
        g.log.info('target subvol is offline')

        # create child dir
        ret, _, err = g.run(self.clients[0], ("mkdir %s" % child_dir))
        self.assertNotEqual(
            ret, 0, ('Expected mkdir of %s to fail with %s', child_dir, err))
        g.log.info('mkdir of dir %s failed', child_dir)

        # check if child_dir exists on any bricks
        for brickdir in brickobject:
            adp = "%s/parent/child" % brickdir.path
            bpath = adp.split(":")
            self.assertTrue(
                (file_exists(brickdir._host, bpath[1])) == 0,
                ('Expected dir %s not to exist on servers', child_dir))
        for client in self.clients:
            self.assertTrue((file_exists(client, child_dir)) == 0)

        g.log.info('dir %s does not exist on mount as expected', child_dir)
コード例 #2
0
def select_cold_tier_bricks_to_bring_offline(mnode, volname):
    """Randomly selects bricks to bring offline without affecting the cluster
    from a cold tier.

    Args:
        mnode (str): Node on which commands will be executed.
        volname (str): Name of the volume.

    Returns:
        list: On success returns list of bricks that can be brough offline
            from cold tier. If volume doesn't exist or is a non tiered volume
            returns empty list.
    """
    cold_tier_bricks_to_bring_offline = []

    # Check if volume is tiered
    if not is_tiered_volume(mnode, volname):
        return cold_tier_bricks_to_bring_offline

    # get volume type
    volume_type_info = get_volume_type_info(mnode, volname)
    cold_tier_type = volume_type_info['cold_tier_type_info']['coldBrickType']

    # get subvols
    subvols_dict = get_subvols(mnode, volname)
    cold_tier_subvols = subvols_dict['cold_tier_subvols']

    # select bricks from distribute volume
    if cold_tier_type == 'Distribute':
        cold_tier_bricks_to_bring_offline = []

    # select bricks from replicated, distributed-replicated volume
    elif (cold_tier_type == 'Replicate'
          or cold_tier_type == 'Distributed-Replicate'):
        # Get replica count
        cold_tier_replica_count = (
            volume_type_info['cold_tier_type_info']['coldreplicaCount'])

        # Get quorum info
        quorum_info = get_client_quorum_info(mnode, volname)
        cold_tier_quorum_info = quorum_info['cold_tier_quorum_info']

        # Get list of bricks to bring offline
        cold_tier_bricks_to_bring_offline = (
            get_bricks_to_bring_offline_from_replicated_volume(
                cold_tier_subvols, cold_tier_replica_count,
                cold_tier_quorum_info))

    # select bricks from Disperse, Distribured-Disperse volume
    elif (cold_tier_type == 'Disperse'
          or cold_tier_type == 'Distributed-Disperse'):

        # Get redundancy count
        cold_tier_redundancy_count = (
            volume_type_info['cold_tier_type_info']['coldredundancyCount'])

        # Get list of bricks to bring offline
        cold_tier_bricks_to_bring_offline = (
            get_bricks_to_bring_offline_from_disperse_volume(
                cold_tier_subvols, cold_tier_redundancy_count))

    return cold_tier_bricks_to_bring_offline
コード例 #3
0
    def test_access_file_with_stale_linkto_xattr(self):
        """
        Description: Checks if the files are accessible as non-root user if
                     the files have stale linkto xattr.
        Steps:
        1) Create a volume and start it.
        2) Mount the volume on client node using FUSE.
        3) Create a file.
        4) Enable performance.parallel-readdir and
           performance.readdir-ahead on the volume.
        5) Rename the file in order to create
           a linkto file.
        6) Force the linkto xattr values to become stale by changing the dht
           subvols in the graph
        7) Login as an non-root user and access the file.
        """
        # pylint: disable=protected-access

        # Set permissions on the mount-point
        m_point = self.mounts[0].mountpoint
        ret = set_file_permissions(self.clients[0], m_point, "-R 777")
        self.assertTrue(ret, "Failed to set file permissions")
        g.log.info("Successfully set file permissions on mount-point")

        # Creating a file on the mount-point
        cmd = 'dd if=/dev/urandom of={}/FILE-1 count=1 bs=16k'.format(m_point)
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "File to create file")

        # Enable performance.parallel-readdir and
        # performance.readdir-ahead on the volume
        options = {
            "performance.parallel-readdir": "enable",
            "performance.readdir-ahead": "enable"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "Failed to set volume options")
        g.log.info("Successfully set volume options")

        # Finding a file name such that renaming source file to it will form a
        # linkto file
        subvols = (get_subvols(self.mnode, self.volname))['volume_subvols']
        newhash = find_new_hashed(subvols, "/", "FILE-1")
        new_name = str(newhash.newname)
        new_host = str(newhash.hashedbrickobject._host)
        new_name_path = str(newhash.hashedbrickobject._fqpath)[:-1]

        # Move file such that it hashes to some other subvol and forms linkto
        # file
        ret = move_file(self.clients[0], "{}/FILE-1".format(m_point),
                        "{}/{}".format(m_point, new_name))
        self.assertTrue(ret, "Rename failed")
        g.log.info('Renamed file %s to %s', "{}/FILE-1".format(m_point),
                   "{}/{}".format(m_point, new_name))

        # Check if "dst_file" is linkto file
        ret = is_linkto_file(new_host, '{}{}'.format(new_name_path, new_name))
        self.assertTrue(ret, "File is not a linkto file")
        g.log.info("File is linkto file")

        # Force the linkto xattr values to become stale by changing the dht
        # subvols in the graph; for that:
        # disable performance.parallel-readdir and
        # performance.readdir-ahead on the volume
        options = {
            "performance.parallel-readdir": "disable",
            "performance.readdir-ahead": "disable"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "Failed to disable volume options")
        g.log.info("Successfully disabled volume options")

        # Access the file as non-root user
        cmd = "ls -lR {}".format(m_point)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd, user="******")
        self.assertEqual(ret, 0, "Lookup failed ")
        g.log.info("Lookup successful")
コード例 #4
0
    def test_self_heal_with_diff_algorithm(self):
        """
        Test Steps:
        1. Create a replicated/distributed-replicate volume and mount it
        2. Set data/metadata/entry-self-heal to off and
           data-self-heal-algorithm to diff
        3. Create few files inside a directory with some data
        4. Check arequal of the subvol and all the bricks in the subvol should
           have same checksum
        5. Bring down a brick from the subvol and validate it is offline
        6. Modify the data of existing files under the directory
        7. Bring back the brick online and wait for heal to complete
        8. Check arequal of the subvol and all the brick in the same subvol
           should have same checksum
        """

        # Setting options
        for key, value in (("data-self-heal",
                            "off"), ("metadata-self-heal",
                                     "off"), ("entry-self-heal", "off"),
                           ("data-self-heal-algorithm", "diff")):
            ret = set_volume_options(self.mnode, self.volname, {key: value})
            self.assertTrue(ret, 'Failed to set %s to %s.' % (key, value))
            g.log.info("%s set to %s successfully", key, value)

        # Create few files under a directory with data
        mountpoint = self.mounts[0].mountpoint
        client = self.mounts[0].client_system

        cmd = ("mkdir %s/test_diff_self_heal ; cd %s/test_diff_self_heal ;"
               "for i in `seq 1 100` ; do dd if=/dev/urandom of=file.$i "
               " bs=1M count=1; done;" % (mountpoint, mountpoint))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "Failed to create file on mountpoint")
        g.log.info("Successfully created files on mountpoint")

        # Check arequal checksum of all the bricks is same
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        for subvol in subvols:
            ret, arequal_from_the_bricks = collect_bricks_arequal(subvol)
            self.assertTrue(
                ret, "Arequal is collected successfully across "
                "the bricks in the subvol {}".format(subvol))
            cmd = len(set(arequal_from_the_bricks))
            if (self.volume_type == "arbiter"
                    or self.volume_type == "distributed-arbiter"):
                cmd = len(set(arequal_from_the_bricks[:2]))
            self.assertEqual(
                cmd, 1, "Arequal"
                " is same on all the bricks in the subvol")

        # List a brick in each subvol and bring them offline
        brick_to_bring_offline = []
        for subvol in subvols:
            self.assertTrue(subvol, "List is empty")
            brick_to_bring_offline.extend(sample(subvol, 1))

        ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
        self.assertTrue(
            ret,
            "Unable to bring brick: {} offline".format(brick_to_bring_offline))

        # Validate the brick is offline
        ret = are_bricks_offline(self.mnode, self.volname,
                                 brick_to_bring_offline)
        self.assertTrue(
            ret, "Brick:{} is still online".format(brick_to_bring_offline))

        # Modify files under test_diff_self_heal directory
        cmd = ("for i in `seq 1 100` ; do truncate -s 0 file.$i ; "
               "truncate -s 2M file.$i ; done;")
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "Failed to modify the files")
        g.log.info("Successfully modified files")

        # Start volume with force to bring all bricks online
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Volume start with force failed")
        g.log.info("Volume: %s started successfully", self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))

        # Monitor heal completion
        self.assertTrue(
            monitor_heal_completion(self.mnode,
                                    self.volname,
                                    interval_check=10),
            "Heal failed after 20 mins")

        # Check are there any files in split-brain
        self.assertFalse(
            is_volume_in_split_brain(self.mnode, self.volname),
            "Some files are in split brain for "
            "volume: {}".format(self.volname))

        # Check arequal checksum of all the bricks is same
        for subvol in subvols:
            ret, arequal_from_the_bricks = collect_bricks_arequal(subvol)
            self.assertTrue(
                ret, "Arequal is collected successfully across "
                "the bricks in the subvol {}".format(subvol))
            cmd = len(set(arequal_from_the_bricks))
            if (self.volume_type == "arbiter"
                    or self.volume_type == "distributed-arbiter"):
                cmd = len(set(arequal_from_the_bricks[:2]))
            self.assertEqual(
                cmd, 1, "Arequal"
                " is same on all the bricks in the subvol")
コード例 #5
0
    def test_mem_leak_on_gluster_procs_after_ssl_enabled(self):
        """
        Steps:
        Scenario 1:
        1) Enable management encryption on the cluster.
        2) Create a 2X3 volume.
        3) Mount the volume using FUSE on a client node.
        4) Start doing IO on the mount (ran IO till the volume is ~88% full)
        5) Simultaneously start collecting the memory usage for
           'glusterfsd' process.
        6) Issue the command "# gluster v heal <volname> info" continuously
           in a loop.
        """

        # Fill the vol approx 88%
        bricks = get_all_bricks(self.mnode, self.volname)
        usable_size = int(get_usable_size_per_disk(bricks[0]) * 0.88)

        procs = []
        counter = 1
        for _ in get_subvols(self.mnode, self.volname)['volume_subvols']:
            filename = "{}/test_file_{}".format(self.mounts[0].mountpoint,
                                                str(counter))
            proc = g.run_async(
                self.mounts[0].client_system,
                "fallocate -l {}G {}".format(usable_size, filename))
            procs.append(proc)
            counter += 1

        # Start monitoring resource usage on servers and clients
        # default interval = 60 sec
        # count = 780 (60 *12)  => for 12 hrs
        monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
            self.test_id, count=780)
        self.assertIsNotNone(
            monitor_proc_dict, "Failed to start monitoring on servers and "
            "clients")

        ret = validate_io_procs(procs, self.mounts)
        self.assertTrue(ret, "IO Failed")

        # Perform gluster heal info for 12 hours
        end_time = datetime.now() + timedelta(hours=12)
        while True:
            curr_time = datetime.now()
            cmd = "gluster volume heal %s info" % self.volname
            ret, _, _ = g.run(self.mnode, cmd)
            self.assertEqual(ret, 0, "Failed to execute heal info cmd")
            if curr_time > end_time:
                g.log.info("Successfully ran for 12 hours. Checking for "
                           "memory leaks")
                break

        # Wait for monitoring processes to complete
        ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
                                                 cluster=True)
        self.assertTrue(ret, "ERROR: Failed to stop monitoring processes")

        # Check if there are any memory leaks and OOM killers
        ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
            self.test_id)
        self.assertFalse(ret,
                         "Memory leak and OOM kills check failed on servers")

        ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
            self.test_id)
        self.assertFalse(ret,
                         "Memory leak and OOM kills check failed on clients")
        g.log.info("No memory leaks/OOM kills found on serves and clients")
コード例 #6
0
    def test_remove_brick_operations(self):
        """
        Steps:
        1. Remove data brick count number of bricks from the volume
           should fail
        2. step 1 with force option should fail
        3. Remove redundant brick count number of bricks from the volume
           should fail
        4. step 3 with force option should fail
        5. Remove data brick count+1 number of bricks from the volume
           should fail
        6. step 5 with force option should fail
        7. Remove disperse count number of bricks from the volume with
           one wrong brick path should fail
        8. step 7 with force option should fail
        9. Start remove brick on first subvol bricks
        10. Remove all the subvols to make a pure EC vol
            by start remove brick on second subvol bricks
        11. Start remove brick on third subvol bricks
        12. Write files and perform read on mountpoints
        """
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-statements

        subvols_list = get_subvols(self.mnode, self.volname)
        volinfo = get_volume_info(self.mnode, self.volname)
        initial_brickcount = volinfo[self.volname]['brickCount']
        data_brick_count = (self.volume['voltype']['disperse_count'] -
                            self.volume['voltype']['redundancy_count'])

        # Try to remove data brick count number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0][0:data_brick_count])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove redundant brick count number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0]
            [0:self.volume['voltype']['redundancy_count']])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume"
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove data brick count+1 number of bricks from the volume
        bricks_list_to_remove = (
            subvols_list['volume_subvols'][0][0:data_brick_count + 1])
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Try to remove disperse count number of bricks from the volume with
        # one wrong brick path
        bricks_list_to_remove = (subvols_list['volume_subvols'][0]
                                 [0:self.volume['voltype']['disperse_count']])
        bricks_list_to_remove[0] = bricks_list_to_remove[0] + "wrong_path"
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Trying with force option
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="force")
        self.assertEqual(ret, 1,
                         ("ERROR: Removed bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Verify that the brick count is intact
        volinfo = get_volume_info(self.mnode, self.volname)
        latest_brickcount = volinfo[self.volname]['brickCount']
        self.assertEqual(initial_brickcount, latest_brickcount,
                         ("Brick count is not expected to "
                          "change, but changed"))

        # Start remove brick on first subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][0]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0,
                         ("Failed to remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Verify that the brick count is intact
        volinfo = get_volume_info(self.mnode, self.volname)
        latest_brickcount = volinfo[self.volname]['brickCount']
        self.assertEqual(initial_brickcount, latest_brickcount,
                         ("Brick count is not expected to "
                          "change, but changed"))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s" % self.volname))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Remove all the subvols to make a pure EC vol
        # Start remove brick on second subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][1]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0,
                         ("Failed to remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s", self.volname))

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume"
                          " %s" % (bricks_list_to_remove, self.volname)))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Start remove brick on third subvol bricks
        bricks_list_to_remove = subvols_list['volume_subvols'][2]
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="start")
        self.assertEqual(ret, 0, ("Failed to remove bricks %s from "
                                  "the volume %s" %
                                  (bricks_list_to_remove, self.volname)))

        # Wait for remove brick to complete
        ret = wait_for_remove_brick_to_complete(self.mnode, self.volname,
                                                bricks_list_to_remove)
        self.assertTrue(ret, ("Remove brick is not yet complete on the volume "
                              "%s" % self.volname))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Commit the remove brick operation
        ret, _, _ = remove_brick(self.mnode,
                                 self.volname,
                                 bricks_list_to_remove,
                                 option="commit")
        self.assertEqual(ret, 0,
                         ("Failed to commit remove bricks %s from the volume "
                          "%s" % (bricks_list_to_remove, self.volname)))
        g.log.info("Remove brick is successfully complete on the volume %s",
                   self.volname)

        # Log volume info and status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s" % self.volname))
        g.log.info(
            "Successful in logging volume info and status "
            "of volume %s", self.volname)

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Write some files on the mount point
        cmd1 = ("cd %s; mkdir test; cd test; for i in `seq 1 100` ;"
                "do touch file$i; done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd1)
        self.assertEqual(ret, 0, ("Write operation failed on client "
                                  "%s " % self.mounts[0].client_system))
        g.log.info("Writes on mount point successful")

        # Perform read operation on mountpoint
        cmd2 = ("cd %s; ls -lRt;" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd2)
        self.assertEqual(ret, 0, ("Read operation failed on client "
                                  "%s " % self.mounts[0].client_system))
        g.log.info("Read on mount point successful")
コード例 #7
0
    def test_ec_open_fd(self):
        """
        Test Steps:
        - disable server side heal
        - Create a file
        - Set volume option to implement open FD on file
        - Bring a brick down,say b1
        - Open FD on file
        - Bring brick b1 up
        - write to open FD file
        - Monitor heal
        - Check xattr , ec.version and ec.size of file
        - Check stat of file
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals

        mountpoint = self.mounts[0].mountpoint

        # Disable server side heal
        ret = disable_heal(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to disable server side heal"))
        g.log.info("Successfully disabled server side heal")

        # Log Volume Info and Status after disabling server side heal
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s", self.volname))

        # Create a file
        cmd = ("cd %s; touch 'file_openfd';" % mountpoint)
        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished creating a file while all the bricks are UP')

        # Set volume options
        ret = set_volume_options(self.mnode, self.volname,
                                 {"performance.read-after-open": "yes"})
        self.assertTrue(ret, 'Failed to set volume {}'
                        ' options'.format(self.volname))
        g.log.info('Successfully set %s volume options', self.volname,)

        # Bringing brick b1 offline
        sub_vols = get_subvols(self.mnode, self.volname)
        subvols_list = sub_vols['volume_subvols']
        bricks_list1 = subvols_list[0]
        brick_b1_down = choice(bricks_list1)
        ret = bring_bricks_offline(self.volname,
                                   brick_b1_down)
        self.assertTrue(ret, 'Brick %s is not offline' % brick_b1_down)
        g.log.info('Brick %s is offline successfully', brick_b1_down)

        node = self.mounts[0].client_system
        # Open FD
        proc = open_file_fd(mountpoint, time=100,
                            client=node)

        # Bring brick b1 online
        ret = bring_bricks_online(self.mnode, self.volname,
                                  [brick_b1_down],
                                  'glusterd_restart')
        self.assertTrue(ret, 'Brick {} is not brought '
                        'online'.format(brick_b1_down))
        g.log.info('Brick %s is online successfully', brick_b1_down)

        # Validate peers are connected
        ret = self.validate_peers_are_connected()
        self.assertTrue(ret, "Peers are not in connected state after bringing"
                        " an offline brick to online via `glusterd restart`")
        g.log.info("Successfully validated peers are in connected state")

        # Check if write to FD is successful
        g.log.info('Open FD on file successful')
        ret, _, _ = proc.async_communicate()
        self.assertEqual(ret, 0, "Write to FD is successful")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        file_openfd = os.path.join(mountpoint, 'file_openfd')

        # Check if data exists on file
        ret = check_if_pattern_in_file(node, 'xyz', file_openfd)
        self.assertEqual(ret, 0, 'xyz does not exists in file')
        g.log.info('xyz exists in file')

        file_fd = 'file_openfd'

        # Check if EC version is same on all bricks which are up
        ret = validate_xattr_on_all_bricks(bricks_list1, file_fd,
                                           'trusted.ec.version')
        self.assertTrue(ret, "Healing not completed and EC version is "
                        "not updated")
        g.log.info("Healing is completed and EC version is updated")

        # Check if EC size is same on all bricks which are up
        ret = validate_xattr_on_all_bricks(bricks_list1, file_fd,
                                           'trusted.ec.size')
        self.assertTrue(ret, "Healing not completed and EC size is "
                        "not updated")
        g.log.info("Healing is completed and EC size is updated")

        # Check stat of file
        cmd = "cd %s; du -kh file_openfd" % mountpoint
        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)
        g.log.info('File %s is accessible', file_fd)
    def test_metadata_self_heal_on_open_fd(self):
        """
        Description: Pro-active metadata self heal on open fd

        Steps :
        1) Create a volume.
        2) Mount the volume using FUSE.
        3) Create test executable on volume mount.
        4) While test execution is in progress, bring down brick1.
        5) From mount point, change ownership, permission, group id of
           the test file.
        6) While test execution is in progress, bring back brick1 online.
        7) Do stat on the test file to check ownership, permission,
           group id on mount point and on bricks
        8) Stop test execution.
        9) Do stat on the test file to check ownership, permission,
           group id on mount point and on bricks.
        10) There should be no pending heals in the heal info command.
        11) There should be no split-brain.
        12) Calculate arequal of the bricks and mount point and it
            should be same.
        """
        # pylint: disable=too-many-statements,too-many-locals
        # pylint: disable=too-many-branches
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, 'Brick list is None')
        client = self.clients[0]

        # Create test executable file on mount point
        m_point = self.mounts[0].mountpoint
        test_file = "testfile.sh"
        cmd = ("echo 'while true; do echo 'Press CTRL+C to stop execution';"
               " done' >> {}/{}".format(m_point, test_file))
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "Failed to create test file")

        # Execute the test file
        cmd = "cd {}; sh {}".format(m_point, test_file)
        g.run_async(client, cmd)

        # Get pid of the test file
        _cmd = "ps -aux | grep -v grep | grep testfile.sh | awk '{print $2}'"
        ret, out, _ = g.run(client, _cmd)
        self.assertEqual(ret, 0, "Failed to get pid of test file execution")

        # Bring brick1 offline
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(
            ret, 'Failed to bring bricks {} '
            'offline'.format(bricks_list[1]))

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Bricks {} are not '
                        'offline'.format(bricks_list[1]))

        # change uid, gid and permission from client
        cmd = "chown {} {}/{}".format(self.user, m_point, test_file)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "chown failed")

        cmd = "chgrp {} {}/{}".format(self.user, m_point, test_file)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "chgrp failed")

        cmd = "chmod 777 {}/{}".format(m_point, test_file)
        ret, _, _ = g.run(client, cmd)
        self.assertEqual(ret, 0, "chown failed")

        # Bring brick1 online
        ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(
            ret, 'Failed to bring bricks {} online'.format(bricks_list[1]))

        ret = get_pathinfo(client, "{}/{}".format(m_point, test_file))
        self.assertIsNotNone(
            ret, "Unable to get "
            "trusted.glusterfs.pathinfo  of file")
        nodes_to_check = {}
        bricks_list = []
        for brick in ret['brickdir_paths']:
            node, brick_path = brick.split(':')
            if node[0:2].isdigit():
                nodes_to_check[node] = os.path.dirname(brick_path)
                path = node + ":" + os.path.dirname(brick_path)
            else:
                nodes_to_check[gethostbyname(node)] = (
                    os.path.dirname(brick_path))
                path = gethostbyname(node) + ":" + os.path.dirname(brick_path)
            bricks_list.append(path)
        nodes_to_check[client] = m_point

        # Verify that the changes are successful on bricks and client
        self._verify_stat_info(nodes_to_check, test_file)

        # Kill the test executable file
        for pid in out.split('\n')[:-1]:
            cmd = "kill -s 9 {}".format(pid)
            ret, _, _ = g.run(client, cmd)
            self.assertEqual(ret, 0, "Failed to kill test file execution")

        # Verify that the changes are successful on bricks and client
        self._verify_stat_info(nodes_to_check, test_file)

        # Verify there are no pending heals
        heal_info = get_heal_info_summary(self.mnode, self.volname)
        self.assertIsNotNone(heal_info, 'Unable to get heal info')
        for brick in bricks_list:
            self.assertEqual(int(heal_info[brick]['numberOfEntries']), 0,
                             ("Pending heal on brick {} ".format(brick)))

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal for mount
        ret, arequals = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

        # Collecting data bricks
        vol_info = get_volume_info(self.mnode, self.volname)
        self.assertIsNotNone(vol_info, 'Unable to get volume info')
        data_brick_list = []
        for brick in bricks_list:
            for brick_info in vol_info[self.volname]["bricks"]["brick"]:
                if brick_info["name"] == brick:
                    if brick_info["isArbiter"] == "0":
                        data_brick_list.append(brick)
        bricks_list = data_brick_list

        # Get arequal on bricks and compare with mount_point_total
        # It should be the same
        arbiter = self.volume_type.find('arbiter') >= 0
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        stop = len(subvols[0]) - 1 if arbiter else len(subvols[0])
        for subvol in subvols:
            subvol = [i for i in subvol if i in bricks_list]
            if subvol:
                ret, arequal = collect_bricks_arequal(subvol[0:stop])
                self.assertTrue(
                    ret, 'Unable to get arequal checksum '
                    'on {}'.format(subvol[0:stop]))
                self.assertEqual(
                    len(set(arequal)), 1, 'Mismatch of arequal '
                    'checksum among {} is '
                    'identified'.format(subvol[0:stop]))
                brick_total = arequal[-1].splitlines()[-1].split(':')[-1]
                self.assertEqual(
                    brick_total, mount_point_total,
                    "Arequals for mountpoint and {} "
                    "are not equal".format(subvol[0:stop]))
コード例 #9
0
def is_layout_complete(mnode, volname, dirpath):
    """This function reads the subvols in the given volume and checks whether
       layout is complete or not.
       Layout starts at zero,
       ends at 32-bits high,
        and has no holes or overlaps

    Args:
        volname (str): volume name
        mnode (str): Node on which cmd has to be executed.
        dirpath (str): directory path; starting from root of mount point.

    Returns (bool): True if layout is complete
                    False if layout has any holes or overlaps

    Example:
        is_layout_complete("abc.xyz.com", "testvol", "/")
        is_layout_complete("abc.xyz.com", "testvol", "/dir1/dir2/dir3")
    """

    subvols_list = get_subvols(mnode, volname)['volume_subvols']
    trim_subvols_list = [y for x in subvols_list for y in x]

    # append the dirpath to the elements in the list
    final_subvols_list = [x + dirpath for x in trim_subvols_list]

    complete_hash_list = []
    for fqpath in final_subvols_list:
        hash_list = BrickDir(fqpath).hashrange
        complete_hash_list.append(hash_list)
    joined_hashranges = [y for x in complete_hash_list for y in x]
    g.log.debug("joined range list: %s" % joined_hashranges)

    # remove duplicate hashes
    collapsed_ranges = list(set(joined_hashranges))

    # sort the range list for good measure
    collapsed_ranges.sort()

    # first hash in the list is 0?
    if collapsed_ranges[0] != 0:
        g.log.error('First hash in range (%d) is not zero' %
                    collapsed_ranges[0])
        return False

    # last hash in the list is 32-bits high?
    if collapsed_ranges[-1] != int(0xffffffff):
        g.log.error('Last hash in ranges (%s) is not 0xffffffff' %
                    hex(collapsed_ranges[-1]))
        return False

    # remove the first and last hashes
    clipped_ranges = collapsed_ranges[1:-1]
    g.log.debug('clipped: %s' % clipped_ranges)

    # walk through the list in pairs and look for diff == 1
    iter_ranges = iter(clipped_ranges)
    for first in iter_ranges:
        second = next(iter_ranges)
        hash_difference = second - first
        g.log.debug('%d - %d = %d' % (second, first, hash_difference))
        if hash_difference > 1:
            g.log.error("Layout has holes")

            return False
        elif hash_difference < 1:
            g.log.error("Layout has overlaps")
コード例 #10
0
    def test_split_brain(self):
        """
        Description: Create split-brain on files and check if IO's fail
        - Disable self-heal and cluster-quorum-type
        - Get the bricks from the volume
        - Write IO and validate IO
        - Bring 1st set of brick offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Bring 2nd set of bricks offline(1 Data brick and arbiter brick)
        - Write IO and validate IO
        - Check volume is in split-brain
        - Write IO and validate IO - should fail
        - Enable self-heal and cluster-quorum-type
        - Write IO and validate IO - should fail
        """
        # Disable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "off", "cluster.quorum-type": "none"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Get the bricks from the volume
        sub_vols = get_subvols(self.mnode, self.volname)
        self.bricks_to_bring_offline = list(sub_vols['volume_subvols'][0])

        # Write IO's
        write_cmd = ("/usr/bin/env python %s create_files -f 1 "
                     "--base-file-name test_file --fixed-file-size 1k %s" %
                     (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, write_cmd)

        # Bring 1st set of brick offline(1 Data brick and arbiter brick)
        for bricks in ((0, -1), (1, -1)):
            down_bricks = []
            for brick in bricks:
                down_bricks.append(self.bricks_to_bring_offline[brick])
            ret = bring_bricks_offline(self.volname, down_bricks)
            self.assertTrue(
                ret, 'Failed to bring bricks {} offline'.format(down_bricks))
            proc = g.run_async(self.mounts[0].client_system, write_cmd)

            # Validate I/O
            self.assertTrue(validate_io_procs([proc], self.mounts),
                            "IO failed on some of the clients")

            # Bring bricks online
            self._bring_bricks_online()

        # Check volume is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "unable to create split-brain scenario")
        g.log.info("Successfully created split brain scenario")

        # Write IO's
        proc2 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(validate_io_procs([proc2], self.mounts),
                         "IO passed on split-brain")
        g.log.info("Expected - IO's failed due to split-brain")

        # Enable self-heal and cluster-quorum-type
        options = {"self-heal-daemon": "on", "cluster.quorum-type": "auto"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))

        # Write IO's
        proc3 = g.run_async(self.mounts[0].client_system, write_cmd)

        # Validate I/O
        self.assertFalse(validate_io_procs([proc3], self.mounts),
                         "IO passed on split-brain")
        g.log.info("Expected - IO's failed due to split-brain")
コード例 #11
0
    def test_brick_full_add_brick_rebalance(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create a data set on the client node such that all the available
           space is used and "No space left on device" error is generated.
        3. Set cluster.min-free-disk to 30%.
        4. Add bricks to the volume, trigger rebalance and wait for rebalance
           to complete.
        """
        # Create a data set on the client node such that all the available
        # space is used and "No space left on device" error is generated
        bricks = get_all_bricks(self.mnode, self.volname)

        # Calculate the usable size and fill till it reaches
        # min free limit
        usable_size = get_usable_size_per_disk(bricks[0])
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        filename = "abc"
        for subvol in subvols:
            while (subvols[find_hashed_subvol(subvols, "/", filename)[1]] ==
                   subvol):
                filename = self._get_random_string()
            ret, _, err = g.run(self.mounts[0].client_system,
                                "fallocate -l {}G {}/{}".format(
                                    usable_size, self.mounts[0].mountpoint,
                                    filename))
            err_msg = 'No space left on device'
            if ret and err_msg in err:
                ret = 0
            self.assertFalse(ret, "Failed to fill disk to min free limit")
        g.log.info("Disk filled up to min free limit")

        # Try to perfrom I/O from mount point(This should fail)
        ret, _, _ = g.run(self.mounts[0].client_system,
                          "fallocate -l 5G {}/mfile".format(
                              self.mounts[0].mountpoint))
        self.assertTrue(ret,
                        "Unexpected: Able to do I/O even when disks are "
                        "filled to min free limit")
        g.log.info("Expected: Unable to perfrom I/O as min free disk is hit")

        # Set cluster.min-free-disk to 30%
        ret = set_volume_options(self.mnode, self.volname,
                                 {'cluster.min-free-disk': '30%'})
        self.assertTrue(ret, "Failed to set cluster.min-free-disk to 30%")

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=1200)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
コード例 #12
0
    def test_gfid_split_brain_resolution(self):
        """
        - create gfid split-brain of files and resolves them using source-brick
          option of the CLI.
        """

        # pylint: disable=too-many-statements
        # pylint: disable=too-many-locals

        # Disable all self-heals and client-quorum
        options = {
            "self-heal-daemon": "off",
            "data-self-heal": "off",
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "cluster.quorum-type": "none"
        }
        g.log.info("setting volume options %s", options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Create dir inside which I/O will be performed.
        ret = mkdir(self.mounts[0].client_system,
                    "%s/test_gfid_split_brain" % self.mounts[0].mountpoint)
        self.assertTrue(ret, "mkdir failed")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Toggle bricks and perform I/O
        file_list = [
            "file1.txt", "file2.txt", "file3.txt", "file4.txt", "file5.txt",
            "file6.txt", "file7.txt", "file8.txt", "file9.txt", "file10.txt"
        ]
        brick_index = 0
        offline_bricks = []
        for _ in range(0, 3):
            for i in range(0, num_subvols):
                subvol_brick_list = subvols_dict['volume_subvols'][i]
                offline_bricks.append(subvol_brick_list[brick_index % 3])
                offline_bricks.append(subvol_brick_list[(brick_index + 1) % 3])
            self.toggle_bricks_and_perform_io(file_list, offline_bricks)
            brick_index += 1
            offline_bricks[:] = []

        # Enable shd
        g.log.info("enabling the self heal daemon")
        ret = enable_self_heal_daemon(self.mnode, self.volname)
        self.assertTrue(ret, "failed to enable self heal daemon")
        g.log.info("Successfully enabled the self heal daemon")

        # Wait for self heal processes to come online
        g.log.info("Wait for selfheal process to come online")
        timeout = 300
        ret = wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname,
                                                      timeout)
        self.assertTrue(ret, "Self-heal process are not online")
        g.log.info("All self heal process are online")

        # Trigger heal
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # checking if file is in split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertTrue(ret, "Files are not in split-brain as expected.")
        g.log.info("Files are still in split-brain")

        # First brick of each replica will be used as source-brick
        first_brick_list = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            brick = subvol_brick_list[0]
            first_brick_list.append(brick)

        # Find which dht subvols the 10 files are present in and trigger heal
        for filename in file_list:
            fpath = self.mounts[0].mountpoint + "/test_gfid_split_brain/" + \
                    filename
            gfile = GlusterFile(self.clients[0], fpath)
            for brick in first_brick_list:
                _, brick_path = brick.split(':')
                match = [
                    brick for item in gfile.hashed_bricks if brick_path in item
                ]
                if match:
                    self.resolve_gfid_split_brain(
                        "/test_gfid_split_brain/" + filename, brick)

        # Trigger heal to complete pending data/metadata heals
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Starting heal failed')
        g.log.info('Index heal launched')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Get arequals and compare
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

            # Get arequal for every brick and compare with first brick
            for brick in subvol_brick_list[1:]:
                node, brick_path = brick.split(':')
                command = ('arequal-checksum -p %s '
                           '-i .glusterfs -i .landfill -i .trashcan' %
                           brick_path)
                ret, brick_arequal, _ = g.run(node, command)
                self.assertFalse(ret,
                                 'Failed to get arequal on brick %s' % brick)
                g.log.info('Getting arequal for %s is successful', brick)
                brick_total = brick_arequal.splitlines()[-1].split(':')[-1]

                self.assertEqual(
                    first_brick_total, brick_total,
                    'Arequals for subvol and %s are not equal' % brick)
                g.log.info('Arequals for subvol and %s are equal', brick)
コード例 #13
0
    def test_heal_info_shouldnot_list_files_being_accessed(self):
        """
        - bring brick 1 offline
        - create files and validate IO
        - get entries before accessing file
        - get first filename from active subvol without offline bricks
        - access and modify the file
        - while accessing - get entries
        - Compare entries before accessing and while accessing
        - validate IO
        """

        # Bring 1-st brick offline
        brick_to_bring_offline = [self.bricks_list[0]]
        g.log.info('Bringing bricks %s offline...' % brick_to_bring_offline)
        ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s offline'
                        % brick_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 brick_to_bring_offline)
        self.assertTrue(ret, 'Bricks %s are not offline'
                        % brick_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful'
                   % brick_to_bring_offline)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s"
                       % (mount_obj.client_system, mount_obj.mountpoint))

            # Creating files
            cmd = ("python %s create_files -f 100 %s"
                   % (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # Get entries before accessing file
        g.log.info("Getting entries_before_accessing file...")
        entries_before_accessing = get_heal_info_summary(
            self.mnode, self.volname)
        self.assertNotEqual(entries_before_accessing, None,
                            'Can`t get heal info summary')
        g.log.info(
            "Getting entries_before_accessing file finished successfully")

        # Get filename to access from active subvol without offline bricks
        # Get last subvol
        subvols = get_subvols(self.mnode, self.volname)
        subvol_without_offline_brick = subvols['volume_subvols'][-1]

        # Get first brick server and brick path
        # and get first file from filelist
        subvol_mnode, mnode_brick = subvol_without_offline_brick[0].split(':')
        ret, file_list, err = g.run(subvol_mnode, 'ls %s' % mnode_brick)
        file_to_edit = file_list.splitlines()[0]

        # Access and modify the file
        g.log.info("Start modifying IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            cmd = ("cd %s/ ; "
                   "dd if=/dev/zero of=%s bs=1G count=1"
                   % (mount_obj.mountpoint, file_to_edit))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            g.log.info("IO on %s:%s is modified successfully"
                       % (mount_obj.client_system,
                          mount_obj.mountpoint))
        self.io_validation_complete = False

        # Get entries while accessing file
        g.log.info("Getting entries while accessing file...")
        entries_while_accessing = get_heal_info_summary(
            self.mnode, self.volname)
        self.assertNotEqual(entries_before_accessing, None,
                            'Can`t get heal info summary')
        g.log.info("Getting entries while accessing file "
                   "finished successfully")

        # Compare dicts before accessing and while accessing
        g.log.info('Comparing entries before modifying and while modifying...')
        ret = cmp(entries_before_accessing, entries_while_accessing)
        self.assertEqual(ret, 0, 'Entries before modifying and while modifying'
                                 'are not equal')
        g.log.info('Comparison entries before modifying and while modifying'
                   'finished successfully.')

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")
コード例 #14
0
    def test_one_brick_full_add_brick_rebalance(self):
        """
        Test case:
        1. Create a pure distribute volume with 3 bricks.
        2. Start it and mount it on client.
        3. Fill one disk of the volume till it's full
        4. Add brick to volume, start rebalance and wait for it to complete.
        5. Check arequal checksum before and after add brick should be same.
        6. Check if link files are present on bricks or not.
        """
        # Fill few bricks till it is full
        bricks = get_all_bricks(self.mnode, self.volname)

        # Calculate the usable size and fill till it reaches
        # min free limit
        usable_size = get_usable_size_per_disk(bricks[0])
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        fname = "abc"

        # Create directories in hierarchy
        dirp = "/dir1/dir2/"
        path = "{}{}".format(self.mounts[0].mountpoint, dirp)
        ret = mkdir(self.mounts[0].client_system, path, parents=True)
        self.assertTrue(ret, "Failed to create dir hierarchy")

        for _ in range(0, usable_size):

            # Create files inside directories
            while (subvols[find_hashed_subvol(subvols, dirp, fname)[1]][0] !=
                   subvols[0][0]):
                fname = self._get_random_string()
            ret, _, _ = g.run(self.mounts[0].client_system,
                              "fallocate -l 1G {}{}".format(path, fname))
            self.assertFalse(ret, "Failed to fill disk to min free limit")
            fname = self._get_random_string()
        g.log.info("Disk filled up to min free limit")

        # Collect arequal checksum before ops
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=1800)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")

        # Check if linkto files exist or not as rebalance is already
        # completed we shouldn't be seeing any linkto files
        for brick in bricks:
            node, path = brick.split(":")
            path += dirp
            list_of_files = get_dir_contents(node, path)
            self.assertIsNotNone(list_of_files, "Unable to get files")
            for filename in list_of_files:
                ret = get_dht_linkto_xattr(node, "{}{}".format(path,
                                                               filename))
                self.assertIsNone(ret, "Unable to fetch dht linkto xattr")
    def test_impact_of_replace_brick_for_glustershd(self):
        # pylint: disable=too-many-statements,too-many-branches,too-many-locals
        nodes = self.volume['servers']
        replaced_bricks = []

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)
        glustershd_pids = pids

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # validate the bricks present in volume info with
        # glustershd server volume file
        g.log.info("Starting parsing file %s on "
                   "node %s", self.glustershd, self.mnode)
        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
                                             bricks_list)
        self.assertTrue(ret, ("Brick List from volume info is different "
                              "from glustershd server volume file. "
                              "Please check log file for details"))
        g.log.info("Successfully parsed %s file", self.glustershd)

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # replace brick from each sub-vol
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            brick_to_replace = subvol_brick_list[-1]
            new_brick = brick_to_replace + 'new'
            g.log.info("Replacing the brick %s for the volume : %s",
                       brick_to_replace, self.volname)
            ret, _, err = replace_brick(self.mnode, self.volname,
                                        brick_to_replace, new_brick)
            self.assertFalse(ret, err)
            g.log.info('Replaced brick %s to %s successfully',
                       brick_to_replace, new_brick)
            replaced_bricks.append(brick_to_replace)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode,
                                                   self.volname,
                                                   timeout=60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verify glustershd process releases its parent process
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on nodes "
                   "%s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or"
                              " more than One self heal daemon process"
                              " found : %s" % pids))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)
        glustershd_pids_after_replacement = pids

        # Compare pids before and after replacing
        self.assertNotEqual(
            glustershd_pids, glustershd_pids_after_replacement,
            "Self Daemon process is same before and"
            " after replacing bricks")
        g.log.info("Self Heal Daemon Process is different before and "
                   "after replacing bricks")

        # get the bricks for the volume after replacing
        bricks_list_after_replacing = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List after expanding "
                   "volume: %s", bricks_list_after_replacing)

        # validate the bricks present in volume info
        # with glustershd server volume file after replacing bricks
        g.log.info("Starting parsing file %s", self.glustershd)
        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
                                             bricks_list_after_replacing)

        self.assertTrue(ret, ("Brick List from volume info is different "
                              "from glustershd server volume file after "
                              "replacing bricks. Please check log file "
                              "for details"))
        g.log.info("Successfully parsed %s file", self.glustershd)
        g.log.info("Starting to delete replaced brick dir's")

        # Remove brick directories of the replaced bricks as this is not
        # handled by tearDown class
        for bricks in replaced_bricks:
            node, brick_path = bricks.split(r':')
            cmd = "rm -rf " + brick_path
            ret, _, _ = g.run(node, cmd)
            if ret:
                raise ExecutionError("Failed to delete the brick dir's for"
                                     " %s and brick %s" % (node, brick_path))
            g.log.info("Successfully deleted brick dir's for replaced bricks")
コード例 #16
0
    def test_client_side_quorum_with_auto_option_cross2(self):
        """
        Test Script to verify the Client Side Quorum with auto option

        * set cluster.quorum-type to auto.
        * start I/O from the mount point.
        * kill 2-nd brick process from the each and every replica set
        * perform ops

        """
        # pylint: disable=too-many-branches,too-many-statements
        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting cluster.quorum-type to auto on "
                   "volume %s", self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Start IO on mounts
        g.log.info("Starting IO .....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # bring 2-nd bricks offline for all the subvolumes
        offline_bricks = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            bricks_to_bring_offline = subvol_brick_list[1]
            g.log.info("Going to bring down the brick process "
                       "for %s", bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s successfully", bricks_to_bring_offline)
            offline_bricks.append(bricks_to_bring_offline)

        # create new file named newfile0.txt
        g.log.info("Start creating new file on all mounts...")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 1 --base-file-name newfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # create directory user1
        g.log.info("Start creating directory on all mounts...")
        all_mounts_procs = []
        cmd = ("python %s create_deep_dir %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # create h/w link to file
        g.log.info("Start creating hard link for file0.txt on mount")
        cmd = ("ln %s/file0.txt %s/file0.txt_hwlink" %
               (self.mounts[0].mountpoint, self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertFalse(
            ret, 'Failed to create hard link '
            'for file0.txt on %s' % self.mounts[0].mountpoint)
        g.log.info("Hard link for file0.txt on %s is created successfully",
                   self.mounts[0].mountpoint)

        # create s/w link
        g.log.info("Start creating soft link for file1.txt on mount")
        cmd = ("ln -s %s/file1.txt %s/file1.txt_swlink" %
               (self.mounts[0].mountpoint, self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertFalse(
            ret, 'Failed to create soft link '
            'for file1.txt on %s' % self.mounts[0].mountpoint)
        g.log.info("Soft link for file1.txt on %s is created successfully",
                   self.mounts[0].mountpoint)

        # append to file
        g.log.info("Appending to file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = ("cat %s/file0.txt >> %s/file1.txt" %
                   (mount_obj.mountpoint, mount_obj.mountpoint))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(
                ret, 'Failed to append file1.txt on %s' % mount_obj.mountpoint)
            g.log.info("Appending for file1.txt on %s is successful",
                       mount_obj.mountpoint)

        # modify the file
        g.log.info("Modifying file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = ("echo 'Modify Contents' > %s/file1.txt" %
                   mount_obj.mountpoint)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(
                ret, 'Failed to modify file1.txt on %s' % mount_obj.mountpoint)
            g.log.info("Modifying for file1.txt on %s is successful",
                       mount_obj.mountpoint)

        # truncate the file
        g.log.info("Truncating file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "truncate -s 0 %s/file1.txt" % mount_obj.mountpoint
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(
                ret,
                'Failed to truncate file1.txt on %s' % mount_obj.mountpoint)
            g.log.info("Truncating for file1.txt on %s is successful",
                       mount_obj.mountpoint)

        # read the file
        g.log.info("Starting reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # stat on file
        g.log.info("stat on file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "stat %s/file1.txt" % mount_obj.mountpoint
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(
                ret, 'Failed to stat file1.txt on %s' % mount_obj.mountpoint)
            g.log.info("Stat for file1.txt on %s is successful",
                       mount_obj.mountpoint)

        # stat on dir
        g.log.info("stat on directory on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s stat %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(
                ret, 'Failed to stat directory on %s' % mount_obj.mountpoint)
            g.log.info("Stat for directory on %s is successful",
                       mount_obj.mountpoint)

        # ls on mount point
        g.log.info("ls on mount point on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s ls %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, 'Failed to ls on %s' % mount_obj.mountpoint)
            g.log.info("ls for %s is successful", mount_obj.mountpoint)

        # bring back the bricks online for all subvolumes
        g.log.info("bringing up the brick : %s online", offline_bricks)
        ret = bring_bricks_online(self.mnode, self.volname, offline_bricks)
        self.assertTrue(
            ret, ("Failed to brought the brick %s online" % offline_bricks))
        g.log.info("Successfully brought the bricks")
    def test_replace_brick_self_heal_io_in_progress(self):
        """
        - Create directory on mount point and write files/dirs
        - Create another set of files (1K files)
        - While creation of files/dirs are in progress Kill one brick
        - Remove the contents of the killed brick(simulating disk replacement)
        - When the IO's are still in progress, restart glusterd on the nodes
          where we simulated disk replacement to bring back bricks online
        - Start volume heal
        - Wait for IO's to complete
        - Verify whether the files are self-healed
        - Calculate arequals of the mount point and all the bricks
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        # Create dirs with files
        g.log.info('Creating dirs with file...')
        command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "-d 2 -l 2 -n 2 -f 10 %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, err = g.run(self.mounts[0].client_system,
                            command,
                            user=self.mounts[0].user)
        self.assertFalse(ret, err)
        g.log.info("IO is successful")

        # Creating another set of files (1K files)
        self.all_mounts_procs = []

        # Create dirs with files
        g.log.info('Creating 1K files...')
        command = ("/usr/bin/env python %s create_files "
                   "-f 1500 --fixed-file-size 10k %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts[0])
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']

        # Bring brick offline
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Remove the content of the killed bricks
        for brick in bricks_to_bring_offline:
            brick_node, brick_path = brick.split(':')

            # Removing files
            command = ('cd %s ; rm -rf *' % brick_path)
            ret, _, err = g.run(brick_node, command)
            self.assertFalse(ret, err)
            g.log.info('Files are deleted on brick %s', brick)

        # Bring brick online
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Wait for volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal daemons are online")

        # Start healing
        ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Check arequals for "replicated"
        all_bricks = get_all_bricks(self.mnode, self.volname)
        if self.volume_type == "replicated":

            # Get arequal after bricks are online
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting arequal after successfully bringing'
                       'bricks online.')
            mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

            # Get arequal on bricks and compare with mount_point_total
            ret, arequals = collect_bricks_arequal(all_bricks)
            self.assertTrue(ret, 'Failed to get arequal on bricks')
            for arequal in arequals:
                brick_total = arequal.splitlines()[-1].split(':')[-1]
                self.assertEqual(
                    mount_point_total, brick_total,
                    'Arequals for mountpoint and brick '
                    'are not equal')
                g.log.info('Arequals for mountpoint and brick are equal')

        # Check arequals for "distributed-replicated"
        if self.volume_type == "distributed-replicated":

            # Get the subvolumes
            subvols_dict = get_subvols(self.mnode, self.volname)
            num_subvols = len(subvols_dict['volume_subvols'])
            g.log.info("Number of subvolumes in volume %s:", num_subvols)

            # Get arequals and compare
            for i in range(0, num_subvols):

                # Get arequal for first brick
                subvol_brick_list = subvols_dict['volume_subvols'][i]
                ret, arequal = collect_bricks_arequal(subvol_brick_list[0])
                self.assertTrue(ret, 'Failed to get arequal on first brick')
                first_brick_total = arequal[0].splitlines()[-1].split(':')[-1]

                # Get arequal for every brick and compare with first brick
                ret, arequals = collect_bricks_arequal(subvol_brick_list)
                self.assertTrue(ret, 'Failed to get arequal on bricks')
                for arequal in arequals:
                    brick_total = arequal.splitlines()[-1].split(':')[-1]
                    self.assertEqual(
                        first_brick_total, brick_total,
                        'Arequals for subvol and brick are '
                        'not equal')
                    g.log.info('Arequals for subvol and brick are equal')
コード例 #18
0
    def test_client_side_quorum_with_fixed_for_cross2(self):
        """
        Test Script to verify the Client Side Quorum with fixed
        for cross 2 volume

        * Disable self heal daemom
        * set cluster.quorum-type to fixed.
        * start I/O( write and read )from the mount point - must succeed
        * Bring down brick1
        * start I/0 ( write and read ) - must succeed
        * set the cluster.quorum-count to 1
        * start I/0 ( write and read ) - must succeed
        * set the cluster.quorum-count to 2
        * start I/0 ( write and read ) - read must pass, write will fail
        * bring back the brick1 online
        * start I/0 ( write and read ) - must succeed
        * Bring down brick2
        * start I/0 ( write and read ) - read must pass, write will fail
        * set the cluster.quorum-count to 1
        * start I/0 ( write and read ) - must succeed
        * cluster.quorum-count back to 2 and cluster.quorum-type to auto
        * start I/0 ( write and read ) - must succeed
        * Bring back brick2 online
        * Bring down brick1
        * start I/0 ( write and read ) - read must pass, write will fail
        * set the quorum-type to none
        * start I/0 ( write and read ) - must succeed

        """
        # pylint: disable=too-many-branches,too-many-statements
        # Disable self heal daemon
        options = {"cluster.self-heal-daemon": "off"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # set cluster.quorum-type to fixed
        options = {"cluster.quorum-type": "fixed"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/O( write ) - must succeed
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mounts)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Bring down brick1 for all the subvolumes
        subvolumes_first_brick_list = []
        subvolumes_second_brick_list = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            subvolumes_first_brick_list.append(subvol_brick_list[0])
            subvolumes_second_brick_list.append(subvol_brick_list[1])

        g.log.info("Going to bring down the brick process "
                   "for %s", subvolumes_first_brick_list)
        ret = bring_bricks_offline(self.volname, subvolumes_first_brick_list)
        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                              "check the log file for more details."))
        g.log.info("Brought down the brick process "
                   "for %s successfully", subvolumes_first_brick_list)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mounts)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name second_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # set the cluster.quorum-count to 1
        options = {"cluster.quorum-count": "1"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mount.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name third_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # set the cluster.quorum-count to 2
        options = {"cluster.quorum-count": "2"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - read must pass, write will fail
        g.log.info("Starting IO on mount......")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name fourth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with Read Only File System")
        ret, _ = is_io_procs_fail_with_rofs(self, all_mounts_procs,
                                            self.mounts)
        self.assertTrue(ret, ("Unexpected Error and IO successful"
                              " on Read-Only File System"))
        g.log.info("EXPECTED Read-only file system in IO while creating file")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # bring back the brick1 online for all subvolumes
        g.log.info("bringing up the bricks : %s online",
                   subvolumes_first_brick_list)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  subvolumes_first_brick_list)
        self.assertTrue(ret, ("Failed to brought the bricks %s online" %
                              subvolumes_first_brick_list))
        g.log.info("Successfully brought the bricks %s online",
                   subvolumes_first_brick_list)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mount.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name fifth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # Bring down brick2 for all the subvolumes
        g.log.info("Going to bring down the brick process "
                   "for %s", subvolumes_second_brick_list)
        ret = bring_bricks_offline(self.volname, subvolumes_second_brick_list)
        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                              "check the log file for more details."))
        g.log.info("Brought down the brick process "
                   "for %s successfully", subvolumes_second_brick_list)

        # start I/0 ( write and read ) - read must pass, write will fail
        g.log.info("Start creating files on mounts.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name sixth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with Read Only File System")
        ret, _ = is_io_procs_fail_with_rofs(self, all_mounts_procs,
                                            self.mounts)
        self.assertTrue(ret, ("Unexpected Error and IO successful"
                              " on Read-Only File System"))
        g.log.info("EXPECTED Read-only file system in IO while creating file")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # set the cluster.quorum-count to 1
        options = {"cluster.quorum-count": "1"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mount.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name seventh_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # set cluster.quorum-type to auto and cluster.quorum-count back to 2
        options = {"cluster.quorum-type": "auto", "cluster.quorum-count": "2"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mount.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name eigth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # Bring back brick2 online for all the subvolumes
        g.log.info("bringing up the bricks : %s online",
                   subvolumes_second_brick_list)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  subvolumes_second_brick_list)
        self.assertTrue(ret, ("Failed to brought the brick %s online" %
                              subvolumes_second_brick_list))
        g.log.info("Successfully brought the brick %s online",
                   subvolumes_second_brick_list)

        # Bring down brick1 again for all the subvolumes
        g.log.info("Going to bring down the brick process "
                   "for %s", subvolumes_first_brick_list)
        ret = bring_bricks_offline(self.volname, subvolumes_first_brick_list)
        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                              "check the log file for more details."))
        g.log.info("Brought down the brick process "
                   "for %s successfully", subvolumes_first_brick_list)

        # start I/0 ( write and read ) - read must pass, write will fail
        g.log.info("Start creating files on mounts.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name ninth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with Read Only File System")
        ret, _ = is_io_procs_fail_with_rofs(self, all_mounts_procs,
                                            self.mounts)
        self.assertTrue(ret, ("Unexpected Error and IO successful"
                              " on Read-Only File System"))
        g.log.info("EXPECTED Read-only file system in IO while creating file")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # set the quorum-type to none
        options = {"cluster.quorum-type": "none"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mount.....")
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name tenth_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # read the file
        g.log.info("Start reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # bring back the bricks online for all subvolumes
        g.log.info("bringing up the brick : %s online",
                   subvolumes_first_brick_list)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  subvolumes_first_brick_list)
        self.assertTrue(ret, ("Failed to brought the brick %s online" %
                              subvolumes_first_brick_list))
        g.log.info("Successfully brought the bricks")
コード例 #19
0
    def test_heal_full_after_deleting_files(self):
        """
        - Create IO
        - Calculate arequal from mount
        - Delete data from backend from the EC volume
        - Trigger heal full
        - Check if heal is completed
        - Check for split-brain
        - Calculate arequal checksum and compare it
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Get areequal before deleting the files from brick
        g.log.info('Getting areequal before getting bricks offline...')
        ret, result_before_killing_procs = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before getting bricks offline '
                   'is successful')

        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']

        # Delete data from backend from the erasure node
        for subvol in subvols:
            erasure = subvol[-1]
            g.log.info('Clearing ec brick %s', erasure)
            node, brick_path = erasure.split(':')
            ret, _, err = g.run(node, 'cd %s/ ; rm -rf *' % brick_path)
            g.log.error('Clearing ec brick %s is unsuccessful', erasure)
            self.assertFalse(ret, err)
        g.log.info('Clearing data from brick is successful')

        # Trigger heal full
        ret = trigger_heal_full(self.mnode, self.volname)

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get areequal after healing
        g.log.info('Getting areequal after getting bricks online...')
        ret, result_after_healing = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal after getting bricks online '
                   'is successful')

        # Comparing areequals
        self.assertEqual(
            result_before_killing_procs, result_after_healing,
            'Areequals areequals before before killing arbiter '
            'processes and after healing are not equal')
        g.log.info('Areequals areequals before before killing arbiter '
                   'processes and after healing equal')
コード例 #20
0
    def test_client_side_quorum_with_auto_option_overwrite_fixed(self):
        """
        Test Script to verify the Client Side Quorum with auto option

        * check the default value of cluster.quorum-type
        * try to set any junk value to cluster.quorum-type
          other than {none,auto,fixed}
        * check the default value of cluster.quorum-count
        * set cluster.quorum-type to fixed and cluster.quorum-count to 1
        * start I/O from the mount point
        * kill 2 of the brick process from the each replica set.
        * set cluster.quorum-type to auto

        """
        # pylint: disable=too-many-locals,too-many-statements
        # check the default value of cluster.quorum-type
        option = "cluster.quorum-type"
        g.log.info("Getting %s for the volume %s", option, self.volname)
        option_dict = get_volume_options(self.mnode, self.volname, option)
        self.assertIsNotNone(option_dict,
                             ("Failed to get %s volume option"
                              " for volume %s" % (option, self.volname)))
        self.assertEqual(option_dict['cluster.quorum-type'], 'auto',
                         ("Default value for %s is not auto"
                          " for volume %s" % (option, self.volname)))
        g.log.info("Succesfully verified default value of %s for volume %s",
                   option, self.volname)

        # set the junk value to cluster.quorum-type
        junk_values = ["123", "abcd", "fixxed", "Aauto"]
        for each_junk_value in junk_values:
            options = {"cluster.quorum-type": "%s" % each_junk_value}
            g.log.info("setting %s for the volume "
                       "%s", options, self.volname)
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertFalse(ret, ("Able to set junk value %s for "
                                   "volume %s" % (options, self.volname)))
            g.log.info(
                "Expected: Unable to set junk value %s "
                "for volume %s", options, self.volname)

        # check the default value of cluster.quorum-count
        option = "cluster.quorum-count"
        g.log.info("Getting %s for the volume %s", option, self.volname)
        option_dict = get_volume_options(self.mnode, self.volname, option)
        self.assertIsNotNone(option_dict,
                             ("Failed to get %s volume option"
                              " for volume %s" % (option, self.volname)))
        self.assertEqual(option_dict['cluster.quorum-count'], '(null)',
                         ("Default value for %s is not null"
                          " for volume %s" % (option, self.volname)))
        g.log.info("Successful in getting %s for the volume %s", option,
                   self.volname)

        # set cluster.quorum-type to fixed and cluster.quorum-count to 1
        options = {"cluster.quorum-type": "fixed", "cluster.quorum-count": "1"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # create files
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mounts)
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertFalse(
            ret,
            "IO failed on %s with '%s'" % (self.mounts[0].client_system, err))

        # get the subvolumes
        g.log.info("starting to get subvolumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s is %s", self.volname,
                   num_subvols)

        # bring bricks offline( 2 bricks ) for all the subvolumes
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            bricks_to_bring_offline = subvol_brick_list[0:2]
            g.log.info("Going to bring down the brick process "
                       "for %s", bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s successfully", bricks_to_bring_offline)

        # create files
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mounts)
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name second_file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertFalse(
            ret,
            "IO failed on %s with '%s'" % (self.mounts[0].client_system, err))

        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting %s for volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for "
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # create files
        all_mounts_procs = []
        g.log.info("Starting IO on mountpount...")
        g.log.info("mounts: %s", self.mounts)
        cmd = ("mkdir %s/newdir && touch %s/newdir/myfile{1..3}.txt" %
               (self.mounts[0].mountpoint, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while creating files")
コード例 #21
0
    def test_metadata_self_heal_client_side_heal(self):
        """
        Testcase steps:
        1.Turn off the options self heal daemon
        2.Create IO
        3.Calculate arequal of the bricks and mount point
        4.Bring down "brick1" process
        5.Change the permissions of the directories and files
        6.Change the ownership of the directories and files
        7.Change the group of the directories and files
        8.Bring back the brick "brick1" process
        9.Execute "find . | xargs stat" from the mount point to trigger heal
        10.Verify the changes in permissions are not self healed on brick1
        11.Verify the changes in permissions on all bricks but brick1
        12.Verify the changes in ownership are not self healed on brick1
        13.Verify the changes in ownership on all the bricks but brick1
        14.Verify the changes in group are not successfully self-healed
           on brick1
        15.Verify the changes in group on all the bricks but brick1
        16.Turn on the option metadata-self-heal
        17.Execute "find . | xargs md5sum" from the mount point to trgger heal
        18.Wait for heal to complete
        19.Verify the changes in permissions are self-healed on brick1
        20.Verify the changes in ownership are successfully self-healed
           on brick1
        21.Verify the changes in group are successfully self-healed on brick1
        22.Calculate arequal check on all the bricks and mount point
        """
        # Setting options
        ret = set_volume_options(self.mnode, self.volname,
                                 {"self-heal-daemon": "off"})
        self.assertTrue(ret, 'Failed to set options self-heal-daemon '
                        'and metadata-self-heal to OFF')
        g.log.info("Options are set successfully")

        # Creating files on client side
        self.test_meta_data_self_heal_folder = 'test_meta_data_self_heal'
        for mount_object in self.mounts:
            command = ("cd {0}/ ; mkdir {1} ; cd {1}/ ;"
                       "for i in `seq 1 100` ; "
                       "do mkdir dir.$i ; "
                       "for j in `seq 1 5` ; "
                       "do dd if=/dev/urandom of=dir.$i/file.$j "
                       "bs=1K count=$j ; done ; done ;".format
                       (mount_object.mountpoint,
                        self.test_meta_data_self_heal_folder))
            proc = g.run_async(mount_object.client_system, command,
                               user=mount_object.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        self.validate_io_on_clients()

        # Calculate and check arequal of the bricks and mount point
        self.check_arequal_from_mount_point_and_bricks()

        # Select bricks to bring offline from a replica set
        subvols_dict = get_subvols(self.mnode, self.volname)
        subvols = subvols_dict['volume_subvols']
        bricks_to_bring_offline = []
        bricks_to_be_online = []
        for subvol in subvols:
            bricks_to_bring_offline.append(subvol[0])
            for brick in subvol[1:]:
                bricks_to_be_online.append(brick)

        # Bring bricks offline
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s offline' %
                        bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret, 'Bricks %s are not offline'
                        % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Change the permissions of the directories and files
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            command = ('cd {}/{}; '
                       'for i in `seq 1 100` ; '
                       'do chmod 555 dir.$i ; done ; '
                       'for i in `seq 1 50` ; '
                       'do for j in `seq 1 5` ; '
                       'do chmod 666 dir.$i/file.$j ; done ; done ; '
                       'for i in `seq 51 100` ; '
                       'do for j in `seq 1 5` ; '
                       'do chmod 444 dir.$i/file.$j ; done ; done ;'
                       .format(mount_obj.mountpoint,
                               self.test_meta_data_self_heal_folder))

            proc = g.run_async(mount_obj.client_system, command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.validate_io_on_clients()

        # Change the ownership of the directories and files
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            command = ('cd {}/{} ; '
                       'for i in `seq 1 35` ; '
                       'do chown -R qa_func dir.$i ; done ; '
                       'for i in `seq 36 70` ; '
                       'do chown -R qa_system dir.$i ; done ; '
                       'for i in `seq 71 100` ; '
                       'do chown -R qa_perf dir.$i ; done ;'
                       .format(mount_obj.mountpoint,
                               self.test_meta_data_self_heal_folder))
            proc = g.run_async(mount_obj.client_system, command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.validate_io_on_clients()

        # Change the group of the directories and files
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            command = ('cd {}/{}; '
                       'for i in `seq 1 100` ; '
                       'do chgrp -R qa_all dir.$i ; done ;'
                       .format(mount_obj.mountpoint,
                               self.test_meta_data_self_heal_folder))

            proc = g.run_async(mount_obj.client_system, command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.validate_io_on_clients()

        # Bring brick online
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s online' %
                        bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Trigger heal from mount point
        self.trigger_heal_from_mount_point()

        # Verify the changes are not self healed on brick1 for each subvol
        for brick in bricks_to_bring_offline:
            node, brick_path = brick.split(':')

            dir_list = get_dir_contents(node, "{}/{}".format(
                brick_path, self.test_meta_data_self_heal_folder))
            self.assertIsNotNone(dir_list, "Dir list from "
                                 "brick is empty")
            g.log.info("Successfully got dir list from bick")

            # Verify changes for dirs
            for folder in dir_list:

                ret = get_file_stat(node, "{}/{}/{}".format(
                    brick_path, self.test_meta_data_self_heal_folder,
                    folder))

                self.assertEqual('755', ret['access'],
                                 "Permissions mismatch on node {}"
                                 .format(node))

                self.assertEqual('root', ret['username'],
                                 "User id mismatch on node {}"
                                 .format(node))

                self.assertEqual('root', ret['groupname'],
                                 "Group id mismatch on node {}"
                                 .format(node))

                # Get list of files for each dir
                file_list = get_dir_contents(node, "{}/{}/{}".format(
                    brick_path, self.test_meta_data_self_heal_folder,
                    folder))
                self.assertIsNotNone(file_list, "File list from "
                                     "brick is empty.")
                g.log.info("Successfully got file list from bick.")

                if file_list:
                    for file_name in file_list:

                        ret = get_file_stat(node, "{}/{}/{}/{}".format(
                            brick_path, self.test_meta_data_self_heal_folder,
                            folder, file_name))

                        self.assertEqual('644', ret['access'],
                                         "Permissions mismatch on node"
                                         " {} for file {}".format(node,
                                                                  file_name))

                        self.assertEqual('root', ret['username'],
                                         "User id mismatch on node"
                                         " {} for file {}".format(node,
                                                                  file_name))

                        self.assertEqual('root', ret['groupname'],
                                         "Group id mismatch on node"
                                         " {} for file {}".format(node,
                                                                  file_name))

        # Verify the changes are self healed on all bricks except brick1
        # for each subvol
        self.check_permssions_on_bricks(bricks_to_be_online)

        # Setting options
        ret = set_volume_options(self.mnode, self.volname,
                                 {"metadata-self-heal": "on"})
        self.assertTrue(ret, 'Failed to set options to ON.')
        g.log.info("Options are set successfully")

        # Trigger heal from mount point
        self.trigger_heal_from_mount_point()

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Verify the changes are self healed on brick1 for each subvol
        self.check_permssions_on_bricks(bricks_to_bring_offline)

        # Calculate and check arequal of the bricks and mount point
        self.check_arequal_from_mount_point_and_bricks()
コード例 #22
0
    def test_afr_dir_entry_creation_with_subvol_down(self):
        """
        1. Create a distributed-replicated(3X3)/distributed-arbiter(3X(2+1))
           and mount it on one client
        2. Kill 3 bricks corresponding to the 1st subvol
        3. Unmount and remount the volume on the same client
        4. Create deep dir from mount point
           mkdir -p dir1/subdir1/deepdir1
        5. Create files under dir1/subdir1/deepdir1; touch <filename>
        6. Now bring all sub-vols up by volume start force
        7. Validate backend bricks for dir creation, the subvol which is
           offline will have no dirs created, whereas other subvols will have
           dirs created from step 4
        8. Trigger heal from client by "#find . | xargs stat"
        9. Verify that the directory entries are created on all back-end bricks
        10. Create new dir (dir2) on location dir1/subdir1/deepdir1
        11. Trigger rebalance and wait for the completion
        12. Check backend bricks for all entries of dirs
        13. Check if files are getting created on the subvol which was offline
        """
        # Bring down first subvol of bricks offline
        self.subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        first_subvol = self.subvols[0]
        ret = bring_bricks_offline(self.volname, first_subvol)
        self.assertTrue(
            ret, "Unable to bring {} bricks offline".format(first_subvol))

        # Check bricks are offline or not
        ret = are_bricks_offline(self.mnode, self.volname, first_subvol)
        self.assertTrue(ret, "Bricks {} are still online".format(first_subvol))

        # Unmount and remount the volume
        ret, _, _ = umount_volume(self.mounts[0].client_system,
                                  self.mounts[0].mountpoint)
        self.assertFalse(ret, "Failed to unmount volume.")
        ret, _, _ = mount_volume(self.volname, self.mount_type,
                                 self.mounts[0].mountpoint, self.mnode,
                                 self.mounts[0].client_system)
        self.assertFalse(ret, "Failed to remount volume.")
        g.log.info('Successfully umounted and remounted volume.')

        # At this step, sleep is must otherwise file creation will fail
        sleep(2)

        # Create dir `dir1/subdir1/deepdir1` on mountpont
        directory1 = "dir1/subdir1/deepdir1"
        path = self.mounts[0].mountpoint + "/" + directory1
        ret = mkdir(self.mounts[0].client_system, path, parents=True)
        self.assertTrue(ret, "Directory {} creation failed".format(path))

        # Create files on the 2nd and 3rd subvols which are online
        brickobject = create_brickobjectlist(self.subvols, directory1)
        self.assertIsNotNone(brickobject, "Failed to get brick object list")
        self._create_number_of_files_on_the_subvol(brickobject[1],
                                                   directory1,
                                                   5,
                                                   mountpath=path)
        self._create_number_of_files_on_the_subvol(brickobject[2],
                                                   directory1,
                                                   5,
                                                   mountpath=path)

        # Bring bricks online using volume start force
        ret, _, err = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, err)
        g.log.info("Volume: %s started successfully", self.volname)

        # Check all bricks are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, "Few process after volume start are offline for "
            "volume: {}".format(self.volname))

        # Validate Directory is not created on the bricks of the subvol which
        # is offline
        for subvol in self.subvols:
            self._check_file_exists(subvol,
                                    "/" + directory1,
                                    exists=(subvol != first_subvol))

        # Trigger heal from the client
        cmd = "cd {}; find . | xargs stat".format(self.mounts[0].mountpoint)
        ret, _, err = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, err)

        # Validate the directory1 is present on all the bricks
        for subvol in self.subvols:
            self._check_file_exists(subvol, "/" + directory1, exists=True)

        # Create new dir (dir2) on location dir1/subdir1/deepdir1
        directory2 = "/" + directory1 + '/dir2'
        path = self.mounts[0].mountpoint + directory2
        ret = mkdir(self.mounts[0].client_system, path, parents=True)
        self.assertTrue(ret, "Directory {} creation failed".format(path))

        # Trigger rebalance and validate the completion
        ret, _, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, err)
        g.log.info("Rebalance on volume %s started successfully", self.volname)
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(
            ret, "Rebalance didn't complete on the volume: "
            "{}".format(self.volname))

        # Validate all dirs are present on all bricks in each subvols
        for subvol in self.subvols:
            for each_dir in ("/" + directory1, directory2):
                self._check_file_exists(subvol, each_dir, exists=True)

        # Validate if files are getting created on the subvol which was
        # offline
        self._create_number_of_files_on_the_subvol(brickobject[0],
                                                   directory1,
                                                   5,
                                                   mountpath=path)
コード例 #23
0
    def test_file_access(self):
        """
        Test file access.
        """
        # pylint: disable=protected-access
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-statements
        mount_obj = self.mounts[0]
        mountpoint = mount_obj.mountpoint

        # get subvol list
        subvols = (get_subvols(self.mnode, self.volname))['volume_subvols']
        self.assertIsNotNone(subvols, "failed to get subvols")

        # create a file
        srcfile = mountpoint + '/testfile'
        ret, _, err = g.run(self.clients[0], ("touch %s" % srcfile))
        self.assertEqual(ret, 0, ("File creation failed for %s err %s",
                                  srcfile, err))
        g.log.info("testfile creation successful")

        # find hashed subvol
        srchashed, scount = find_hashed_subvol(subvols, "/", "testfile")
        self.assertIsNotNone(srchashed, "could not find srchashed")
        g.log.info("hashed subvol for srcfile %s subvol count %s",
                   srchashed._host, str(scount))

        # rename the file such that the new name hashes to a new subvol
        tmp = find_new_hashed(subvols, "/", "testfile")
        self.assertIsNotNone(tmp, "could not find new hashed for dstfile")
        g.log.info("dst file name : %s dst hashed_subvol : %s "
                   "subvol count : %s", tmp.newname,
                   tmp.hashedbrickobject._host, str(tmp.subvol_count))

        dstname = str(tmp.newname)
        dstfile = mountpoint + "/" + dstname
        dsthashed = tmp.hashedbrickobject
        dcount = tmp.subvol_count
        ret, _, err = g.run(self.clients[0], ("mv %s %s" %
                                              (srcfile, dstfile)))
        self.assertEqual(ret, 0, ("rename failed for %s err %s",
                                  srcfile, err))
        g.log.info("cmd: mv srcfile dstfile successful")

        # check that on dsthash_subvol the file is a linkto file
        filepath = dsthashed._fqpath + "/" + dstname
        file_stat = get_file_stat(dsthashed._host, filepath)
        self.assertEqual(file_stat['access'], "1000", ("Expected file "
                                                       "permission to be 1000"
                                                       " on subvol %s",
                                                       dsthashed._host))
        g.log.info("dsthash_subvol has the expected linkto file")

        # check on srchashed the file is a data file
        filepath = srchashed._fqpath + "/" + dstname
        file_stat = get_file_stat(srchashed._host, filepath)
        self.assertNotEqual(file_stat['access'], "1000", ("Expected file "
                                                          "permission not to"
                                                          "be 1000 on subvol"
                                                          "%s",
                                                          srchashed._host))

        # Bring down the hashed subvol of dstfile(linkto file)
        ret = bring_bricks_offline(self.volname, subvols[dcount])
        self.assertTrue(ret, ('Error in bringing down subvolume %s',
                              subvols[dcount]))
        g.log.info('dst subvol %s is offline', subvols[dcount])

        # Need to access the file through a fresh lookup through a new mount
        # create a new dir(choosing server to do a mount)
        ret, _, _ = g.run(self.mnode, ("mkdir -p /mnt"))
        self.assertEqual(ret, 0, ('mkdir of mount dir failed'))
        g.log.info("mkdir of mount dir succeeded")

        # do a temp mount
        ret = mount_volume(self.volname, self.mount_type, "/mnt",
                           self.mnode, self.mnode)
        self.assertTrue(ret, ('temporary mount failed'))
        g.log.info("temporary mount succeeded")

        # check that file is accessible (stat)
        ret, _, _ = g.run(self.mnode, ("stat /mnt/%s" % dstname))
        self.assertEqual(ret, 0, ('stat error on for dst file %s', dstname))
        g.log.info("stat on /mnt/%s successful", dstname)

        # cleanup temporary mount
        ret = umount_volume(self.mnode, "/mnt")
        self.assertTrue(ret, ('temporary mount failed'))
        g.log.info("umount successful")

        # Bring up the hashed subvol
        ret = bring_bricks_online(self.mnode, self.volname, subvols[dcount],
                                  bring_bricks_online_methods=None)
        self.assertTrue(ret, "Error in bringing back subvol online")
        g.log.info('Subvol is back online')

        # now bring down the cached subvol
        ret = bring_bricks_offline(self.volname, subvols[scount])
        self.assertTrue(ret, ('Error in bringing down subvolume %s',
                              subvols[scount]))
        g.log.info('target subvol %s is offline', subvols[scount])

        # file access should fail
        ret, _, _ = g.run(self.clients[0], ("stat %s" % dstfile))
        self.assertEqual(ret, 1, ('stat error on for file %s', dstfile))
        g.log.info("dstfile access failed as expected")
コード例 #24
0
    def test_snap_self_heal(self):
        """
        Steps:

        1. create a volume
        2. mount volume
        3. create snapshot of that volume
        4. Activate snapshot
        5. Clone snapshot and Mount
        6. Perform I/O
        7. Bring Down Few bricks from volume without
           affecting the volume or cluster.
        8. Perform I/O
        9. Bring back down bricks to online
        10. Validate heal is complete with areequal

        """
        # pylint: disable=too-many-statements, too-many-locals
        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(
            ret, 0, ("Failed to create snapshot for volume %s" % self.volname))
        g.log.info("Snapshot %s created successfully for volume %s", self.snap,
                   self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to Activate snapshot %s" % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # snapshot list
        ret, _, _ = snap_list(self.mnode)
        self.assertEqual(ret, 0, ("Failed to list all the snapshot"))
        g.log.info("Snapshot list command was successful")

        # Creating a Clone volume from snapshot:
        g.log.info("Starting to Clone volume from Snapshot")
        ret, _, _ = snap_clone(self.mnode, self.snap, self.clone)
        self.assertEqual(ret, 0, ("Failed to clone %s from snapshot %s" %
                                  (self.clone, self.snap)))
        g.log.info("%s created successfully", self.clone)

        #  start clone volumes
        g.log.info("start to created clone volumes")
        ret, _, _ = volume_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start clone %s" % self.clone)
        g.log.info("clone volume %s started successfully", self.clone)

        # Mounting a clone volume
        g.log.info("Mounting a clone volume")
        ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1,
                                 self.mnode, self.clients[0])
        self.assertEqual(ret, 0,
                         "Failed to mount clone Volume %s" % self.clone)
        g.log.info("Clone volume %s mounted Successfully", self.clone)

        # Checking cloned volume mounted or not
        ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0],
                         self.mount_type)
        self.assertTrue(
            ret,
            "Failed to mount clone volume on mount point: %s" % self.mount1)
        g.log.info("clone Volume %s mounted on %s", self.clone, self.mount1)

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.clone)
        bricks_list = get_all_bricks(self.mnode, self.clone)
        g.log.info("Brick List : %s", bricks_list)

        # Select bricks to bring offline
        g.log.info("Starting to bring bricks to offline")
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))
        g.log.info("Brick to bring offline: %s ", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.clone, bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring the bricks offline")
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Offline Bricks list
        offline_bricks = get_offline_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(
            offline_bricks, "Failed to get offline bricklist"
            "for volume %s" % self.clone)
        for bricks in offline_bricks:
            self.assertIn(bricks, bricks_to_bring_offline,
                          "Failed to validate "
                          "Bricks offline")
        g.log.info("Bricks Offline: %s", offline_bricks)

        # Online Bricks list
        online_bricks = get_online_bricks_list(self.mnode, self.clone)
        self.assertIsNotNone(
            online_bricks, "Failed to get online bricks"
            " for volume %s" % self.clone)
        g.log.info("Bricks Online: %s", online_bricks)

        # write files mountpoint
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s", self.mount1)
        all_mounts_procs = []
        cmd = ("python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)
        g.log.info("Successful in creating I/O on mounts")

        # Bring all bricks online
        g.log.info("bring all bricks online")
        ret = bring_bricks_online(self.mnode, self.clone,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, "Failed to bring bricks online")
        g.log.info("Successful in bringing all bricks online")

        # Validate Bricks are online
        g.log.info("Validating all bricks are online")
        ret = are_bricks_online(self.mnode, self.clone, bricks_list)
        self.assertTrue(ret, "Failed to bring all the bricks online")
        g.log.info("bricks online: %s", bricks_list)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.clone)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online" % self.clone))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.clone)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.clone)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.clone))
        g.log.info("Volume %s : All process are online", self.clone)

        # wait for the heal process to complete
        g.log.info("waiting for heal process to complete")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to complete the heal process")
        g.log.info("Successfully completed heal process")

        # Check areequal
        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.clone)
        subvols = get_subvols(self.mnode, self.clone)
        num_subvols = len(subvols['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # Get arequals and compare
        g.log.info("Starting to Compare areequals")
        for i in range(0, num_subvols):
            # Get arequal for first brick
            subvol_brick_list = subvols['volume_subvols'][i]
            node, brick_path = subvol_brick_list[0].split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            first_brick_total = arequal.splitlines()[-1].split(':')[-1]

        # Get arequal for every brick and compare with first brick
        for brick in subvol_brick_list:
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, brick_arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = brick_arequal.splitlines()[-1].split(':')[-1]
            self.assertEqual(
                first_brick_total, brick_total,
                'Arequals for subvol and %s are not equal' % brick)
            g.log.info('Arequals for subvol and %s are equal', brick)
        g.log.info('All arequals are equal for distributed-replicated')
    def test_resolving_meta_data(self):
        """
        - Create a file test_file.txt
        - Find out which brick the file resides on and kill arbiter brick
        in the replica pair
        - Modify the permissions of the file
        - Bring back the killed brick
        - Kill the other brick in the replica pair
        - Modify the permissions of the file
        - Bring back the killed brick
        - Trigger heal
        - Check if heal is completed
        - Check for split-brain
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Creating files on client side
        file_to_create = 'test_file.txt'
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create file
            g.log.info('Creating file...')
            command = ("cd %s ; "
                       "touch %s" % (mount_obj.mountpoint, file_to_create))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # get bricks with file
        g.log.info('Getting bricks with file...')
        subvols_dict = get_subvols(self.mnode, self.volname)
        brick_list_with_file = []
        for subvol in subvols_dict['volume_subvols']:
            for brick in subvol:
                node, brick_path = brick.split(':')
                ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
                if 'test_file.txt' in brick_file_list:
                    brick_list_with_file.append(brick)
        g.log.info('Bricks with file: %s', brick_list_with_file)

        # Bring arbiter brick offline
        bricks_to_bring_offline = [brick_list_with_file[-1]]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Modify the permissions
            g.log.info('Modifying the permissions of the file...')
            command = ("cd %s ; "
                       "chmod 600 %s" % (mount_obj.mountpoint, file_to_create))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring arbiter brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Bring 1-st data brick offline
        bricks_to_bring_offline = [brick_list_with_file[0]]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Modify the permissions
            g.log.info('Modifying the permissions of the file...')
            command = ("cd %s ; "
                       "chmod 644 %s" % (mount_obj.mountpoint, file_to_create))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring 1-st data brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')
コード例 #26
0
    def test_custom_xattr_with_subvol_down_dir_exists(self):
        """
        Description:
        Steps:
        1) Create directories from mount point.
        2) Bring one or more(not all) dht sub-volume(s) down by killing
           processes on that server
        3) Create a custom xattr for dir hashed to down sub-volume and also for
           another dir not hashing to down sub-volumes
           # setfattr -n user.foo -v bar2 <dir>
        4) Verify that custom xattr for directory is displayed on mount point
           and bricks for both directories
           # getfattr -n user.foo <dir>
           # getfattr -n user.foo <brick_path>/<dir>
        5) Modify custom xattr value and verify that custom xattr for directory
           is displayed on mount point and all up bricks
           # setfattr -n user.foo -v ABC <dir>
        6) Verify that custom xattr is not displayed once you remove it on
           mount point and all up bricks
        7) Verify that mount point shows pathinfo xattr for dir hashed to down
           sub-volume and also for dir not hashed to down sub-volumes
           # getfattr -n trusted.glusterfs.pathinfo <dir>
        8) Again create a custom xattr for dir not hashing to down sub-volumes
           # setfattr -n user.foo -v star1 <dir>
        9) Bring up the sub-volumes
        10) Execute lookup on parent directory of both <dir> from mount point
        11) Verify Custom extended attributes for dir1 on all bricks
        """
        # pylint: disable=protected-access
        # Create dir1 on client0
        self._create_dir(dir_name="dir1")

        # Get subvol list
        subvols = (get_subvols(self.mnode, self.volname))['volume_subvols']
        self.assertIsNotNone(subvols, "Failed to get subvols")

        # Finding a dir name such that it hashes to a different subvol
        newhash = find_new_hashed(subvols, "/", "dir1")
        new_name = str(newhash.newname)
        new_subvol_count = newhash.subvol_count

        # Create a dir with the new name
        self._create_dir(dir_name=new_name)

        # Kill the brick/subvol to which the new dir hashes
        ret = bring_bricks_offline(
            self.volname, subvols[new_subvol_count])
        self.assertTrue(ret, ('Error in bringing down subvolume %s',
                              subvols[new_subvol_count]))
        g.log.info('DHT subvol %s is offline', subvols[new_subvol_count])

        # Set the xattr on dir hashing to down subvol
        ret = set_fattr(self.client, '{}/{}'.format(self.m_point, new_name),
                        'user.foo', 'bar2')
        self.assertFalse(ret, "Unexpected: custom xattr set successfully"
                              " for dir hashing to down subvol")
        g.log.info("Expected: Failed to set xattr on dir:%s"
                   " which hashes to down subvol due to error: Transport"
                   " endpoint not connected", new_name)

        # Check if the trusted.glusterfs.pathinfo is displayed
        # for dir hashing to down subvol on mointpoint
        ret = get_fattr(self.client, '{}/{}'.format(
            self.m_point, new_name), 'trusted.glusterfs.pathinfo')
        self.assertIsNotNone(ret, "Failed to get the xattr"
                             " on:{}".format(self.client))
        g.log.info("The xattr trusted.glusterfs.pathinfo"
                   " is displayed on mointpoint for %s", new_name)

        # Set the xattr on dir hashing to down subvol
        ret = set_fattr(self.client, '{}/{}'.format(self.m_point, new_name),
                        'user.foo', 'star1')
        self.assertFalse(ret, "Unexpected: custom xattr set successfully"
                              " for dir hashing to down subvol")
        g.log.info("Expected: Tansport endpoint not connected")

        # Calling the local function
        self._create_xattr_check_self_heal()
    def test_client_side_quorum_with_fixed_for_cross3(self):
        """
        Test Script to verify the Client Side Quorum with fixed
        for cross 3 volume

        * Disable self heal daemom
        * set cluster.quorum-type to fixed.
        * start I/O( write and read )from the mount point - must succeed
        * Bring down brick1
        * start I/0 ( write and read ) - must succeed
        * Bring down brick2
        * start I/0 ( write and read ) - must succeed
        * set the cluster.quorum-count to 1
        * start I/0 ( write and read ) - must succeed
        * set the cluster.quorum-count to 2
        * start I/0 ( write and read ) - read and write will fail
        * bring back the brick1 online
        * start I/0 ( write and read ) - must succeed
        * Bring back brick2 online
        * start I/0 ( write and read ) - must succeed
        * set cluster.quorum-type to auto
        * start I/0 ( write and read ) - must succeed
        * Bring down brick1 and brick2
        * start I/0 ( write and read ) - read and write will fail
        * set the cluster.quorum-count to 1
        * start I/0 ( write and read ) - read and write will fail
        * set the cluster.quorum-count to 3
        * start I/0 ( write and read ) - read and write will fail
        * set the quorum-type to none
        * start I/0 ( write and read ) - must succeed

        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        # Disable self heal daemon
        options = {"cluster.self-heal-daemon": "off"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # set cluster.quorum-type to fixed
        options = {"cluster.quorum-type": "fixed"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/O( write ) - must succeed
        all_mounts_procs = []
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "Reads failed on some of the clients")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s", self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:", num_subvols)

        # bring down brick1 for all the subvolumes
        offline_brick1_from_replicasets = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            brick_to_bring_offline1 = subvol_brick_list[0]
            g.log.info("Going to bring down the brick process "
                       "for %s", brick_to_bring_offline1)
            ret = bring_bricks_offline(self.volname, brick_to_bring_offline1)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s successfully", brick_to_bring_offline1)
            offline_brick1_from_replicasets.append(brick_to_bring_offline1)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name testfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # bring down brick2 for all the subvolumes
        offline_brick2_from_replicasets = []
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            brick_to_bring_offline2 = subvol_brick_list[1]
            g.log.info("Going to bring down the brick process "
                       "for %s", brick_to_bring_offline2)
            ret = bring_bricks_offline(self.volname, brick_to_bring_offline2)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s successfully", brick_to_bring_offline2)
            offline_brick2_from_replicasets.append(brick_to_bring_offline2)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name newfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # set the cluster.quorum-count to 1
        options = {"cluster.quorum-count": "1"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(
            ret, "Unable to set %s for volume %s" % (options, self.volname))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name filename %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # set the cluster.quorum-count to 2
        options = {"cluster.quorum-count": "2"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - read and write will fail
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("dd if=/dev/urandom of=%s/test_file bs=1M count=1" %
               self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected Error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while creating file")

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("cat %s/file1.txt" % self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while reading file")

        # bring back the brick1 online for all subvolumes
        g.log.info("bringing up the brick : %s online",
                   offline_brick1_from_replicasets)
        ret = bring_bricks_online(
            self.mnode,
            self.volname,
            offline_brick1_from_replicasets,
            bring_bricks_online_methods='glusterd_restart')
        self.assertTrue(ret, ("Failed to brought the brick %s online" %
                              offline_brick1_from_replicasets))
        g.log.info("Successfully brought the brick %s online",
                   offline_brick1_from_replicasets)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name newfilename %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # Bring back brick2 online
        g.log.info("bringing up the brick : %s online",
                   offline_brick2_from_replicasets)
        ret = bring_bricks_online(
            self.mnode,
            self.volname,
            offline_brick2_from_replicasets,
            bring_bricks_online_methods='glusterd_restart')
        self.assertTrue(ret, ("Failed to brought the brick %s online" %
                              offline_brick2_from_replicasets))
        g.log.info("Successfully brought the brick %s online",
                   offline_brick2_from_replicasets)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name textfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set %s for volume %s" %
                              (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name newtextfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)

        # bring down brick1 and brick2 for all the subvolumes
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s", i, subvol_brick_list)
            bricks_to_bring_offline = subvol_brick_list[0:2]
            g.log.info("Going to bring down the brick process for %s",
                       bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(
                ret, "Failed to bring down the bricks. Please "
                "check the log file for more details.")
            g.log.info("Brought down the brick process "
                       "for %s successfully", bricks_to_bring_offline)

        # start I/0 ( write and read ) - read and write will fail
        all_mounts_procs = []
        g.log.info("Start creating file on mountpoint %s",
                   self.mounts[0].mountpoint)
        cmd = ("dd if=/dev/urandom of=%s/new_test_file bs=1M count=1" %
               self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while creating files")

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        g.log.info("Starting reading file")
        cmd = ("cat %s/file1.txt" % self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while reading file")

        # set the cluster.quorum-count to 1
        options = {"cluster.quorum-count": "1"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(
            ret, "Unable to set %s for volume %s" % (options, self.volname))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - read and write will fail
        g.log.info("Start creating files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("dd if=/dev/urandom of=%s/new_test_file bs=1M count=1" %
               self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while creating files")

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("cat %s/file1.txt" % self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while reading file")

        # set the cluster.quorum-count to 3
        options = {"cluster.quorum-count": "3"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(
            ret, "Unable to set %s for volume %s" % (options, self.volname))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - read and write will fail
        g.log.info("Start creating files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("dd if=/dev/urandom of=%s/new_test_file bs=1M count=1" %
               self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while creating files")

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("cat %s/file1.txt" % self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with "
                   "Transport endpoint is not connected")
        ret, _ = is_io_procs_fail_with_error(self, all_mounts_procs,
                                             self.mounts, self.mount_type)
        self.assertTrue(ret, ("Unexpected error and IO successful"
                              " on not connected transport endpoint"))
        g.log.info("EXPECTED: Transport endpoint is not connected"
                   " while reading file")

        # set the quorum-type to none
        options = {"cluster.quorum-type": "none"}
        g.log.info("setting %s for the volume %s", options, self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(
            ret, "Unable to set %s for volume %s" % (options, self.volname))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # start I/0 ( write and read ) - must succeed
        g.log.info("Starting IO on mountpoint %s", self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name lastfile %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "IO failed on mountpoint %s" % self.mounts[0].mountpoint)

        # read the file
        g.log.info("Start reading files on mountpoint %s",
                   self.mounts[0].mountpoint)
        all_mounts_procs = []
        cmd = "/usr/bin/env python %s read %s" % (self.script_upload_path,
                                                  self.mounts[0].mountpoint)
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(all_mounts_procs, self.mounts),
            "Reads failed on mountpoint %s" % self.mounts[0].mountpoint)
コード例 #28
0
    def test_client_side_quorum_with_auto_option(self):
        """
        Test Script to verify the Client Side Quorum with auto option

        * set cluster.quorum-type to auto.
        * start I/O from the mount point.
        * kill 2 of the brick process from the each and every replica set
        * perform ops

        """
        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting cluster.quorum-type to auto on "
                   "volume %s" % self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Sucessfully set %s for volume %s" %
                   (options, self.volname))

        # write files on all mounts
        g.log.info("Starting IO on all mounts...")
        g.log.info("mounts: %s" % self.mounts)
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 10 --base-file-name file %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating IO on mounts")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # get the subvolumes
        g.log.info("Starting to get sub-volumes for volume %s" % self.volname)
        subvols_dict = get_subvols(self.mnode, self.volname)
        num_subvols = len(subvols_dict['volume_subvols'])
        g.log.info("Number of subvolumes in volume %s:" % num_subvols)

        # bring bricks offline( 2 bricks ) for all the subvolumes
        for i in range(0, num_subvols):
            subvol_brick_list = subvols_dict['volume_subvols'][i]
            g.log.info("sub-volume %s brick list : %s" %
                       (i, subvol_brick_list))
            # For volume type: 1 * 2, bring 1 brick offline
            if len(subvol_brick_list) == 2:
                bricks_to_bring_offline = subvol_brick_list[0:1]
            else:
                bricks_to_bring_offline = subvol_brick_list[0:2]
            g.log.info("Going to bring down the brick process "
                       "for %s" % bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                                  "check the log file for more details."))
            g.log.info("Brought down the brick process "
                       "for %s succesfully" % bricks_to_bring_offline)

        # create 2 files named newfile0.txt and newfile1.txt
        g.log.info("Start creating 2 files on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_files "
                   "-f 2 --base-file-name newfile %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with read-only filesystem")
        ret = is_io_procs_fail_with_rofs(self, all_mounts_procs, self.mounts)
        self.assertTrue(ret, ("Unexpected error and IO successfull"
                              " on read-only filesystem"))
        g.log.info("EXPECTED: Read-only file system in IO while creating file")

        # create directory user1
        g.log.info("Start creating directory on all mounts...")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s create_deep_dir "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("Validating whether IO failed with read-only filesystem")
        ret = is_io_procs_fail_with_rofs(self, all_mounts_procs, self.mounts)
        self.assertTrue(ret, ("Unexpected error and IO successfull"
                              " on read-only filesystem"))
        g.log.info("EXPECTED: Read-only file system in IO while"
                   " creating directory")

        # create h/w link to file
        g.log.info("Start creating hard link for file0.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "ln %s/file0.txt %s/file0.txt_hwlink" \
                  % (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and creating hard link"
                                  " successful on read-only filesystem"))
            self.assertIn(
                "Read-only file system", err,
                "Read-only filesystem not found in "
                "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # create s/w link
        g.log.info("Start creating soft link for file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "ln -s %s/file1.txt %s/file1.txt_swlink" %\
                  (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and creating soft link"
                                  " successful on read-only filesystem"))
            self.assertIn(
                "Read-only file system", err,
                "Read-only filesystem not found in "
                "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # append to file
        g.log.info("Appending to file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "cat %s/file0.txt >> %s/file1.txt" %\
                  (mount_obj.mountpoint, mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and append successful"
                                  " on read-only filesystem"))
            self.assertIn(
                "Read-only file system", err,
                "Read-only filesystem not found in "
                "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # modify the file
        g.log.info("Modifying file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "echo 'Modify Contents' > %s/file1.txt"\
                  % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and modifying successful"
                                  " on read-only filesystem"))
            self.assertIn(
                "Read-only file system", err,
                "Read-only filesystem not found in "
                "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # truncate the file
        g.log.info("Truncating file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "truncate -s 0 %s/file1.txt" % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertTrue(ret, ("Unexpected error and truncating file"
                                  " successful on read-only filesystem"))
            self.assertIn(
                "Read-only file system", err,
                "Read-only filesystem not found in "
                "IO while truncating file")
            g.log.info("EXPECTED: Read-only file system in IO")

        # read the file
        g.log.info("Starting reading files on all mounts")
        all_mounts_procs = []
        for mount_obj in self.mounts:
            cmd = ("python %s read "
                   "%s" % (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)

        # Validate IO
        g.log.info("validating IO on all mounts")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "Reads failed on some of the clients")
        g.log.info("Reads successful on all mounts")

        # stat on file
        g.log.info("stat on file1.txt on all mounts")
        for mount_obj in self.mounts:
            cmd = "stat %s/file1.txt" % (mount_obj.mountpoint)
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and stat on file fails"
                                   " on read-only filesystem"))
            g.log.info("stat on file is successfull on read-only filesystem")

        # stat on dir
        g.log.info("stat on directory on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s stat %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and stat on directory"
                                   " fails on read-only filesystem"))
            g.log.info("stat on dir is successfull on read-only filesystem")

        # ls on mount point
        g.log.info("ls on mount point on all mounts")
        for mount_obj in self.mounts:
            cmd = ("python %s ls %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            ret, out, err = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, ("Unexpected error and listing file fails"
                                   " on read-only filesystem"))
            g.log.info("listing files is successfull on read-only filesystem")
コード例 #29
0
    def test_status_string(self):
        '''
        -> Create Volume
        -> Start rebalance
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        -> Start Remove brick operation
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        '''

        # Start rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance for volume %s"
                         % self.volname)
        g.log.info("Rebalance started successfully on volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance failed for volume %s" % self.volname)
        g.log.info("Rebalance completed successfully on volume %s",
                   self.volname)

        # Getting volume status after rebalance start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status for rebalance
        self.assertIn('Rebalance', status_list[len(status_list) - 4],
                      "Incorrect task type found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task type found in volume status for %s",
                   self.volname)

        # Verifying task status string in volume status for rebalance
        self.assertIn('completed', status_list[len(status_list) - 2],
                      "Incorrect task status found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task status found in volume status for %s",
                   self.volname)

        # Getting volume status --xml after rebalance start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml for rebalance
        self.assertEqual('Rebalance',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml "
                         "for %s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml for rebalance
        self.assertEqual(
            'completed',
            vol_status[self.volname]['task_status'][0]['statusStr'],
            "Incorrect task status found in volume status "
            "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)

        # Getting sub vols
        subvol_dict = get_subvols(self.mnode, self.volname)
        subvol = subvol_dict['volume_subvols'][1]

        # Perform remove brick start
        ret, _, _ = remove_brick(self.mnode, self.volname, subvol,
                                 'start', replica_count=3)
        self.assertEqual(ret, 0, "Failed to start remove brick operation "
                                 "for %s" % self.volname)
        g.log.info("Remove brick operation started successfully on volume %s",
                   self.volname)

        # Getting volume status after remove brick start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status after remove brick start
        self.assertIn('Remove brick', status_list[len(status_list) - 8],
                      "Incorrect task type found in volume status for "
                      "%s" % self.volname)
        g.log.info("Correct task type found in volume status task for %s",
                   self.volname)

        # Verifying task status string in volume status after remove
        # brick start
        ret = False
        remove_status = ['completed', 'in progress']
        if (status_list[len(status_list) - 2].split(':')[1].strip() in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "task for %s" % self.volname)
        g.log.info("Correct task status found in volume status task for %s",
                   self.volname)

        # Getting volume status --xml after remove brick start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml after
        # remove brick start
        self.assertEqual('Remove brick',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml for "
                         "%s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml
        # after remove brick start
        ret = False
        if (vol_status[self.volname]['task_status'][0]['statusStr'] in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)
    def test_replacing_all_arbiters(self):
        """
        - Create an arbiter volume 4(2+1) distributed replicate
        - Start writing IO
        - While the I/O's are going on replace all the arbiter bricks
        - check for the new bricks attached successfully
        - Check for heals
        - Validate IO
        """
        # pylint: disable=too-many-locals,too-many-statements
        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume: %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick list: %s", bricks_list)

        # Clear all brick folders. Its need to prevent healing with old files
        for brick in bricks_list:
            g.log.info('Clearing brick %s', brick)
            node, brick_path = brick.split(':')
            ret, _, err = g.run(node, 'cd %s/ ; rm -rf *' % brick_path)
            self.assertFalse(ret, err)
            g.log.info('Clearing brick %s is successful', brick)
        g.log.info('Clearing for all brick is successful')

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 3 -l 3 -n 3 -f 20 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # replace bricks
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        for subvol in subvols:
            g.log.info('Replacing arbiter brick for %s', subvol)
            brick_to_replace = subvol[-1]
            self.bricks_to_clean.append(brick_to_replace)
            new_brick = brick_to_replace + 'new'
            g.log.info("Replacing the brick %s for the volume: %s",
                       brick_to_replace, self.volname)
            ret, _, err = replace_brick(self.mnode, self.volname,
                                        brick_to_replace, new_brick)
            self.assertFalse(ret, err)
            g.log.info('Replaced brick %s to %s successfully',
                       brick_to_replace, new_brick)

        # check replaced bricks
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        index = 0
        for subvol in subvols:
            expected_brick_path = self.bricks_to_clean[index] + 'new'
            brick_to_check = subvol[-1]
            self.assertEqual(expected_brick_path, brick_to_check,
                             'Brick %s is not replaced brick' % brick_to_check)
            index += 1

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s: All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True