Esempio n. 1
0
def select_volume_bricks_to_bring_offline(mnode, volname):
    """Randomly selects bricks to bring offline without affecting the cluster
    from a non-tiered volume.

    Args:
        mnode (str): Node on which commands will be executed.
        volname (str): Name of the volume.

    Returns:
        list: On success returns list of bricks that can be brough offline.
            If volume doesn't exist or is a tiered volume returns empty list
    """
    volume_bricks_to_bring_offline = []

    # Check if volume is tiered
    if is_tiered_volume(mnode, volname):
        return volume_bricks_to_bring_offline

    # get volume type
    volume_type_info = get_volume_type_info(mnode, volname)
    volume_type = volume_type_info['volume_type_info']['typeStr']

    # get subvols
    subvols_dict = get_subvols(mnode, volname)
    volume_subvols = subvols_dict['volume_subvols']

    # select bricks from distribute volume
    if volume_type == 'Distribute':
        volume_bricks_to_bring_offline = []

    # select bricks from replicated, distributed-replicated volume
    elif (volume_type == 'Replicate'
          or volume_type == 'Distributed-Replicate'):
        # Get replica count
        volume_replica_count = (
            volume_type_info['volume_type_info']['replicaCount'])

        # Get quorum info
        quorum_info = get_client_quorum_info(mnode, volname)
        volume_quorum_info = quorum_info['volume_quorum_info']

        # Get list of bricks to bring offline
        volume_bricks_to_bring_offline = (
            get_bricks_to_bring_offline_from_replicated_volume(
                volume_subvols, volume_replica_count, volume_quorum_info))

    # select bricks from Disperse, Distribured-Disperse volume
    elif (volume_type == 'Disperse' or volume_type == 'Distributed-Disperse'):

        # Get redundancy count
        volume_redundancy_count = (
            volume_type_info['volume_type_info']['redundancyCount'])

        # Get list of bricks to bring offline
        volume_bricks_to_bring_offline = (
            get_bricks_to_bring_offline_from_disperse_volume(
                volume_subvols, volume_redundancy_count))

    return volume_bricks_to_bring_offline
Esempio n. 2
0
def select_hot_tier_bricks_to_bring_offline(mnode, volname):
    """Randomly selects bricks to bring offline without affecting the cluster
    from a hot tier.

    Args:
        mnode (str): Node on which commands will be executed.
        volname (str): Name of the volume.

    Returns:
        list: On success returns list of bricks that can be brough offline
            from hot tier. If volume doesn't exist or is a non tiered volume
            returns empty list.
    """
    hot_tier_bricks_to_bring_offline = []

    # Check if volume is tiered
    if not is_tiered_volume(mnode, volname):
        return hot_tier_bricks_to_bring_offline

    # get volume type
    volume_type_info = get_volume_type_info(mnode, volname)
    hot_tier_type = volume_type_info['hot_tier_type_info']['hotBrickType']

    # get subvols
    subvols_dict = get_subvols(mnode, volname)
    hot_tier_subvols = subvols_dict['hot_tier_subvols']

    # select bricks from distribute volume
    if hot_tier_type == 'Distribute':
        hot_tier_bricks_to_bring_offline = []

    # select bricks from replicated, distributed-replicated volume
    if (hot_tier_type == 'Replicate'
            or hot_tier_type == 'Distributed-Replicate'):
        # Get replica count
        hot_tier_replica_count = (
            volume_type_info['hot_tier_type_info']['hotreplicaCount'])

        # Get quorum info
        quorum_info = get_client_quorum_info(mnode, volname)
        hot_tier_quorum_info = quorum_info['hot_tier_quorum_info']

        # Get list of bricks to bring offline
        hot_tier_bricks_to_bring_offline = (
            get_bricks_to_bring_offline_from_replicated_volume(
                hot_tier_subvols, hot_tier_replica_count,
                hot_tier_quorum_info))

    return hot_tier_bricks_to_bring_offline
Esempio n. 3
0
    def test_manual_heal_should_trigger_heal(self):
        """
        - create a single brick volume
        - add some files and directories
        - get arequal from mountpoint
        - add-brick such that this brick makes the volume a replica vol 1x2
        - start heal
        - make sure heal is completed
        - get arequals from all bricks and compare with arequal from mountpoint
        """
        # pylint: disable=too-many-statements,too-many-locals
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dir-length 1 "
                   "--dir-depth 1 "
                   "--max-num-of-dirs 1 "
                   "--num-of-files 10 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            g.log.info("IO on %s:%s is started successfully",
                       mount_obj.client_system, mount_obj.mountpoint)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Get arequal for mount before adding bricks
        g.log.info('Getting arequal before adding bricks...')
        ret, arequals = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after healing is successful')
        mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

        # Form brick list to add
        g.log.info('Forming brick list to add...')
        bricks_to_add = form_bricks_list(self.mnode, self.volname, 1,
                                         self.servers, self.all_servers_info)
        g.log.info('Brick list to add: %s', bricks_to_add)

        # Add bricks
        g.log.info("Start adding bricks to volume...")
        ret, _, _ = add_brick(self.mnode,
                              self.volname,
                              bricks_to_add,
                              force=True,
                              replica_count=2)
        self.assertFalse(ret, "Failed to add bricks %s" % bricks_to_add)
        g.log.info("Adding bricks is successful on volume %s", self.volname)

        # Make sure the newly added bricks are available in the volume
        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume: %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick list: %s", bricks_list)
        for brick in bricks_to_add:
            self.assertIn(brick, bricks_list,
                          'Brick %s is not in brick list' % brick)
        g.log.info('New bricks are present in the volume')

        # Make sure volume change from distribute to replicate volume
        vol_info_dict = get_volume_type_info(self.mnode, self.volname)
        vol_type = vol_info_dict['volume_type_info']['typeStr']
        self.assertEqual(
            'Replicate', vol_type, 'Volume type is not converted to Replicate '
            'after adding bricks')
        g.log.info('Volume type is successfully converted to Replicate '
                   'after adding bricks')

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal on bricks and compare with mount_point_total
        # It should be the same
        g.log.info('Getting arequal on bricks...')
        arequals_after_heal = {}
        for brick in bricks_list:
            g.log.info('Getting arequal on bricks %s...', brick)
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = arequal.splitlines()[-1].split(':')[-1]
            arequals_after_heal[brick] = brick_total
            self.assertEqual(
                mount_point_total, brick_total,
                'Arequals for mountpoint and %s are not equal' % brick)
            g.log.info('Arequals for mountpoint and %s are equal', brick)
        g.log.info('All arequals are equal for replicated')
    def test_glustershd_on_all_volume_types(self):
        """
        Test Script to verify the glustershd server vol file
        has only entries for replicate volumes

        * Create multiple volumes and start all volumes
        * Check the glustershd processes - Only One glustershd should be listed
        * Check the glustershd server vol file - should contain entries only
                                             for replicated involved volumes
        * Add bricks to the replicate volume - it should convert to
                                               distributed-replicate
        * Check the glustershd server vol file - newly added bricks
                                                 should present
        * Check the glustershd processes - Only 1 glustershd should be listed

        """
        # pylint: disable=too-many-statements
        nodes = self.servers

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, glustershd_pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process "
                              "found : %s" % glustershd_pids))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)

        # For all the volumes, check whether bricks present in
        # glustershd server vol file
        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:
            g.log.info("Volume Name: %s", volume)
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])

            # get the bricks for the volume
            g.log.info("Fetching bricks for the volume : %s", volume)
            bricks_list = get_all_bricks(self.mnode, volume)
            g.log.info("Brick List : %s", bricks_list)

            # validate the bricks present in volume info with
            # glustershd server volume file
            g.log.info("Start parsing file %s on "
                       "node %s", self.GLUSTERSHD, self.mnode)
            ret = do_bricks_exist_in_shd_volfile(self.mnode, volume,
                                                 bricks_list)
            if volume_type == 'Distribute':
                self.assertFalse(ret,
                                 ("Bricks exist in glustershd server "
                                  "volume file for %s Volume" % volume_type))
                g.log.info(
                    "EXPECTED : Bricks doesn't exist in glustershd "
                    "server volume file for %s Volume", volume_type)
            else:
                self.assertTrue(ret, ("Brick List from volume info is "
                                      "different from glustershd server "
                                      "volume file. Please check log "
                                      "file for details"))
                g.log.info(
                    "Bricks exist in glustershd server volume file "
                    "for %s Volume", volume_type)

        # expanding volume for Replicate
        for volume in volume_list:
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])
            if volume_type == 'Replicate':
                g.log.info("Start adding bricks to volume %s", volume)
                ret = expand_volume(self.mnode, volume, self.servers,
                                    self.all_servers_info)
                self.assertTrue(ret, ("Failed to add bricks to "
                                      "volume %s " % volume))
                g.log.info("Add brick successful")

                # Log Volume Info and Status after expanding the volume
                g.log.info("Logging volume info and Status after "
                           "expanding volume")
                ret = log_volume_info_and_status(self.mnode, volume)
                self.assertTrue(ret, ("Logging volume info and status failed "
                                      "on volume %s", volume))
                g.log.info(
                    "Successful in logging volume info and status "
                    "of volume %s", volume)

                # Verify volume's all process are online for 60 sec
                g.log.info("Verifying volume's all process are online")
                ret = wait_for_volume_process_to_be_online(
                    self.mnode, volume, 60)
                self.assertTrue(ret, ("Volume %s : All process are not "
                                      "online", volume))
                g.log.info(
                    "Successfully verified volume %s processes "
                    "are online", volume)

                # check the type for the replicate volume
                volume_type_info_for_replicate_after_adding_bricks = \
                    get_volume_type_info(self.mnode, volume)
                volume_type_for_replicate_after_adding_bricks = \
                    (volume_type_info_for_replicate_after_adding_bricks
                     ['volume_type_info']['typeStr'])

                self.assertEqual(volume_type_for_replicate_after_adding_bricks,
                                 'Distributed-Replicate',
                                 ("Replicate volume type is not converted to "
                                  "Distributed-Replicate after adding bricks"))
                g.log.info("Replicate Volume is successfully converted to"
                           " Distributed-Replicate after adding bricks")

                # get the bricks for the volume after expanding
                bricks_list_after_expanding = get_all_bricks(
                    self.mnode, volume)
                g.log.info("Brick List after expanding "
                           "volume: %s", bricks_list_after_expanding)

                # validate the bricks present in volume info
                # with glustershd server volume file after adding bricks
                g.log.info("Starting parsing file %s", self.GLUSTERSHD)
                ret = do_bricks_exist_in_shd_volfile(
                    self.mnode, volume, bricks_list_after_expanding)

                self.assertTrue(ret, ("Brick List from volume info is "
                                      "different from glustershd server "
                                      "volume file after expanding bricks. "
                                      "Please check log file for details"))
                g.log.info("Brick List from volume info is same as from "
                           "glustershd server volume file after "
                           "expanding bricks.")

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, glustershd_pids_after_adding_bricks = \
            get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret,
                        ("Either No self heal daemon process found or "
                         "more than One self heal daemon process "
                         "found : %s" % glustershd_pids_after_adding_bricks))
        g.log.info(
            "Successful in getting Single self heal daemon process"
            " on all nodes %s", nodes)

        self.assertNotEqual(
            glustershd_pids, glustershd_pids_after_adding_bricks,
            "Self Daemon process is same before and"
            " after adding bricks")
        g.log.info("Self Heal Daemon Process is different before and "
                   "after adding bricks")
    def test_no_glustershd_with_distribute(self):
        """
        Test Script to verify the glustershd server vol file
        has only entries for replicate volumes

        * Create multiple volumes and start all volumes
        * Check the glustershd processes - Only 1 glustershd should be listed
        * Stop all volumes
        * Check the glustershd processes - No glustershd should be running
        * Start the distribute volume only
        * Check the glustershd processes - No glustershd should be running

        """

        nodes = self.servers

        # check the self-heal daemon process
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than One self heal daemon process "
                              "found : %s" % pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", nodes)

        # stop all the volumes
        g.log.info("Going to stop all the volumes")
        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:
            g.log.info("Stopping Volume : %s", volume)
            ret = volume_stop(self.mnode, volume)
            self.assertTrue(ret, ("Failed to stop volume %s" % volume))
            g.log.info("Successfully stopped volume %s", volume)
        g.log.info("Successfully stopped all the volumes")

        # check the self-heal daemon process after stopping all volumes
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertFalse(ret, ("Self heal daemon process is still running "
                               "after stopping all volumes "))
        for node in pids:
            self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
                                                 "running on node %s even "
                                                 "after stoppong all "
                                                 "volumes" % node))
        g.log.info("EXPECTED: No self heal daemon process is "
                   "running after stopping all volumes")

        # start the distribute volume only
        for volume in volume_list:
            volume_type_info = get_volume_type_info(self.mnode, volume)
            volume_type = (volume_type_info['volume_type_info']['typeStr'])
            if volume_type == 'Distribute':
                g.log.info("starting to start distribute volume: %s", volume)
                ret = volume_start(self.mnode, volume)
                self.assertTrue(ret, ("Failed to start volume %s" % volume))
                g.log.info("Successfully started volume %s", volume)
                break

        # check the self-heal daemon process after starting distribute volume
        g.log.info("Starting to get self-heal daemon process on "
                   "nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertFalse(ret, ("Self heal daemon process is still running "
                               "after stopping all volumes "))
        for node in pids:
            self.assertEqual(pids[node][0], -1, ("Self heal daemon is still "
                                                 "running on node %s even "
                                                 "after stopping all "
                                                 "volumes" % node))
        g.log.info("EXPECTED: No self heal daemon process is running "
                   "after stopping all volumes")
    def validate_xattr_values(self, dirname, ctime=True):
        """Validate existence and consistency of a specific
           xattr value across replica set

        Args:
            dirname (str): parent directory name
        Kwargs:
            ctime(bool): ctime feature enablement
        """
        # pylint: disable=too-many-branches
        # Fetch all replica sets(subvols) in the volume
        ret = get_subvols(self.mnode, self.volname)
        # Iterating through each subvol(replicaset)
        for subvol in ret['volume_subvols']:
            brick_host_list = {}  # Dict for storing host,brickpath pairs
            for each in subvol:  # Fetching each replica in replica set
                # Splitting to brick,hostname pairs
                host, brick_path = each.split(':')
                brick_host_list[host] = brick_path
            # Fetch Complete parent directory path
            directory = brick_path + '/' + dirname
            # Fetching all entries recursively in a replicaset
            entry_list = get_dir_contents(host, directory, recursive=True)
            for each in entry_list:
                xattr_value = []  # list to store xattr value
                # Logic to get xattr values
                for host, brickpath in brick_host_list.items():
                    # Remove the prefix brick_path from entry-name
                    each = sub(brick_path, '', each)
                    # Adding the right brickpath name for fetching xattrval
                    brick_entry_path = brickpath + each
                    ret = get_extended_attributes_info(host,
                                                       [brick_entry_path],
                                                       encoding='hex',
                                                       attr_name='trusted'
                                                       '.glusterfs.'
                                                       'mdata')
                    if ret:
                        ret = ret[brick_entry_path]['trusted.glusterfs.mdata']
                        g.log.info("mdata xattr value of %s is %s",
                                   brick_entry_path, ret)
                    else:
                        pass
                    if ctime:
                        self.assertIsNotNone(
                            ret, "glusterfs.mdata not set on"
                            " {}".format(brick_entry_path))
                        g.log.info(
                            "mdata xattr %s is set on the back-end"
                            " bricks", ret)
                    else:
                        self.assertIsNone(
                            ret, "trusted.glusterfs.mdata seen "
                            " on {}".format(brick_entry_path))
                        g.log.info(
                            "mdata xattr %s is not set on the back-end"
                            " bricks", ret)
                    xattr_value.append(ret)
                voltype = get_volume_type_info(self.mnode, self.volname)
                if voltype['volume_type_info']['arbiterCount'] == '0':
                    ret = bool(
                        xattr_value.count(xattr_value[0]) == len(xattr_value))
                elif voltype['volume_type_info']['arbiterCount'] == '1':
                    ret = bool(((xattr_value.count(xattr_value[0]))
                                or (xattr_value.count(xattr_value[1])) > 1))
                else:
                    g.log.error("Arbiter value is neither 0 nor 1")
                if ctime:
                    self.assertTrue(
                        ret, 'trusted.glusterfs.mdata' +
                        ' value not same across bricks for '
                        'entry ' + each)
                else:
                    self.assertTrue(
                        ret, 'trusted.glusterfs.mdata' +
                        ' seems to be set on some bricks for ' + each)