Example #1
0
    def setUp(self):
        """
        setUp method for every test
        """

        bricks = get_servers_bricks_dict(self.servers,
                                         self.all_servers_info)

        # Checking brick dir and cleaning it.
        for server in self.servers:
            for brick in bricks[server]:
                if get_dir_contents(server, brick):
                    cmd = "rm -rf " + brick + "/*"
                    ret, _, _ = g.run(server, cmd)
                    if ret:
                        raise ExecutionError("Failed to delete the brick "
                                             "dirs of deleted volume.")

        # Creating Volume
        ret = self.setup_volume_and_mount_volume(self.mounts)
        if not ret:
            raise ExecutionError("Volume creation or mount failed: %s"
                                 % self.volname)
        g.log.info("Volme created and mounted successfully : %s",
                   self.volname)

        # calling GlusterBaseClass setUp
        self.get_super_method(self, 'setUp')()
def get_all_bricks_from_servers_multivol(servers, servers_info):
    """
    Form list of all the bricks to create/add-brick from the given
    servers and servers_info

    Args:
        servers (list): List of servers in the storage pool.
        servers_info (dict): Information about all servers.

    Returns:
        brickCount (int): Number of bricks available from the servers.
        bricks_list (list): List of all bricks from the servers provided.

    example :
            servers_info = {
                'abc.lab.eng.xyz.com': {
                    'host': 'abc.lab.eng.xyz.com',
                    'brick_root': '/bricks',
                    'devices': ['/dev/vdb', '/dev/vdc', '/dev/vdd', '/dev/vde']
                    },
                'def.lab.eng.xyz.com':{
                    'host': 'def.lab.eng.xyz.com',
                    'brick_root': '/bricks',
                    'devices': ['/dev/vdb', '/dev/vdc', '/dev/vdd', '/dev/vde']
                    }
                }
    """
    if not isinstance(servers, list):
        servers = [servers]

    brickCount, bricks_list = 0, []

    servers_bricks = get_servers_bricks_dict(servers, servers_info)
    server_ip = cycle(servers_bricks.keys())

    for item in list(zip_longest(*list(servers_bricks.values()))):
        for brick in item:
            try:
                server = server_ip.next()  # Python 2
            except AttributeError:
                server = next(server_ip)  # Python 3
            if brick:
                bricks_list.append(server + ":" + brick)
                brickCount += 1
    return brickCount, bricks_list
    def tearDown(self):

        # UnMount Volume
        g.log.info("Starting to Unmount Volume %s", self.volname)
        ret = umount_volume(self.mounts[0].client_system,
                            self.mounts[0].mountpoint,
                            mtype=self.mount_type)
        self.assertTrue(ret, ("Failed to Unmount Volume %s" % self.volname))
        g.log.info("Successfully Unmounted Volume %s", self.volname)

        # Clean up all volumes and peer probe to form cluster
        vol_list = get_volume_list(self.mnode)
        if vol_list is not None:
            for volume in vol_list:
                ret = cleanup_volume(self.mnode, volume)
                if not ret:
                    raise ExecutionError("Failed to cleanup volume")
                g.log.info("Volume deleted successfully : %s", volume)

        # Peer probe detached servers
        pool = nodes_from_pool_list(self.mnode)
        for node in pool:
            peer_detach(self.mnode, node)
        ret = peer_probe_servers(self.mnode, self.servers)
        if not ret:
            raise ExecutionError("Failed to probe detached "
                                 "servers %s" % self.servers)
        g.log.info("Peer probe success for detached "
                   "servers %s", self.servers)

        bricks = get_servers_bricks_dict(self.servers, self.all_servers_info)

        # Checking brick dir and cleaning it.
        for server in self.servers:
            for brick in bricks[server]:
                if get_dir_contents(server, brick):
                    cmd = "rm -rf " + brick + "/*"
                    ret, _, _ = g.run(server, cmd)
                    if ret:
                        raise ExecutionError("Failed to delete the brick "
                                             "dirs of deleted volume.")

        self.get_super_method(self, 'tearDown')()
    def test_impact_of_replace_brick_on_glustershd(self):
        """
        Test Script to verify the glustershd server vol file
        has only entries for replicate volumes
        1.Create multiple volumes and start all volumes
        2.Check the glustershd processes - Only 1 glustershd should be listed
        3.Do replace brick on the replicate volume
        4.Confirm that the brick is replaced
        5.Check the glustershd processes - Only 1 glustershd should be listed
                                           and pid should be different
        6.glustershd server vol should be updated with new bricks
        """
        # Check the self-heal daemon process
        ret, glustershd_pids = get_self_heal_daemon_pid(self.servers)
        self.assertTrue(ret, ("Either no self heal daemon process found or "
                              "more than one self heal daemon process "
                              "found : %s" % glustershd_pids))
        g.log.info(
            "Successful in getting single self heal daemon process"
            " on all nodes %s", self.servers)

        volume_list = get_volume_list(self.mnode)
        for volume in volume_list:

            # Log Volume Info and Status before replacing brick
            ret = log_volume_info_and_status(self.mnode, volume)
            self.assertTrue(ret, ("Logging volume info and status "
                                  "failed on volume %s", volume))
            g.log.info(
                "Successful in logging volume info and status "
                "of volume %s", volume)

            # Selecting a random source brick to replace
            src_brick = choice(get_all_bricks(self.mnode, volume))
            src_node, original_brick = src_brick.split(":")

            # Creating a random destination brick in such a way
            # that the brick is select from the same node but always
            # picks a different from the original brick
            list_of_bricks = [
                brick for brick in get_servers_bricks_dict(
                    src_node, self.all_servers_info)[src_node]
                if brick not in original_brick
            ]
            dst_brick = ('{}:{}/{}_replaced'.format(
                src_node, choice(list_of_bricks),
                original_brick.split('/')[::-1][0]))

            # Replace brick for the volume
            ret, _, _ = replace_brick(self.mnode, volume, src_brick, dst_brick)
            self.assertFalse(
                ret, "Failed to replace brick "
                "from the volume %s" % volume)
            g.log.info(
                "Successfully replaced faulty brick from "
                "the volume %s", volume)

            # Verify all volume process are online
            ret = wait_for_volume_process_to_be_online(self.mnode, volume)
            self.assertTrue(ret,
                            "Volume %s : All process are not online" % volume)
            g.log.info("Volume %s : All process are online", volume)

            # Check the self-heal daemon process after replacing brick
            ret, pid_after_replace = get_self_heal_daemon_pid(self.servers)
            self.assertTrue(
                ret, "Either no self heal daemon process "
                "found or more than one self heal "
                "daemon process found : %s" % pid_after_replace)
            g.log.info(
                "Successful in getting Single self heal "
                " daemon process on all nodes %s", self.servers)

            # Compare the glustershd pids
            self.assertNotEqual(
                glustershd_pids, pid_after_replace,
                "Self heal daemon process should be different "
                "after replacing bricks in %s volume" % volume)
            g.log.info("EXPECTED: Self heal daemon process should be different"
                       " after replacing bricks in replicate volume")

            # Get the bricks for the volume
            bricks_list = get_all_bricks(self.mnode, volume)
            g.log.info("Brick List : %s", bricks_list)

            # Validate the bricks present in volume info with
            # glustershd server volume file
            ret = do_bricks_exist_in_shd_volfile(self.mnode, volume,
                                                 bricks_list)
            self.assertTrue(ret, ("Brick List from volume info is "
                                  "different from glustershd server "
                                  "volume file. Please check log file "
                                  "for details"))
            g.log.info(
                "Bricks in volume %s exists in glustershd server "
                "volume file", volume)
Example #5
0
    def test_new_volume_while_io_in_progress(self):
        """
        Create, export and mount new volume while IO running on mount of
        another volume
        Steps:
        1. Start IO on mount points
        2. Create another volume 'volume_new'
        3. Export volume_new through nfs-ganesha
        4. Mount the volume on clients
        """
        # pylint: disable=too-many-statements, too-many-locals
        # Start IO on all mount points
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        self.volname_new = '%s_new' % self.volname
        kwargs = {}
        dict_index = 0

        # Creating mounts list for mounting new volume
        self.mounts_new = []
        for mount_obj in self.mounts:
            self.mounts_new.append(deepcopy(mount_obj))
        for mount_obj in self.mounts_new:
            mount_obj.volname = self.volname_new
            mount_obj.mountpoint = '%s_new' % mount_obj.mountpoint

        # Fetch details for creating a replicate volume.
        replica_count = (
            self.default_volume_type_config['replicated']['replica_count'])
        servers_bricks_dict = get_servers_bricks_dict(self.all_servers,
                                                      self.all_servers_info)
        bricks_list = []
        kwargs['replica_count'] = replica_count
        kwargs['transport_type'] = (
            self.default_volume_type_config['replicated']['transport'])

        for num in range(0, replica_count):
            # Current_server is the server on which brick path will be created
            current_server = list(servers_bricks_dict.keys())[dict_index]
            current_server_unused_bricks_list = (list(
                servers_bricks_dict.values())[dict_index])
            if current_server_unused_bricks_list:
                brick_path = (
                    "%s:%s/%s_brick%s" %
                    (current_server, current_server_unused_bricks_list[0],
                     self.volname_new, num))
                bricks_list.append(brick_path)

                # Remove the added brick from the list
                list(servers_bricks_dict.values())[dict_index].pop(0)

            if dict_index < len(servers_bricks_dict) - 1:
                dict_index = dict_index + 1
            else:
                dict_index = 0

        # Create volume 'volume_new'
        ret, _, _ = volume_create(mnode=self.mnode,
                                  volname=self.volname_new,
                                  bricks_list=bricks_list,
                                  force=False,
                                  **kwargs)
        self.assertEqual(ret, 0,
                         "Unable to create volume %s" % self.volname_new)
        g.log.info("Successfully created volume %s", self.volname_new)

        ret, _, _ = volume_start(self.mnode, self.volname_new)
        self.assertEqual(ret, 0,
                         "Unable to start volume %s" % self.volname_new)

        # Wait for volume processes to be online
        g.log.info("Wait for volume %s processes to be online",
                   self.volname_new)
        ret = wait_for_volume_process_to_be_online(self.mnode,
                                                   self.volname_new)
        self.assertTrue(
            ret, "Wait timeout: Processes of volume %s are "
            "not online." % self.volname_new)
        g.log.info("Volume processes of volume %s are now online",
                   self.volname_new)

        # Export volume as nfs-ganesha export
        ret, _, _ = export_nfs_ganesha_volume(self.mnode, self.volname_new)
        self.assertEqual(
            ret, 0, "Failed to set ganesha.enable 'on' on "
            "volume %s" % self.volname_new)
        g.log.info(
            "Successful in setting ganesha.enable to 'on' on "
            "volume %s", self.volname_new)

        # Verify volume export
        ret = wait_for_nfs_ganesha_volume_to_get_exported(
            self.mnode, self.volname_new)
        self.assertTrue(
            ret, "Failed to export volume %s as nfs-ganesha "
            "export" % self.volname_new)
        g.log.info("Successfully exported volume %s", self.volname_new)

        # Mount the new volume
        for mount_obj in self.mounts_new:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted %s on client %s",
                       mount_obj.volname, mount_obj.client_system)

        # Verify mounts
        for mount_obj in self.mounts_new:
            ret = mount_obj.is_mounted()
            self.assertTrue(
                ret, ("Volume %s is not mounted on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Verified: Volume %s is mounted on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Export and mount of new volume %s is success.",
                   self.volname_new)

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all IO")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")
    def test_validate_optimized_glusterd_handshake(self):
        """
        Test Case:
        1) Create a 3 node cluster
        2) Enable brick-multiplex in the cluster
        3) Create and start 2000 volumes
        4) Stop one of the node in the cluster
        5) Set an option for around 850 volumes in the cluster
        6) Restart glusterd on the previous node
        7) Check the value of the option set earlier, in the restarted node
        """
        # pylint: disable=too-many-locals
        # Enable brick-multiplex
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to enable brick mux on cluster")

        server_info_frm_three_node = {}
        for server in self.servers[:3]:
            server_info_frm_three_node[server] = self.all_servers_info[server]

        # Fetch the available bricks dict
        bricks_dict = get_servers_bricks_dict(self.servers[:3],
                                              server_info_frm_three_node)
        self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict")

        # Using, custome method because method bulk_volume_creation creates
        # a huge logging and does unwanted calls, which will slow down the
        # test case and use more memory
        # Create and start 2000 volumes
        for i in range(2000):
            self.volname = "volume-%d" % i
            bricks_list = []
            j = 0
            for key, value in bricks_dict.items():
                j += 1
                brick = choice(value)
                brick = "{}:{}/{}_brick-{}".format(key, brick,
                                                   self.volname, j)
                bricks_list.append(brick)

            kwargs = {'replica_count': 3}

            ret, _, _ = volume_create(self.mnode, self.volname,
                                      bricks_list, False, **kwargs)
            self.assertEqual(ret, 0, "Failed to create volume: %s"
                             % self.volname)

            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0, "Failed to start volume: %s"
                             % self.volname)

        g.log.info("Successfully created and started all the volumes")

        # Stop glusterd on one node
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on node :%s"
                        % self.servers[1])

        self.glusterd_is_stopped = True

        # Set a volume option for 800 volumes
        option_value = {'network.ping-timeout': 45}
        for i in range(850):
            vol_name = "volume-" + str(i)
            ret = set_volume_options(self.mnode, vol_name, option_value)
            self.assertTrue(ret, "Failed to set volume option")

        # Start glusterd on the previous node
        ret = restart_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on node: %s"
                        % self.servers[1])

        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(ret, "Glusterd is not yet started on the node :%s"
                        % self.servers[1])

        # It might take some time, to get the peers to connected state,
        # because of huge number of volumes to sync
        while True:
            ret = is_peer_connected(self.mnode, self.servers[1:3])
            if ret:
                break
            sleep(1)

        self.assertTrue(ret, "Peers are not in connected state")

        self.glusterd_is_stopped = False

        # Check the volume option set earlier is synced on restarted node
        for i in range(850):
            vol_name = "volume-" + str(i)
            # Doing, a while True loop because there might be race condition
            # and it might take time for the node to sync the data initially
            while True:
                ret = get_volume_options(self.servers[1], vol_name,
                                         'network.ping-timeout')
                self.assertTrue(ret, "Failed to get volume option")
                g.log.info("Ret: %s", ret['network.ping-timeout'])
                if ret['network.ping-timeout'] == '45':
                    break
            self.assertEqual(ret['network.ping-timeout'], '45',
                             "Option value not updated in the restarted node")
    def test_reserved_port_range_for_gluster(self):
        """
        Test Case:
        1) Set the max-port option in glusterd.vol file to 49200
        2) Restart glusterd on one of the node
        3) Create 50 volumes in a loop
        4) Try to start the 50 volumes in a loop
        5) Confirm that the 50th volume failed to start
        6) Confirm the error message, due to which volume failed to start
        7) Set the max-port option in glusterd.vol file back to default value
        8) Restart glusterd on the same node
        9) Starting the 50th volume should succeed now
        """
        # Set max port number as 49200 in glusterd.vol file
        cmd = "sed -i 's/60999/49200/' /etc/glusterfs/glusterd.vol"
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(
            ret, 0, "Failed to set the max-port to 49200 in"
            " glusterd.vol file")

        self.port_range_changed = True

        # Restart glusterd
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart glusterd")
        g.log.info("Successfully restarted glusterd on node: %s", self.mnode)

        # Check node on which glusterd was restarted is back to 'Connected'
        # state from any other peer
        ret = wait_for_peers_to_connect(self.servers[1], self.servers)
        self.assertTrue(ret, "All the peers are not in connected state")

        # Fetch the available bricks dict
        bricks_dict = get_servers_bricks_dict(self.servers,
                                              self.all_servers_info)
        self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict")

        # Create 50 volumes in a loop
        for i in range(1, 51):
            self.volname = "volume-%d" % i
            bricks_list = []
            j = 0
            for key, value in bricks_dict.items():
                j += 1
                brick = choice(value)
                brick = "{}:{}/{}_brick-{}".format(key, brick, self.volname, j)
                bricks_list.append(brick)

            ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
            self.assertEqual(ret, 0,
                             "Failed to create volume: %s" % self.volname)
            g.log.info("Successfully created volume: %s", self.volname)

        # Try to start 50 volumes in loop
        for i in range(1, 51):
            self.volname = "volume-%d" % i
            ret, _, err = volume_start(self.mnode, self.volname)
            if ret:
                break
        g.log.info("Successfully started all the volumes until volume: %s",
                   self.volname)

        # Confirm if the 50th volume failed to start
        self.assertEqual(
            i, 50, "Failed to start the volumes volume-1 to"
            " volume-49 in a loop")

        # Confirm the error message on volume start fail
        err_msg = ("volume start: volume-50: failed: Commit failed on"
                   " localhost. Please check log file for details.")
        self.assertEqual(
            err.strip(), err_msg, "Volume start failed with"
            " a different error message")

        # Confirm the error message from the log file
        cmd = ("cat /var/log/glusterfs/glusterd.log | %s" %
               "grep -i 'All the ports in the range are exhausted' | wc -l")
        ret, out, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "Failed to 'grep' the glusterd.log file")
        self.assertNotEqual(
            out, "0", "Volume start didn't fail with expected"
            " error message")

        # Set max port number back to default value in glusterd.vol file
        cmd = "sed -i 's/49200/60999/' /etc/glusterfs/glusterd.vol"
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(
            ret, 0, "Failed to set the max-port back to 60999 in"
            " glusterd.vol file")

        self.port_range_changed = False

        # Restart glusterd on the same node
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart glusterd")
        g.log.info("Successfully restarted glusterd on node: %s", self.mnode)

        # Starting the 50th volume should succeed now
        self.volname = "volume-%d" % i
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname)