コード例 #1
0
    def test_offline_brick_status_when_quorum_not_met(self):
        """
        Test Brick status when Quorum is not met after glusterd restart.
        1. Create a volume and mount it.
        2. Set the quorum type to 'server'.
        3. Bring some nodes down such that quorum won't be met.
        4. Brick status should be offline in the node which is up.
        5. Restart glusterd in this node.
        6. The brick status still should be offline as quorum isn't met.
        """
        # Set the quorum type to server and validate it.
        vol_option = {'cluster.server-quorum-type': 'server'}
        ret = set_volume_options(self.mnode, self.volname, vol_option)
        self.assertTrue(ret, "gluster volume option set of %s to %s failed"
                        % ('cluster.server-quorum-type', 'server'))
        g.log.info("Cluster quorum set to type server.")

        # Get the brick list.
        brick_list = get_all_bricks(self.mnode, self.volname)

        # Stop glusterd processes.
        ret = stop_glusterd(self.servers[1:])
        self.assertTrue(ret, "Failed to stop glusterd on specified nodes.")
        g.log.info("Glusterd processes stopped in the desired servers.")

        # Get the brick status in a node where glusterd is up.
        ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1])
        self.assertTrue(ret, "Bricks are online")
        g.log.info("Bricks are offline as expected.")

        # Restart one of the node which is up.
        ret = restart_glusterd(self.servers[0])
        self.assertTrue(ret, ("Failed to restart glusterd on desired node."))
        g.log.info("Glusterd restarted on the desired node.")

        # Wait for glusterd to be online and validate it's running.
        self.assertTrue(wait_for_glusterd_to_start(self.servers[0]),
                        "Glusterd not up on the desired server.")
        g.log.info("Glusterd is up in the desired server.")

        # Get the brick status from the restarted node.
        ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1])
        self.assertTrue(ret, "Bricks are online")
        g.log.info("Bricks are offline as expected.")

        # Start glusterd on all servers.
        ret = start_glusterd(self.servers)
        self.assertTrue(ret, "Failed to start glusterd on the specified nodes")
        g.log.info("Initiated start of glusterd on all nodes.")

        # Wait for glusterd to start.
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd not up on all nodes.")
        g.log.info("Glusterd is up and running on all nodes.")

        # Wait for all volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   timeout=600)
        self.assertTrue(ret, ("All volume processes not up."))
        g.log.info("All volume processes are up.")
コード例 #2
0
    def test_glusterd_start_stop_consistent_info(self):
        """
        Test Case:
        1. Stop and Start glusterd and store the output returned.
        2. Check if consistent output is returned if glusterd is started and
        stopped 5 times.
        """
        glusterd_start_cmd = 'systemctl start glusterd'
        glusterd_stop_cmd = 'systemctl stop glusterd'

        # Stop glusterd to get the info for further comparison.
        stop_output = self._run_command_in_servers(glusterd_stop_cmd)

        # Start glusterd to get the info for further comparison.
        start_output = self._run_command_in_servers(glusterd_start_cmd)

        # Loop over the stop-stop cycle for 5 times and validate the output.
        for _ in range(5):
            # Validate stop output is the same.
            ret_value = self._run_command_in_servers(glusterd_stop_cmd)
            self.assertEqual(ret_value, stop_output,
                             "%s is not consistent." % (glusterd_stop_cmd))
            sleep(2)

            # Validate start output is the same.
            ret_value = self._run_command_in_servers(glusterd_start_cmd)
            self.assertEqual(ret_value, start_output,
                             "%s is not consistent." % (glusterd_start_cmd))
            ret = wait_for_glusterd_to_start(self.servers[0])
            self.assertTrue(ret, "Glusterd has not yet started in servers.")
コード例 #3
0
    def tearDown(self):

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        if ret:
            ExecutionError("Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        ret = wait_for_glusterd_to_start(self.servers)
        if not ret:
            ExecutionError("glusterd is not running on %s" % self.servers)
        g.log.info("Glusterd start on the nodes succeeded")

        # Checking if peer is connected.
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        if not ret:
            ExecutionError("Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Stopping and deleting volume.
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
コード例 #4
0
    def scratch_cleanup(cls, error_or_failure_exists):
        """
        This scratch_cleanup script will run only when the code
        currently running goes into execution or assertion error.

        Args:
            error_or_failure_exists (bool): If set True will cleanup setup
                atlast of testcase only if exectution or assertion error in
                teststeps. False will skip this scratch cleanup step.

        Returns (bool): True if setup cleanup is successful.
            False otherwise.
        """
        if error_or_failure_exists:
            ret = stop_glusterd(cls.servers)
            if not ret:
                g.log.error("Failed to stop glusterd")
                cmd_list = ("pkill pidof glusterd",
                            "rm /var/run/glusterd.socket")
                for server in cls.servers:
                    for cmd in cmd_list:
                        ret, _, _ = g.run(server, cmd, "root")
                        if ret:
                            g.log.error("Failed to stop glusterd")
                            return False
            for server in cls.servers:
                cmd_list = ("rm -rf /var/lib/glusterd/vols/*",
                            "rm -rf /var/lib/glusterd/snaps/*",
                            "rm -rf /var/lib/glusterd/peers/*",
                            "rm -rf {}/*/*".format(
                                cls.all_servers_info[server]['brick_root']))
                for cmd in cmd_list:
                    ret, _, _ = g.run(server, cmd, "root")
                    if ret:
                        g.log.error(
                            "failed to cleanup server {}".format(server))
                        return False
            ret = restart_glusterd(cls.servers)
            if not ret:
                g.log.error("Failed to start glusterd")
                return False
            sleep(2)
            ret = wait_for_glusterd_to_start(cls.servers)
            if not ret:
                g.log.error("Failed to bring glusterd up")
                return False
            ret = peer_probe_servers(cls.mnode, cls.servers)
            if not ret:
                g.log.error("Failed to peer probe servers")
                return False
            for client in cls.clients:
                cmd_list = ("umount /mnt/*", "rm -rf /mnt/*")
                for cmd in cmd_list:
                    ret = g.run(client, cmd, "root")
                    if ret:
                        g.log.error(
                            "failed to unmount/already unmounted {}".format(
                                client))
            return True
コード例 #5
0
    def test_snap_info_glusterd_restart(self):
        """
        Verify snapshot info before and after glusterd restart

        * Create multiple snapshots
        * Check snapshot info
          - Without using snapname or volname
          - Using snapname
          - Using volname
        * Restart glusterd on all servers
        * Repeat the snapshot info step for all the three scenarios
          mentioned above
        """

        # pylint: disable=too-many-statements
        # Create snapshots with description
        for snap in self.snapshots:
            ret, _, _ = snap_create(self.mnode,
                                    self.volname,
                                    snap,
                                    description='$p3C!@l C#@R@cT#R$')
            self.assertEqual(
                ret, 0,
                ("Failed to create snapshot for volume %s" % self.volname))
            g.log.info("Snapshot %s created successfully for volume %s", snap,
                       self.volname)

        # Perform the snapshot info tests before glusterd restart
        self.snapshot_info()

        # Restart Glusterd on all servers
        for server in self.servers:
            ret = restart_glusterd(server)
            self.assertTrue(ret,
                            ("Failed to restart glusterd on node %s" % server))
            g.log.info("Successfully restarted glusterd on node %s", server)

        # Wait for glusterd to be online and validate glusterd running on all
        # server nodes
        self.assertTrue(
            wait_for_glusterd_to_start(self.servers),
            "Unexpected: glusterd not up on one or more of the nodes")
        g.log.info("Glusterd is up and running on all nodes")

        # Check if peers are connected
        self.assertTrue(wait_for_peers_to_connect(self.mnode, self.servers),
                        "Unexpected: Peers are not in connected state")
        g.log.info("Successful: All peers are in connected state")

        # perform the snapshot info tests after glusterd restart
        self.snapshot_info()
コード例 #6
0
    def test_daemons_after_reboot(self):
        '''
        Creating volume then performing FUSE mount
        then enable quota to that volume, then set quota
        limit to that volume then perform a reboot and check
        the selfheal daemon and quota daemon running or not
        after reboot
        '''

        # Enabling quota to volume
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to enable quota on volume : "
            "%s" % self.volname)
        g.log.info("quota enabled successfully on volume: %s", self.volname)

        # Setting quota limit to volume
        ret, _, _ = quota_limit_usage(self.mnode,
                                      self.volname,
                                      path='/',
                                      limit='1GB',
                                      soft_limit='')
        self.assertEqual(
            ret, 0, "Quota limit set failed "
            "on volume : %s" % self.volname)

        ret, _ = reboot_nodes_and_wait_to_come_online(self.servers[1])
        self.assertTrue(ret, "Failed to reboot the node %s" % self.servers[1])
        g.log.info("Node %s rebooted successfully", self.servers[1])

        # Checking glusterd status and peer status afte reboot of server
        self.assertTrue(wait_for_glusterd_to_start(self.servers[1]),
                        "Failed to start glusterd on %s" % self.servers[1])
        self.assertTrue(wait_for_peers_to_connect(self.mnode, self.servers),
                        "some peers are not in connected state")
        g.log.info("glusterd is running and all peers are in "
                   "connected state")

        # Checks self heal daemon and quota daemon process running or not
        ret = self.is_daemon_process_running()
        self.assertTrue(
            ret, "failed to run self-heal and quota daemon "
            "processs on all hosts")
        g.log.info("self-heal and quota daemons are running on all "
                   "hosts successfully")
    def test_glusterd_default_vol_behavior_and_quorum_options(self):
        """
        Test default volume behavior and quorum options
        1. Create a volume and start it.
        2. Check that no quorum options are found in vol info.
        3. Kill two glusterd processes.
        4. There shouldn't be any effect to the running glusterfsd
        processes.
        """
        # Check the default quorum options are correct.
        self._validate_vol_options('cluster.server-quorum-type', 'off')
        self._validate_vol_options('cluster.server-quorum-ratio', '51', True)

        # Get the count of number of glusterfsd processes running.
        count_before_glusterd_kill = self._get_total_brick_processes_count()

        # Kill two glusterd processes.
        server_list = [self.servers[1], self.servers[2]]
        ret = stop_glusterd(server_list)
        self.assertTrue(ret, "Failed to stop glusterd on the specified nodes.")
        ret = is_glusterd_running(server_list)
        self.assertNotEqual(ret, 0, ("Glusterd is not stopped on the servers"
                                     " where it was desired to be stopped."))
        g.log.info("Glusterd processes stopped in the desired servers.")

        # Get the count of number of glusterfsd processes running.
        count_after_glusterd_kill = self._get_total_brick_processes_count()

        # The count of glusterfsd processes should match
        self.assertEqual(count_before_glusterd_kill, count_after_glusterd_kill,
                         ("Glusterfsd processes are affected."))
        g.log.info("Glusterd processes are not affected.")

        # Start glusterd on all servers.
        ret = start_glusterd(self.servers)
        self.assertTrue(ret, "Failed to Start glusterd on the specified"
                        " nodes")
        g.log.info("Started glusterd on all nodes.")

        # Wait for glusterd to restart.
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd not up on all nodes.")
        g.log.info("Glusterd is up and running on all nodes.")
コード例 #8
0
    def _wait_for_gluster_process_online_state(self):
        """
        Function which waits for the glusterfs processes to come up
        """
        # Wait for glusterd to be online and validate it's running.
        self.assertTrue(wait_for_glusterd_to_start(self.servers),
                        "glusterd not up on the desired nodes.")
        g.log.info("Glusterd is up and running on desired nodes.")

        # Wait for peers to connect
        ret = wait_for_peers_to_connect(self.mnode, self.servers, 50)
        self.assertTrue(ret, "Peers not in connected state.")
        g.log.info("Peers in connected state.")

        # Wait for all volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode,
                                                   self.volname,
                                                   timeout=600)
        self.assertTrue(ret, ("All volume processes not up."))
        g.log.info("All volume processes are up.")
    def test_setting_vol_option_with_max_characters(self):

        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        auth_list = []
        for ip_addr in range(256):
            auth_list.append('192.168.122.%d' % ip_addr)
        for ip_addr in range(7):
            auth_list.append('192.168.123.%d' % ip_addr)
        ip_list = ','.join(auth_list)

        # set auth.allow with <4096 characters and restart the glusterd
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        self.options = {"auth.allow": ip_list}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart the glusterd on %s"
                        % self.mnode)

        # set auth.allow with >4096 characters and restart the glusterd
        ip_list = ip_list + ",192.168.123.7"
        self.options = {"auth.allow": ip_list}
        g.log.info("Setting auth.allow with string of length %d for %s",
                   len(ip_list), self.volname)
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set auth.allow with string of length"
                              " %d for %s" % (len(ip_list), self.volname)))
        ret = restart_glusterd(self.mnode)
        self.assertTrue(ret, "Failed to restart the glusterd on %s"
                        % self.mnode)

        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.servers)
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers)
    def tearDown(self):
        """ Cleanup the volumes """
        if self.glusterd_is_stopped:
            ret = restart_glusterd(self.servers[1])
            if not ret:
                raise ExecutionError("Failed to start glusterd on node: %s"
                                     % self.servers[1])

            ret = wait_for_glusterd_to_start(self.servers[1])
            if not ret:
                raise ExecutionError("Glusterd is not yet started on node: %s"
                                     % self.servers[1])

        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume %s" % volume)

        # Disable multiplex
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'disable'})
        if not ret:
            raise ExecutionError("Failed to disable brick mux in cluster")

        # Peer probe detached servers
        pool = nodes_from_pool_list(self.mnode)
        for node in pool:
            peer_detach(self.mnode, node)
        ret = peer_probe_servers(self.mnode, self.servers)
        if not ret:
            raise ExecutionError("Failed to probe detached "
                                 "servers %s" % self.servers)

        # Calling baseclass tearDown method
        self.get_super_method(self, 'tearDown')()
コード例 #11
0
    def tearDown(self):
        # Check if a node is still down
        if self.glusterd_is_stopped:
            ret = start_glusterd(self.random_server)
            self.assertTrue(
                ret, "Failed to start glusterd on %s" % self.random_server)
            g.log.info("Successfully started glusterd on node: %s",
                       self.random_server)

            # Waiting for glusterd to start completely
            ret = wait_for_glusterd_to_start(self.random_server)
            self.assertTrue(
                ret, "glusterd is not running on %s" % self.random_server)
            g.log.info("glusterd is started and running on %s",
                       self.random_server)

        # Unmounting and cleaning volume.
        ret = self.unmount_volume_and_cleanup_volume(self.mounts)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
コード例 #12
0
    def tearDown(self):
        # Restart glusterd on nodes for which it was stopped
        ret = restart_glusterd(self.servers[3:5])
        if not ret:
            raise ExecutionError("Failed to restart glusterd on nodes: %s" %
                                 self.servers[3:5])

        # Wait for glusterd to be online and validate it's running.
        ret = wait_for_glusterd_to_start(self.servers[3:5])
        if not ret:
            raise ExecutionError("Glusterd not up on the servers: %s" %
                                 self.servers[3:5])

        # clean up all volumes
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume %s" % volume)
            g.log.info("Volume deleted successfully : %s", volume)

        # Peer probe detached servers
        ret = peer_probe_servers(self.mnode, self.servers[1:3])
        if not ret:
            raise ExecutionError("Failed to probe detached "
                                 "servers %s" % self.servers[1:3])

        # Remove all the statedump files created in the test
        cmd = "rm -rf /var/run/gluster/glusterdump.*"
        ret, _, _ = g.run(self.mnode, cmd)
        if ret:
            raise ExecutionError("Failed to clear out the statedump files")

        self.get_super_method(self, 'tearDown')()
コード例 #13
0
    def test_gfind_when_node_down(self):
        """
        Verifying the glusterfind functionality when node is down.

        1. Create a volume
        2. Create a session on the volume
        3. Create various files from mount point
        4. Bring down glusterd on one of the node
        5. Perform glusterfind pre
        6. Perform glusterfind post
        7. Check the contents of outfile
        8. Create more files from mountpoint
        9. Reboot one of the nodes
        10. Perform gluserfind pre
        11. Perform glusterfind post
        12. Check the contents of outfile
        """

        # pylint: disable=too-many-statements
        # Create a session for the volume
        ret, _, _ = gfind_create(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the "
                                  "volume %s failed" % self.volname))
        g.log.info("Successfully created a session for the volume %s",
                   self.volname)

        # Perform glusterfind list to check if session exists
        _, out, _ = gfind_list(self.mnode, volname=self.volname,
                               sessname=self.session)
        self.assertNotEqual(out, "No sessions found.",
                            "Failed to list the glusterfind session")
        g.log.info("Successfully listed the glusterfind session")

        self._perform_io_and_validate_presence_of_files()

        # Wait for changelog to get updated
        sleep(2)

        # Bring one of the node down.
        self.random_server = choice(self.servers[1:])
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Succesfully stopped glusterd on one node.")

        # Wait till glusterd is completely down.
        while is_glusterd_running(self.random_server) != 1:
            sleep(2)

        self._perform_glusterfind_pre_and_validate_outfile()

        # Perform glusterfind post for the session
        ret, _, _ = gfind_post(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Failed to perform glusterfind post"))
        g.log.info("Successfully performed glusterfind post")

        # Bring glusterd which was downed on a random node, up.
        ret = start_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s"
                        % self.random_server)
        g.log.info("Successfully started glusterd on node : %s",
                   self.random_server)

        # Waiting for glusterd to start completely.
        ret = wait_for_glusterd_to_start(self.random_server)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.random_server)
        g.log.info("glusterd is started and running on %s",
                   self.random_server)

        self._perform_io_and_validate_presence_of_files()

        # Perform IO
        self._perform_io_and_validate_presence_of_files()

        # Wait for changelog to get updated
        sleep(2)

        # Reboot one of the nodes.
        self.random_server = choice(self.servers[1:])
        ret = reboot_nodes(self.random_server)
        self.assertTrue(ret, "Failed to reboot the said node.")
        g.log.info("Successfully started reboot process on one node.")

        self._perform_glusterfind_pre_and_validate_outfile()

        # Perform glusterfind post for the session
        ret, _, _ = gfind_post(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Failed to perform glusterfind post"))
        g.log.info("Successfully performed glusterfind post")

        # Gradual sleep backoff till the node has rebooted.
        counter = 0
        timeout = 300
        ret = False
        while counter < timeout:
            ret, _ = are_nodes_online(self.random_server)
            if not ret:
                g.log.info("Node's offline, Retrying after 5 seconds ...")
                sleep(5)
                counter += 5
            else:
                ret = True
                break
        self.assertTrue(ret, "Node is still offline.")
        g.log.info("Rebooted node is online")

        # Wait for glusterd to start completely
        ret = wait_for_glusterd_to_start(self.random_server)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.random_server)
        g.log.info("glusterd is started and running on %s",
                   self.random_server)
    def test_sync_functinality(self):

        # create a 2x3 volume
        num_of_servers = len(self.servers)
        servers_info_from_cluster = {}
        for server in self.servers[0:num_of_servers - 1]:
            servers_info_from_cluster[server] = self.all_servers_info[server]

        self.volume['servers'] = self.servers[0:num_of_servers - 1]
        self.volume['voltype']['replica_count'] = 3
        self.volume['voltype']['dist_count'] = 2
        ret = setup_volume(self.mnode, servers_info_from_cluster, self.volume)
        self.assertTrue(ret, ("Failed to create "
                              "and start volume %s" % self.volname))
        g.log.info("Successfully created and started the volume %s",
                   self.volname)

        # stop glusterd on a random node of the cluster
        random_server_index = random.randint(1, num_of_servers - 2)
        random_server = self.servers[random_server_index]
        cmd = "systemctl stop glusterd"
        ret = g.run_async(random_server, cmd)
        g.log.info("Stopping glusterd on %s", random_server)

        # set a option on volume, stat-prefetch on
        self.options = {"stat-prefetch": "on"}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set option stat-prefetch to on"
                              "for the volume %s" % self.volname))
        g.log.info(
            "Succeeded in setting stat-prefetch option to on"
            "for the volume %s", self.volname)

        # start glusterd on the node where glusterd is stopped
        ret = start_glusterd(random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s" % random_server)

        ret = wait_for_glusterd_to_start(random_server)
        self.assertTrue(ret, "glusterd is not running on %s" % random_server)
        g.log.info("glusterd is started and running on %s", random_server)

        # volume info should be synced across the cluster
        out1 = get_volume_info(self.mnode, self.volname)
        self.assertIsNotNone(
            out1, "Failed to get the volume info from %s" % self.mnode)
        g.log.info("Getting volume info from %s is success", self.mnode)

        count = 0
        while count < 60:
            out2 = get_volume_info(random_server, self.volname)
            self.assertIsNotNone(
                out2, "Failed to get the volume info from %s" % random_server)
            if out1 == out2:
                break
            sleep(2)
            count += 1

        g.log.info("Getting volume info from %s is success", random_server)
        self.assertDictEqual(out1, out2, "volume info is not synced")

        # stop glusterd on a random server from cluster
        random_server_index = random.randint(1, num_of_servers - 2)
        random_server = self.servers[random_server_index]
        cmd = "systemctl stop glusterd"
        ret = g.run_async(random_server, cmd)
        g.log.info("Stopping glusterd on node %s", random_server)

        # peer probe a new node
        ret = peer_probe_servers(self.mnode, self.servers[num_of_servers - 1])
        self.assertTrue(
            ret, "Failed to peer probe %s from %s" %
            (self.servers[num_of_servers - 1], self.mnode))
        g.log.info("Peer probe from %s to %s is success", self.mnode,
                   self.servers[num_of_servers - 1])

        # start glusterd on the node where glusterd is stopped
        ret = start_glusterd(random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s" % random_server)

        ret = wait_for_glusterd_to_start(random_server)
        self.assertTrue(ret, "glusterd is not running on %s" % random_server)
        g.log.info("glusterd is started and running on %s", random_server)

        # peer status should be synced across the cluster
        list1 = nodes_from_pool_list(self.mnode)
        self.assertIsNotNone(
            list1, "Failed to get nodes list in the cluster"
            "from %s" % self.mnode)
        g.log.info("Successfully got the nodes list in the cluster from %s",
                   self.mnode)

        # replacing ip with FQDN
        i = 0
        for node in list1:
            list1[i] = socket.getfqdn(node)
            i += 1
        list1 = sorted(list1)

        count = 0
        while count < 60:
            list2 = nodes_from_pool_list(random_server)
            self.assertIsNotNone(
                list2, "Failed to get nodes list in the "
                "cluster from %s" % random_server)
            # replacing ip with FQDN
            i = 0
            for node in list2:
                list2[i] = socket.getfqdn(node)
                i += 1

            list2 = sorted(list2)
            if list2 == list1:
                break
            sleep(2)
            count += 1

        g.log.info("Successfully got the nodes list in the cluster from %s",
                   random_server)

        self.assertListEqual(list1, list2, "Peer status is "
                             "not synced across the cluster")
        g.log.info("Peer status is synced across the cluster")
コード例 #15
0
    def test_glusterd_rebalance(self):

        '''
        -> Create Volume
        -> Fuse mount the volume
        -> Perform I/O on fuse mount
        -> Add bricks to the volume
        -> Perform rebalance on the volume
        -> While rebalance is in progress,
        -> restart glusterd on all the nodes in the cluster
        '''

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 25 %s" % (self.script_upload_path,
                                             self.counter,
                                             mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )

        # Forming brick list
        self.brick_list = form_bricks_list_to_add_brick(
            self.mnode, self.volname, self.servers, self.all_servers_info)

        # Adding Bricks
        ret, _, _ = add_brick(self.mnode, self.volname, self.brick_list)
        self.assertEqual(ret, 0, "Failed to add brick to the volume %s"
                         % self.volname)
        g.log.info("Brick added successfully to the volume %s", self.volname)

        # Performing rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance on volume %s'
                         % self.volname)
        g.log.info("Rebalance started successfully on volume %s",
                   self.volname)

        # Checking Rebalance is in progress or not
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        if rebalance_status['aggregate']['statusStr'] != 'in progress':
            raise ExecutionError("Rebalance is not in 'in progress' state, "
                                 "either rebalance is in compeleted state or"
                                 " failed to get rebalance status")

        # Restart glusterd
        ret = restart_glusterd(self.servers)
        self.assertTrue(ret, "Failed to restart glusterd on servers")
        g.log.info("Glusterd restarted successfully on %s", self.servers)

        # Checking glusterd status
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd is not running on some of the "
                        "servers")
        g.log.info("Glusterd is running on all servers %s", self.servers)
コード例 #16
0
    def test_glustershd_with_restarting_glusterd(self):
        """
        Test Script to verify the self heal daemon process with restarting
        glusterd and rebooting the server

        * stop all volumes
        * restart glusterd - should not run self heal daemon process
        * start replicated involved volumes
        * single self heal daemon process running
        * restart glusterd
        * self heal daemon pid will change
        * bring down brick and restart glusterd
        * self heal daemon pid will change and its different from previous
        * brought up the brick

        """
        # pylint: disable=too-many-statements
        nodes = self.volume['servers']

        # stop the volume
        g.log.info("Stopping the volume %s", self.volname)
        ret = volume_stop(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))
        g.log.info("Successfully stopped volume %s", self.volname)

        # check the self heal daemon process after stopping the volume
        g.log.info("Verifying the self heal daemon process for "
                   "volume %s", self.volname)
        ret = are_all_self_heal_daemons_are_online(self.mnode, self.volname)
        self.assertFalse(ret, ("Self Heal Daemon process is still running "
                               "even after stopping volume %s" % self.volname))
        g.log.info("Self Heal Daemon is not running after stopping  "
                   "volume %s", self.volname)

        # restart glusterd service on all the servers
        g.log.info("Restarting glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        self.assertTrue(
            wait_for_glusterd_to_start(self.servers),
            "Failed to start glusterd on %s" % self.servers)

        # check the self heal daemon process after restarting glusterd process
        g.log.info("Starting to get self-heal daemon process on"
                   " nodes %s", nodes)
        ret = are_all_self_heal_daemons_are_online(self.mnode, self.volname)
        self.assertFalse(ret, ("Self Heal Daemon process is running after "
                               "glusterd restart with volume %s in "
                               "stop state" % self.volname))
        g.log.info("Self Heal Daemon is not running after stopping  "
                   "volume and restarting glusterd %s", self.volname)

        # start the volume
        g.log.info("Starting the volume %s", self.volname)
        ret = volume_start(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to start volume %s" % self.volname))
        g.log.info("Volume %s started successfully", self.volname)

        # Verfiy glustershd process releases its parent process
        g.log.info("Checking whether glustershd process is daemonized or not")
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        g.log.info("Single self heal daemon process on all nodes %s", nodes)

        # get the self heal daemon pids after starting volume
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        g.log.info("Successful in getting self heal daemon pids")
        glustershd_pids = pids

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # validate the bricks present in volume info
        # with glustershd server volume file
        g.log.info("Starting parsing file %s on "
                   "node %s", self.glustershd, self.mnode)
        ret = do_bricks_exist_in_shd_volfile(self.mnode, self.volname,
                                             bricks_list)
        self.assertTrue(ret, ("Brick List from volume info is different from "
                              "glustershd server volume file. "
                              "Please check log file for details."))
        g.log.info("Successfully parsed %s file", self.glustershd)

        # restart glusterd service on all the servers
        g.log.info("Restarting glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))

        # check the self heal daemon process after starting volume and
        # restarting glusterd process
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        glustershd_pids_after_glusterd_restart = pids

        self.assertNotEqual(glustershd_pids,
                            glustershd_pids_after_glusterd_restart,
                            ("Self Heal Daemon pids are same after "
                             "restarting glusterd process"))
        g.log.info("Self Heal Daemon process are different before and "
                   "after restarting glusterd process")

        # select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(filter(None, (
            bricks_to_bring_offline_dict['hot_tier_bricks'] +
            bricks_to_bring_offline_dict['cold_tier_bricks'] +
            bricks_to_bring_offline_dict['volume_bricks'])))

        # bring bricks offline
        g.log.info("Going to bring down the brick process "
                   "for %s", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to bring down the bricks. Please "
                              "check the log file for more details."))
        g.log.info("Brought down the brick process "
                   "for %s successfully", bricks_to_bring_offline)

        # restart glusterd after brought down the brick
        g.log.info("Restart glusterd on all servers %s", nodes)
        ret = restart_glusterd(nodes)
        self.assertTrue(ret, ("Failed to restart glusterd on all nodes %s",
                              nodes))
        g.log.info("Successfully restarted glusterd on all nodes %s",
                   nodes)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Verfiy glustershd process releases its parent process
        ret = is_shd_daemonized(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))

        # check the self heal daemon process after killing brick and
        # restarting glusterd process
        g.log.info("Starting to get self-heal daemon process "
                   "on nodes %s", nodes)
        ret, pids = get_self_heal_daemon_pid(nodes)
        self.assertTrue(ret, ("Either No self heal daemon process found or "
                              "more than One self heal daemon process found"))
        glustershd_pids_after_killing_brick = pids

        self.assertNotEqual(glustershd_pids_after_glusterd_restart,
                            glustershd_pids_after_killing_brick,
                            ("Self Heal Daemon process are same from before "
                             "killing the brick,restarting glusterd process"))
        g.log.info("Self Heal Daemon process are different after killing the "
                   "brick, restarting the glusterd process")

        # brought the brick online
        g.log.info("bringing up the bricks : %s online",
                   bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(ret, ("Failed to brought the bricks online"))
        g.log.info("Successfully brought the bricks online")

        # check all bricks are online
        g.log.info("Verifying all bricka are online or not.....")
        ret = are_bricks_online(self.mnode, self.volname,
                                bricks_to_bring_offline)
        self.assertTrue(ret, ("Not all bricks are online"))
        g.log.info("All bricks are online.")
コード例 #17
0
    def test_volume_set_when_glusterd_stopped_on_one_node(self):
        """
        Test Case:
        1) Setup and mount a volume on client.
        2) Stop glusterd on a random server.
        3) Start IO on mount points
        4) Set an option on the volume
        5) Start glusterd on the stopped node.
        6) Verify all the bricks are online after starting glusterd.
        7) Check if the volume info is synced across the cluster.
        """
        # Fetching the bricks list and storing it for later use
        list1 = get_online_bricks_list(self.mnode, self.volname)
        self.assertIsNotNone(
            list1, "Failed to get the list of online bricks "
            "for volume: %s" % self.volname)

        # Fetching a random server from list.
        self.random_server = choice(self.servers[1:])

        # Stopping glusterd on one node.
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Successfully stopped glusterd on one node.")

        self.glusterd_is_stopped = True

        # Start IO on mount points.
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--dirname-start-num %d "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")

        # set a option on volume, stat-prefetch on
        self.options = {"stat-prefetch": "on"}
        ret = set_volume_options(self.mnode, self.volname, self.options)
        self.assertTrue(ret, ("Failed to set option stat-prefetch to on"
                              "for the volume %s" % self.volname))
        g.log.info(
            "Succeeded in setting stat-prefetch option to on"
            "for the volume %s", self.volname)

        # start glusterd on the node where glusterd is stopped
        ret = start_glusterd(self.random_server)
        self.assertTrue(ret,
                        "Failed to start glusterd on %s" % self.random_server)
        g.log.info("Successfully started glusterd on node: %s",
                   self.random_server)

        # Waiting for glusterd to start completely
        ret = wait_for_glusterd_to_start(self.random_server)
        self.assertTrue(ret,
                        "glusterd is not running on %s" % self.random_server)
        g.log.info("glusterd is started and running on %s", self.random_server)

        self.glusterd_is_stopped = False

        # Confirm if all the bricks are online or not
        count = 0
        while count < 10:
            list2 = get_online_bricks_list(self.mnode, self.volname)
            if list1 == list2:
                break
            sleep(2)
            count += 1

        self.assertListEqual(
            list1, list2, "Unexpected: All the bricks in the"
            "volume are not online")
        g.log.info("All the bricks in the volume are back online")

        # volume info should be synced across the cluster
        out1 = get_volume_info(self.mnode, self.volname)
        self.assertIsNotNone(
            out1, "Failed to get the volume info from %s" % self.mnode)
        g.log.info("Getting volume info from %s is success", self.mnode)

        count = 0
        while count < 60:
            out2 = get_volume_info(self.random_server, self.volname)
            self.assertIsNotNone(
                out2,
                "Failed to get the volume info from %s" % self.random_server)
            if out1 == out2:
                break
            sleep(2)
            count += 1

        self.assertDictEqual(
            out1, out2, "Volume info is not synced in the"
            "restarted node")
        g.log.info("Volume info is successfully synced across the cluster")
コード例 #18
0
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # trying to peer probe the node whose glusterd was stopped using its IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # reboot a server and then trying to peer probe at the time of reboot
        g.log.info("Rebooting %s and checking peer probe", self.servers[1])
        reboot = g.run_async(self.servers[1], "reboot")

        # Mandatory sleep for 3 seconds to make sure node is in halted state
        sleep(3)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s which has been issued a reboot ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        ret, _, _ = reboot.async_communicate()
        self.assertEqual(ret, 255, "reboot failed")

        # Validate if rebooted node is online or not
        count = 0
        while count < 40:
            sleep(15)
            ret, _ = are_nodes_online(self.servers[1])
            if ret:
                g.log.info("Node %s is online", self.servers[1])
                break
            count += 1
        self.assertTrue(ret, "Node in test not yet online")

        # check if glusterd is running post reboot
        ret = wait_for_glusterd_to_start(self.servers[1],
                                         glusterd_start_wait_timeout=120)
        self.assertTrue(ret, "Glusterd service is not running post reboot")

        # peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")
コード例 #19
0
    def test_snap_glusterd_down(self):
        # pylint: disable=too-many-statements
        """
        Steps:

        1. create a volume
        2. mount volume
        3. create snapshot of that volume
        4. validate using snapshot info
        5. Activate snapshot
        6. List all snapshots present
        7. validate using snapshot info
        8. Stop glusterd on one node
        9. Check glusterd status
       10. deactivate created snapshot
       11. Start glusterd on that node
       12. Check glusterd status
       13. validate using snapshot info
       13. Check all peers are connected

        """
        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot %s for volume %s" %
                          (self.snap, self.volname)))
        g.log.info("Snapshot %s created successfully "
                   "for volume %s", self.snap, self.volname)

        # Check snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        self.assertIsNotNone(
            snap_info, "Failed to get snap information"
            "for snapshot %s" % self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(status, 'Started', "snapshot %s "
                            "not started" % self.snap)
        g.log.info("Successfully checked snapshot info")

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to Activate snapshot %s" % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # snapshot list
        g.log.info("Starting to validate list of snapshots")
        snap_list1 = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list1, "Failed to list all the snapshot")
        self.assertEqual(len(snap_list1), 1, "Failed to validate snap list")
        g.log.info("Snapshot list successfully validated")

        # Check snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        status = snap_info['snapVolume']['status']
        self.assertEqual(status, 'Started', "Failed to" "start snapshot info")
        g.log.info("Successfully checked snapshot info")

        # Stop Glusterd on one node
        g.log.info("Stopping Glusterd on one node")
        ret = stop_glusterd(self.servers[1])

        # Check Glusterd status
        g.log.info("Check glusterd running or not")
        self.assertFalse(wait_for_glusterd_to_start(self.servers[1]),
                         "glusterd is still running on %s" % self.servers[1])
        g.log.info("Expected: Glusterd not running on node %s",
                   self.servers[1])

        # de-activating snapshot
        g.log.info("Starting to de-activate Snapshot")
        ret, _, _ = snap_deactivate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to deactivate snapshot %s" % self.snap))
        g.log.info("Snapshot %s deactivated successfully", self.snap)

        # validate snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(status, 'Started', "snapshot %s "
                            "not started" % self.snap)
        g.log.info("Successfully validated snapshot info")

        # Start Glusterd on node
        g.log.info("Starting Glusterd on node %s", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(
            ret, "Failed to start glusterd on %s node" % self.servers[1])
        g.log.info("Successfully started glusterd on "
                   "%s node", self.servers[1])

        # Check Glusterd status
        g.log.info("Check glusterd running or not")
        self.assertTrue(wait_for_glusterd_to_start(self.servers[1]),
                        "glusterd is still running on %s" % self.servers[1])
        g.log.info("glusterd is running on %s node", self.servers[1])

        # validate snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        self.assertIsNotNone(
            snap_info, "Failed to get snap info for"
            " snapshot %s" % self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(
            status, 'Started', "snapshot"
            " %s failed to validate with snap info" % self.snap)
        g.log.info("Successfully validated snapshot info")

        # Check all the peers are in connected state
        g.log.info("Validating all the peers are in connected state")
        self.assertTrue(wait_for_peers_to_connect(self.mnode, self.servers),
                        "glusterd is still running on %s" % self.servers)
        g.log.info("Successfully validated all the peers")
コード例 #20
0
    def test_snap_list_glusterd_restart(self):
        """
        Verify snapshot list before and after glusterd restart

        * Create 3 snapshots of the volume
        * Delete one snapshot
        * List all snapshots created
        * Restart glusterd on all nodes
        * List all snapshots
          All snapshots must be listed except the one that was deleted
        """

        # pylint: disable=too-many-statements
        # Create snapshots
        for snap in self.snapshots:
            ret, _, _ = snap_create(self.mnode, self.volname, snap)
            self.assertEqual(ret, 0, ("Failed to create snapshot %s for "
                                      "volume %s" % (snap, self.volname)))
            g.log.info("Snapshot %s created successfully "
                       "for volume %s", snap, self.volname)

        # List the snapshots and validate with snapname
        snap_list = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list, "Failed to list all snapshots")
        self.assertEqual(len(snap_list), 3, "Failed to validate snap list")
        g.log.info("Successfully validated snap list")
        for snap in self.snapshots:
            self.assertIn(
                snap, snap_list, "Failed to validate the snapshot "
                "%s in the snapshot list" % snap)
        g.log.info("Successfully validated the presence of snapshots using "
                   "snapname")

        # Delete one snapshot
        ret, _, _ = snap_delete(self.mnode, self.snapshots[0])
        self.assertEqual(ret, 0,
                         ("Failed to delete snapshot %s" % self.snapshots[0]))
        g.log.info("Snapshots %s deleted Successfully", self.snapshots[0])

        # List the snapshots and validate with snapname
        snap_list = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list, "Failed to list all snapshots")
        self.assertEqual(len(snap_list), 2, "Failed to validate snap list")
        g.log.info("Successfully validated snap list")
        for snap in self.snapshots[1:]:
            self.assertIn(
                snap, snap_list, "Failed to validate the snapshot "
                "%s in the snapshot list" % snap)
        g.log.info("Successfully validated the presence of snapshots using "
                   "snapname")

        # Restart glusterd on all the servers
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to restart glusterd on nodes %s" % self.servers))
        g.log.info("Successfully restarted glusterd on nodes %s", self.servers)

        # Wait for glusterd to be online and validate glusterd running on all
        # server nodes
        self.assertTrue(
            wait_for_glusterd_to_start(self.servers),
            "Unexpected: glusterd not up on one or more of the nodes")
        g.log.info("Glusterd is up and running on all nodes")

        # Check if peers are connected
        self.assertTrue(is_peer_connected(self.mnode, self.servers),
                        "Unexpected: Peers are not in connected state")
        g.log.info("Successful: All peers are in connected state")

        # List the snapshots after glusterd restart
        # All snapshots must be listed except the one deleted
        for server in self.servers:
            snap_list = get_snap_list(server)
            self.assertIsNotNone(
                snap_list,
                "Failed to get the list of snapshots in node %s" % server)
            self.assertEqual(
                len(snap_list), 2,
                "Unexpected: Number of snapshots not consistent in the node %s"
                % server)
            g.log.info("Successfully validated snap list for node %s", server)
            for snap in self.snapshots[1:]:
                self.assertIn(
                    snap, snap_list, "Failed to validate the snapshot "
                    "%s in the snapshot list" % snap)
            g.log.info(
                "Successfully validated the presence of snapshots "
                "using snapname for node %s", server)
    def test_brick_port(self):
        # pylint: disable=too-many-statements, too-many-branches
        """
        In this test case:
        1. Trusted storage Pool of 2 nodes
        2. Create a distributed volumes with 2 bricks
        3. Start the volume
        4. Stop glusterd on one node 2
        5. Modify any of the volume option on node 1
        6. Start glusterd on node 2
        7. Check volume status, brick should get port
        """
        my_server_info = {
            self.servers[0]: self.all_servers_info[self.servers[0]]
        }
        my_servers = self.servers[0:2]
        index = 1
        ret, _, _ = peer_probe(self.servers[0], self.servers[index])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                  self.servers[0], self.servers[index]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[index])
        key = self.servers[index]
        my_server_info[key] = self.all_servers_info[key]

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 2,
                                       my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret = volume_create(self.mnode, self.volname,
                            bricks_list, force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")

        g.log.info("Successfully got the list of bricks of volume")

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(vol_status, "Failed to get volume "
                             "status for %s" % self.volname)
        totport = 0
        for _, value in vol_status.items():
            for _, val in value.items():
                for _, value1 in val.items():
                    if int(value1["port"]) > 0:
                        totport += 1

        self.assertEqual(totport, 2, ("Volume %s is not started successfully"
                                      "because no. of brick port is not equal"
                                      " to 2", self.volname))

        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on one of the node")
        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertFalse(ret, "glusterd is still running on %s"
                         % self.servers[1])
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[1])

        option = {'performance.readdir-ahead': 'on'}
        ret = set_volume_options(self.servers[0], self.volname, option)
        self.assertTrue(ret, "gluster volume set %s performance.readdir-ahead"
                             "on is failed on server %s"
                        % (self.volname, self.servers[0]))
        g.log.info("gluster volume set %s performance.readdir-ahead on"
                   "successfully on :%s", self.volname, self.servers[0])

        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on one of the node")
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[1])
        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.servers[1])
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[1])

        ret = wait_for_peers_to_connect(self.servers[0], self.servers[1])
        self.assertTrue(ret, "glusterd is not connected %s with peer %s"
                        % (self.servers[0], self.servers[1]))

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(vol_status, "Failed to get volume "
                             "status for %s" % self.volname)
        totport = 0
        for _, value in vol_status.items():
            for _, val in value.items():
                for _, value1 in val.items():
                    if int(value1["port"]) > 0:
                        totport += 1

        self.assertEqual(totport, 2, ("Volume %s is not started successfully"
                                      "because no. of brick port is not equal"
                                      " to 2", self.volname))
コード例 #22
0
    def test_rebalance_hang(self):
        """
        In this test case:
        1. Trusted storage Pool of 2 nodes
        2. Create a distributed volumes with 2 bricks
        3. Start the volume
        4. Mount the volume
        5. Add some data file on mount
        6. Start rebalance with force
        7. kill glusterd on 2nd node
        8. Issue volume related command
        """

        # pylint: disable=too-many-statements
        my_server_info = {
            self.servers[0]: self.all_servers_info[self.servers[0]]
        }
        my_servers = self.servers[0:2]
        index = 1
        ret, _, _ = peer_probe(self.servers[0], self.servers[index])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                  self.servers[0], self.servers[index]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[index])
        key = self.servers[index]
        my_server_info[key] = self.all_servers_info[key]

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 2, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret, _, _ = volume_create(self.mnode,
                                  self.volname,
                                  bricks_list,
                                  force=False)
        self.assertEqual(ret, 0, ("Unable"
                                  "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        self.all_mounts_procs = []
        # Creating files
        command = ("cd %s/ ; "
                   "for i in `seq 1 10` ; "
                   "do mkdir l1_dir.$i ; "
                   "for j in `seq 1 5` ; "
                   "do mkdir l1_dir.$i/l2_dir.$j ; "
                   "for k in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k "
                   "bs=128k count=$k ; done ; done ; done ; " %
                   (self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        g.log.info("Starting rebalance with force on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, False, True)
        self.assertEqual(
            ret, 0, ("Failed to start rebalance for volume %s", self.volname))
        g.log.info("Successfully rebalance on the volume %s", self.volname)

        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on one of the node")
        ret = is_glusterd_running(self.servers[1])
        self.assertNotEqual(
            ret, 0, ("Glusterd is not stopped on servers %s", self.servers[1]))
        g.log.info("Glusterd stop on the nodes : %s succeeded",
                   self.servers[1])

        # Wait for fix-layout to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(
            vol_status, "Failed to get volume "
            "status for %s" % self.volname)

        # Start glusterd on the node where it is stopped
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd start on the node failed")
        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(ret, "glusterd is not running on %s" % self.servers[1])
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[1])
コード例 #23
0
    def test_glusterd_memory_consumption_increase(self):
        """
        Test Case:
        1) Enable brick-multiplex and set max-bricks-per-process to 3 in
           the cluster
        2) Get the glusterd memory consumption
        3) Perform create,start,stop,delete operation for 100 volumes
        4) Check glusterd memory consumption, it should not increase by
           more than 50MB
        5) Repeat steps 3-4 for two more time
        6) Check glusterd memory consumption it should not increase by
           more than 10MB
        """
        # pylint: disable=too-many-locals
        # Restarting glusterd to refresh its memory consumption
        ret = restart_glusterd(self.servers)
        self.assertTrue(ret, "Restarting glusterd failed")

        # check if glusterd is running post reboot
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd service is not running post reboot")

        # Enable brick-multiplex, set max-bricks-per-process to 3 in cluster
        for key, value in (('cluster.brick-multiplex', 'enable'),
                           ('cluster.max-bricks-per-process', '3')):
            ret = set_volume_options(self.mnode, 'all', {key: value})
            self.assertTrue(
                ret, "Failed to set {} to {} "
                " for the cluster".format(key, value))

        # Get the pidof of glusterd process
        pid_list = []
        for server in self.servers:
            # Get the pidof of glusterd process
            cmd = "pidof glusterd"
            ret, pid, _ = g.run(server, cmd)
            self.assertEqual(ret, 0, "Failed to get the pid of glusterd")
            pid = int(pid)
            pid_list.append(pid)

        # Fetch the list of memory consumed in all the nodes
        mem_consumed_list = self._memory_consumption_for_all_nodes(pid_list)

        # Perform volume operations for 100 volumes for first time
        self._volume_operations_in_loop()

        # Fetch the list of memory consumed in all the nodes after 1 iteration
        mem_consumed_list_1 = self._memory_consumption_for_all_nodes(pid_list)

        for i, mem in enumerate(mem_consumed_list_1):
            condition_met = False
            if mem - mem_consumed_list[i] <= 50:
                condition_met = True

            self.assertTrue(
                condition_met, "Unexpected: Memory consumption"
                " glusterd increased more than the expected"
                " of value")

        # Perform volume operations for 100 volumes for second time
        self._volume_operations_in_loop()

        # Fetch the list of memory consumed in all the nodes after 2 iterations
        mem_consumed_list_2 = self._memory_consumption_for_all_nodes(pid_list)

        for i, mem in enumerate(mem_consumed_list_2):
            condition_met = False
            if mem - mem_consumed_list_1[i] <= 10:
                condition_met = True

            self.assertTrue(
                condition_met, "Unexpected: Memory consumption"
                " glusterd increased more than the expected"
                " of value")

        # Perform volume operations for 100 volumes for third time
        self._volume_operations_in_loop()

        # Fetch the list of memory consumed in all the nodes after 3 iterations
        mem_consumed_list_3 = self._memory_consumption_for_all_nodes(pid_list)

        for i, mem in enumerate(mem_consumed_list_3):
            condition_met = False
            if mem - mem_consumed_list_2[i] <= 10:
                condition_met = True

            self.assertTrue(
                condition_met, "Unexpected: Memory consumption"
                " glusterd increased more than the expected"
                " of value")
    def test_validate_optimized_glusterd_handshake(self):
        """
        Test Case:
        1) Create a 3 node cluster
        2) Enable brick-multiplex in the cluster
        3) Create and start 2000 volumes
        4) Stop one of the node in the cluster
        5) Set an option for around 850 volumes in the cluster
        6) Restart glusterd on the previous node
        7) Check the value of the option set earlier, in the restarted node
        """
        # pylint: disable=too-many-locals
        # Enable brick-multiplex
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to enable brick mux on cluster")

        server_info_frm_three_node = {}
        for server in self.servers[:3]:
            server_info_frm_three_node[server] = self.all_servers_info[server]

        # Fetch the available bricks dict
        bricks_dict = get_servers_bricks_dict(self.servers[:3],
                                              server_info_frm_three_node)
        self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict")

        # Using, custome method because method bulk_volume_creation creates
        # a huge logging and does unwanted calls, which will slow down the
        # test case and use more memory
        # Create and start 2000 volumes
        for i in range(2000):
            self.volname = "volume-%d" % i
            bricks_list = []
            j = 0
            for key, value in bricks_dict.items():
                j += 1
                brick = choice(value)
                brick = "{}:{}/{}_brick-{}".format(key, brick,
                                                   self.volname, j)
                bricks_list.append(brick)

            kwargs = {'replica_count': 3}

            ret, _, _ = volume_create(self.mnode, self.volname,
                                      bricks_list, False, **kwargs)
            self.assertEqual(ret, 0, "Failed to create volume: %s"
                             % self.volname)

            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0, "Failed to start volume: %s"
                             % self.volname)

        g.log.info("Successfully created and started all the volumes")

        # Stop glusterd on one node
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on node :%s"
                        % self.servers[1])

        self.glusterd_is_stopped = True

        # Set a volume option for 800 volumes
        option_value = {'network.ping-timeout': 45}
        for i in range(850):
            vol_name = "volume-" + str(i)
            ret = set_volume_options(self.mnode, vol_name, option_value)
            self.assertTrue(ret, "Failed to set volume option")

        # Start glusterd on the previous node
        ret = restart_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on node: %s"
                        % self.servers[1])

        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(ret, "Glusterd is not yet started on the node :%s"
                        % self.servers[1])

        # It might take some time, to get the peers to connected state,
        # because of huge number of volumes to sync
        while True:
            ret = is_peer_connected(self.mnode, self.servers[1:3])
            if ret:
                break
            sleep(1)

        self.assertTrue(ret, "Peers are not in connected state")

        self.glusterd_is_stopped = False

        # Check the volume option set earlier is synced on restarted node
        for i in range(850):
            vol_name = "volume-" + str(i)
            # Doing, a while True loop because there might be race condition
            # and it might take time for the node to sync the data initially
            while True:
                ret = get_volume_options(self.servers[1], vol_name,
                                         'network.ping-timeout')
                self.assertTrue(ret, "Failed to get volume option")
                g.log.info("Ret: %s", ret['network.ping-timeout'])
                if ret['network.ping-timeout'] == '45':
                    break
            self.assertEqual(ret['network.ping-timeout'], '45',
                             "Option value not updated in the restarted node")
    def test_profile_operations_with_one_node_down(self):

        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile info on the volume.
        4) Stop glusterd on one node.
        5) Run profile info with different parameters
           and see if all bricks are present or not.
        6) Stop profile on the volume.
        """

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dirname-start-num %d "
                   "--dir-length 6 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" % (
                       self.script_upload_path,
                       counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start profile on volume: %s"
                         % self.volname)
        g.log.info("Successfully started profile on volume: %s",
                   self.volname)

        # Fetching a random server from list.
        self.random_server = randint(1, len(self.servers)-1)

        # Stopping glusterd on one node.
        ret = stop_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Successfully stopped glusterd on one node.")
        ret = wait_for_glusterd_to_start(self.servers[self.random_server])
        self.assertFalse(ret, "glusterd is still running on %s"
                         % self.servers[self.random_server])
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[self.random_server])

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to run profile info on volume: %s"
                         % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_online_bricks_list(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(brick in out,
                            "Brick %s not a part of profile info output."
                            % brick)
            g.log.info("Brick %s showing in profile info output.",
                       brick)

        # Running profile info with different profile options.
        profile_options = ['peek', 'incremental', 'clear', 'incremental peek',
                           'cumulative']
        for option in profile_options:

            # Getting and checking output of profile info.
            ret, out, _ = profile_info(self.mnode, self.volname,
                                       options=option)
            self.assertEqual(ret, 0,
                             "Failed to run profile info %s on volume: %s"
                             % (option, self.volname))
            g.log.info("Successfully executed profile info %s on volume: %s",
                       option, self.volname)

            # Checking if all bricks are present in profile info peek.
            for brick in brick_list:
                self.assertTrue(brick in out,
                                "Brick %s not a part of profile"
                                " info %s output."
                                % (brick, option))
                g.log.info("Brick %s showing in profile info %s output.",
                           brick, option)

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        self.assertTrue(ret, "Peers are not in connected state.")
        g.log.info("Peers are in connected state.")

        # Stop profile on volume.
        ret, _, _ = profile_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop profile on volume: %s"
                         % self.volname)
        g.log.info("Successfully stopped profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        g.log.info("IO validation complete.")
コード例 #26
0
    def test_glusterd_friend_update_on_peer_rejoin(self):
        """
        Test Steps:
        1. Restart glusterd on one of the node
        2. Check friend updates happened between nodes where
           glusterd was running
        3. Check friend updates between rejoined node to each other node
        """
        # Restart glusterd on one of the node
        ret = restart_glusterd(self.servers[1])
        self.assertTrue(
            ret, "Failed to restart glusterd on server: %s" % self.servers[1])

        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(
            ret, "Unexpected: Glusterd not yet started on:"
            " server: %s" % self.servers[1])

        # Save the current UTC time
        # Reducing 1 second to adjust with the race conditions in logging
        curr_time = datetime.utcnow() - timedelta(seconds=1)
        curr_time = curr_time.strftime("%H:%M:%S")

        # Minimum cluster size
        min_clstr_sz = 2

        # Friend updates for a minimum cluster
        min_updt = 4

        # Current cluster size
        crnt_clstr_size = len(self.servers)

        # Wait until all the updates between the cluster nodes finish
        sleep(2 * crnt_clstr_size)

        # Intentional, to leverage the filtering of command line
        cmd = "gluster peer status | grep 'Uuid:' | cut -d ':' -f 2"
        ret, peer_lst, _ = g.run(self.servers[1], cmd)
        self.assertEqual(ret, 0, "Failed to execute the peer status command")
        peer_lst = peer_lst.splitlines()
        peer_lst = [p_uuid.strip() for p_uuid in peer_lst]

        # Check if there are any friend update between other nodes
        # and the restarted node
        for server in self.servers:
            # Don't check on the restarted node
            if server != self.servers[1]:
                for uuid in peer_lst:
                    cmd = ("awk '/%s/,0' /var/log/glusterfs/glusterd.log |"
                           " grep '_handle_friend_update' | grep %s | wc -l" %
                           (curr_time, uuid))
                    ret, out, _ = g.run(server, cmd)
                    self.assertEqual(
                        ret, 0, "Failed to get count of friend"
                        " updates")
                    out = int(out)
                    self.assertEqual(
                        out, 0, "Unexpected: Found friend updates"
                        " between other nodes")

        g.log.info("Expected: No friend updates between other peer nodes")

        # Check friend updates between rejoined node and other nodes
        cmd = ("awk '/%s/,0' /var/log/glusterfs/glusterd.log "
               "| grep '_handle_friend_update' | wc -l" % curr_time)
        ret, count, _ = g.run(self.servers[1], cmd)
        self.assertEqual(ret, 0, "Failed to fetch the count of friend updates")
        count = int(count)

        # Calculate the expected friend updates for a given cluster size
        expected_frnd_updts = min_updt * (crnt_clstr_size - min_clstr_sz + 1)

        self.assertEqual(
            count, expected_frnd_updts, "Count of friend updates"
            " is not equal to the expected value")