Ejemplo n.º 1
0
    def test_offline_brick_status_when_quorum_not_met(self):
        """
        Test Brick status when Quorum is not met after glusterd restart.
        1. Create a volume and mount it.
        2. Set the quorum type to 'server'.
        3. Bring some nodes down such that quorum won't be met.
        4. Brick status should be offline in the node which is up.
        5. Restart glusterd in this node.
        6. The brick status still should be offline as quorum isn't met.
        """
        # Set the quorum type to server and validate it.
        vol_option = {'cluster.server-quorum-type': 'server'}
        ret = set_volume_options(self.mnode, self.volname, vol_option)
        self.assertTrue(ret, "gluster volume option set of %s to %s failed"
                        % ('cluster.server-quorum-type', 'server'))
        g.log.info("Cluster quorum set to type server.")

        # Get the brick list.
        brick_list = get_all_bricks(self.mnode, self.volname)

        # Stop glusterd processes.
        ret = stop_glusterd(self.servers[1:])
        self.assertTrue(ret, "Failed to stop glusterd on specified nodes.")
        g.log.info("Glusterd processes stopped in the desired servers.")

        # Get the brick status in a node where glusterd is up.
        ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1])
        self.assertTrue(ret, "Bricks are online")
        g.log.info("Bricks are offline as expected.")

        # Restart one of the node which is up.
        ret = restart_glusterd(self.servers[0])
        self.assertTrue(ret, ("Failed to restart glusterd on desired node."))
        g.log.info("Glusterd restarted on the desired node.")

        # Wait for glusterd to be online and validate it's running.
        self.assertTrue(wait_for_glusterd_to_start(self.servers[0]),
                        "Glusterd not up on the desired server.")
        g.log.info("Glusterd is up in the desired server.")

        # Get the brick status from the restarted node.
        ret = are_bricks_offline(self.mnode, self.volname, brick_list[0:1])
        self.assertTrue(ret, "Bricks are online")
        g.log.info("Bricks are offline as expected.")

        # Start glusterd on all servers.
        ret = start_glusterd(self.servers)
        self.assertTrue(ret, "Failed to start glusterd on the specified nodes")
        g.log.info("Initiated start of glusterd on all nodes.")

        # Wait for glusterd to start.
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd not up on all nodes.")
        g.log.info("Glusterd is up and running on all nodes.")

        # Wait for all volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   timeout=600)
        self.assertTrue(ret, ("All volume processes not up."))
        g.log.info("All volume processes are up.")
Ejemplo n.º 2
0
    def tearDown(self):

        # start the volume, it should succeed
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume stop failed")

        # start glusterd on all servers
        ret = start_glusterd(self.servers)
        if not ret:
            raise ExecutionError("Failed to start glusterd on all servers")

        for server in self.servers:
            ret = wait_for_peers_to_connect(server, self.servers)
            if not ret:
                ret = peer_probe_servers(server, self.servers)
                if not ret:
                    raise ExecutionError("Failed to peer probe all "
                                         "the servers")

        # clean up all volumes
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume % s" % volume)
            g.log.info("Volume deleted successfully : %s", volume)

        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 3
0
def add_extra_servers_to_cluster(mnode, extra_servers):
    """Adds the given extra servers to cluster

    Args:
        mnode (str): Node on which cmd has to be executed.
        extra_servers (str|list) : A server|list of extra servers to be
            attached to cluster

    Returns:
        bool: True, if extra servers are attached to cluster
              False, otherwise

    Example:
        add_extra_servers_to_cluster("abc.com", ['peer_node1','peer_node2'])
    """

    if not isinstance(extra_servers, list):
        extra_servers = [extra_servers]

    ret = start_glusterd(servers=extra_servers)
    if not ret:
        g.log.error("glusterd did not start in peer nodes")
        return False

    ret = peer_probe_servers(mnode, servers=extra_servers)
    if not ret:
        g.log.error("Unable to do peer probe on extra server machines")
        return False

    return True
Ejemplo n.º 4
0
    def tearDown(self):

        # start glusterd on all servers
        ret = start_glusterd(self.servers)
        if not ret:
            raise ExecutionError("Failed to start glusterd on all servers")

        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            if not ret:
                ret = peer_probe_servers(server, self.servers)
                if not ret:
                    raise ExecutionError("Failed to peer probe all "
                                         "the servers")

        # clean up all volumes
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume % s" % volume)
            g.log.info("Volume deleted successfully : %s" % volume)

        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 5
0
    def tearDown(self):
        # stopping the volume and Cleaning up the volume
        self.get_super_method(self, 'tearDown')()
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        # Takes 5 seconds to restart glusterd into peer connected state
        sleep(5)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # checking for peer status from every node
        ret = is_peer_connected(self.mnode, self.servers)
        if not ret:
            ret = peer_probe_servers(self.mnode, self.servers)
            if not ret:
                raise ExecutionError("Failed to peer probe failed in "
                                     "servers %s" % self.servers)
        g.log.info("All peers are in connected state")
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Failed Cleanup the Volume")
        g.log.info("Volume deleted successfully")
Ejemplo n.º 6
0
    def tearDown(self):

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        if ret:
            ExecutionError("Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        ret = wait_for_glusterd_to_start(self.servers)
        if not ret:
            ExecutionError("glusterd is not running on %s" % self.servers)
        g.log.info("Glusterd start on the nodes succeeded")

        # Checking if peer is connected.
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        if not ret:
            ExecutionError("Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Stopping and deleting volume.
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 7
0
    def tearDown(self):
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # Checking for peer status from every node
        for server in self.servers:
            ret = wait_for_peers_to_connect(server, self.servers)
            if not ret:
                raise ExecutionError("Servers are not in peer probed state")

        ret = cleanup_volume(self.mnode, self.volname_2)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" %
                                 self.volname_2)
        # Unmount and cleanup original volume
        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to umount the vol & cleanup Volume")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 8
0
    def tearDown(self):

        # Check and start if glusterd isn't running.
        if not self.validate_peers_are_connected():

            # Starting glusterd on node where stopped.
            ret = start_glusterd(self.node_on_glusterd_to_stop)
            if ret:
                raise ExecutionError("Failed to start glusterd.")
            g.log.info("Successfully started glusterd.")

            # Checking if peer is connected.
            ret = wait_for_peers_to_connect(self.mnode, self.servers)
            self.assertTrue(
                ret, "glusterd is not connected %s with peer %s" %
                (self.mnode, self.servers))
            g.log.info("Peers is in connected state.")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        # Takes 5 seconds to restart glusterd into peer connected state
        sleep(5)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # checking for peer status from every node
        ret = is_peer_connected(self.mnode, self.servers)
        if not ret:
            ret = peer_probe_servers(self.mnode, self.servers)
            if not ret:
                raise ExecutionError("Failed to peer probe failed in "
                                     "servers %s" % self.servers)

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on servers")

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in connected state")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 11
0
    def tearDown(self):
        """
        tearDown for every test
        """

        ret = is_glusterd_running(self.random_server)
        if ret:
            ret = start_glusterd(self.random_server)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s"
                                     % self.random_server)

        # checking for peer status from every node
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        self.assertTrue(ret, "glusterd is not connected %s with peer %s"
                        % (self.mnode, self.servers))

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s"
                                 % self.volname)

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 12
0
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.random_server)
        if ret:
            ret = start_glusterd(self.random_server)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.random_server)
        g.log.info("Glusterd started successfully on %s", self.random_server)

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")
        g.log.info("All peers are in connected state")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on servers")

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in connected state")

        # Setting quorum ratio to 51%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '51%'})
        if not ret:
            raise ExecutionError("Failed to set server quorum ratio on %s" %
                                 self.volname)

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        GlusterBaseClass.tearDown.im_func(self)
    def tearDown(self):

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        if ret:
            ExecutionError("Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected.
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.mnode, self.servers)
            counter += 1
            if ret:
                break
            sleep(3)
        if not ret:
            ExecutionError("Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Stopping and deleting volume.
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 15
0
    def tearDown(self):
        """
        tearDown for every test
        """
        if not self.glusterd_service:
            ret = start_glusterd(self.servers[1])
            if not ret:
                raise ExecutionError("Failed to start glusterd services "
                                     "for : %s" % self.servers[1])

        # Checking glusterd service running or not
        ret = is_glusterd_running(self.servers[1])
        if ret == 0:
            g.log.info("glusterd running on :%s", self.servers[1])
        else:
            raise ExecutionError("glusterd not running on :%s" %
                                 self.servers[1])

        # In this test case performing quorum operations,
        # deleting volumes immediately after glusterd services start, volume
        # deletions are failing with quorum not met,
        # that's the reason verifying peers are connected or not before
        # deleting volumes
        peers_not_connected = True
        count = 0
        while count < 10:
            ret = self.validate_peers_are_connected()
            if ret:
                peers_not_connected = False
                break
            count += 1
            sleep(5)
        if peers_not_connected:
            raise ExecutionError("Servers are not in peer probed state")

        # Reverting back the quorum ratio to 51%
        self.quorum_perecent = {'cluster.server-quorum-ratio': '51%'}
        ret = set_volume_options(self.mnode, 'all', self.quorum_perecent)
        if not ret:
            raise ExecutionError(
                ret, "gluster volume set all cluster"
                ".server-quorum- ratio percentage Failed"
                " :%s" % self.servers)
        g.log.info(
            "gluster volume set all cluster.server-quorum-ratio 51"
            "percentage enabled successfully :%s", self.servers)

        # stopping the volume and Cleaning up the volume
        for volume in self.volume_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Failed to Cleanup the "
                                     "Volume %s" % volume)
            g.log.info("Volume deleted successfully : %s", volume)

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 16
0
    def test_glusterd_services(self):
        """Test restart, stop, start of glusterd
        """
        # restart glusterd on all servers
        g.log.info("Restart glusterd on all servers %s", self.servers)
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret,
            ("Failed to restart glusterd on all servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: active)
        g.log.info(
            "Check if glusterd is running on all servers %s"
            "(expected: active)", self.servers)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0,
            ("Glusterd is not running on all servers %s", self.servers))
        g.log.info("Glusterd is running on all the servers %s", self.servers)

        # Stop glusterd on all servers
        g.log.info("Stop glusterd on all servers %s", self.servers)
        ret = stop_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to stop glusterd on all servers %s", self.servers))
        g.log.info("Successfully stopped glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: not running)
        g.log.info(
            "Check if glusterd is running on all servers %s"
            "(expected: not running)", self.servers)
        ret = is_glusterd_running(self.servers)
        self.assertNotEqual(ret, 0, ("Glusterd is still running on some "
                                     "servers %s", self.servers))
        g.log.info("Glusterd not running on any servers %s as expected.",
                   self.servers)

        # Start glusterd on all servers
        g.log.info("Start glusterd on all servers %s", self.servers)
        ret = start_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to start glusterd on all servers %s", self.servers))
        g.log.info("Successfully started glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: active)
        g.log.info(
            "Check if glusterd is running on all servers %s"
            "(expected: active)", self.servers)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0,
            ("Glusterd is not running on all servers %s", self.servers))
        g.log.info("Glusterd is running on all the servers %s", self.servers)
Ejemplo n.º 17
0
    def test_glusterd_restart_stop_start(self):
        """Tests glusterd stop, start, restart and validate if all
        peers are in connected state after glusterd restarts.
        """
        # restart glusterd on all servers
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret,
            ("Failed to restart glusterd on all servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: active)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0,
            ("Glusterd is not running on all servers %s", self.servers))
        g.log.info("Glusterd is running on all the servers %s", self.servers)

        # Stop glusterd on all servers
        ret = stop_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to stop glusterd on all servers %s", self.servers))
        g.log.info("Successfully stopped glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: not running)
        ret = is_glusterd_running(self.servers)
        self.assertNotEqual(ret, 0, ("Glusterd is still running on some "
                                     "servers %s", self.servers))
        g.log.info("Glusterd not running on any servers %s as expected.",
                   self.servers)

        # Start glusterd on all servers
        ret = start_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to start glusterd on all servers %s", self.servers))
        g.log.info("Successfully started glusterd on all servers %s",
                   self.servers)

        # Check if glusterd is running on all servers(expected: active)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0,
            ("Glusterd is not running on all servers %s", self.servers))
        g.log.info("Glusterd is running on all the servers %s", self.servers)

        # Wait for all the glusterd's to establish communication.
        time.sleep(30)

        # Validate all the peers are in connected state
        ret = self.validate_peers_are_connected()
        self.assertTrue(ret, "Validating Peers to be in Cluster Failed")

        self.test_method_complete = True
Ejemplo n.º 18
0
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Glusterd not started on some of "
                                     "the servers")
        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")

        # Setting Quorum ratio to 51%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '51%'})
        if not ret:
            raise ExecutionError("Failed to set server quorum ratio on %s" %
                                 self.servers)
        g.log.info("Able to set server quorum ratio successfully on %s",
                   self.servers)

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Removing brick directories
        if not self.replace_brick_failed:
            for brick in self.brick_list:
                node, brick_path = brick.split(r':')
                cmd = "rm -rf " + brick_path
                ret, _, _ = g.run(node, cmd)
                if ret:
                    raise ExecutionError("Failed to delete the brick "
                                         "dir's of deleted volume")

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 19
0
    def test_glusterd_start_stop_cycle(self):
        """
        Test Glusterd stop-start cycle of gluster processes.
        1. Create a gluster volume.
        2. Kill all gluster related processes.
        3. Start glusterd service.
        4. Verify that all gluster processes are up.
        5. Repeat the above steps 5 times.
        """
        # Create and start a volume
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        for _ in range(5):
            killed_gluster_process_count = []
            # Kill gluster processes in all servers
            for server in self.servers:
                cmd = ('pkill --signal 9 -c -e "(glusterd|glusterfsd|glusterfs'
                       ')"|tail -1')
                ret, out, err = g.run(server, cmd)
                self.assertEqual(ret, 0, err)
                killed_gluster_process_count.append(int(out))

            # Start glusterd on all servers.
            ret = start_glusterd(self.servers)
            self.assertTrue(ret, ("Failed to restart glusterd on desired"
                                  " nodes."))
            g.log.info("Glusterd started on desired nodes.")

            # Wait for gluster processes to come up.
            self._wait_for_gluster_process_online_state()

            spawned_gluster_process_count = []
            # Get number of  gluster processes spawned in all server
            for server in self.servers:
                cmd = ('pgrep -c "(glusterd|glusterfsd|glusterfs)"')
                ret, out, err = g.run(server, cmd)
                self.assertEqual(ret, 0, err)
                spawned_gluster_process_count.append(int(out))

            # Compare process count in each server.
            for index, server in enumerate(self.servers):
                self.assertEqual(
                    killed_gluster_process_count[index],
                    spawned_gluster_process_count[index],
                    ("All processes not up and running on %s", server))
Ejemplo n.º 20
0
    def tearDown(self):
        """
        tearDown for every test
        """
        if not self.glusterd_service:
            ret = start_glusterd(self.servers[1])
            if not ret:
                raise ExecutionError("Failed to start glusterd services "
                                     "for : %s" % self.servers[1])

        # Checking glusterd service running or not
        ret = is_glusterd_running(self.servers[1])
        if ret == 0:
            g.log.info("glusterd running on :%s", self.servers[1])
        else:
            raise ExecutionError("glusterd not running on :%s" %
                                 self.servers[1])

        # In this test case performing quorum operations,
        # deleting volumes immediately after glusterd services start, volume
        # deletions are failing with quorum not met,
        # that's the reason verifying peers are connected or not before
        # deleting volumes
        peers_not_connected = True
        count = 0
        while count < 10:
            ret = self.validate_peers_are_connected()
            if ret:
                peers_not_connected = False
                break
            count += 1
            sleep(5)
        if peers_not_connected:
            raise ExecutionError("Servers are not in peer probed state")

        # stopping the volume and Cleaning up the volume
        for volume in self.volume_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Failed to Cleanup the "
                                     "Volume %s" % volume)
            g.log.info("Volume deleted successfully : %s", volume)

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 21
0
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Glusterd not started on some of "
                                     "the servers")
        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")

        # reset quorum ratio to default
        g.log.info("resetting quorum ratio")
        ret, _, _ = volume_reset(self.mnode, 'all')
        self.assertEqual(ret, 0, "Failed to reset quorum ratio")
        g.log.info("Successfully resetted quorum ratio")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Removing brick directories
        if not self.replace_brick_failed:
            node, brick_path = self.random_brick.split(r':')
            cmd = "rm -rf " + brick_path
            ret, _, _ = g.run(node, cmd)
            if ret:
                raise ExecutionError("Failed to delete the brick dir's")

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
    def test_glusterd_default_vol_behavior_and_quorum_options(self):
        """
        Test default volume behavior and quorum options
        1. Create a volume and start it.
        2. Check that no quorum options are found in vol info.
        3. Kill two glusterd processes.
        4. There shouldn't be any effect to the running glusterfsd
        processes.
        """
        # Check the default quorum options are correct.
        self._validate_vol_options('cluster.server-quorum-type', 'off')
        self._validate_vol_options('cluster.server-quorum-ratio', '51', True)

        # Get the count of number of glusterfsd processes running.
        count_before_glusterd_kill = self._get_total_brick_processes_count()

        # Kill two glusterd processes.
        server_list = [self.servers[1], self.servers[2]]
        ret = stop_glusterd(server_list)
        self.assertTrue(ret, "Failed to stop glusterd on the specified nodes.")
        ret = is_glusterd_running(server_list)
        self.assertNotEqual(ret, 0, ("Glusterd is not stopped on the servers"
                                     " where it was desired to be stopped."))
        g.log.info("Glusterd processes stopped in the desired servers.")

        # Get the count of number of glusterfsd processes running.
        count_after_glusterd_kill = self._get_total_brick_processes_count()

        # The count of glusterfsd processes should match
        self.assertEqual(count_before_glusterd_kill, count_after_glusterd_kill,
                         ("Glusterfsd processes are affected."))
        g.log.info("Glusterd processes are not affected.")

        # Start glusterd on all servers.
        ret = start_glusterd(self.servers)
        self.assertTrue(ret, "Failed to Start glusterd on the specified"
                        " nodes")
        g.log.info("Started glusterd on all nodes.")

        # Wait for glusterd to restart.
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd not up on all nodes.")
        g.log.info("Glusterd is up and running on all nodes.")
Ejemplo n.º 23
0
    def tearDown(self):
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # checking for peer status from every node
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        if not ret:
            raise ExecutionError("Peers are not in connected state")

        # stopping the volume and Cleaning up the volume
        ret = self.unmount_volume_and_cleanup_volume(self.mounts)
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully %s", self.volname)
Ejemplo n.º 24
0
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Glusterd not started on some of "
                                     "the servers")
        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")

        # Setting quorum ratio to 51%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '51%'})
        self.assertTrue(
            ret, "Failed to set quorum ratio to 51 percentage on "
            "servers %s" % self.servers)
        g.log.info(
            "Able to set server quorum ratio to 51 percentage "
            "on servers %s", self.servers)

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
    def tearDown(self):

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        if ret:
            ExecutionError("Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected
        for server in self.servers:
            ret = wait_for_peers_to_connect(self.mnode, server)
            if not ret:
                ExecutionError("Peers are not in connected state.")
            g.log.info("Peers are in connected state.")

        # Unmounting and cleaning volume.
        ret = self.unmount_volume_and_cleanup_volume(self.mounts)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s"
                                     % self.servers)
        g.log.info("Glusterd started successfully on %s", self.servers)

        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        self.assertTrue(ret, "glusterd is not connected %s with peer %s"
                        % (self.servers, self.servers))

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s"
                                 % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on servers")

        g.log.info("glusterd is running on all the nodes")

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in connected state")

        g.log.info("Peers are in connected state")

        # reset quorum ratio to default
        g.log.info("resetting quorum ratio")
        ret, _, _ = volume_reset(self.mnode, 'all')
        self.assertEqual(ret, 0, "Failed to reset quorum ratio")
        g.log.info("Successfully resetted quorum ratio")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 28
0
    def tearDown(self):
        # Check if a node is still down
        if self.glusterd_is_stopped:
            ret = start_glusterd(self.random_server)
            self.assertTrue(
                ret, "Failed to start glusterd on %s" % self.random_server)
            g.log.info("Successfully started glusterd on node: %s",
                       self.random_server)

            # Waiting for glusterd to start completely
            ret = wait_for_glusterd_to_start(self.random_server)
            self.assertTrue(
                ret, "glusterd is not running on %s" % self.random_server)
            g.log.info("glusterd is started and running on %s",
                       self.random_server)

        # Unmounting and cleaning volume.
        ret = self.unmount_volume_and_cleanup_volume(self.mounts)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
    def test_profile_operations_with_one_node_down(self):

        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile info on the volume.
        4) Stop glusterd on one node.
        5) Run profile info with different parameters
           and see if all bricks are present or not.
        6) Stop profile on the volume.
        """

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dirname-start-num %d "
                   "--dir-length 6 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" % (
                       self.script_upload_path,
                       counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start profile on volume: %s"
                         % self.volname)
        g.log.info("Successfully started profile on volume: %s",
                   self.volname)

        # Fetching a random server from list.
        self.random_server = randint(1, len(self.servers)-1)

        # Stopping glusterd on one node.
        ret = stop_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Successfully stopped glusterd on one node.")
        ret = wait_for_glusterd_to_start(self.servers[self.random_server])
        self.assertFalse(ret, "glusterd is still running on %s"
                         % self.servers[self.random_server])
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[self.random_server])

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to run profile info on volume: %s"
                         % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_online_bricks_list(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(brick in out,
                            "Brick %s not a part of profile info output."
                            % brick)
            g.log.info("Brick %s showing in profile info output.",
                       brick)

        # Running profile info with different profile options.
        profile_options = ['peek', 'incremental', 'clear', 'incremental peek',
                           'cumulative']
        for option in profile_options:

            # Getting and checking output of profile info.
            ret, out, _ = profile_info(self.mnode, self.volname,
                                       options=option)
            self.assertEqual(ret, 0,
                             "Failed to run profile info %s on volume: %s"
                             % (option, self.volname))
            g.log.info("Successfully executed profile info %s on volume: %s",
                       option, self.volname)

            # Checking if all bricks are present in profile info peek.
            for brick in brick_list:
                self.assertTrue(brick in out,
                                "Brick %s not a part of profile"
                                " info %s output."
                                % (brick, option))
                g.log.info("Brick %s showing in profile info %s output.",
                           brick, option)

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected
        ret = wait_for_peers_to_connect(self.mnode, self.servers)
        self.assertTrue(ret, "Peers are not in connected state.")
        g.log.info("Peers are in connected state.")

        # Stop profile on volume.
        ret, _, _ = profile_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop profile on volume: %s"
                         % self.volname)
        g.log.info("Successfully stopped profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        g.log.info("IO validation complete.")
    def test_peer_probe_when_glusterd_down(self):
        # pylint: disable=too-many-statements
        '''
        Test script to verify the behavior when we try to peer
        probe a valid node whose glusterd is down
        Also post validate to make sure no core files are created
        under "/", /var/log/core and /tmp  directory

        Ref: BZ#1257394 Provide meaningful error on peer probe and peer detach
        Test Steps:
        1 check the current peer status
        2 detach one of the valid nodes which is already part of cluster
        3 stop glusterd on that node
        4 try to attach above node to cluster, which must fail with
          Transport End point error
        5 Recheck the test using hostname, expected to see same result
        6 start glusterd on that node
        7 halt/reboot the node
        8 try to peer probe the halted node, which must fail again.
        9 The only error accepted is
          "peer probe: failed: Probe returned with Transport endpoint is not
          connected"
        10 Check peer status and make sure no other nodes in peer reject state
        '''

        ret, test_timestamp, _ = g.run_local('date +%s')
        test_timestamp = test_timestamp.strip()

        # Detach one of the nodes which is part of the cluster
        g.log.info("detaching server %s ", self.servers[1])
        ret, _, err = peer_detach(self.mnode, self.servers[1])
        msg = 'peer detach: failed: %s is not part of cluster\n' \
              % self.servers[1]
        if ret:
            self.assertEqual(err, msg, "Failed to detach %s "
                             % (self.servers[1]))

        # Bring down glusterd of the server which has been detached
        g.log.info("Stopping glusterd on %s ", self.servers[1])
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Fail to stop glusterd on %s " % self.servers[1])

        # Trying to peer probe the node whose glusterd was stopped using IP
        g.log.info("Peer probing %s when glusterd down ", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with "
                              "Transport endpoint is not connected\n")

        # Trying to peer probe the same node with hostname
        g.log.info("Peer probing node %s using hostname with glusterd down ",
                   self.servers[1])
        hostname = g.run(self.servers[1], "hostname")
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when "
                                    "glusterd is down")
        self.assertEqual(err, "peer probe: failed: Probe returned with"
                              " Transport endpoint is not connected\n")

        # Start glusterd again for the next set of test steps
        g.log.info("starting glusterd on %s ", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "glusterd couldn't start successfully on %s"
                        % self.servers[1])

        # Bring down the network for sometime
        network_status = bring_down_network_interface(self.servers[1], 150)

        # Peer probing the node using IP when it is still not online
        g.log.info("Peer probing node %s when network is down",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertNotEqual(ret, 0, "Peer probe passed when it was expected to"
                                    " fail")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        # Peer probing the node using hostname when it is still not online
        g.log.info("Peer probing node %s using hostname which is still "
                   "not online ",
                   self.servers[1])
        ret, _, err = peer_probe(self.mnode, hostname[1].strip())
        self.assertNotEqual(ret, 0, "Peer probe should not pass when node "
                                    "has not come online")
        self.assertEqual(err.split("\n")[0], "peer probe: failed: Probe "
                                             "returned with Transport endpoint"
                                             " is not connected")

        ret, _, _ = network_status.async_communicate()
        if ret != 0:
            g.log.error("Failed to perform network interface ops")

        # Peer probe the node must pass
        g.log.info("peer probing node %s", self.servers[1])
        ret, _, err = peer_probe(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Peer probe has failed unexpectedly with "
                                 "%s " % err)

        # Checking if core file created in "/", "/tmp" and "/var/log/core"
        ret = is_core_file_created(self.servers, test_timestamp)
        self.assertTrue(ret, "core file found")