Esempio n. 1
0
    def test_is_peer_connected(self):
        """Check if peer is connected with is_peer_connected function
        """
        # Executing if all the peers are in connected state from mnode
        # This will validate all nodes in self.servers are in 'Connected'
        # State from self.mnode
        g.log.info("Validating servers %s are in connected state from node %s",
                   self.servers, self.mnode)
        ret = is_peer_connected(self.mnode, self.servers)
        self.assertTrue(ret, ("Some or all servers %s are not in connected "
                              "state from node %s", self.servers, self.mnode))
        g.log.info(
            "Successfully validated servers %s are all in connected "
            "state from node %s", self.servers, self.mnode)

        # Validate if peers are connected from each server
        g.log.info(
            "Validating if servers %s are connected from other servers "
            "in the cluster", self.servers)
        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            self.assertTrue(
                ret,
                ("Some or all servers %s are not "
                 "in connected state from node %s", self.servers, self.mnode))
            g.log.info(
                "Successfully validated servers %s are all "
                "in connected state from node %s", self.servers, self.mnode)
        g.log.info(
            "Successfully validated all servers %s are in connected "
            "state from other servers in the cluster", self.servers)
    def test_setting_volume_level_option_to_cluster(self):
        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a cluster.
        2) Try to set volume level options to cluster level.
           (These should fail!)
        eg: gluster v set all transport.listen-backlog 128
            gluster v set all performance.parallel-readdir on
        3) Check if glusterd has crashed or not.(Should not crash!)
        """

        # Set transport.listen-backlog to 128 for all volumes.(Should fail!)
        ret = set_volume_options(self.mnode, 'all',
                                 {'transport.listen-backlog': '128'})
        self.assertFalse(
            ret, "Error: Able to set transport.listen-backlog "
            "to 128 for all volumes.")
        g.log.info("EXPECTED: Failed to set transport.listen-backlog to 128"
                   " for all volumes.")

        # Checking if glusterd is running on all the nodes.
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "glusterd has crashed.")
        g.log.info("glusterd is running on all servers.")

        # Checking if all the peers are in connected state or not.
        ret = is_peer_connected(self.mnode, self.servers)
        self.assertTrue(ret, "All peers are not in connected state.")
        g.log.info("All peers are in connected state.")

        # Set performance.parallel-readdir to on for all volumes.(Should fail!)
        ret = set_volume_options(self.mnode, 'all',
                                 {'performance.parallel-readdir': 'on'})
        self.assertFalse(
            ret, "Error: Able to set performance.parallel"
            "-readdir to ON for all volumes.")
        g.log.info("EXPECTED: Failed to set parallel-readdir to"
                   " ON for all volumes.")

        # Checking if glusterd is running on all the nodes
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "glusterd has crashed.")
        g.log.info("glusterd is running on all servers.")

        # Checking if all the peers are in connected state or not.
        ret = is_peer_connected(self.mnode, self.servers)
        self.assertTrue(ret, "All peers are not in connected state.")
        g.log.info("All peers are in connected state.")
    def test_peer_detach_check_warning_message(self):
        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a cluster.
        2) Peer detach a node but don't press y.
        3) Check the warning message.
        4) Check peer status.
           (Node shouldn't be detached!)
        5) Peer detach a node now press y.
        6) Check peer status.
           (Node should be detached!)
        """

        # Peer detach one node
        ret, msg, _ = g.run(self.mnode,
                            "gluster peer detach %s" % self.servers[1])
        self.assertEqual(ret, 0,
                         "ERROR: Peer detach successful %s" % self.servers[1])
        g.log.info("EXPECTED: Failed to detach %s", self.servers[1])

        # Checking warning message
        expected_msg = ' '.join([
            'All clients mounted through the peer which is getting',
            'detached need to be remounted using one of the other',
            'active peers in the trusted storage pool to ensure',
            'client gets notification on any changes done on the',
            'gluster configuration and if the same has been done',
            'do you want to proceed'
        ])
        self.assertIn(expected_msg,
                      msg.split('?')[0],
                      "Incorrect warning message for peer detach.")
        g.log.info("Correct warning message for peer detach.")

        # Checking if peer is connected
        ret = is_peer_connected(self.mnode, self.servers[1])
        self.assertTrue(ret, "Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Peer detach one node
        ret, _, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Failed to detach %s" % self.servers[1])
        g.log.info("Peer detach successful %s", self.servers[1])

        # Checking if peer is connected
        ret = is_peer_connected(self.mnode, self.servers[1])
        self.assertFalse(ret, "Peer is in connected state.")
        g.log.info("Peer is not in connected state.")
    def tearDown(self):

        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Failed Cleanup the volume")
            g.log.info("Volume deleted successfully %s", volume)

        # Setting quorum ratio to 51%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '51%'})
        if not ret:
            raise ExecutionError("Failed to set server quorum ratio on %s" %
                                 self.volname)

        # Peer probe servers since we are doing peer detach in setUpClass
        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            if not ret:
                ret = peer_probe_servers(server, self.servers)
                if not ret:
                    raise ExecutionError(
                        "Peer probe failed to one of the node")
                g.log.info("Peer probe successful")

        self.get_super_method(self, 'tearDown')()
Esempio n. 5
0
    def tearDown(self):
        # stopping the volume and Cleaning up the volume
        self.get_super_method(self, 'tearDown')()
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        # Takes 5 seconds to restart glusterd into peer connected state
        sleep(5)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # checking for peer status from every node
        ret = is_peer_connected(self.mnode, self.servers)
        if not ret:
            ret = peer_probe_servers(self.mnode, self.servers)
            if not ret:
                raise ExecutionError("Failed to peer probe failed in "
                                     "servers %s" % self.servers)
        g.log.info("All peers are in connected state")
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Failed Cleanup the Volume")
        g.log.info("Volume deleted successfully")
Esempio n. 6
0
    def setUp(self):
        """
        """
        GlusterBaseClass.setUp.im_func(self)
        # Peer probe servers
        g.log.info("Peer Probe servers '%s'", self.servers)
        ret = peer_probe_servers(self.mnode, self.servers)
        if not ret:
            raise ExecutionError(
                "Failed to peer probe some or all servers %s "
                "into the cluster", self.servers)
        g.log.info("Successfully peer probed all servers '%s' to the cluster",
                   self.servers)

        # Validate if peers are connected from each server
        g.log.info(
            "Validating if servers %s are connected from other servers "
            "in the cluster", self.servers)
        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            if not ret:
                raise ExecutionError(
                    "Some or all servers %s are not "
                    "in connected state from node %s", self.servers,
                    self.mnode)
            g.log.info(
                "Successfully validated servers %s are all "
                "in connected state from node %s", self.servers, self.mnode)
        g.log.info(
            "Successfully validated all servers %s are in connected "
            "state from other servers in the cluster", self.servers)
Esempio n. 7
0
    def tearDown(self):

        # start glusterd on all servers
        ret = start_glusterd(self.servers)
        if not ret:
            raise ExecutionError("Failed to start glusterd on all servers")

        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            if not ret:
                ret = peer_probe_servers(server, self.servers)
                if not ret:
                    raise ExecutionError("Failed to peer probe all "
                                         "the servers")

        # clean up all volumes
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume % s" % volume)
            g.log.info("Volume deleted successfully : %s" % volume)

        GlusterBaseClass.tearDown.im_func(self)
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on %s" %
                                     self.servers)
        # Takes 5 seconds to restart glusterd into peer connected state
        sleep(5)
        g.log.info("Glusterd started successfully on %s", self.servers)

        # checking for peer status from every node
        ret = is_peer_connected(self.mnode, self.servers)
        if not ret:
            ret = peer_probe_servers(self.mnode, self.servers)
            if not ret:
                raise ExecutionError("Failed to peer probe failed in "
                                     "servers %s" % self.servers)

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
    def validate_peers_are_connected(cls):
        """Validate whether each server in the cluster is connected to
        all other servers in cluster.

        Returns (bool): True if all peers are in connected with other peers.
            False otherwise.
        """
        # Validate if peer is connected from all the servers
        g.log.info(
            "Validating if servers %s are connected from other servers "
            "in the cluster", cls.servers)
        for server in cls.servers:
            g.log.info("Validate servers %s are in connected from  node %s",
                       cls.servers, server)
            ret = is_peer_connected(server, cls.servers)
            if not ret:
                g.log.error(
                    "Some or all servers %s are not in connected "
                    "state from node %s", cls.servers, server)
                return False
            g.log.info(
                "Successfully validated servers %s are all in "
                "connected state from node %s", cls.servers, server)
        g.log.info(
            "Successfully validated all servers %s are in connected "
            "state from other servers in the cluster", cls.servers)

        # Peer Status from mnode
        peer_status(cls.mnode)

        return True
    def tearDown(self):

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        if ret:
            ExecutionError("Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected.
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.mnode, self.servers)
            counter += 1
            if ret:
                break
            sleep(3)
        if not ret:
            ExecutionError("Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Stopping and deleting volume.
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        GlusterBaseClass.tearDown.im_func(self)
 def tearDown(self):
     g.log.info("Peering any nodes which are not part of cluster as "
                "part of cleanup")
     for server in self.servers:
         if not is_peer_connected(self.mnode, server):
             ret, _, err = peer_probe(self.mnode, server)
             if ret:
                 raise ExecutionError("Peer probe failed with %s " % err)
    def are_peers_in_connected_state(self):
        """Validate if all the peers are in connected state from all servers.
        """
        _rc = True
        # Validate if peer is connected from all the servers
        for server in self.servers:
            ret = is_peer_connected(server, self.servers)
            if not ret:
                _rc = False

        # Peer Status from mnode
        peer_status(self.mnode)

        return _rc
    def tearDown(self):
        """
        tearDown for every test
        """
        # checking for peer status from every node
        ret = is_peer_connected(self.mnode, self.servers)
        if not ret:
            ret = peer_probe_servers(self.mnode, self.random_server)
            if not ret:
                raise ExecutionError("Failed to peer probe failed in "
                                     "servers %s" % self.random_server)
        g.log.info("All peers are in connected state")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
Esempio n. 14
0
    def tearDown(self):

        # wait till peers are in connected state
        count = 0
        while count < 60:
            ret = is_peer_connected(self.mnode, self.servers)
            if ret:
                break
            sleep(3)

        # clean up volumes
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        GlusterBaseClass.tearDown.im_func(self)
Esempio n. 15
0
    def tearDown(self):

        # wait till peers are in connected state
        count = 0
        while count < 60:
            ret = is_peer_connected(self.mnode, self.servers)
            if ret:
                break
            sleep(3)
            count += 1

        # clean up volumes
        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume % s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        self.get_super_method(self, 'tearDown')()
    def setUpClass(cls):
        # Calling GlusterBaseClass setUpClass
        cls.get_super_method(cls, 'setUpClass')()
        ret = volume_exists(cls.mnode, cls.volname)
        if ret:
            ret = cleanup_volume(cls.mnode, cls.volname)
            if not ret:
                raise ExecutionError("Unable to delete volume")
            g.log.info("Successfully deleted volume % s", cls.volname)

        # Check if peer is connected state or not and detach all the nodes
        for server in cls.servers:
            ret = is_peer_connected(server, cls.servers)
            if ret:
                ret = peer_detach_servers(server, cls.servers)
                if not ret:
                    raise ExecutionError(
                        "Detach failed from all the servers from the node.")
                g.log.info("Peer detach SUCCESSFUL.")

        # Before starting the testcase, proceed only it has minimum of 4 nodes
        if len(cls.servers) < 4:
            raise ExecutionError("Minimun four nodes required for this "
                                 " testcase to execute")
    def tearDown(self):

        # Start rebalance for volume.
        g.log.info("Stopping rebalance on the volume")
        ret, _, _ = rebalance_stop(self.mnode, self.volname)
        if ret:
            raise ExecutionError("Failed to stop rebalance " "on the volume .")
        g.log.info("Successfully stopped rebalance on the volume %s",
                   self.volname)

        # Peer probe node which was detached
        ret, _, _ = peer_probe(self.mnode, self.servers[4])
        if ret:
            raise ExecutionError("Failed to probe %s" % self.servers[4])
        g.log.info("Peer probe successful %s", self.servers[4])

        # Wait till peers are in connected state
        count = 0
        while count < 60:
            ret = is_peer_connected(self.mnode, self.servers)
            if ret:
                break
            sleep(3)

        # Unmounting and cleaning volume
        ret, _, _ = umount_volume(mclient=self.mounts[0].client_system,
                                  mpoint=self.mounts[0].mountpoint)
        if ret:
            raise ExecutionError("Unable to unmount volume %s" % self.volname)
        g.log.info("Volume unmounted successfully  %s", self.volname)

        ret = cleanup_volume(self.mnode, self.volname)
        if not ret:
            raise ExecutionError("Unable to delete volume %s" % self.volname)
        g.log.info("Volume deleted successfully  %s", self.volname)
        GlusterBaseClass.tearDown.im_func(self)
    def test_snap_list_glusterd_restart(self):
        """
        Verify snapshot list before and after glusterd restart

        * Create 3 snapshots of the volume
        * Delete one snapshot
        * List all snapshots created
        * Restart glusterd on all nodes
        * List all snapshots
          All snapshots must be listed except the one that was deleted
        """

        # pylint: disable=too-many-statements
        # Create snapshots
        for snap in self.snapshots:
            ret, _, _ = snap_create(self.mnode, self.volname, snap)
            self.assertEqual(ret, 0, ("Failed to create snapshot %s for "
                                      "volume %s" % (snap, self.volname)))
            g.log.info("Snapshot %s created successfully "
                       "for volume %s", snap, self.volname)

        # List the snapshots and validate with snapname
        snap_list = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list, "Failed to list all snapshots")
        self.assertEqual(len(snap_list), 3, "Failed to validate snap list")
        g.log.info("Successfully validated snap list")
        for snap in self.snapshots:
            self.assertIn(
                snap, snap_list, "Failed to validate the snapshot "
                "%s in the snapshot list" % snap)
        g.log.info("Successfully validated the presence of snapshots using "
                   "snapname")

        # Delete one snapshot
        ret, _, _ = snap_delete(self.mnode, self.snapshots[0])
        self.assertEqual(ret, 0,
                         ("Failed to delete snapshot %s" % self.snapshots[0]))
        g.log.info("Snapshots %s deleted Successfully", self.snapshots[0])

        # List the snapshots and validate with snapname
        snap_list = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list, "Failed to list all snapshots")
        self.assertEqual(len(snap_list), 2, "Failed to validate snap list")
        g.log.info("Successfully validated snap list")
        for snap in self.snapshots[1:]:
            self.assertIn(
                snap, snap_list, "Failed to validate the snapshot "
                "%s in the snapshot list" % snap)
        g.log.info("Successfully validated the presence of snapshots using "
                   "snapname")

        # Restart glusterd on all the servers
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to restart glusterd on nodes %s" % self.servers))
        g.log.info("Successfully restarted glusterd on nodes %s", self.servers)

        # Wait for glusterd to be online and validate glusterd running on all
        # server nodes
        self.assertTrue(
            wait_for_glusterd_to_start(self.servers),
            "Unexpected: glusterd not up on one or more of the nodes")
        g.log.info("Glusterd is up and running on all nodes")

        # Check if peers are connected
        self.assertTrue(is_peer_connected(self.mnode, self.servers),
                        "Unexpected: Peers are not in connected state")
        g.log.info("Successful: All peers are in connected state")

        # List the snapshots after glusterd restart
        # All snapshots must be listed except the one deleted
        for server in self.servers:
            snap_list = get_snap_list(server)
            self.assertIsNotNone(
                snap_list,
                "Failed to get the list of snapshots in node %s" % server)
            self.assertEqual(
                len(snap_list), 2,
                "Unexpected: Number of snapshots not consistent in the node %s"
                % server)
            g.log.info("Successfully validated snap list for node %s", server)
            for snap in self.snapshots[1:]:
                self.assertIn(
                    snap, snap_list, "Failed to validate the snapshot "
                    "%s in the snapshot list" % snap)
            g.log.info(
                "Successfully validated the presence of snapshots "
                "using snapname for node %s", server)
    def test_profile_operations_with_one_node_down(self):

        # pylint: disable=too-many-statements
        """
        Test Case:
        1) Create a volume and start it.
        2) Mount volume on client and start IO.
        3) Start profile info on the volume.
        4) Stop glusterd on one node.
        5) Run profile info with different parameters
           and see if all bricks are present or not.
        6) Stop profile on the volume.
        """

        # Start IO on mount points.
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        counter = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dir-depth 4 "
                   "--dirname-start-num %d "
                   "--dir-length 6 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            counter += 1

        # Start profile on volume.
        ret, _, _ = profile_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start profile on volume: %s" % self.volname)
        g.log.info("Successfully started profile on volume: %s", self.volname)

        # Fetching a random server from list.
        self.random_server = randint(1, len(self.servers) - 1)

        # Stopping glusterd on one node.
        ret = stop_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Successfully stopped glusterd on one node.")
        counter = 0
        while counter > 20:
            ret = is_glusterd_running(self.servers[self.random_server])
            if ret:
                break
            counter += 1
            sleep(3)

        # Getting and checking output of profile info.
        ret, out, _ = profile_info(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to run profile info on volume: %s" % self.volname)
        g.log.info("Successfully executed profile info on volume: %s",
                   self.volname)

        # Checking if all bricks are present in profile info.
        brick_list = get_online_bricks_list(self.mnode, self.volname)
        for brick in brick_list:
            self.assertTrue(
                brick in out,
                "Brick %s not a part of profile info output." % brick)
            g.log.info("Brick %s showing in profile info output.", brick)

        # Running profile info with different profile options.
        profile_options = [
            'peek', 'incremental', 'clear', 'incremental peek', 'cumulative'
        ]
        for option in profile_options:

            # Getting and checking output of profile info.
            ret, out, _ = profile_info(self.mnode,
                                       self.volname,
                                       options=option)
            self.assertEqual(
                ret, 0, "Failed to run profile info %s on volume: %s" %
                (option, self.volname))
            g.log.info("Successfully executed profile info %s on volume: %s",
                       option, self.volname)

            # Checking if all bricks are present in profile info peek.
            for brick in brick_list:
                self.assertTrue(
                    brick in out, "Brick %s not a part of profile"
                    " info %s output." % (brick, option))
                g.log.info("Brick %s showing in profile info %s output.",
                           brick, option)

        # Starting glusterd on node where stopped.
        ret = start_glusterd(self.servers[self.random_server])
        self.assertTrue(ret, "Failed to start glusterd.")
        g.log.info("Successfully started glusterd.")

        # Checking if peer is connected
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.mnode, self.servers)
            counter += 1
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peers are not in connected state.")
        g.log.info("Peers are in connected state.")

        # Stop profile on volume.
        ret, _, _ = profile_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         "Failed to stop profile on volume: %s" % self.volname)
        g.log.info("Successfully stopped profile on volume: %s", self.volname)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info("IO validation complete.")
Esempio n. 20
0
    def test_remove_brick(self):
        """
        In this test case:
        1. Trusted storage Pool of 4 nodes
        2. Create a distributed-replicated volumes with 4 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Create some data file
        6. Start remove-brick operation for one replica pair
        7. Restart glusterd on all nodes
        8. Try to commit the remove-brick operation while rebalance
           is in progress, it should fail
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:4]
        my_server_info = {}
        for server in self.servers[0:4]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 4):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])

        # Validating whether the peer are connected or not
        # In jenkins This case is failing saying peers are not in connected
        # state, that is reason adding a check whether peers are connected
        # or not
        count = 0
        while count < 30:
            ret = is_peer_connected(self.mnode, my_servers)
            if ret:
                g.log.info("Peers are in connected state")
                break
            sleep(3)
            count = count + 1
        self.assertTrue(ret, "Some peers are not in connected state")

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 4, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        kwargs = {}
        kwargs['replica_count'] = 2
        ret = volume_create(self.mnode,
                            self.volname,
                            bricks_list,
                            force=False,
                            **kwargs)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        self.all_mounts_procs = []
        # Creating files
        command = ("cd %s/ ; "
                   "for i in `seq 1 10` ; "
                   "do mkdir l1_dir.$i ; "
                   "for j in `seq 1 5` ; "
                   "do mkdir l1_dir.$i/l2_dir.$j ; "
                   "for k in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k "
                   "bs=128k count=$k ; "
                   "done ; "
                   "done ; "
                   "done ; " % (self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False
        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        remove_brick_list = bricks_list[2:4]
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")
        g.log.info("Restart glusterd on servers %s", self.servers)
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to restart glusterd on servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on servers %s",
                   self.servers)

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'commit')
        self.assertNotEqual(ret, 0, "Remove brick commit ops should be fail")
        g.log.info("Remove bricks commit operation failure is expected")
    def test_validate_optimized_glusterd_handshake(self):
        """
        Test Case:
        1) Create a 3 node cluster
        2) Enable brick-multiplex in the cluster
        3) Create and start 2000 volumes
        4) Stop one of the node in the cluster
        5) Set an option for around 850 volumes in the cluster
        6) Restart glusterd on the previous node
        7) Check the value of the option set earlier, in the restarted node
        """
        # pylint: disable=too-many-locals
        # Enable brick-multiplex
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret, "Failed to enable brick mux on cluster")

        server_info_frm_three_node = {}
        for server in self.servers[:3]:
            server_info_frm_three_node[server] = self.all_servers_info[server]

        # Fetch the available bricks dict
        bricks_dict = get_servers_bricks_dict(self.servers[:3],
                                              server_info_frm_three_node)
        self.assertIsNotNone(bricks_dict, "Failed to get the bricks dict")

        # Using, custome method because method bulk_volume_creation creates
        # a huge logging and does unwanted calls, which will slow down the
        # test case and use more memory
        # Create and start 2000 volumes
        for i in range(2000):
            self.volname = "volume-%d" % i
            bricks_list = []
            j = 0
            for key, value in bricks_dict.items():
                j += 1
                brick = choice(value)
                brick = "{}:{}/{}_brick-{}".format(key, brick,
                                                   self.volname, j)
                bricks_list.append(brick)

            kwargs = {'replica_count': 3}

            ret, _, _ = volume_create(self.mnode, self.volname,
                                      bricks_list, False, **kwargs)
            self.assertEqual(ret, 0, "Failed to create volume: %s"
                             % self.volname)

            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0, "Failed to start volume: %s"
                             % self.volname)

        g.log.info("Successfully created and started all the volumes")

        # Stop glusterd on one node
        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on node :%s"
                        % self.servers[1])

        self.glusterd_is_stopped = True

        # Set a volume option for 800 volumes
        option_value = {'network.ping-timeout': 45}
        for i in range(850):
            vol_name = "volume-" + str(i)
            ret = set_volume_options(self.mnode, vol_name, option_value)
            self.assertTrue(ret, "Failed to set volume option")

        # Start glusterd on the previous node
        ret = restart_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on node: %s"
                        % self.servers[1])

        ret = wait_for_glusterd_to_start(self.servers[1])
        self.assertTrue(ret, "Glusterd is not yet started on the node :%s"
                        % self.servers[1])

        # It might take some time, to get the peers to connected state,
        # because of huge number of volumes to sync
        while True:
            ret = is_peer_connected(self.mnode, self.servers[1:3])
            if ret:
                break
            sleep(1)

        self.assertTrue(ret, "Peers are not in connected state")

        self.glusterd_is_stopped = False

        # Check the volume option set earlier is synced on restarted node
        for i in range(850):
            vol_name = "volume-" + str(i)
            # Doing, a while True loop because there might be race condition
            # and it might take time for the node to sync the data initially
            while True:
                ret = get_volume_options(self.servers[1], vol_name,
                                         'network.ping-timeout')
                self.assertTrue(ret, "Failed to get volume option")
                g.log.info("Ret: %s", ret['network.ping-timeout'])
                if ret['network.ping-timeout'] == '45':
                    break
            self.assertEqual(ret['network.ping-timeout'], '45',
                             "Option value not updated in the restarted node")
Esempio n. 22
0
    def test_glusterd_split_brain_with_quorum(self):
        """
        - On a 6 node cluster
        - Create a volume using first four nodes
        - Set the volumes options
        - Stop two gluster nodes
        - Perform gluster vol reset
        - Start the glusterd on the nodes where it stopped
        - Check the peer status, all the nodes should be in connected state

        """
        # Before starting the testcase, proceed only it has minimum of 6 nodes
        self.assertGreaterEqual(len(self.servers), 6,
                                "Not enough servers to run this test")

        # Volume options to set on the volume
        volume_options = {
            'nfs.disable': 'off',
            'auth.allow': '1.1.1.1',
            'nfs.rpc-auth-allow': '1.1.1.1',
            'nfs.addr-namelookup': 'on',
            'cluster.server-quorum-type': 'server',
            'network.ping-timeout': '20',
            'nfs.port': '2049',
            'performance.nfs.write-behind': 'on',
        }

        # Set the volume options
        ret = set_volume_options(self.mnode, self.volname, volume_options)
        self.assertTrue(ret, "Unable to set the volume options")
        g.log.info("All the volume_options set succeeded")

        # Stop glusterd on two gluster nodes where bricks aren't present
        ret = stop_glusterd(self.servers[-2:])
        self.assertTrue(ret, "Failed to stop glusterd on one of the node")
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[-2:])

        # Check glusterd is stopped
        ret = is_glusterd_running(self.servers[-2:])
        self.assertEqual(ret, 1, "Glusterd is running on nodes")
        g.log.info("Expected: Glusterd stopped on nodes %s", self.servers[-2:])

        # Performing volume reset on the volume to remove all the volume
        # options set earlier
        ret, _, err = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume reset failed with below error "
                         "%s" % err)
        g.log.info("Volume reset on the volume %s succeeded", self.volname)

        # Bring back glusterd online on the nodes where it stopped earlier
        ret = start_glusterd(self.servers[-2:])
        self.assertTrue(ret, "Failed to start glusterd on the nodes")
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[-2:])

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt = 20
        counter = 0
        _rc = False
        g.log.info("Wait for some seconds, right after glusterd start it "
                   "will create two daemon process it need few seconds "
                   "(like 3-5) to initialize the glusterd")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers)
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break
        if not _rc:
            raise ExecutionError("Peers are not connected state after "
                                 "bringing back glusterd online on the "
                                 "nodes in which previously glusterd "
                                 "had been stopped")
    def setUpClass(cls, mount_vol=True):
        """Setup volume, shares/exports volume for cifs/nfs protocols,
            mounts the volume.
        """
        GlusterBaseClass.setUpClass.im_func(cls)

        # Validate if peer is connected from all the servers
        for server in cls.servers:
            ret = is_peer_connected(server, cls.servers)
            if not ret:
                raise ExecutionError("Validating Peers to be in Cluster "
                                     "Failed")
        g.log.info("All peers are in connected state")

        # Peer Status from mnode
        peer_status(cls.mnode)

        # Setup Volume
        ret = setup_volume(mnode=cls.mnode,
                           all_servers_info=cls.all_servers_info,
                           volume_config=cls.volume, force=True)
        if not ret:
            raise ExecutionError("Setup volume %s failed", cls.volname)
        time.sleep(10)

        # Export/Share the volume based on mount_type
        if cls.mount_type != "glusterfs":
            if "nfs" in cls.mount_type:
                ret = export_volume_through_nfs(
                    mnode=cls.mnode, volname=cls.volname,
                    enable_ganesha=cls.enable_nfs_ganesha)
                if not ret:
                    raise ExecutionError("Failed to export volume %s "
                                         "as NFS export", cls.volname)

                # Set NFS-Ganesha specific volume options
                if cls.enable_nfs_ganesha and cls.nfs_ganesha_export_options:
                    g.log.info("Setting NFS-Ganesha export specific "
                               "volume options")
                    ret = set_volume_options(
                        mnode=cls.mnode, volname=cls.volname,
                        options=cls.nfs_ganesha_export_options)
                    if not ret:
                        raise ExecutionError("Failed to set NFS-Ganesha "
                                             "export specific options on "
                                             "volume %s", cls.volname)
                    g.log.info("Successful in setting NFS-Ganesha export "
                               "specific volume options")

            if "smb" in cls.mount_type or "cifs" in cls.mount_type:
                ret = share_volume_over_smb(mnode=cls.mnode,
                                            volname=cls.volname,
                                            smb_users_info=cls.smb_users_info)
                if not ret:
                    raise ExecutionError("Failed to export volume %s "
                                         "as SMB Share", cls.volname)

                # Set SMB share specific volume options
                if cls.smb_share_options:
                    g.log.info("Setting SMB share specific volume options")
                    ret = set_volume_options(mnode=cls.mnode,
                                             volname=cls.volname,
                                             options=cls.smb_share_options)
                    if not ret:
                        raise ExecutionError("Failed to set SMB share "
                                             "specific options "
                                             "on volume %s", cls.volname)
                    g.log.info("Successful in setting SMB share specific "
                               "volume options")

        # Log Volume Info and Status
        ret = log_volume_info_and_status(cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Logging volume %s info and status failed",
                                 cls.volname)

        # Create Mounts
        if mount_vol:
            _rc = True
            g.log.info("Starting to mount volume")
            for mount_obj in cls.mounts:
                ret = mount_obj.mount()
                if not ret:
                    g.log.error("Unable to mount volume '%s:%s' on '%s:%s'",
                                mount_obj.server_system, mount_obj.volname,
                                mount_obj.client_system, mount_obj.mountpoint)
                    _rc = False
            if not _rc:
                raise ExecutionError("Mounting volume %s on few clients "
                                     "failed", cls.volname)
            else:
                g.log.info("Successful in mounting volume on all clients")

            # Get info of mount before the IO
            g.log.info("Get mounts Info:")
            log_mounts_info(cls.mounts)
        else:
            g.log.info("Not Mounting the volume as 'mount_vol' option is "
                       "set to %s", mount_vol)
Esempio n. 24
0
    def test_volume_create_with_glusterd_restarts(self):
        # pylint: disable=too-many-statements
        """
        Test case:
        1) Create a cluster.
        2) Create volume using the first three nodes say N1, N2 and N3.
        3) While the create is happening restart the fourth node N4.
        4) Check if glusterd has crashed on any node.
        5) While the volume start is happening restart N4.
        6) Check if glusterd has crashed on any node.
        """

        # Fetching all the parameters for volume_create
        list_of_three_servers = []
        server_info_for_three_nodes = {}

        for server in self.servers[0:3]:
            list_of_three_servers.append(server)
            server_info_for_three_nodes[server] = self.all_servers_info[server]

        bricks_list = form_bricks_list(self.mnode, self.volname, 3,
                                       list_of_three_servers,
                                       server_info_for_three_nodes)
        # Restarting glusterd in a loop
        restart_cmd = ("for i in `seq 1 5`; do "
                       "systemctl restart glusterd; done")
        proc1 = g.run_async(self.servers[3], restart_cmd)

        # Creating volumes using 3 servers
        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertEqual(ret, 0, "Volume creation failed")
        g.log.info("Volume %s created successfully", self.volname)

        ret, _, _ = proc1.async_communicate()
        self.assertEqual(ret, 0, "Glusterd restart not working.")

        # Checking if peers are connected or not.
        count = 0
        while count < 60:
            ret = is_peer_connected(self.mnode, self.servers)
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peers are not in connected state.")
        g.log.info("Peers are in connected state.")

        # Restarting glusterd in a loop
        restart_cmd = ("for i in `seq 1 5`; do "
                       "systemctl restart glusterd; done")
        proc1 = g.run_async(self.servers[3], restart_cmd)

        # Start the volume created.
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume start failed")
        g.log.info("Volume %s started successfully", self.volname)

        ret, _, _ = proc1.async_communicate()
        self.assertEqual(ret, 0, "Glusterd restart not working.")

        # Checking if peers are connected or not.
        count = 0
        while count < 60:
            ret = is_peer_connected(self.mnode, self.servers)
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peers are not in connected state.")
        g.log.info("Peers are in connected state.")
Esempio n. 25
0
    def test_peer_probe(self):
        """
        In this test case:
        1. Create Dist Volume on Node 1
        2. Create Replica Volume on Node 2
        3. Peer Probe N2 from N1(should fail)
        4. Clean All Volumes
        5. Peer Probe N1 to N2(should success)
           Peer Probe N3 to N2(should fail)
        6. Create replica Volume on N1 and N2
        7. Peer probe from N3 to N1(should fail)
        8. Peer probe from N1 to N3(should succeed)
        9. Create replica Volume on N1, N2 and N2
        10.Start Volume
        11. delete volume (should fail)
        12. Stop volume
        13. Clean up all volumes
        """

        # pylint: disable=too-many-statements
        # Create a distributed volume on Node1
        number_of_brick = 1
        servers_info_from_single_node = {}
        servers_info_from_single_node[self.servers[0]] = self.all_servers_info[
            self.servers[0]]
        self.volname = "testvol"
        bricks_list = form_bricks_list(self.servers[0], self.volname,
                                       number_of_brick, self.servers[0],
                                       servers_info_from_single_node)
        ret, _, _ = volume_create(self.servers[0], self.volname, bricks_list,
                                  True)
        self.assertEqual(ret, 0, "Volume create failed")
        g.log.info("Volume %s created successfully", self.volname)

        # Create a replicate volume on Node2 without force
        number_of_brick = 2
        servers_info_from_single_node = {}
        servers_info_from_single_node[self.servers[1]] = self.all_servers_info[
            self.servers[1]]
        kwargs = {'replica_count': 2}
        self.volname = "new-volume"
        bricks_list = form_bricks_list(self.servers[1], self.volname,
                                       number_of_brick, self.servers[1],
                                       servers_info_from_single_node)

        # creation of replicate volume without force should fail
        ret, _, _ = volume_create(self.servers[1], self.volname, bricks_list,
                                  False, **kwargs)
        self.assertNotEqual(ret, 0, ("Unexpected: Successfully created "
                                     "the replicate volume on node2 "
                                     "without force"))
        g.log.info(
            "Failed to create the replicate volume %s as "
            " expected without force", self.volname)

        # Create a replica volume on Node2 with force
        number_of_brick = 3
        servers_info_from_single_node = {}
        servers_info_from_single_node[self.servers[1]] = self.all_servers_info[
            self.servers[1]]
        kwargs = {'replica_count': 3}
        self.volname = "new-volume"
        bricks_list = form_bricks_list(self.servers[1], self.volname,
                                       number_of_brick, self.servers[1],
                                       servers_info_from_single_node)

        # creation of replicate volume with force should succeed
        ret, _, _ = volume_create(self.servers[1], self.volname, bricks_list,
                                  True, **kwargs)
        self.assertEqual(ret, 0, "Volume create failed")
        g.log.info("Volume %s created", self.volname)

        # Perform peer probe from N1 to N2
        ret, _, _ = peer_probe(self.servers[0], self.servers[1])
        self.assertNotEqual(
            ret, 0,
            ("peer probe is success from %s to %s even if %s "
             " is a part of another cluster or having volumes "
             " configured", self.servers[0], self.servers[1], self.servers[1]))
        g.log.info("peer probe failed from %s to "
                   "%s as expected", self.servers[0], self.servers[1])

        # clean up all volumes
        for server in self.servers[0:2]:
            # Listing all the volumes
            vol_list = get_volume_list(server)
            self.assertIsNotNone(vol_list, "Unable to get volumes list")
            g.log.info("Getting the volume list from %s", self.mnode)
            for vol in vol_list:
                g.log.info("deleting volume : %s", vol)
                ret = cleanup_volume(server, vol)
                self.assertTrue(ret, ("Failed to Cleanup the Volume %s", vol))
                g.log.info("Volume deleted successfully : %s", vol)

        # Perform peer probe from N1 to N2 should success
        ret, _, _ = peer_probe(self.servers[0], self.servers[1])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is "
                                  "failed", self.servers[0], self.servers[1]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[1])

        # Checking if peer is connected
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.servers[0], self.servers[1])
            counter += 1
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Perform peer probe from N3 to N2 should fail
        ret, _, _ = peer_probe(self.servers[2], self.servers[1])
        self.assertNotEqual(
            ret, 0,
            ("peer probe is success from %s to %s even if %s "
             "is a part of another cluster or having volumes "
             "configured", self.servers[2], self.servers[1], self.servers[1]))
        g.log.info("peer probe failed from %s to "
                   "%s as expected", self.servers[2], self.servers[1])

        # Create a replica volume on N1 and N2 with force
        number_of_brick = 2
        servers_info_from_two_node = {}
        for server in self.servers[0:2]:
            servers_info_from_two_node[server] = self.all_servers_info[server]
        kwargs = {'replica_count': 2}
        self.volname = "new-volume"
        bricks_list = form_bricks_list(self.servers[0], self.volname,
                                       number_of_brick, self.servers[0:2],
                                       servers_info_from_two_node)
        ret, _, _ = volume_create(self.servers[1], self.volname, bricks_list,
                                  True, **kwargs)
        self.assertEqual(ret, 0, "Volume create failed")
        g.log.info("Volume %s created succssfully", self.volname)

        # Perform peer probe from N3 to N1 should fail
        ret, _, _ = peer_probe(self.servers[2], self.servers[0])
        self.assertNotEqual(
            ret, 0,
            ("peer probe is success from %s to %s even if %s "
             "a part of another cluster or having volumes "
             "configured", self.servers[2], self.servers[0], self.servers[0]))
        g.log.info("peer probe is failed from %s to "
                   "%s as expected", self.servers[2], self.servers[0])

        # Perform peer probe from N1 to N3 should succed
        ret, _, _ = peer_probe(self.servers[0], self.servers[2])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is "
                                  "failed", self.servers[0], self.servers[2]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[2])

        # Checking if peer is connected
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.servers[0], self.servers[:3])
            counter += 1
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        # Create a replica volume on N1, N2 and N3 with force
        number_of_brick = 3
        server_info_from_three_node = {}
        for server in self.servers[0:3]:
            server_info_from_three_node[server] = self.all_servers_info[server]
        kwargs = {'replica_count': 3}
        self.volname = "new-replica-volume"
        bricks_list = form_bricks_list(self.servers[2], self.volname,
                                       number_of_brick, self.servers[0:3],
                                       server_info_from_three_node)
        ret, _, _ = volume_create(self.servers[1], self.volname, bricks_list,
                                  True, **kwargs)
        self.assertEqual(ret, 0, "Volume create failed")
        g.log.info("creation of replica volume should succeed")

        ret, _, _ = volume_start(self.servers[2], self.volname, True)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Volume %s start with force is success", self.volname)

        # Volume delete should fail without stopping volume
        self.assertTrue(
            volume_delete(self.servers[2], self.volname, xfail=True),
            "Unexpected Error: Volume deleted "
            "successfully without stopping volume")
        g.log.info(
            "Expected: volume delete should fail without "
            "stopping volume: %s", self.volname)

        # Volume stop with force
        ret, _, _ = volume_stop(self.mnode, self.volname, True)
        self.assertEqual(ret, 0, ("Failed to stop the volume "
                                  "%s", self.volname))
        g.log.info("Volume stop with force is success")
    def test_gluster_operation_after_removing_firewall(self):
        """
        Test steps:
        1. Add firewall services to the zones on 2 nodes
        2. Create a cluster using the 2 nodes
        3. Check peer status on both the nodes
        4. Remove firewall services from both the nodes
        5. Check peer status on both the nodes
        6. Create a distribute volume using both the node bricks and start it
        7. Mount the volume on different node, it should fail
        8. Cleanup the volume, Detach the node and try to probe again
        9. Check peer status
        10. Remove firewall services permanently and reload firewall
        11. Check peer status
        12. Create a distribute volume using both the node bricks and start it
        13. Mount the volume on different node, it should fail
        """
        # pylint: disable=too-many-statements
        # Add firewall services on first 2 nodes
        ret = self._add_firewall_services(self.servers[:2])
        self.assertTrue(ret, "Failed to add services to firewall")

        self.firewall_added = True

        # Peer probe second node
        self._probe_peer(self.servers[1])

        # Check peer status on both the nodes
        ret = wait_for_peers_to_connect(self.mnode, self.servers[:2])
        self.assertTrue(ret, "Peer is not connected")

        # Remove firewall services
        self._remove_firewall_services(self.servers[:2])

        self.firewall_added = False

        # Create a volume
        self._create_distribute_volume("distribute_volume")

        # Start the volume
        self._start_the_volume(self.volname)

        # Mount the volume on a different node, it should fail
        self._try_mounting_volume()

        # Cleanup volume before peer detach
        ret = cleanup_volume(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to cleanup volume")

        # Detach the probed node
        ret, _, _ = peer_detach(self.mnode, self.servers[1])
        self.assertEqual(ret, 0, "Failed to detach node: %s" % self.servers[1])

        # Peer probe the node should fail
        self._probe_peer(self.servers[1], True)

        # Add firewall services permanently
        ret = self._add_firewall_services(self.servers[:2])
        self.assertTrue(ret, "Failed to add services to firewall")

        self.firewall_added = True

        # Reload firewall
        ret = self._reload_firewall_service(self.servers[:2])
        self.assertTrue(ret, "Failed to reload firewall service")

        # Peer probe again
        self._probe_peer(self.servers[1])

        # Check peer status the probed node
        ret = wait_for_peers_to_connect(self.mnode, self.servers[1])
        self.assertTrue(ret, "Peer is not connected")

        # Remove firewall services permanently
        self._remove_firewall_services(self.servers[:2])

        self.firewall_added = False

        # Reload firewall
        ret = self._reload_firewall_service(self.servers[:2])
        self.assertTrue(ret, "Failed to reload firewall service")

        # Check peer status
        ret = is_peer_connected(self.mnode, self.servers[1])
        self.assertTrue(ret, "Peer is not connected")

        # Create a volume
        self._create_distribute_volume("distribute_volume_2")

        # Start the volume
        self._start_the_volume(self.volname)

        # Mount the volume on a different node, it should fail
        self._try_mounting_volume()
    def test_snap_glusterd_down(self):
        # pylint: disable=too-many-statements
        """
        Steps:

        1. create a volume
        2. mount volume
        3. create snapshot of that volume
        4. validate using snapshot info
        5. Activate snapshot
        6. List all snapshots present
        7. validate using snapshot info
        8. Stop glusterd on one node
        9. Check glusterd status
       10. deactivate created snapshot
       11. Start glusterd on that node
       12. Check glusterd status
       13. validate using snapshot info
       13. Check all peers are connected

        """
        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(ret, 0, ("Failed to create snapshot %s for volume %s"
                                  % (self.snap, self.volname)))
        g.log.info("Snapshot %s created successfully "
                   "for volume %s", self.snap, self.volname)

        # Check snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        self.assertIsNotNone(snap_info, "Failed to get snap information"
                             "for snapshot %s" % self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(status, 'Started', "snapshot %s "
                            "not started" % self.snap)
        g.log.info("Successfully checked snapshot info")

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0, ("Failed to Activate snapshot %s"
                                  % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # snapshot list
        g.log.info("Starting to validate list of snapshots")
        snap_list1 = get_snap_list(self.mnode)
        self.assertIsNotNone(snap_list1, "Failed to list all the snapshot")
        self.assertEqual(len(snap_list1), 1, "Failed to validate snap list")
        g.log.info("Snapshot list successfully validated")

        # Check snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        status = snap_info['snapVolume']['status']
        self.assertEqual(status, 'Started', "Failed to"
                         "start snapshot info")
        g.log.info("Successfully checked snapshot info")

        # Stop Glusterd on one node
        g.log.info("Stopping Glusterd on one node")
        ret = stop_glusterd(self.servers[1])

        # Check Glusterd status
        g.log.info("Check glusterd running or not")
        count = 0
        while count < 80:
            ret = is_glusterd_running(self.servers[1])
            if ret == 1:
                break
            time.sleep(2)
            count += 2
        self.assertEqual(ret, 1, "Unexpected: glusterd running on node %s" %
                         self.servers[1])
        g.log.info("Expected: Glusterd not running on node %s",
                   self.servers[1])

        # de-activating snapshot
        g.log.info("Starting to de-activate Snapshot")
        ret, _, _ = snap_deactivate(self.mnode, self.snap)
        self.assertEqual(ret, 0, ("Failed to deactivate snapshot %s"
                                  % self.snap))
        g.log.info("Snapshot %s deactivated successfully", self.snap)

        # validate snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(status, 'Started', "snapshot %s "
                            "not started" % self.snap)
        g.log.info("Successfully validated snapshot info")

        # Start Glusterd on node
        g.log.info("Starting Glusterd on node %s", self.servers[1])
        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on %s node"
                        % self.servers[1])
        g.log.info("Successfully started glusterd on "
                   "%s node", self.servers[1])

        # Check Glusterd status
        g.log.info("Check glusterd running or not")
        count = 0
        while count < 80:
            ret = is_glusterd_running(self.servers[1])
            if ret:
                break
            time.sleep(2)
            count += 2
        self.assertEqual(ret, 0, "glusterd not running on node %s "
                         % self.servers[1])
        g.log.info("glusterd is running on %s node",
                   self.servers[1])

        # validate snapshot info
        g.log.info("Checking snapshot info")
        snap_info = get_snap_info_by_snapname(self.mnode, self.snap)
        self.assertIsNotNone(snap_info, "Failed to get snap info for"
                             " snapshot %s" % self.snap)
        status = snap_info['snapVolume']['status']
        self.assertNotEqual(status, 'Started', "snapshot"
                            " %s failed to validate with snap info"
                            % self.snap)
        g.log.info("Successfully validated snapshot info")

        # Check all the peers are in connected state
        g.log.info("Validating all the peers are in connected state")
        for servers in self.servers:
            count = 0
            while count < 80:
                ret = is_peer_connected(self.mnode, servers)
                if ret:
                    break
                time.sleep(2)
                count += 2
            self.assertTrue(ret, "All the nodes are not in cluster")
        g.log.info("Successfully validated all the peers")
    def test_glusterd_quorum_validation(self):
        """
        -> Creating two volumes and starting them, stop the second volume
        -> set the server quorum and set the ratio to 90
        -> Stop the glusterd in one of the node, so the quorum won't meet
        -> Peer probing a new node should fail
        -> Volume stop will fail
        -> volume delete will fail
        -> volume reset will fail
        -> Start the glusterd on the node where it is stopped
        -> Volume stop, start, delete will succeed once quorum is met
        """
        # pylint: disable=too-many-statements, too-many-branches

        # Peer probe first 3 servers
        servers_info_from_three_nodes = {}
        for server in self.servers[0:3]:
            servers_info_from_three_nodes[server] = self.all_servers_info[
                server]

            # Peer probe the first 3 servers
            ret, _, _ = peer_probe(self.mnode, server)
            self.assertEqual(ret, 0,
                             ("Peer probe failed to one of the server"))
        g.log.info("Peer probe to first 3 nodes succeeded")

        self.volume['servers'] = self.servers[0:3]
        # Create a volume using the first 3 nodes
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(ret, ("Failed to create and start volume"))
        g.log.info("Volume created and started successfully")

        # Creating another volume and stopping it
        second_volume = "second_volume"
        self.volume['name'] = second_volume
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(ret, ("Failed to create and start volume"))
        g.log.info("Volume created and started succssfully")

        # stopping the second volume
        g.log.info("Stopping the second volume %s", second_volume)
        ret, _, _ = volume_stop(self.mnode, second_volume)
        self.assertEqual(ret, 0, ("Failed to stop the volume"))
        g.log.info("Successfully stopped second volume %s", second_volume)

        # Setting the server-quorum-type as server
        self.options = {"cluster.server-quorum-type": "server"}
        vol_list = get_volume_list(self.mnode)
        self.assertIsNotNone(vol_list, "Failed to get the volume list")
        g.log.info("Fetched the volume list")
        for volume in vol_list:
            g.log.info(
                "Setting the server-quorum-type as server"
                " on volume %s", volume)
            ret = set_volume_options(self.mnode, volume, self.options)
            self.assertTrue(ret, ("Failed to set the quorum type as a server"
                                  " on volume %s", volume))
        g.log.info("Server Quorum type is set as a server")

        # Setting the server quorum ratio to 90
        self.quorum_perecent = {'cluster.server-quorum-ratio': '90%'}
        ret = set_volume_options(self.mnode, 'all', self.quorum_perecent)
        self.assertTrue(ret, ("Failed to set the server quorum ratio "
                              "to 90 on servers"))
        g.log.info("Successfully set server quorum ratio to 90% on servers")

        # Stop glusterd on one of the node
        ret = stop_glusterd(self.servers[2])
        self.assertTrue(ret, ("Failed to stop glusterd on "
                              "node %s", self.servers[2]))
        g.log.info("Glusterd stop on the nodes : %s"
                   " succeeded", self.servers[2])

        # Check glusterd is stopped
        ret = is_glusterd_running(self.servers[2])
        self.assertEqual(ret, 1, "Unexpected: Glusterd is running on node")
        g.log.info("Expected: Glusterd stopped on node %s", self.servers[2])

        # Adding a new peer will fail as quorum not met
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertNotEqual(ret, 0,
                            ("Unexpected:"
                             "Succeeded to peer probe new node %s when quorum "
                             "is not met", self.servers[3]))
        g.log.info("Failed to peer probe new node as expected"
                   " when quorum not met")

        # Stopping an already started volume should fail as quorum is not met
        ret, _, _ = volume_start(self.mnode, second_volume)
        self.assertNotEqual(
            ret, 0, "Unexpected: Successfuly started "
            "volume even when quorum not met.")
        g.log.info(
            "Volume start %s failed as expected when quorum "
            "is not met", second_volume)

        # Stopping a volume should fail stop the first volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(
            ret, 1, "Unexpected: Successfully stopped"
            " volume even when quourm is not met")
        g.log.info(
            "volume stop %s failed as expected when quorum "
            "is not met", self.volname)

        # Stopping a volume with force option should fail
        ret, _, _ = volume_stop(self.mnode, self.volname, force=True)
        self.assertNotEqual(
            ret, 0, "Unexpected: Successfully "
            "stopped volume with force. Expected: "
            "Volume stop should fail when quourm is not met")
        g.log.info("volume stop failed as expected when quorum is not met")

        # Deleting a volume should fail. Deleting the second volume.
        ret = volume_delete(self.mnode, second_volume)
        self.assertFalse(
            ret, "Unexpected: Volume delete was "
            "successful even when quourm is not met")
        g.log.info("volume delete failed as expected when quorum is not met")

        # Volume reset should fail when quorum is not met
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Unexpected: Volume reset was "
            "successful even when quorum is not met")
        g.log.info("volume reset failed as expected when quorum is not met")

        # Volume reset should fail even with force when quourum is not met
        ret, _, _ = volume_reset(self.mnode, self.volname, force=True)
        self.assertNotEqual(
            ret, 0, "Unexpected: Volume reset was "
            "successful with force even "
            "when quourm is not met")
        g.log.info("volume reset failed as expected when quorum is not met")

        # Start glusterd on the node where glusterd is stopped
        ret = start_glusterd(self.servers[2])
        self.assertTrue(ret, "Failed to start glusterd on one node")
        g.log.info("Started glusterd on server"
                   " %s successfully", self.servers[2])

        ret = is_glusterd_running(self.servers[2])
        self.assertEqual(ret, 0, ("glusterd is not running on "
                                  "node %s", self.servers[2]))
        g.log.info("glusterd is running on node" " %s ", self.servers[2])

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt, counter, _rc = 30, 0, False
        g.log.info("Wait for some seconds, right after glusterd start it "
                   "will create two daemon process it need few seconds "
                   "(like 3-5) to initialize the glusterd")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers[0:3])
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break

        self.assertTrue(_rc, ("Peers are not connected state after "
                              "bringing back glusterd online on the "
                              "nodes in which previously glusterd "
                              "had been stopped"))

        # Check all bricks are online or wait for the bricks to be online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "All bricks are not online")
        g.log.info("All bricks of the volume %s are online", self.volname)

        # Once quorum is met should be able to cleanup the volume
        ret = volume_delete(self.mnode, second_volume)
        self.assertTrue(ret, "Volume delete failed even when quorum is met")
        g.log.info("volume delete succeed without any issues")

        # Volume stop should succeed
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume stop failed")
        g.log.info("succeeded stopping the volume as expected")

        # volume reset should succeed
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume reset failed ")
        g.log.info("volume reset succeeded as expected when quorum is not met")

        # Peer probe new node should succeed
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertEqual(
            ret, 0, ("Failed to peer probe new node even when quorum is met"))
        g.log.info("Succeeded to peer probe new node when quorum met")

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt, counter, _rc = 30, 0, False
        g.log.info("Wait for some seconds, right after peer probe")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers[0:3])
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break

        self.assertTrue(_rc, ("Peers are not connected state"))
    def test_brick_port(self):
        # pylint: disable=too-many-statements, too-many-branches
        """
        In this test case:
        1. Trusted storage Pool of 2 nodes
        2. Create a distributed volumes with 2 bricks
        3. Start the volume
        4. Stop glusterd on one node 2
        5. Modify any of the volume option on node 1
        6. Start glusterd on node 2
        7. Check volume status, brick should get port
        """
        my_server_info = {
            self.servers[0]: self.all_servers_info[self.servers[0]]
        }
        my_servers = self.servers[0:2]
        index = 1
        ret, _, _ = peer_probe(self.servers[0], self.servers[index])
        self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                  self.servers[0], self.servers[index]))
        g.log.info("peer probe is success from %s to "
                   "%s", self.servers[0], self.servers[index])
        key = self.servers[index]
        my_server_info[key] = self.all_servers_info[key]

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 2, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret = volume_create(self.mnode, self.volname, bricks_list, force=False)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")

        g.log.info("Successfully got the list of bricks of volume")

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(
            vol_status, "Failed to get volume "
            "status for %s" % self.volname)
        totport = 0
        for _, value in vol_status.items():
            for _, val in value.items():
                for _, value1 in val.items():
                    if int(value1["port"]) > 0:
                        totport += 1

        self.assertEqual(totport, 2, ("Volume %s is not started successfully"
                                      "because no. of brick port is not equal"
                                      " to 2", self.volname))

        ret = stop_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to stop glusterd on one of the node")
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.servers[1])
            if ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(ret, 1,
                         "glusterd is still running on %s" % self.servers[1])
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[1])

        option = {'performance.readdir-ahead': 'on'}
        ret = set_volume_options(self.servers[0], self.volname, option)
        self.assertTrue(
            ret, "gluster volume set %s performance.readdir-ahead"
            "on is failed on server %s" % (self.volname, self.servers[0]))
        g.log.info(
            "gluster volume set %s performance.readdir-ahead on"
            "successfully on :%s", self.volname, self.servers[0])

        ret = start_glusterd(self.servers[1])
        self.assertTrue(ret, "Failed to start glusterd on one of the node")
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[1])
        count = 0
        while count < 60:
            ret = is_glusterd_running(self.servers[1])
            if not ret:
                break
            sleep(2)
            count += 1

        self.assertEqual(ret, 0,
                         "glusterd is not running on %s" % self.servers[1])
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[1])

        count = 0
        while count < 60:
            ret = is_peer_connected(self.servers[0], self.servers[1])
            if ret:
                break
            sleep(2)
            count += 1
        self.assertEqual(
            ret, 1, "glusterd is not connected %s with peer %s" %
            (self.servers[0], self.servers[1]))

        vol_status = get_volume_status(self.mnode, self.volname)
        self.assertIsNotNone(
            vol_status, "Failed to get volume "
            "status for %s" % self.volname)
        totport = 0
        for _, value in vol_status.items():
            for _, val in value.items():
                for _, value1 in val.items():
                    if int(value1["port"]) > 0:
                        totport += 1

        self.assertEqual(totport, 2, ("Volume %s is not started successfully"
                                      "because no. of brick port is not equal"
                                      " to 2", self.volname))
    def test_spurious_rebalance(self):
        """
        In this test case:
        1. Trusted storage Pool of 3 nodes
        2. Create a distributed volumes with 3 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Remove a brick from the volume
        6. Check remove-brick status
        7. Stop the remove brick process
        8. Perform fix-layoyt on the volume
        9. Get the rebalance fix-layout status
       10. Create a directory from mount point
       11. Check trusted.glusterfs.dht extended attribue for newly
           created directory on the remove brick
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:3]
        my_server_info = {}
        for server in self.servers[0:3]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 3):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])
        # Checking if peer is connected
        counter = 0
        while counter < 30:
            ret = is_peer_connected(self.mnode, self.servers[:3])
            counter += 1
            if ret:
                break
            sleep(3)
        self.assertTrue(ret, "Peer is not in connected state.")
        g.log.info("Peers is in connected state.")

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 3, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret, _, _ = volume_create(self.mnode,
                                  self.volname,
                                  bricks_list,
                                  force=False)
        self.assertEqual(ret, 0, ("Unable"
                                  "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)
        remove_brick_list = []
        remove_brick_list.append(bricks_list[2])
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stopped successfully")

        g.log.info("Starting Fix-layoyt on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, True)
        self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started fix-layout on the volume %s",
                   self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Fix-layout is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Fix-layout is successfully complete on the volume %s",
                   self.volname)
        ret = mkdir(self.mounts[0].client_system,
                    "%s/dir1" % self.mounts[0].mountpoint)
        self.assertTrue(ret, ("Failed to create directory dir1"))
        g.log.info("directory dir1 is created successfully")

        brick_server, brick_dir = remove_brick_list[0].split(':')
        folder_name = brick_dir + "/dir1"
        g.log.info("Check trusted.glusterfs.dht on host  %s for directory %s",
                   brick_server, folder_name)

        ret = get_fattr(brick_server, folder_name, 'trusted.glusterfs.dht')
        self.assertTrue(
            ret, ("Failed to get trusted.glusterfs.dht for %s" % folder_name))
        g.log.info("get trusted.glusterfs.dht xattr for %s successfully",
                   folder_name)