Ejemplo n.º 1
0
 def _reset_the_volume(self):
     """
     Resetting the volume
     """
     ret = volume_reset(self.mnode, self.volname)
     self.assertTrue(ret, "Failed to reset volume: %s" % self.volname)
     g.log.info("Reseting volume %s was successful", self.volname)
    def tearDown(self):
        """
        tearDown for every test
        Clean up and unmount the volume
        """
        # calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()

        # Delete the glusterfind sessions
        ret, _, _ = gfind_delete(self.mnode, self.volname, self.session)
        if ret:
            raise ExecutionError("Failed to delete session %s" % self.session)
        g.log.info("Successfully deleted session %s", self.session)

        # Remove the outfiles created during 'glusterfind pre'
        for out in self.outfiles:
            ret = remove_file(self.mnode, out, force=True)
            if not ret:
                raise ExecutionError("Failed to remove the outfile %s" % out)
        g.log.info("Successfully removed the outfiles")

        # Reset the volume
        ret, _, _ = volume_reset(self.mnode, self.volname)
        if ret:
            raise ExecutionError("Failed to reset the volume %s"
                                 % self.volname)
        g.log.info("Successfully reset the volume %s", self.volname)

        # Cleanup the volume
        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to Cleanup Volume")
        g.log.info("Successful in Cleanup Volume")
Ejemplo n.º 3
0
    def tearDown(self):
        """tear Down Callback"""
        # Unmount volume and cleanup.
        ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to unmount and cleanup volume")
        g.log.info("Successful in unmount and cleanup of volume")

        # Reset the cluster options.
        ret = volume_reset(self.mnode, "all")
        if not ret:
            raise ExecutionError("Failed to Reset the cluster options.")
        g.log.info("Successfully reset cluster options.")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on servers")

        g.log.info("glusterd is running on all the nodes")

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in connected state")

        g.log.info("Peers are in connected state")

        # reset quorum ratio to default
        g.log.info("resetting quorum ratio")
        ret, _, _ = volume_reset(self.mnode, 'all')
        self.assertEqual(ret, 0, "Failed to reset quorum ratio")
        g.log.info("Successfully resetted quorum ratio")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Setting quorum ratio to 51%
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.server-quorum-ratio': '51%'})
        if not ret:
            raise ExecutionError("Failed to set server quorum ratio on %s" %
                                 self.volname)
        g.log.info("Able to set server quorum ratio successfully on %s",
                   self.servers)

        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 5
0
    def tearDown(self):
        """
        tearDown for every test
        """
        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Glusterd not started on some of "
                                     "the servers")
        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in peer probed state")

        # reset quorum ratio to default
        g.log.info("resetting quorum ratio")
        ret, _, _ = volume_reset(self.mnode, 'all')
        self.assertEqual(ret, 0, "Failed to reset quorum ratio")
        g.log.info("Successfully resetted quorum ratio")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed Cleanup the Volume %s" % self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Removing brick directories
        if not self.replace_brick_failed:
            node, brick_path = self.random_brick.split(r':')
            cmd = "rm -rf " + brick_path
            ret, _, _ = g.run(node, cmd)
            if ret:
                raise ExecutionError("Failed to delete the brick dir's")

        # Calling GlusterBaseClass tearDown
        GlusterBaseClass.tearDown.im_func(self)
Ejemplo n.º 6
0
    def bricks_online_and_volume_reset(cls):
        """
        reset the volume if any bricks are offline.
        waits for all bricks to be online and resets
        volume options set
        """
        bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname)
        if bricks_offline is not None:
            ret = volume_start(cls.mnode, cls.volname, force=True)
            if not ret:
                raise ExecutionError("Failed to force start volume"
                                     "%s" % cls.volname)
        ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Failed to bring bricks online"
                                 "for volume %s" % cls.volname)

        ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True)
        if ret:
            raise ExecutionError("Failed to reset volume %s" % cls.volname)
        g.log.info("Successful in volume reset %s", cls.volname)
    def tearDown(self):
        """tear Down Callback"""
        # Wait for peers to connect.
        ret = wait_for_peers_to_connect(self.mnode, self.servers, 50)
        if not ret:
            raise ExecutionError("Peers are not in connected state.")

        # Unmount volume and cleanup.
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Unmount and Cleanup volume")
        g.log.info("Successful in unmount and cleanup operations")

        # Reset the cluster options.
        ret = volume_reset(self.mnode, "all")
        if not ret:
            raise ExecutionError("Failed to Reset the cluster options.")
        g.log.info("Successfully reset cluster options.")

        # Calling GlusterBaseClass tearDown
        self.get_super_method(self, 'tearDown')()
    def tearDown(self):
        self.get_super_method(self, 'tearDown')()

        # Unmount the volume
        ret = umount_volume(mclient=self.mounts[0].client_system,
                            mpoint=self.mounts[0].mountpoint)
        if not ret:
            raise ExecutionError("Unable to umount the volume")
        g.log.info("Unmounting of the volume %s succeeded", self.volname)

        # The reason for volume reset is, metadata-cache is enabled
        # by group, can't disable the group in glusterfs.
        ret, _, _ = volume_reset(self.mnode, self.volname)
        if ret:
            raise ExecutionError("Unable to reset the volume {}".
                                 format(self.volname))
        g.log.info("Volume: %s reset successful ", self.volname)

        # Cleanup the volume
        if not self.cleanup_volume():
            raise ExecutionError("Unable to perform volume clenaup")
        g.log.info("Volume cleanup is successfull")
    def tearDown(self):

        ret = is_glusterd_running(self.servers)
        if ret:
            ret = start_glusterd(self.servers)
            if not ret:
                raise ExecutionError("Failed to start glusterd on servers")

        g.log.info("glusterd is running on all the nodes")

        # checking for peer status from every node
        count = 0
        while count < 80:
            ret = self.validate_peers_are_connected()
            if ret:
                break
            sleep(2)
            count += 1

        if not ret:
            raise ExecutionError("Servers are not in connected state")

        g.log.info("Peers are in connected state")

        # stopping the volume and Cleaning up the volume
        ret = self.cleanup_volume()
        if not ret:
            raise ExecutionError("Failed to Cleanup the Volume %s" %
                                 self.volname)
        g.log.info("Volume deleted successfully : %s", self.volname)

        # Reset Cluster options
        ret = volume_reset(self.mnode, 'all')
        if not ret:
            raise ExecutionError("Failed to reset cluster options on %s" %
                                 self.volname)
        g.log.info("Cluster options reset successfully on %s", self.servers)

        self.get_super_method(self, 'tearDown')()
Ejemplo n.º 10
0
    def test_custom_xlator_ops(self):
        '''
        Steps:
        - Perform minimal IO on the mount
        - Enable custom xlator and verify xlator position in the volfile
        - After performing any operation on the custom xlator set options using
          'storage.reserve' to validate other xlators aren't effected
        - Add brick to the volume and verify the xlator position in volfile in
          the new brick
        - Replace brick and verify the xlator position in new brick volfile
        - Verify debug xlator is reflected correctly in the volfile when set
        - Validate unexisting xlator position should fail
        - Reset the volume and verify all the options set above are reset

        For more details refer inline comments
        '''

        # Write IO on the mount
        self._simple_io()

        # Set storage.reserve option, just a baseline that set options are
        # working
        self._set_and_assert_volume_option('storage.reserve', '2%')

        # Test mount is accessible in RW
        self._simple_io()

        # Position custom xlator in the graph
        xlator, parent, xtype = 'ro', 'worm', 'user'
        self._enable_xlator(xlator, parent, xtype)

        # Verify mount is accessible as we didn't set any options yet
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Set 'read-only' to 'on'
        self._set_and_assert_volume_option('user.xlator.ro.read-only', 'on')

        # Functional verification that mount should be RO
        self._simple_io(xfail=True)
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Shouldn't effect other xlator options
        self._set_and_assert_volume_option('storage.reserve', '3%')

        # Functional validation that mount should be RW
        self._set_and_assert_volume_option('user.xlator.ro.read-only', 'off')
        self._simple_io()

        # Shouldn't effect other xlator options
        self._set_and_assert_volume_option('storage.reserve', '4%')

        # Add brick to the volume and new brick volfile should have custom
        # xlator
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, 'Unable to expand volume')
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to log volume info and status')
        self._verify_position(xlator, parent, xtype)
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, 'Unable to start rebalance operaiont post '
            'expanding volume')
        sleep(.5)
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance on the volume is not completed')

        # Replace on 'pure distribute' isn't recommended
        if self.volume['voltype']['type'] != 'distributed':

            # Replace brick and new brick volfile should have custom xlator
            ret = replace_brick_from_volume(self.mnode, self.volname,
                                            self.servers,
                                            self.all_servers_info)
            self.assertTrue(ret, 'Unable to perform replace brick operation')
            self._verify_position(xlator, parent, xtype)
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(
                ret, 'Heal is not yet completed after performing '
                'replace brick operation')

        # Regression cases
        # Framework should fail when non existing xlator position is supplied
        self._set_and_assert_volume_option('user.xlator.ro',
                                           'unknown',
                                           xfail=True)

        # Any failure in setting xlator option shouldn't result in degraded
        # volume
        self._simple_io()
        self._set_and_assert_volume_option('storage.reserve', '5%')

        # Custom xlator framework touches existing 'debug' xlators and minimal
        # steps to verify no regression
        xlator, parent, xtype = 'delay-gen', 'posix', 'debug'
        self._enable_xlator(xlator, parent, xtype)

        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, 'Failed to list all files and dirs')

        # Volume shouldn't be able to start on using same name for custom
        # xlator and existing xlator
        if self.mount_type != 'nfs':
            xlator, parent, xtype = 'posix', 'posix', 'user'
            self._enable_xlator(xlator, parent, xtype, xsfail=True)

        # Volume reset should remove all the options that are set upto now
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Unable to reset volume')

        # Volume start here is due to earlier failure starting the volume and
        # isn't related to 'volume_reset'
        if self.mount_type != 'nfs':
            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0, 'Unable to start a stopped volume')
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, 'Not all volume processes are online after '
            'starting a stopped volume')
        sleep(self.timeout)
        self._simple_io()

        # Verify options are reset
        vol_info = get_volume_info(self.mnode, self.volname)
        options = vol_info[self.volname]['options']
        negate = ['user.xlator.ro', 'debug.delay-gen', 'storage.reserve']
        for option in negate:
            self.assertNotIn(
                option, options, 'Found {0} in volume info even '
                'after volume reset'.format(option))

        g.log.info(
            'Pass: Validating custom xlator framework for volume %s '
            'is successful', self.volname)
Ejemplo n.º 11
0
    def test_glusterd_split_brain_with_quorum(self):
        """
        - On a 6 node cluster
        - Create a volume using first four nodes
        - Set the volumes options
        - Stop two gluster nodes
        - Perform gluster vol reset
        - Start the glusterd on the nodes where it stopped
        - Check the peer status, all the nodes should be in connected state

        """
        # Before starting the testcase, proceed only it has minimum of 6 nodes
        self.assertGreaterEqual(len(self.servers), 6,
                                "Not enough servers to run this test")

        # Volume options to set on the volume
        volume_options = {
            'nfs.disable': 'off',
            'auth.allow': '1.1.1.1',
            'nfs.rpc-auth-allow': '1.1.1.1',
            'nfs.addr-namelookup': 'on',
            'cluster.server-quorum-type': 'server',
            'network.ping-timeout': '20',
            'nfs.port': '2049',
            'performance.nfs.write-behind': 'on',
        }

        # Set the volume options
        ret = set_volume_options(self.mnode, self.volname, volume_options)
        self.assertTrue(ret, "Unable to set the volume options")
        g.log.info("All the volume_options set succeeded")

        # Stop glusterd on two gluster nodes where bricks aren't present
        ret = stop_glusterd(self.servers[-2:])
        self.assertTrue(ret, "Failed to stop glusterd on one of the node")
        g.log.info("Glusterd stop on the nodes : %s "
                   "succeeded", self.servers[-2:])

        # Check glusterd is stopped
        ret = is_glusterd_running(self.servers[-2:])
        self.assertEqual(ret, 1, "Glusterd is running on nodes")
        g.log.info("Expected: Glusterd stopped on nodes %s", self.servers[-2:])

        # Performing volume reset on the volume to remove all the volume
        # options set earlier
        ret, _, err = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume reset failed with below error "
                         "%s" % err)
        g.log.info("Volume reset on the volume %s succeeded", self.volname)

        # Bring back glusterd online on the nodes where it stopped earlier
        ret = start_glusterd(self.servers[-2:])
        self.assertTrue(ret, "Failed to start glusterd on the nodes")
        g.log.info("Glusterd start on the nodes : %s "
                   "succeeded", self.servers[-2:])

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt = 20
        counter = 0
        _rc = False
        g.log.info("Wait for some seconds, right after glusterd start it "
                   "will create two daemon process it need few seconds "
                   "(like 3-5) to initialize the glusterd")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers)
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break
        if not _rc:
            raise ExecutionError("Peers are not connected state after "
                                 "bringing back glusterd online on the "
                                 "nodes in which previously glusterd "
                                 "had been stopped")
    def test_volume_set_ops_sub_dirs_mounted(self):
        """
        Check volume start/volume stop/volume reset operations while sub-dirs
        are mounted

        Steps:
        1. Create two sub-directories on mounted volume.
        2. Unmount volume from clients.
        3. Mount each sub-directory to two different clients.
        4. Perform IO on mounts.
        5. Perform volume stop operation.
        6. Perform volume start operation.
        7. Perform volume reset operation.
        """
        # Creating two sub directories on mounted volume
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd1' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd2' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))

        # Unmounting volumes
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Failed to un mount one or more volumes")
        g.log.info("Successfully un mounted all volumes")

        # Mounting one sub directory on each client.
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/d1" % self.volname
        self.subdir_mounts[1].volname = "%s/d2" % self.volname
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount sub directory %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted sub directory %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted sub directories to clients.")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Stop volume
        g.log.info("Stopping volume: %s", self.volname)
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop volume: %s" % self.volname)

        # Start volume
        g.log.info("Starting volume again: %s", self.volname)
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname)

        # Reset volume
        g.log.info("Resetting volume: %s", self.volname)
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to reset volume: %s" % self.volname)
Ejemplo n.º 13
0
    def test_validate_authreject_vol(self):
        """
        -Set Authentication Reject for client1
        -Check if bricks are online
        -Mounting the vol on client1
        -Check if bricks are online
        -Mounting the vol on client2
        -Reset the Volume
        -Check if bricks are online
        -Mounting the vol on client1
        """
        # pylint: disable=too-many-statements

        # Obtain hostname of clients
        ret, hostname_client1, _ = g.run(self.clients[0], "hostname")
        self.assertEqual(
            ret, 0,
            ("Failed to obtain hostname of client %s" % self.clients[0]))
        g.log.info("Obtained hostname of client. IP- %s, hostname- %s",
                   self.clients[0], hostname_client1.strip())

        # Set Authentication
        option = {"auth.reject": hostname_client1.strip()}
        ret = set_volume_options(self.mnode, self.volname, option)
        self.assertTrue(
            ret, ("Failed to set authentication with option: %s" % option))
        g.log.info("Authentication Set successfully with option: %s", option)

        # Fetching all the bricks
        self.mountpoint = "/mnt/testvol"
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Brick list is empty")
        g.log.info("Brick List : %s", bricks_list)

        # Check are bricks online
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, "All bricks are not online")

        # Using this way to check because of bug 1586036
        # Mounting volume
        ret, _, _ = mount_volume(self.volname, self.mount_type,
                                 self.mountpoint, self.mnode, self.clients[0])

        # Checking if volume is mounted
        out = is_mounted(self.volname,
                         self.mountpoint,
                         self.mnode,
                         self.clients[0],
                         self.mount_type,
                         user='******')
        if (ret == 0) & (not out):
            g.log.error("Mount executed successfully due to bug 1586036")
        elif (ret == 1) & (not out):
            g.log.info("Expected:Mounting has failed successfully")
        else:
            raise ExecutionError(
                "Unexpected Mounting of Volume %s successful" % self.volname)

        # Checking client logs for authentication error
        cmd = ("grep AUTH_FAILED /var/log/glusterfs/mnt-" "testvol.log")
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(
            ret, 0, "Mounting has not failed due to"
            "authentication error")
        g.log.info("Mounting has failed due to authentication error")

        # Mounting the vol on client2
        # Check bricks are online
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, "All bricks are not online")

        # Mounting Volume
        ret, _, _ = mount_volume(self.volname, self.mount_type,
                                 self.mountpoint, self.mnode, self.clients[1])
        self.assertEqual(ret, 0, "Failed to mount volume")
        g.log.info("Mounted Successfully")

        # Checking if volume is mounted
        out = is_mounted(self.volname,
                         self.mountpoint,
                         self.mnode,
                         self.clients[1],
                         self.mount_type,
                         user='******')
        self.assertTrue(out, "Volume %s has failed to mount" % self.volname)

        # Reset Volume
        ret, _, _ = volume_reset(mnode=self.mnode, volname=self.volname)
        self.assertEqual(ret, 0, "Failed to reset volume")
        g.log.info("Volume %s reset operation is successful", self.volname)

        # Checking if bricks are online
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, "All bricks are not online")

        # Mounting Volume
        ret, _, _ = mount_volume(self.volname, self.mount_type,
                                 self.mountpoint, self.mnode, self.clients[0])
        self.assertEqual(ret, 0, "Failed to mount volume")
        g.log.info("Mounted Successfully")

        # Checking if Volume is mounted
        out = is_mounted(self.volname,
                         self.mountpoint,
                         self.servers[0],
                         self.clients[0],
                         self.mount_type,
                         user='******')
        self.assertTrue(out, "Volume %s has failed to mount" % self.volname)
        g.log.info("Volume is mounted successfully %s", self.volname)
Ejemplo n.º 14
0
    def test_validate_snaps_restore(self):
        # pylint: disable=too-many-statements
        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Setting some volume option related to snapshot
        option_before_restore = {
            'volumeConfig': [{
                'softLimit': '100',
                'effectiveHardLimit': '200',
                'hardLimit': '256'
            }],
            'systemConfig': {
                'softLimit': '90%',
                'activateOnCreate': 'disable',
                'hardLimit': '256',
                'autoDelete': 'disable'
            }
        }
        ret = set_snap_config(self.mnode, option_before_restore)
        self.assertTrue(ret,
                        ("Failed to set vol option on  %s" % self.volname))
        g.log.info("Volume options for%s is set successfully", self.volname)

        # Get brick list before taking snap_restore
        bricks_before_snap_restore = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List before snap restore "
                   "volume: %s", bricks_before_snap_restore)

        # Creating snapshot
        ret = snap_create(self.mnode, self.volname, "snap1")
        self.assertTrue(ret,
                        ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snap1 created successfully for volume  %s",
                   self.volname)

        # Again start IO on all mounts.
        all_mounts_procs = []
        count = 1000
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Reset volume to make sure volume options will reset
        ret = volume_reset(self.mnode, self.volname, force=False)
        self.assertTrue(ret, ("Failed to reset %s" % self.volname))
        g.log.info("Reset Volume %s is Successful", self.volname)

        # Removing one brick
        g.log.info("Starting volume shrink")
        ret = shrink_volume(self.mnode, self.volname, force=True)
        self.assertTrue(ret, ("Failed to shrink the volume on "
                              "volume %s", self.volname))
        g.log.info("Shrinking volume is successful on "
                   "volume %s", self.volname)

        # Restore snapshot
        ret = snap_restore_complete(self.mnode, self.volname, "snap1")
        self.assertTrue(ret, ("Failed to restore snap snap1 on the "
                              "volume %s", self.volname))
        g.log.info(
            "Restore of volume is successful from snap1 on "
            "volume  %s", self.volname)

        # Validate volume is up and running
        g.log.info("Verifying volume is up and process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Get volume options post restore
        option_after_restore = get_snap_config(self.mnode)
        # Compare volume options
        self.assertNotEqual(option_before_restore, option_after_restore,
                            "Volume Options are not same after snap restore")

        # Get brick list post restore
        bricks_after_snap_restore = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List after snap restore "
                   "volume: %s", bricks_after_snap_restore)
        # Compare brick_list
        self.assertNotEqual(bricks_before_snap_restore,
                            bricks_after_snap_restore,
                            "Bricks are not same after snap restore")

        # Creating snapshot
        ret = snap_create(self.mnode, self.volname, "snap2")
        self.assertTrue(ret,
                        ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snap2 created successfully for volume  %s",
                   self.volname)

        # Again start IO on all mounts after restore
        all_mounts_procs = []
        count = 1000
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")
Ejemplo n.º 15
0
    def test_clone_delete_snap(self):
        """
        clone from snap of one volume
        * Create and Mount the volume
        * Enable some volume options
        * Creating 2 snapshots and activate
        * reset the volume
        * create a clone of snapshots created
        * Mount both the clones
        * Perform I/O on mount point
        * Check volume options of cloned volumes
        * Create snapshot of the cloned snapshot volume
        * cleanup snapshots and volumes
        """

        # pylint: disable=too-many-statements, too-many-locals
        # Enabling Volume options on the volume and validating
        g.log.info("Enabling volume options for volume %s ", self.volname)
        options = {" features.uss": "enable"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(
            ret, ("Failed to set volume options for volume %s" % self.volname))
        g.log.info("Successfully set volume options"
                   "for volume %s", self.volname)

        # Validate feature.uss enabled or not
        g.log.info("Validating feature.uss is enabled")
        option = "features.uss"
        vol_option = get_volume_options(self.mnode, self.volname, option)
        self.assertEqual(vol_option['features.uss'], 'enable', "Failed"
                         " to validate "
                         "volume options")
        g.log.info("Successfully validated volume options"
                   "for volume %s", self.volname)

        # Creating snapshot
        g.log.info("Starting to Create snapshot")
        for snap_count in range(0, 2):
            ret, _, _ = snap_create(self.mnode, self.volname,
                                    "snap%s" % snap_count)
            self.assertEqual(
                ret, 0,
                ("Failed to create snapshot for volume %s" % self.volname))
            g.log.info("Snapshot snap%s created successfully"
                       "for volume %s", snap_count, self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        for snap_count in range(0, 2):
            ret, _, _ = snap_activate(self.mnode, "snap%s" % snap_count)
            self.assertEqual(
                ret, 0, ("Failed to Activate snapshot snap%s" % snap_count))
            g.log.info("Snapshot snap%s activated successfully", snap_count)

        # Reset volume:
        g.log.info("Starting to Reset Volume")
        ret, _, _ = volume_reset(self.mnode, self.volname, force=False)
        self.assertEqual(ret, 0, ("Failed to reset volume %s" % self.volname))
        g.log.info("Reset Volume on volume %s is Successful", self.volname)

        # Validate feature.uss enabled or not
        g.log.info("Validating feature.uss is enabled")
        option = "features.uss"
        vol_option = get_volume_options(self.mnode, self.volname, option)
        self.assertEqual(vol_option['features.uss'], 'off', "Failed"
                         " to validate "
                         "volume options")
        g.log.info("Successfully validated volume options"
                   "for volume %s", self.volname)

        # Verify volume's all process are online
        g.log.info("Starting to Verify volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are"
                              "not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Creating and starting a Clone of snapshot
        g.log.info("Starting to Clone Snapshot")
        for clone_count in range(0, 2):
            ret, _, _ = snap_clone(self.mnode, "snap%s" % clone_count,
                                   "clone%s" % clone_count)
            self.assertEqual(ret, 0,
                             ("Failed to clone clone%s volume" % clone_count))
            g.log.info("clone%s volume created successfully", clone_count)

        # Start Cloned volume
        g.log.info("starting to Validate clone volumes are started")
        for clone_count in range(0, 2):
            ret, _, _ = volume_start(self.mnode, "clone%s" % clone_count)
            self.assertEqual(ret, 0, ("Failed to start clone%s" % clone_count))
            g.log.info("clone%s started successfully", clone_count)
        g.log.info("All the clone volumes are started Successfully")

        # Validate Volume start of cloned volume
        g.log.info("Starting to Validate Volume start")
        for clone_count in range(0, 2):
            vol_info = get_volume_info(self.mnode, "clone%s" % clone_count)
            if vol_info["clone%s" % clone_count]['statusStr'] != 'Started':
                raise ExecutionError("Failed to get volume info for clone%s" %
                                     clone_count)
            g.log.info("Volume clone%s is in Started state", clone_count)

        # Validate feature.uss enabled or not
        g.log.info("Validating feature.uss is enabled")
        option = "features.uss"
        for clone_count in range(0, 2):
            vol_option = get_volume_options(self.mnode,
                                            "clone%s" % clone_count, option)
            self.assertEqual(vol_option['features.uss'], 'enable', "Failed"
                             " to validate"
                             "volume options")
            g.log.info(
                "Successfully validated volume options"
                "for volume clone%s", clone_count)

        # Mount both the cloned volumes
        g.log.info("Mounting Cloned Volumes")
        for mount_obj in range(0, 2):
            self.mpoint = "/mnt/clone%s" % mount_obj
            cmd = "mkdir -p  %s" % self.mpoint
            ret, _, _ = g.run(self.clients[0], cmd)
            self.assertEqual(ret, 0, ("Creation of directory %s"
                                      "for mounting"
                                      "volume %s failed: Directory already"
                                      "present" %
                                      (self.mpoint, "clone%s" % mount_obj)))
            g.log.info(
                "Creation of directory %s for mounting volume %s "
                "success", self.mpoint, ("clone%s" % mount_obj))
            ret, _, _ = mount_volume("clone%s" % mount_obj, self.mount_type,
                                     self.mpoint, self.mnode, self.clients[0])
            self.assertEqual(ret, 0, ("clone%s is not mounted" % mount_obj))
            g.log.info("clone%s is mounted Successfully", mount_obj)

        # Perform I/O on mount
        # Start I/O on all mounts
        g.log.info("Starting to Perform I/O on Mountpoint")
        all_mounts_procs = []
        for mount_obj in range(0, 2):
            cmd = ("cd /mnt/clone%s/; for i in {1..10};"
                   "do touch file$i; done; cd;") % mount_obj
            proc = g.run(self.clients[0], cmd)
            all_mounts_procs.append(proc)
        g.log.info("I/O on mountpoint is successful")

        # create snapshot
        g.log.info("Starting to Create snapshot of clone volume")
        ret0, _, _ = snap_create(self.mnode, "clone0", "snap2")
        self.assertEqual(ret0, 0, "Failed to create the snapshot"
                         "snap2 from clone0")
        g.log.info("Snapshots snap2 created successfully from clone0")
        ret1, _, _ = snap_create(self.mnode, "clone1", "snap3")
        self.assertEqual(ret1, 0, "Failed to create the snapshot snap3"
                         "from clone1")
        g.log.info("Snapshots snap3 created successfully from clone1")

        # Listing all Snapshots present
        g.log.info("Starting to list all snapshots")
        ret, _, _ = snap_list(self.mnode)
        self.assertEqual(ret, 0, ("Failed to list snapshots present"))
        g.log.info("Snapshots successfully listed")
    def test_glusterd_quorum_validation(self):
        """
        -> Creating two volumes and starting them, stop the second volume
        -> set the server quorum and set the ratio to 90
        -> Stop the glusterd in one of the node, so the quorum won't meet
        -> Peer probing a new node should fail
        -> Volume stop will fail
        -> volume delete will fail
        -> volume reset will fail
        -> Start the glusterd on the node where it is stopped
        -> Volume stop, start, delete will succeed once quorum is met
        """
        # pylint: disable=too-many-statements, too-many-branches

        # Peer probe first 3 servers
        servers_info_from_three_nodes = {}
        for server in self.servers[0:3]:
            servers_info_from_three_nodes[server] = self.all_servers_info[
                server]

            # Peer probe the first 3 servers
            ret, _, _ = peer_probe(self.mnode, server)
            self.assertEqual(ret, 0,
                             ("Peer probe failed to one of the server"))
        g.log.info("Peer probe to first 3 nodes succeeded")

        self.volume['servers'] = self.servers[0:3]
        # Create a volume using the first 3 nodes
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(ret, ("Failed to create and start volume"))
        g.log.info("Volume created and started successfully")

        # Creating another volume and stopping it
        second_volume = "second_volume"
        self.volume['name'] = second_volume
        ret = setup_volume(self.mnode,
                           servers_info_from_three_nodes,
                           self.volume,
                           force=True)
        self.assertTrue(ret, ("Failed to create and start volume"))
        g.log.info("Volume created and started succssfully")

        # stopping the second volume
        g.log.info("Stopping the second volume %s", second_volume)
        ret, _, _ = volume_stop(self.mnode, second_volume)
        self.assertEqual(ret, 0, ("Failed to stop the volume"))
        g.log.info("Successfully stopped second volume %s", second_volume)

        # Setting the server-quorum-type as server
        self.options = {"cluster.server-quorum-type": "server"}
        vol_list = get_volume_list(self.mnode)
        self.assertIsNotNone(vol_list, "Failed to get the volume list")
        g.log.info("Fetched the volume list")
        for volume in vol_list:
            g.log.info(
                "Setting the server-quorum-type as server"
                " on volume %s", volume)
            ret = set_volume_options(self.mnode, volume, self.options)
            self.assertTrue(ret, ("Failed to set the quorum type as a server"
                                  " on volume %s", volume))
        g.log.info("Server Quorum type is set as a server")

        # Setting the server quorum ratio to 90
        self.quorum_perecent = {'cluster.server-quorum-ratio': '90%'}
        ret = set_volume_options(self.mnode, 'all', self.quorum_perecent)
        self.assertTrue(ret, ("Failed to set the server quorum ratio "
                              "to 90 on servers"))
        g.log.info("Successfully set server quorum ratio to 90% on servers")

        # Stop glusterd on one of the node
        ret = stop_glusterd(self.servers[2])
        self.assertTrue(ret, ("Failed to stop glusterd on "
                              "node %s", self.servers[2]))
        g.log.info("Glusterd stop on the nodes : %s"
                   " succeeded", self.servers[2])

        # Check glusterd is stopped
        ret = is_glusterd_running(self.servers[2])
        self.assertEqual(ret, 1, "Unexpected: Glusterd is running on node")
        g.log.info("Expected: Glusterd stopped on node %s", self.servers[2])

        # Adding a new peer will fail as quorum not met
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertNotEqual(ret, 0,
                            ("Unexpected:"
                             "Succeeded to peer probe new node %s when quorum "
                             "is not met", self.servers[3]))
        g.log.info("Failed to peer probe new node as expected"
                   " when quorum not met")

        # Stopping an already started volume should fail as quorum is not met
        ret, _, _ = volume_start(self.mnode, second_volume)
        self.assertNotEqual(
            ret, 0, "Unexpected: Successfuly started "
            "volume even when quorum not met.")
        g.log.info(
            "Volume start %s failed as expected when quorum "
            "is not met", second_volume)

        # Stopping a volume should fail stop the first volume
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(
            ret, 1, "Unexpected: Successfully stopped"
            " volume even when quourm is not met")
        g.log.info(
            "volume stop %s failed as expected when quorum "
            "is not met", self.volname)

        # Stopping a volume with force option should fail
        ret, _, _ = volume_stop(self.mnode, self.volname, force=True)
        self.assertNotEqual(
            ret, 0, "Unexpected: Successfully "
            "stopped volume with force. Expected: "
            "Volume stop should fail when quourm is not met")
        g.log.info("volume stop failed as expected when quorum is not met")

        # Deleting a volume should fail. Deleting the second volume.
        ret = volume_delete(self.mnode, second_volume)
        self.assertFalse(
            ret, "Unexpected: Volume delete was "
            "successful even when quourm is not met")
        g.log.info("volume delete failed as expected when quorum is not met")

        # Volume reset should fail when quorum is not met
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Unexpected: Volume reset was "
            "successful even when quorum is not met")
        g.log.info("volume reset failed as expected when quorum is not met")

        # Volume reset should fail even with force when quourum is not met
        ret, _, _ = volume_reset(self.mnode, self.volname, force=True)
        self.assertNotEqual(
            ret, 0, "Unexpected: Volume reset was "
            "successful with force even "
            "when quourm is not met")
        g.log.info("volume reset failed as expected when quorum is not met")

        # Start glusterd on the node where glusterd is stopped
        ret = start_glusterd(self.servers[2])
        self.assertTrue(ret, "Failed to start glusterd on one node")
        g.log.info("Started glusterd on server"
                   " %s successfully", self.servers[2])

        ret = is_glusterd_running(self.servers[2])
        self.assertEqual(ret, 0, ("glusterd is not running on "
                                  "node %s", self.servers[2]))
        g.log.info("glusterd is running on node" " %s ", self.servers[2])

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt, counter, _rc = 30, 0, False
        g.log.info("Wait for some seconds, right after glusterd start it "
                   "will create two daemon process it need few seconds "
                   "(like 3-5) to initialize the glusterd")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers[0:3])
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break

        self.assertTrue(_rc, ("Peers are not connected state after "
                              "bringing back glusterd online on the "
                              "nodes in which previously glusterd "
                              "had been stopped"))

        # Check all bricks are online or wait for the bricks to be online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "All bricks are not online")
        g.log.info("All bricks of the volume %s are online", self.volname)

        # Once quorum is met should be able to cleanup the volume
        ret = volume_delete(self.mnode, second_volume)
        self.assertTrue(ret, "Volume delete failed even when quorum is met")
        g.log.info("volume delete succeed without any issues")

        # Volume stop should succeed
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume stop failed")
        g.log.info("succeeded stopping the volume as expected")

        # volume reset should succeed
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume reset failed ")
        g.log.info("volume reset succeeded as expected when quorum is not met")

        # Peer probe new node should succeed
        ret, _, _ = peer_probe(self.mnode, self.servers[3])
        self.assertEqual(
            ret, 0, ("Failed to peer probe new node even when quorum is met"))
        g.log.info("Succeeded to peer probe new node when quorum met")

        # Check peer status whether all peer are in connected state none of the
        # nodes should be in peer rejected state
        halt, counter, _rc = 30, 0, False
        g.log.info("Wait for some seconds, right after peer probe")
        while counter < halt:
            ret = is_peer_connected(self.mnode, self.servers[0:3])
            if not ret:
                g.log.info("Peers are not connected state,"
                           " Retry after 2 seconds .......")
                sleep(2)
                counter = counter + 2
            else:
                _rc = True
                g.log.info("Peers are in connected state in the cluster")
                break

        self.assertTrue(_rc, ("Peers are not connected state"))
    def test_ec_quorumcount_5(self):
        """
        Test Steps:
        - Write IO's when all bricks are online
        - Get subvol from which bricks to be brought down
        - Set volume disperse quorum count to 5
        - Start writing and reading IO's
        - Bring a brick down,say b1
        - Validate write and read is successful
        - Bring a brick down,say b2
        - Validate write has failed and read is successful
        - Start IO's again while quorum is not met on volume
          write should fail and read should pass
        - Add-brick and log
        - Start Rebalance
        - Wait for rebalance,which should fail as quorum is not met
        - Bring brick online
        - Wait for brick to come online
        - Check if bricks are online
        - Start IO's again when all bricks are online
        - IO's should complete successfully
        - Start IO's again and reset volume
        - Bring down other bricks to max redundancy
        - Validating IO's and waiting to complete
        """

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals

        mountpoint = self.mounts[0].mountpoint
        client1 = self.mounts[0].client_system
        client2 = self.mounts[1].client_system

        # Write IO's  when all bricks are online
        writecmd = ("cd %s; for i in `seq 1 100` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        # Select a subvol from which bricks to be brought down
        sub_vols = get_subvols(self.mnode, self.volname)
        bricks_list1 = list(choice(sub_vols['volume_subvols']))
        brick_1, brick_2 = sample(bricks_list1, 2)

        # Set volume disperse quorum count to 5
        ret = set_volume_options(self.mnode, self.volname,
                                 {"disperse.quorum-count": "5"})
        self.assertTrue(
            ret, 'Failed to set volume {}'
            ' options'.format(self.volname))
        g.log.info('Successfully set disperse quorum on %s', self.volname)

        # Start writing and reading IO's
        procwrite, procread, count = [], [], 1
        for mount_obj in self.mounts:
            writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                        "--dirname-start-num %d --dir-depth 5 "
                        "--dir-length 10 --max-num-of-dirs 2 "
                        "--num-of-files 15 %s" %
                        (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               writecmd,
                               user=mount_obj.user)
            procwrite.append(proc)
            count += 10

        self.generate_read_cmd(mountpoint, '1', '10')
        ret = g.run_async(client2, self.readcmd)
        procread.append(ret)

        # Brick 1st brick down
        ret = bring_bricks_offline(self.volname, brick_1)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_1))
        g.log.info('Brick %s is offline successfully', brick_1)

        writecmd = ("cd %s; for i in `seq 101 110` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished writes on files sucessfully')

        self.generate_read_cmd(mountpoint, '101', '110')
        ret, _, err = g.run(client1, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Finished reads on files sucessfully')

        # Brick 2nd brick down
        ret = bring_bricks_offline(self.volname, brick_2)
        self.assertTrue(ret, 'Brick {} is not offline'.format(brick_2))
        g.log.info('Brick %s is offline successfully', brick_2)

        # Validate write has failed and read is successful
        ret = validate_io_procs(procwrite, self.mounts)
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        ret = validate_io_procs(procread, self.mounts[1])
        self.assertTrue(ret, 'Read operation failed on the client')
        g.log.info('Reads on files successful')

        # Start IO's again while quorum is not met on volume
        procwrite = []
        writecmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                    "--dirname-start-num 20 --dir-depth 1 "
                    "--dir-length 10 --max-num-of-dirs 1 "
                    "--num-of-files 10 %s" %
                    (self.script_upload_path, mountpoint))
        proc = g.run_async(client1, writecmd)
        procwrite.append(proc)
        ret = validate_io_procs(procwrite, self.mounts[0])
        self.assertFalse(
            ret, 'Write successful even after disperse quorum is '
            'not met')
        g.log.info('EXPECTED - Writes failed as disperse quroum is not met')

        self.generate_read_cmd(mountpoint, '1', '100')
        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Reads on files successful')

        # Add brick
        ret = expand_volume(self.mnode,
                            self.volname,
                            self.servers,
                            self.all_servers_info,
                            force=True)
        self.assertTrue(
            ret, ("Failed to expand the volume {}".format(self.volname)))
        g.log.info("Expanding volume %s is successful", self.volname)

        # Log Volume Info and Status after expanding the volume
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume {}".format(self.volname)))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ('Rebalance failed on the volume'
                                  ' {}'.format(self.volname)))
        g.log.info('Rebalance has started on volume %s', self.volname)

        # Wait for rebalance to complete
        # Which should also fail as quorum is not met
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=600)
        self.assertFalse(
            ret, "Rebalance passed though disperse quorum "
            "is not met on volume")
        g.log.info(
            "Expected: Rebalance failed on the volume %s,disperse"
            " quorum is not met", self.volname)

        # Bring brick online
        brick_list = brick_1, brick_2
        ret = bring_bricks_online(self.mnode, self.volname, brick_list)
        self.assertTrue(ret, 'Brick not brought online')
        g.log.info('Brick brought online successfully')

        # Wait for brick to come online
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, 'Bricks are not online')
        g.log.info('EXPECTED : Bricks are online')

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Start IO's again when all bricks are online
        writecmd = ("cd %s; for i in `seq 101 200` ;"
                    "do dd if=/dev/urandom of=file$i bs=1M "
                    "count=5;done" % mountpoint)
        self.generate_read_cmd(mountpoint, '101', '120')

        # IO's should complete successfully
        ret, _, err = g.run(client1, writecmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Writes on client % successful', client1)

        ret, _, err = g.run(client2, self.readcmd)
        self.assertEqual(ret, 0, err)
        g.log.info('Read on client % successful', client2)

        # Start IO's again
        all_mounts_procs, count = [], 30
        for mount_obj in self.mounts:
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d --dir-depth 2 "
                   "--dir-length 10 --max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count += 10

        # Reset volume
        ret, _, err = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, err)
        g.log.info('Reset of volume %s successful', self.volname)

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        bricks_to_offline = sample(bricks_list1, 2)
        ret = bring_bricks_offline(self.volname, bricks_to_offline)
        self.assertTrue(ret, 'Redundant bricks not offline')
        g.log.info('Redundant bricks are offline successfully')

        # Validating IO's and waiting to complete
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, 'IO failed on some of the clients')
        g.log.info("Successfully validated all IO's")
Ejemplo n.º 18
0
    def test_validate_authreject_vol(self):
        """
        -Set Authentication
        -For all the clients
        -Fetch the bricks
        -Check if bricks are online
        -Create directory
        -Mount the volume
        -Check if it is mounted
        -Check authentication logs
        -Reset the Volume
        -Check if bricks are online
        -Mounting the vol on client1
        """
        # pylint: disable=too-many-statements
        # Set Authentication
        option = {"auth.reject": "\"*\""}
        ret = set_volume_options(self.mnode, self.volname, option)
        self.assertTrue(ret, "Failed to set authentication")
        g.log.info("Authentication set Successfully")

        for client in self.clients:
            # Fetching all the bricks
            self.mountpoint = '/mnt/testvol'
            g.log.info("Fetching bricks for the volume : %s", self.volname)
            bricks_list = get_all_bricks(self.mnode, self.volname)
            self.assertIsNotNone(bricks_list, "Brick list is empty")
            g.log.info("Brick List : %s", bricks_list)

            # Check are bricks online
            ret = are_bricks_online(self.mnode, self.volname, bricks_list)
            self.assertTrue(ret, "All bricks are not online")
            g.log.info("All bricks are online")

            # Creating directory to mount
            cmd = ("mkdir -p /mnt/testvol")
            ret, _, _ = g.run(client, cmd)
            self.assertEqual(ret, 0, "Failed to create directory")

            # Using this way to check because of bug 1586036
            # Mounting volume
            ret, _, _ = mount_volume(self.volname, self.mount_type,
                                     self.mountpoint, self.mnode, client)

            # Checking if volume is mounted
            out = is_mounted(self.volname,
                             self.mountpoint,
                             self.mnode,
                             client,
                             self.mount_type,
                             user='******')
            if (ret == 0) & (not out):
                g.log.error("Mount executed successfully due to bug 1586036")
            elif (ret == 1) & (not out):
                g.log.info("Expected:Mounting has failed successfully")
            else:
                raise ExecutionError("Unexpected Mounting of Volume %s"
                                     "successful" % self.volname)

            # Checking client logs for authentication error
            cmd = ("grep AUTH_FAILED /var/log/glusterfs/mnt-" "testvol.log")
            ret, _, _ = g.run(client, cmd)
            self.assertEqual(
                ret, 0, "Mounting has not failed due to"
                "authentication error")
            g.log.info("Mounting has failed due to authentication error")

        # Reset Volume
        ret, _, _ = volume_reset(mnode=self.mnode, volname=self.volname)
        self.assertEqual(ret, 0, "Failed to reset volume")
        g.log.info("Volume %s reset operation is successful", self.volname)

        # Check if bricks are online and  Mounting the vol on client1
        # Fetching bricks
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Brick list is empty")
        g.log.info("Brick List : %s", bricks_list)

        # Checking if bricks are online
        ret = are_bricks_online(self.mnode, self.volname, bricks_list)
        self.assertTrue(ret, "All bricks are not online")
        g.log.info("All bricks are online")

        # Creating directory to mount
        cmd = ("mkdir -p /mnt/testvol")
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to create directory")

        # Mounting Volume
        ret, _, _ = mount_volume(self.volname, self.mount_type,
                                 self.mountpoint, self.servers[0],
                                 self.clients[0])
        self.assertEqual(ret, 0, "Failed to mount volume")
        g.log.info("Mounted Successfully")

        # Checking if Volume is mounted
        out = is_mounted(self.volname,
                         self.mountpoint,
                         self.servers[0],
                         self.clients[0],
                         self.mount_type,
                         user='******')
        self.assertTrue(out, "Volume %s has failed to mount" % self.volname)
        g.log.info("Volume is mounted successfully %s", self.volname)