def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Create glusterblock PVC when gluster pod is down."""

        # Check that we work with containerized Gluster
        if not self.is_containerized_gluster():
            self.skipTest("Only containerized Gluster clusters are supported.")

        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node, pvc_name, "test-pvc-mount-on-app-pod",
            mount_path=mount_path)
        self.addCleanup(
            wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(
            self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 15):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
Beispiel #2
0
    def test_uss_snap_active_deactive(self):

        # pylint: disable=too-many-statements
        """
        Steps:
        * Create volume
        * Mount volume
        * Perform I/O on mounts
        * Create 2 snapshots snapy1 & snapy2
        * Validate snap created
        * Enable USS
        * Validate USS is enabled
        * Validate snapd is running
        * Activate snapy1 & snapy2
        * List snaps under .snap directory
          -- snap1 and snap2 should be listed under .snaps
        * Deactivate snapy2
        * List snaps under .snap directory
          -- snapy2 is not listed as it is deactivated
        * Activate snapy2
        * List snaps under .snap directory
          -- snap1 and snap2 should be listed under .snaps
        """

        # Perform I/O
        g.log.info("Starting IO on all mounts...")
        self.counter = 1
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 2 "
                "--max-num-of-dirs 2 "
                "--num-of-files 2 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("I/O successful on clients")

        # Enable USS
        g.log.info("Enable USS on volume")
        ret, _, _ = enable_uss(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to enable USS on volume")
        g.log.info("Successfully enabled USS on volume")

        # Validate USS is enabled
        g.log.info("Validating USS is enabled")
        ret = is_uss_enabled(self.mnode, self.volname)
        self.assertTrue(ret, "USS is disabled on volume " "%s" % self.volname)
        g.log.info("USS enabled on volume %s", self.volname)

        # Validate snapd running
        for server in self.servers:
            g.log.info("Validating snapd daemon on:%s", server)
            ret = is_snapd_running(server, self.volname)
            self.assertTrue(ret, "Snapd is Not running on " "%s" % server)
            g.log.info("Snapd Running on node: %s", server)

        # Create 2 snapshot
        g.log.info("Creating 2 snapshots for volume %s", self.volname)
        for i in range(1, 3):
            ret, _, _ = snap_create(self.mnode, self.volname, "snapy%s" % i)
            self.assertEqual(
                ret, 0, ("Failed to create snapshot for %s" % self.volname))
            g.log.info("Snapshot %s created successfully for volume  %s",
                       "snapy%s" % i, self.volname)

        # Check for no of snaps using snap_list it should be 2 now
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            2, len(snap_list), "No of snaps not consistent "
            "for volume %s" % self.volname)
        g.log.info("Successfully validated number of snaps.")

        # Activate snapshot snapy1 & snapy2
        g.log.info("Activating snapshot snapy1 & snapy2")
        for i in range(1, 3):
            ret, _, _ = snap_activate(self.mnode, "snapy%s" % i)
            self.assertEqual(ret, 0, "Failed to activate snapshot snapy%s" % i)
        g.log.info("Both snapshots activated successfully")

        # list activated snapshots directory under .snaps
        g.log.info("Listing activated snapshots under .snaps")
        for mount_obj in self.mounts:
            ret, out, _ = uss_list_snaps(mount_obj.client_system,
                                         mount_obj.mountpoint)
            self.assertEqual(
                ret, 0, "Directory Listing Failed for"
                " Activated Snapshot")
            validate_dir = out.split('\n')
            self.assertIn(
                "snapy1", validate_dir, "Failed to "
                "validate snapy1 under .snaps directory")
            g.log.info("Activated Snapshot snapy1 listed Successfully")
            self.assertIn(
                "snapy2", validate_dir, "Successfully listed"
                " snapy2 under.snaps directory")
            g.log.info("Expected: De-activated Snapshot not listed")

        # Deactivate snapshot snapy2
        g.log.info("Deactivating snapshot snapy2")
        ret, _, _ = snap_deactivate(self.mnode, "snapy2")
        self.assertEqual(ret, 0, "Failed to deactivate snapshot snapy2")
        g.log.info("Successfully deactivated snapshot snapy2")

        # validate snapy2 should not present in mountpoint
        ret = view_snaps_from_mount(self.mounts, "snapy2")
        self.assertFalse(
            ret, " UnExpected : Still able to View snapy2"
            " from mount ")
        g.log.info("Successfully verified deactivated snapshot "
                   "snapy2 is not listed")

        # Activate snapshot snapy2
        ret, _, _ = snap_activate(self.mnode, "snapy2")
        self.assertEqual(ret, 0, "Failed to activate Snapshot snapy2")
        g.log.info("Snapshot snapy2 activated successfully")

        # list activated snapshots directory under .snaps
        g.log.info("Listing activated snapshots under .snaps")
        for mount_obj in self.mounts:
            ret, out, _ = uss_list_snaps(mount_obj.client_system,
                                         mount_obj.mountpoint)
            self.assertEqual(
                ret, 0, "Directory Listing Failed for"
                " Activated Snapshot")
            validate_dir = out.split('\n')
            self.assertIn(
                "snapy1", validate_dir, "Failed to "
                "validate snapy%s under .snaps directory")
            g.log.info("Activated Snapshot listed Successfully")
            self.assertIn(
                "snapy2", validate_dir, "Successfully listed"
                "snapy2 under .snaps directory")
            g.log.info("Expected: De-activated Snapshot not listed")
Beispiel #3
0
    def test_data_self_heal_daemon_off(self):
        """
        Test Data-Self-Heal (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        - create IO
        - Get areequal before getting bricks offline
        - set the volume option
        "self-heal-daemon": "off"
        - bring down all bricks processes from selected set
        - Get areequal after getting bricks offline and compare with
        areequal before getting bricks offline
        - modify the data
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check if heal is completed
        - check for split-brain
        - add bricks
        - do rebalance
        - create 5k files
        - while creating files - kill bricks and bring bricks online one by one
        in cycle
        - validate IO
        """

        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Successfully set %s for volume %s" %
                   (options, self.volname))

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s" %
                       (mount_obj.client_system, mount_obj.mountpoint))
            # Create files
            g.log.info('Creating files...')
            command = (
                "python %s create_files -f 100 --fixed-file-size 1k %s" %
                (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Get areequal before getting bricks offline
        g.log.info('Getting areequal before getting bricks offline...')
        ret, result_before_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before getting bricks offline '
                   'is successful')

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...' % bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful' %
                   bricks_to_bring_offline)

        # Get areequal after getting bricks offline
        g.log.info('Getting areequal after getting bricks offline...')
        ret, result_after_offline = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal after getting bricks offline '
                   'is successful')

        # Checking areequals before bringing bricks offline
        # and after bringing bricks offline
        self.assertEqual(
            result_before_offline, result_after_offline,
            'Checksums before and '
            'after bringing bricks online are not equal')
        g.log.info('Checksums before and after bringing bricks online '
                   'are equal')

        # Modify the data
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying data for %s:%s" %
                       (mount_obj.client_system, mount_obj.mountpoint))
            # Create files
            g.log.info('Creating files...')
            command = (
                "python %s create_files -f 100 --fixed-file-size 10k %s" %
                (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Bring brick online
        g.log.info('Bringing bricks %s online...' % bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful' %
                   bricks_to_bring_offline)

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online" % self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Add bricks
        g.log.info("Start adding bricks to volume...")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s" % self.volname)

        # Do rebalance
        ret, out, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance is not completed')
        g.log.info('Rebalance is completed successfully')

        # Create 1k files
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying data for %s:%s" %
                       (mount_obj.client_system, mount_obj.mountpoint))
            # Create files
            g.log.info('Creating files...')
            command = ("python %s create_files -f 1000 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Kill all bricks in cycle
        bricks_list = get_all_bricks(self.mnode, self.volname)
        for brick in bricks_list:
            # Bring brick offline
            g.log.info('Bringing bricks %s offline' % brick)
            ret = bring_bricks_offline(self.volname, [brick])
            self.assertTrue(ret, 'Failed to bring bricks %s offline' % brick)

            ret = are_bricks_offline(self.mnode, self.volname, [brick])
            self.assertTrue(ret, 'Bricks %s are not offline' % brick)
            g.log.info('Bringing bricks %s offline is successful' %
                       bricks_to_bring_offline)

            # Bring brick online
            g.log.info('Bringing bricks %s online...' % brick)
            ret = bring_bricks_online(self.mnode, self.volname, [brick])
            self.assertTrue(
                ret,
                'Failed to bring bricks %s online' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s online is successful' %
                       bricks_to_bring_offline)

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online" % self.volname)

            # Wait for self-heal-daemons to be online
            g.log.info("Waiting for self-heal-daemons to be online")
            ret = is_shd_daemonized(self.all_servers)
            self.assertTrue(
                ret, "Either No self heal daemon process found or"
                "more than one self heal daemon process"
                "found")
            g.log.info("All self-heal-daemons are online")

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")
    def test_rebalance_with_quota_enabled_on_subdirectory(self):
        """
        Test rebalance with quota enabled on subdirectory.
        1. Create Volume of type distribute
        2. Set Quota limit on subdirectory
        3. Do some IO to reach the Hard limit
        4. After IO ends, compute arequal checksum
        5. Add bricks to the volume.
        6. Start rebalance
        7. After rebalance is completed, check arequal checksum
        """
        # Creating main directory.
        ret = mkdir(self.mounts[0].client_system,
                    "{}/main".format(self.mounts[0].mountpoint))
        self.assertTrue(ret, "mkdir of dir main failed")

        # Enable Quota
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname))
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname))
        g.log.info(
            "Quota soft and hard timeout has been set to 0 for %s",
            self.volname)

        # Set the quota limit of 1 GB on /main dir of the volume
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/main",
                                      "1GB")
        self.assertEqual(ret, 0, "Failed to set Quota for dir /main")
        g.log.info("Successfully set quota limit for dir /main")

        # Do some IO until hard limit is reached.
        cmd = (
            "/usr/bin/env python %s create_files "
            "-f 1024 --fixed-file-size 1M --base-file-name file %s/main/"
            % (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(
            self.mounts[0].client_system, cmd, user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs,
                                                self.mounts[0]),
                        "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Validate quota
        ret = quota_validate(self.mnode, self.volname,
                             path='/main', hard_limit=1073741824,
                             sl_exceeded=True, hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/main'")
        g.log.info("Quota Validated for path '/main'")

        # Compute arequal checksum.
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log volume info and status after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Perform rebalance start operation.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to  start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Rebalance started.")

        # Check rebalance is in progress
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        ret = rebalance_status['aggregate']['statusStr']
        self.assertEqual(ret, "in progress", ("Rebalance is not in  "
                                              "'in progress' state, either "
                                              "rebalance is in completed state"
                                              " or failed to get rebalance "
                                              "status"))

        # Wait till rebalance ends.
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Validate quota
        ret = quota_validate(self.mnode, self.volname,
                             path='/main', hard_limit=1073741824,
                             sl_exceeded=True, hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/main'")
        g.log.info("Quota Validated for path '/main'")

        # Compute arequal checksum.
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance.
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
    def test_replace_brick_self_heal_io_in_progress(self):
        """
        - Create directory on mount point and write files/dirs
        - Create another set of files (1K files)
        - While creation of files/dirs are in progress Kill one brick
        - Remove the contents of the killed brick(simulating disk replacement)
        - When the IO's are still in progress, restart glusterd on the nodes
          where we simulated disk replacement to bring back bricks online
        - Start volume heal
        - Wait for IO's to complete
        - Verify whether the files are self-healed
        - Calculate arequals of the mount point and all the bricks
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        # Create dirs with files
        g.log.info('Creating dirs with file...')
        command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "-d 2 -l 2 -n 2 -f 10 %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))
        ret, _, err = g.run(self.mounts[0].client_system,
                            command,
                            user=self.mounts[0].user)
        self.assertFalse(ret, err)
        g.log.info("IO is successful")

        # Creating another set of files (1K files)
        self.all_mounts_procs = []

        # Create dirs with files
        g.log.info('Creating 1K files...')
        command = ("/usr/bin/env python %s create_files "
                   "-f 1500 --fixed-file-size 10k %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts[0])
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(
            filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                          bricks_to_bring_offline_dict['cold_tier_bricks'] +
                          bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Remove the content of the killed bricks
        for brick in bricks_to_bring_offline:
            brick_node, brick_path = brick.split(':')

            # Removing files
            command = ('cd %s ; rm -rf *' % brick_path)
            ret, _, err = g.run(brick_node, command)
            self.assertFalse(ret, err)
            g.log.info('Files are deleted on brick %s', brick)

        # Bring brick online
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Wait for volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal daemons are online")

        # Start healing
        ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Check arequals for "replicated"
        all_bricks = get_all_bricks(self.mnode, self.volname)
        if self.volume_type == "replicated":

            # Get arequal after bricks are online
            ret, arequals = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting arequal after successfully bringing'
                       'bricks online.')
            mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

            # Get arequal on bricks and compare with mount_point_total
            ret, arequals = collect_bricks_arequal(all_bricks)
            self.assertTrue(ret, 'Failed to get arequal on bricks')
            for arequal in arequals:
                brick_total = arequal.splitlines()[-1].split(':')[-1]
                self.assertEqual(
                    mount_point_total, brick_total,
                    'Arequals for mountpoint and brick '
                    'are not equal')
                g.log.info('Arequals for mountpoint and brick are equal')

        # Check arequals for "distributed-replicated"
        if self.volume_type == "distributed-replicated":

            # Get the subvolumes
            subvols_dict = get_subvols(self.mnode, self.volname)
            num_subvols = len(subvols_dict['volume_subvols'])
            g.log.info("Number of subvolumes in volume %s:", num_subvols)

            # Get arequals and compare
            for i in range(0, num_subvols):

                # Get arequal for first brick
                subvol_brick_list = subvols_dict['volume_subvols'][i]
                ret, arequal = collect_bricks_arequal(subvol_brick_list[0])
                self.assertTrue(ret, 'Failed to get arequal on first brick')
                first_brick_total = arequal[0].splitlines()[-1].split(':')[-1]

                # Get arequal for every brick and compare with first brick
                ret, arequals = collect_bricks_arequal(subvol_brick_list)
                self.assertTrue(ret, 'Failed to get arequal on bricks')
                for arequal in arequals:
                    brick_total = arequal.splitlines()[-1].split(':')[-1]
                    self.assertEqual(
                        first_brick_total, brick_total,
                        'Arequals for subvol and brick are '
                        'not equal')
                    g.log.info('Arequals for subvol and brick are equal')
Beispiel #6
0
    def setUpClass(cls):

        # Calling GlusterBaseClass setUpClass
        GlusterBaseClass.setUpClass.im_func(cls)

        # Setup Volume and Mount Volume
        g.log.info("Starting to Setup Volume and Mount Volume")
        ret = cls.setup_volume_and_mount_volume(mounts=cls.mounts)
        if not ret:
            raise ExecutionError("Failed to Setup_Volume and Mount_Volume")
        g.log.info("Successful in Setup Volume and Mount Volume")

        # Upload io scripts for running IO on mounts
        g.log.info("Upload io scripts to clients %s for running IO on "
                   "mounts", cls.clients)
        script_local_path = ("/usr/share/glustolibs/io/scripts/"
                             "file_dir_ops.py")
        cls.script_upload_path = ("/usr/share/glustolibs/io/scripts/"
                                  "file_dir_ops.py")
        ret = upload_scripts(cls.clients, script_local_path)
        if not ret:
            raise ExecutionError("Failed to upload IO scripts to clients %s" %
                                 cls.clients)
        g.log.info("Successfully uploaded IO scripts to clients %s",
                   cls.clients)

        # The --dir-length argument value for
        # file_dir_ops.py create_deep_dirs_with_files is set to 10
        # (refer to the cmd in setUp method). This means every mount will
        # create
        # 10 top level dirs. For every mountpoint/testcase to create new set of
        # dirs, we are incrementing the counter by --dir-length value i.e 10
        # in this test suite.
        #
        # If we are changing the --dir-length to new value, ensure the counter
        # is also incremented by same value to create new set of files/dirs.

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        cls.all_mounts_procs = []
        for index, mount_obj in enumerate(cls.mounts, start=1):
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 1 "
                   "--dir-length 2 "
                   "--max-num-of-dirs 2 "
                   "--num-of-files 55 %s" % (cls.script_upload_path,
                                             index + 10,
                                             mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            cls.all_mounts_procs.append(proc)
        cls.io_validation_complete = False

        # Wait for IO to complete
        if not cls.io_validation_complete:
            g.log.info("Wait for IO to complete")
            ret = wait_for_io_to_complete(cls.all_mounts_procs, cls.mounts)
            if not ret:
                raise ExecutionError("IO failed on some of the clients")
            g.log.info("IO is successful on all mounts")

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(cls.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")
Beispiel #7
0
    def test_fops_ec_volume(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - 1.Start resource consumption tool
        - 2.Create directory dir1
        - 3.Create 5 dir and 5 files in each dir in directory 1
        - 4.Rename all file inside dir1
        - 5.Truncate at any dir in mountpoint inside dir1
        - 6.Create softlink and hardlink of files in mountpoint
        - 7.Delete op for deleting all file in one of the dirs
        - 8.chmod, chown, chgrp inside dir1
        - 9.Create tiny, small, medium nd large file
        - 10.Creating files on client side for dir1
        - 11.Validating IO's and waiting to complete
        - 12.Get areequal before killing the brick
        - 13.Killing 1st brick manually
        - 14.Get areequal after killing 1st brick
        - 15.Killing 2nd brick manually
        - 16.Get areequal after killing 2nd brick
        - 17.Getting arequal and comparing the arequals
        - 18.Deleting dir1
        """

        # Starting resource consumption using top
        log_file_mem_monitor = getcwd() + '/mem_usage.log'
        cmd = 'for i in {1..100};do top -n 1 -b|egrep \
              "RES|gluster" & free -h 2>&1 >> '                                                + \
              log_file_mem_monitor + ' ;sleep 10;done'
        g.log.info(cmd)
        for mount_obj in self.mounts:
            g.run_async(mount_obj.client_system, cmd)
        bricks_list = []

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create directory1")
        g.log.info("Directory 1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start = 1
        end = 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = str(start) + ".." + str(end)
            file_range = str(start) + ".." + str(end)
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            g.run(mount_obj.client_system, cmd)

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            g.run(mount_obj.client_system, cmd)

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        clients = []
        for mount_obj in self.mounts:
            clients.append(mount_obj.client_system)
            cmd = ('cd %s/dir1/dir1/; '
                   'for FILENAME in *;'
                   'do mv $FILENAME Unix_$FILENAME; '
                   'done;' % mount_obj.mountpoint)
            g.run_parallel(clients, cmd)

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start = 2
        end = 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            g.run(mount_obj.client_system, cmd)

            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            g.run(mount_obj.client_system, cmd)
            offset += 1

        # Creating 2TB file if volume is greater
        # than equal to 3TB
        list1 = []
        command = ("df %s" % mount_obj.mountpoint)
        rcode, rout, rerr = g.run(mount_obj.client_system[0], command)
        if rcode == 0:
            list1 = rout.split("\n")[1].split()
            avail = list1[3]
            if int(avail) >= 3000000000:
                cmd = 'fallocate -l 2TB tiny_file_large.txt'
                g.run(mount_obj.client_system[0], cmd)
        g.log.error("Get mountpoint failed: %s", rerr)

        # Creating files on client side for dir1
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get areequal before killing the brick
        g.log.info('Getting areequal before killing of brick...')
        ret, result_before_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Kill 1st brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Get areequal after killing 1st brick
        g.log.info('Getting areequal after killing of brick...')
        ret, result_after_killing_brick = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Kill 2nd brick manually
        ret = bring_bricks_offline(self.volname, [bricks_list[3]])
        self.assertTrue(ret, 'Brick not offline')
        g.log.info('Brick is offline successfully')

        # Get areequal after killing 2nd brick
        g.log.info('Getting areequal after killing of brick...')
        ret, result_after_killing_brick_2 = (collect_mounts_arequal(
            self.mounts[0]))
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting areequal before killing of brick ' 'is successful')

        # Comparing areequals
        self.assertEqual(
            result_before_killing_brick, result_after_killing_brick,
            'Areequals are not equals before killing brick'
            'processes and after offlining 1 redundant bricks')
        g.log.info('Areequals are equals before killing brick'
                   'processes and after offlining 1 redundant bricks')

        # Comparing areequals
        self.assertEqual(result_after_killing_brick,
                         result_after_killing_brick_2,
                         'Areequals are not equals after killing 2'
                         ' bricks')
        g.log.info('Areequals are equals after offlining 2 redundant bricks')

        # Delete op for deleting all file in one of the dirs. start is being
        # used as offset like in previous testcase in dir1
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do rm -f $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            g.run(mount_obj.client_system, cmd)
            start += 5

        # Deleting dir1
        cmd = ('rm -rf %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to delete directory1")
        g.log.info("Directory 1 deleted successfully for %s", self.mounts[0])
Beispiel #8
0
    def test_brick_log_messages(self):
        '''
        -> Create volume
        -> Mount volume
        -> write files on mount point
        -> delete files from mount point
        -> check for any errors filled in all brick logs
        '''

        # checking volume mounted or not
        for mount_obj in self.mounts:
            ret = is_mounted(self.volname, mount_obj.mountpoint, self.mnode,
                             mount_obj.client_system, self.mount_type)
            self.assertTrue(ret, "Not mounted on %s"
                            % mount_obj.client_system)
            g.log.info("Mounted on %s", mount_obj.client_system)

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 5 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 10 %s" % (self.script_upload_path,
                                             self.counter,
                                             mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )

        # Getting timestamp
        _, timestamp, _ = g.run_local('date +%s')
        timestamp = timestamp.strip()

        # Getting all bricks
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get brick list")
        g.log.info("Successful in getting brick list %s", brick_list)

        # Creating dictionary for each node brick path,
        # here nodes are keys and brick paths are values
        brick_path_dict = {}
        for brick in brick_list:
            node, brick_path = brick.split(r':')
            brick_path_list = brick_path.split(r'/')
            del brick_path_list[0]
            brick_log_path = '-'.join(brick_path_list)
            brick_path_dict[node] = brick_log_path

        for node in brick_path_dict:
            #  Copying brick logs into other file for backup purpose
            ret, _, _ = g.run(node, 'cp /var/log/glusterfs/bricks/%s.log '
                                    '/var/log/glusterfs/bricks/%s_%s.log'
                              % (brick_path_dict[node], brick_path_dict[node],
                                 timestamp))
            if ret:
                raise ExecutionError("Failed to copy brick logs of %s" % node)
            g.log.info("Brick logs copied successfully on node %s", node)

            # Clearing the existing brick log file
            ret, _, _ = g.run(node, 'echo > /var/log/glusterfs/bricks/%s.log'
                              % brick_path_dict[node])
            if ret:
                raise ExecutionError("Failed to clear brick log file on %s"
                                     % node)
            g.log.info("Successfully cleared the brick log files on node %s",
                       node)

        # Deleting files from mount point
        ret, _, _ = g.run(self.mounts[0].client_system, 'rm -rf %s/*'
                          % self.mounts[0].mountpoint)
        self.assertEqual(ret, 0, "Failed to delete files from mountpoint %s"
                         % self.mounts[0].mountpoint)
        g.log.info("Files deleted successfully from mountpoint %s",
                   self.mounts[0].mountpoint)

        # Searching for error messages in brick logs after deleting
        # files from mountpoint
        for node in brick_path_dict:
            ret, out, _ = g.run(
                node, "grep ' E ' /var/log/glusterfs/bricks/%s.log | wc -l" %
                brick_path_dict[node])
            self.assertEqual(int(out), 0, "Found Error messages in brick "
                                          "log %s" % node)
            g.log.info("No error messages found in brick log %s", node)
Beispiel #9
0
    def test_conservative_merge_of_files_heal_command(self):
        """
        - set options:
        "metadata-self-heal": "off",
        "entry-self-heal": "off",
        "data-self-heal": "off",
        "self-heal-daemon": "off"
        - Bring brick 0 offline
        - Creating files on client side
        - Bring brick 0 online
        - Bring brick 1 offline
        - Creating files on client side
        - Bring brick 1 online
        - Get arequal on bricks
        - Setting option
        "self-heal-daemon": "on"
        - Start healing
        - Get arequal on bricks and compare with arequals before healing
        and mountpoint
        """
        # pylint: disable=too-many-statements,too-many-locals
        # set options
        bricks_list = get_all_bricks(self.mnode, self.volname)
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
            "self-heal-daemon": "off"
        }
        g.log.info("setting options %s", options)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Bring brick 0 offline
        g.log.info('Bringing bricks %s offline', bricks_list[0])
        ret = bring_bricks_offline(self.volname, bricks_list[0])
        self.assertTrue(ret,
                        'Failed to bring bricks %s offline' % bricks_list[0])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[0])
        g.log.info('Bringing bricks %s offline is successful', bricks_list[0])

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 0 -l 5 -f 10 --dirname-start-num 1 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring brick 0 online
        g.log.info('Bringing bricks %s online...', bricks_list[0])
        ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[0]])
        self.assertTrue(ret,
                        'Failed to bring bricks %s online' % bricks_list[0])
        g.log.info('Bringing bricks %s online is successful', bricks_list[0])

        # Bring brick 1 offline
        g.log.info('Bringing bricks %s offline', bricks_list[1])
        ret = bring_bricks_offline(self.volname, bricks_list[1])
        self.assertTrue(ret,
                        'Failed to bring bricks %s offline' % bricks_list[1])

        ret = are_bricks_offline(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret, 'Bricks %s are not offline' % bricks_list[1])
        g.log.info('Bringing bricks %s offline is successful', bricks_list[1])

        # Creating files on client side
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 0 -l 5 -f 10 --dirname-start-num 6 %s" %
                       (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring brick 1 online
        g.log.info('Bringing bricks %s online...', bricks_list[1])
        ret = bring_bricks_online(self.mnode, self.volname, [bricks_list[1]])
        self.assertTrue(ret,
                        'Failed to bring bricks %s online' % bricks_list[1])
        g.log.info('Bringing bricks %s online is successful', bricks_list[1])

        # Get arequal on bricks
        arequals_before_heal = {}
        g.log.info('Getting arequal on bricks...')
        for brick in bricks_list:
            g.log.info('Getting arequal on bricks %s...', brick)
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = arequal.splitlines()[-1].split(':')[-1]
            arequals_before_heal[brick] = brick_total

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequals for mount
        g.log.info('Getting arequal before getting bricks offline...')
        ret, arequals = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after healing is successful')
        mount_point_total = arequals[0].splitlines()[-1].split(':')[-1]

        # Get arequal on bricks and compare with mount_point_total
        # It should be the same
        g.log.info('Getting arequal on bricks...')
        arequals_after_heal = {}
        for brick in bricks_list:
            g.log.info('Getting arequal on bricks %s...', brick)
            node, brick_path = brick.split(':')
            command = ('arequal-checksum -p %s '
                       '-i .glusterfs -i .landfill -i .trashcan' % brick_path)
            ret, arequal, _ = g.run(node, command)
            self.assertFalse(ret, 'Failed to get arequal on brick %s' % brick)
            g.log.info('Getting arequal for %s is successful', brick)
            brick_total = arequal.splitlines()[-1].split(':')[-1]
            arequals_after_heal[brick] = brick_total
            self.assertEqual(
                mount_point_total, brick_total,
                'Arequals for mountpoint and %s are not equal' % brick)
            g.log.info('Arequals for mountpoint and %s are equal', brick)
        g.log.info('All arequals are equal for replicated')

        self.assertNotEqual(
            arequals_before_heal, arequals_after_heal,
            'Arequals are equal for bricks before (%s) and after (%s) '
            'healing' % (arequals_before_heal, arequals_after_heal))
Beispiel #10
0
    def test_validate_snaps_restore(self):
        # pylint: disable=too-many-statements
        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Setting some volume option related to snapshot
        option_before_restore = {
            'volumeConfig': [{
                'softLimit': '100',
                'effectiveHardLimit': '200',
                'hardLimit': '256'
            }],
            'systemConfig': {
                'softLimit': '90%',
                'activateOnCreate': 'disable',
                'hardLimit': '256',
                'autoDelete': 'disable'
            }
        }
        ret = set_snap_config(self.mnode, option_before_restore)
        self.assertTrue(ret,
                        ("Failed to set vol option on  %s" % self.volname))
        g.log.info("Volume options for%s is set successfully", self.volname)

        # Get brick list before taking snap_restore
        bricks_before_snap_restore = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List before snap restore "
                   "volume: %s", bricks_before_snap_restore)

        # Creating snapshot
        ret = snap_create(self.mnode, self.volname, "snap1")
        self.assertTrue(ret,
                        ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snap1 created successfully for volume  %s",
                   self.volname)

        # Again start IO on all mounts.
        all_mounts_procs = []
        count = 1000
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Reset volume to make sure volume options will reset
        ret = volume_reset(self.mnode, self.volname, force=False)
        self.assertTrue(ret, ("Failed to reset %s" % self.volname))
        g.log.info("Reset Volume %s is Successful", self.volname)

        # Removing one brick
        g.log.info("Starting volume shrink")
        ret = shrink_volume(self.mnode, self.volname, force=True)
        self.assertTrue(ret, ("Failed to shrink the volume on "
                              "volume %s", self.volname))
        g.log.info("Shrinking volume is successful on "
                   "volume %s", self.volname)

        # Restore snapshot
        ret = snap_restore_complete(self.mnode, self.volname, "snap1")
        self.assertTrue(ret, ("Failed to restore snap snap1 on the "
                              "volume %s", self.volname))
        g.log.info(
            "Restore of volume is successful from snap1 on "
            "volume  %s", self.volname)

        # Validate volume is up and running
        g.log.info("Verifying volume is up and process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Get volume options post restore
        option_after_restore = get_snap_config(self.mnode)
        # Compare volume options
        self.assertNotEqual(option_before_restore, option_after_restore,
                            "Volume Options are not same after snap restore")

        # Get brick list post restore
        bricks_after_snap_restore = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List after snap restore "
                   "volume: %s", bricks_after_snap_restore)
        # Compare brick_list
        self.assertNotEqual(bricks_before_snap_restore,
                            bricks_after_snap_restore,
                            "Bricks are not same after snap restore")

        # Creating snapshot
        ret = snap_create(self.mnode, self.volname, "snap2")
        self.assertTrue(ret,
                        ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snap2 created successfully for volume  %s",
                   self.volname)

        # Again start IO on all mounts after restore
        all_mounts_procs = []
        count = 1000
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")
Beispiel #11
0
    def test_oom_on_client_heal_in_progress(self):
        """
        - Create a 1x(2+1) arbiter replicate volume
        - Create IO
        - Bring down the 1-st data brick while creating IO
        - Bring up the 1-st data brick after creating and checking IO
        - Bring down the 3-d arbiter brick
        - Bring up the 3-d arbiter brick
        - Check there no any oom by listing the files from mountpoint
        """

        # Creating IO on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create files
            g.log.info('Creating files...')
            command = ("python %s create_files "
                       "-f 1000 "
                       "--fixed-file-size 10k "
                       "%s" % (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume: %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick list: %s", bricks_list)

        # Bring brick 1 offline
        bricks_to_bring_offline = [bricks_list[0]]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Bring 1-st brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Bring brick 3 offline
        bricks_to_bring_offline = [bricks_list[-1]]
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Bring brick 3 online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Get file list from mountpoint
        g.log.info('Getting file list from mountpoints...')
        for mount_obj in self.mounts:
            g.log.info("Getting file list for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            g.log.info('Getting file list...')
            file_list = list_files(mount_obj.client_system,
                                   mount_obj.mountpoint)
            self.assertIsNotNone(file_list)
        g.log.info('Getting file list from mountpoints finished successfully')
    def test_data_self_heal_algorithm_diff_heal_command(self):
        """
        Test Volume Option - 'cluster.data-self-heal-algorithm' : 'diff'

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        "data-self-heal-algorithm": "diff"
        "self-heal-daemon": "off"
        - create IO
        - calculate arequal
        - bring down all bricks processes from selected set
        - modify the data
        - get arequal before getting bricks online
        - bring bricks online
        - expand volume by adding bricks to the volume
        - do rebalance
        - set the volume option "self-heal-daemon": "on" and check for daemons
        - start healing
        - check if heal is completed
        - check for split-brain
        - calculate arequal and compare with arequal before bringing bricks
        offline and after bringing bricks online
        """
        # pylint: disable=too-many-branches,too-many-statements
        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
            "data-self-heal-algorithm": "diff"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "'self-heal-daemon' "
                   "are set to 'off',"
                   "'data-self-heal-algorithm' "
                   "is set to 'diff' successfully")

        # Creating files on client side
        all_mounts_procs = []
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Creating files
        command = "/usr/bin/env python %s create_files -f 100 %s" % (
            self.script_upload_path, self.mounts[0].mountpoint)

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        all_mounts_procs = []
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        command = ("/usr/bin/env python %s create_files -f 100 "
                   "--fixed-file-size 1M %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Expand volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume...")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info("Expanding volume is successful on volume %s", self.volname)

        # Do rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance is not completed')
        g.log.info('Rebalance is completed successfully')

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks offline
        # and after bringing bricks online
        self.assertEqual(sorted(result_before_online),
                         sorted(result_after_online),
                         'Checksums are not equal')
        g.log.info('Checksums are equal')
    def test_subdir_with_removebrick(self):

        # pylint: disable=too-many-statements
        """
        Mount the volume
        Create 2 subdir on client subdir1 and subdir2
        Auth allow - Client1(subdir1,subdir2),Client2(subdir1,subdir2)
        Mount the subdir to their respective clients
        Start IO's on both subdirs
        Perform remove-brick
        Validate on client if subdir's are mounted post remove-brick
        operation is performed
        """
        # Create  directories subdir1 and subdir2 on mount point
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir1' in"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir2' in"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        # unmount volume
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Volumes UnMount failed")
        g.log.info("Volumes UnMounted successfully")

        # Set authentication on the subdirectory subdir1
        # and subdir2 to access by 2 clients
        g.log.info(
            'Setting authentication on subdir1 and subdir2'
            'for client %s and %s', self.clients[0], self.clients[0])
        ret = set_auth_allow(
            self.volname, self.mnode, {
                '/subdir1': [self.clients[0], self.clients[1]],
                '/subdir2': [self.clients[0], self.clients[1]]
            })
        self.assertTrue(
            ret, 'Failed to set Authentication on volume %s' % self.volume)

        self.mpoint = "/mnt/Mount_Point1"

        # Mount Subdir1 mount on client 1
        _, _, _ = mount_volume("%s/subdir1" % self.volname, self.mount_type,
                               self.mpoint, self.mnode, self.clients[0])

        # Checking subdir1 is mounted or not
        ret = is_mounted("%s/subdir1" % self.volname, self.mpoint, self.mnode,
                         self.clients[0], self.mount_type)
        self.assertTrue(ret,
                        "Volume not mounted on mount point: %s" % self.mpoint)
        g.log.info("Volume %s mounted on %s/subdir1", self.volname,
                   self.mpoint)

        # Mount Subdir2 mount on client 2
        _, _, _ = mount_volume("%s/subdir2" % self.volname, self.mount_type,
                               self.mpoint, self.mnode, self.clients[1])

        # Checking subdir2 is mounted or not
        ret = is_mounted("%s/subdir2" % self.volname, self.mpoint, self.mnode,
                         self.clients[1], self.mount_type)
        self.assertTrue(ret,
                        "Volume not mounted on mount point: %s" % self.mpoint)
        g.log.info("Volume %s mounted on %s/subdir2", self.volname,
                   self.mpoint)

        # Start IO on all the subdir mounts.
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/subdir1" % self.volname
        self.subdir_mounts[1].volname = "%s/subdir2" % self.volname
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       self.mpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, self.mpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Perform remove brick operation when subdir is mounted on client
        g.log.info("Start removing bricks from volume")
        ret = shrink_volume(self.mnode, self.volname)
        self.assertTrue(ret, ("Remove brick operation failed on "
                              "%s", self.volname))
        g.log.info("Remove brick operation is successful on "
                   "volume %s", self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("All volume %s processes failed to come up "
                              "online", self.volname))
        g.log.info("All volume %s processes came up "
                   "online successfully", self.volname)

        # Log Volume Info and Status after performing remove brick
        g.log.info("Logging volume info and Status after shrinking volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Again Checking subdir1 is mounted or not on Client 1
        ret = is_mounted("%s/subdir1" % self.volname, self.mpoint, self.mnode,
                         self.clients[0], self.mount_type)
        self.assertTrue(ret,
                        "Volume not mounted on mount point: %s" % self.mpoint)
        g.log.info("Volume %s mounted on %s/subdir1", self.volname,
                   self.mpoint)

        # Again Checking subdir2 is mounted or not on Client 2
        ret = is_mounted("%s/subdir2" % self.volname, self.mpoint, self.mnode,
                         self.clients[1], self.mount_type)
        self.assertTrue(ret,
                        "Volume not mounted on mount point: %s" % self.mpoint)
        g.log.info("Volume %s mounted on %s/subdir2", self.volname,
                   self.mpoint)
    def test_restore_online_vol(self):

        # pylint: disable=too-many-statements
        """
        Steps:
        1. Create volume
        2. Mount volume
        3. Perform I/O on mounts
        4. Create 1 snapshots snapy1
        5. Validate snap created
        6. Perform some more I/O
        7. Create 1 more snapshot snapy2
        8. Restore volume to snapy1
          -- Restore should fail with message
             "volume needs to be stopped before restore"
        """

        # Performing step 3 to 7 in loop here
        for i in range(1, 3):
            # Perform I/O
            g.log.info("Starting IO on all mounts...")
            self.counter = 1
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "--dirname-start-num %d "
                       "--dir-depth 2 "
                       "--dir-length 2 "
                       "--max-num-of-dirs 2 "
                       "--num-of-files 2 %s" %
                       (self.script_upload_path, self.counter,
                        mount_obj.mountpoint))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)
            self.io_validation_complete = False

            # Validate IO
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True

            # Get stat of all the files/dirs created.
            g.log.info("Get stat of all the files/dirs created.")
            ret = get_mounts_stat(self.mounts)
            self.assertTrue(ret, "Stat failed on some of the clients")
            g.log.info("Successfully got stat of all files/dirs created")

            # Create snapshot
            g.log.info("Creating snapshot for volume %s", self.volname)
            ret, _, _ = snap_create(self.mnode, self.volname, "snapy%s" % i)
            self.assertEqual(
                ret, 0, ("Failed to create snapshot for %s" % self.volname))
            g.log.info("Snapshot created successfully for volume  %s",
                       self.volname)

            # Check for no of snaps using snap_list
            snap_list = get_snap_list(self.mnode)
            self.assertEqual(
                i, len(snap_list), "No of snaps not consistent "
                "for volume %s" % self.volname)
            g.log.info("Successfully validated number of snaps.")

            # Increase counter for next iteration
            self.counter = 1000

        # Restore volume to snapshot snapy2, it should fail
        i = 2
        g.log.info("Starting to restore volume to snapy%s", i)
        ret, _, err = snap_restore(self.mnode, "snapy%s" % i)
        errmsg = ("snapshot restore: failed: Volume (%s) has been started. "
                  "Volume needs to be stopped before restoring a snapshot.\n" %
                  self.volname)
        log_msg = ("Expected : %s, but Returned : %s", errmsg, err)
        self.assertEqual(err, errmsg, log_msg)
        g.log.info("Expected : Failed to restore volume to snapy%s", i)
    def test_validate_snaps_max_limit(self):
        # pylint: disable=too-many-statements
        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" % (
                       self.script_upload_path, count,
                       mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # set config snap-max-hard-limit for 10 snpas
        cmd_str = ("gluster snapshot config snap-max-hard-limit 10"
                   " --mode=script")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, "Failed to set snap-max-hard-limit to 10.")
        g.log.info("snap-max-hard-limit successfully set for 10.")

        # set config snap-max-soft-limit to 50%
        cmd_str = ("gluster snapshot config snap-max-soft-limit 50"
                   " --mode=script")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, "Failed to set snap-max-soft-limit to 50%.")
        g.log.info("snap-max-soft-limit successfully set for 50%.")

        # Create 5 snaps
        for i in range(1, 6):
            cmd_str = "gluster snapshot create %s %s %s" % ("snapy%s" % i,
                                                            self.volname,
                                                            "no-timestamp")
            ret, _, _ = g.run(self.mnode, cmd_str)
            self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                      % self.volname))
            g.log.info("Snapshot snapy%s created successfully"
                       " for volume  %s", i, self.volname)

        # Check for no. of snaps using snap_list it should be 5
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(5, len(snap_list), "Expected 5 snapshots. "
                         "Found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snapshots.")

        # Validate all 5 snap names created during
        for i in range(1, 6):
            self.assertTrue(("snapy%s" % i in snap_list), "%s snap not "
                            "found " % ("snapy%s" % i))
        g.log.info("Successfully validated names of snapshots")

        # create 6th snapshot
        cmd_str = "gluster snapshot create %s %s %s" % ("snapy6", self.volname,
                                                        "no-timestamp")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create snap6 "
                                  "for %s" % self.volname))
        g.log.info("Snapshot 'snapy6' created as it is 6th snap")

        # set config snap-max-soft-limit to 100%
        cmd_str = ("gluster snapshot config snap-max-soft-limit 100"
                   " --mode=script")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, "Failed to set snap-max-soft-limit to 100%.")
        g.log.info("snap-max-soft-limit successfully set for 100%.")

        # create 7th snapshot
        cmd_str = "gluster snapshot create %s %s %s" % ("snapy7", self.volname,
                                                        "no-timestamp")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create "
                                  "snap7 for %s" % self.volname))
        g.log.info("Snapshot 'snapy7' created as it is 7th snap")

        # Create 3 snaps
        for i in range(8, 11, 1):
            cmd_str = "gluster snapshot create %s %s %s" % ("snapy%s" % i,
                                                            self.volname,
                                                            "no-timestamp")
            ret, _, _ = g.run(self.mnode, cmd_str)
            self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                      % self.volname))
            g.log.info("Snapshot snapy%s created successfully "
                       "for volume  %s", i, self.volname)

        # Check for no. of snaps using snap_list it should be 10
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(len(snap_list), 10, "Expected 10 snapshots. "
                         "found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snapshots.")

        # Validate all 10 snap names created
        for i in range(1, 11, 1):
            self.assertTrue(("snapy%s" % i in snap_list), "%s snap not "
                            "found " % ("snapy%s" % i))
        g.log.info("Successfully validated names of snapshots")

        # create 11th snapshot
        cmd_str = "gluster snapshot create %s %s %s" % ("snap", self.volname,
                                                        "no-timestamp")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertNotEqual(ret, 0, ("Unexpected: successfully created 'snap' "
                                     "for %s" % self.volname))
        g.log.info("Expected: Snapshot 'snap' not created as it is 11th snap")

        # Check for no. of snaps using snap_list it should be 10
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(len(snap_list), 10, "Expected 10 snapshots. "
                         "found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snapshots.")

        # modify config snap-max-hard-limit for 20 snpas
        cmd_str = ("gluster snapshot config snap-max-hard-limit 20"
                   " --mode=script")
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, "Failed to set snap-max-hard-limit to 20.")
        g.log.info("snap-max-hard-limit successfully set for 20.")

        # Create 10 snaps
        for i in range(11, 21, 1):
            cmd_str = "gluster snapshot create %s %s %s" % ("snapy%s" % i,
                                                            self.volname,
                                                            "no-timestamp")
            ret, _, _ = g.run(self.mnode, cmd_str)
            self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                      % self.volname))
            g.log.info("Snapshot snapy%s created successfully for "
                       "volume  %s", i, self.volname)

        # Check for no. of snaps using snap_list it should be 20
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(len(snap_list), 20, "Expected 20 snapshots. "
                         "found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")
    def test_heal_info_shouldnot_list_files_being_accessed(self):
        """
        - bring brick 1 offline
        - create files and validate IO
        - get entries before accessing file
        - get first filename from active subvol without offline bricks
        - access and modify the file
        - while accessing - get entries
        - Compare entries before accessing and while accessing
        - validate IO
        """

        # Bring 1-st brick offline
        brick_to_bring_offline = [self.bricks_list[0]]
        g.log.info('Bringing bricks %s offline...', brick_to_bring_offline)
        ret = bring_bricks_offline(self.volname, brick_to_bring_offline)
        self.assertTrue(ret, 'Failed to bring bricks %s offline'
                        % brick_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 brick_to_bring_offline)
        self.assertTrue(ret, 'Bricks %s are not offline'
                        % brick_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   brick_to_bring_offline)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

            # Creating files
            cmd = ("python %s create_files -f 100 %s"
                   % (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        self.io_validation_complete = True

        # Get entries before accessing file
        g.log.info("Getting entries_before_accessing file...")
        entries_before_accessing = get_heal_info_summary(
            self.mnode, self.volname)
        self.assertNotEqual(entries_before_accessing, None,
                            'Can`t get heal info summary')
        g.log.info(
            "Getting entries_before_accessing file finished successfully")

        # Get filename to access from active subvol without offline bricks
        # Get last subvol
        subvols = get_subvols(self.mnode, self.volname)
        subvol_without_offline_brick = subvols['volume_subvols'][-1]

        # Get first brick server and brick path
        # and get first file from filelist
        subvol_mnode, mnode_brick = subvol_without_offline_brick[0].split(':')
        ret, file_list, _ = g.run(subvol_mnode, 'ls %s' % mnode_brick)
        file_to_edit = file_list.splitlines()[0]

        # Access and modify the file
        g.log.info("Start modifying IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Modifying IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            cmd = ("cd %s/ ; "
                   "dd if=/dev/zero of=%s bs=1G count=1"
                   % (mount_obj.mountpoint, file_to_edit))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            g.log.info("IO on %s:%s is modified successfully",
                       mount_obj.client_system, mount_obj.mountpoint)
        self.io_validation_complete = False

        # Get entries while accessing file
        g.log.info("Getting entries while accessing file...")
        entries_while_accessing = get_heal_info_summary(
            self.mnode, self.volname)
        self.assertNotEqual(entries_before_accessing, None,
                            'Can`t get heal info summary')
        g.log.info("Getting entries while accessing file "
                   "finished successfully")

        # Compare dicts before accessing and while accessing
        g.log.info('Comparing entries before modifying and while modifying...')
        ret = cmp(entries_before_accessing, entries_while_accessing)
        self.assertEqual(ret, 0, 'Entries before modifying and while modifying'
                                 'are not equal')
        g.log.info('Comparison entries before modifying and while modifying'
                   'finished successfully.')

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )
        self.io_validation_complete = True
Beispiel #17
0
    def test_replacing_all_arbiters(self):
        """
        - Create an arbiter volume 4(2+1) distributed replicate
        - Start writing IO
        - While the I/O's are going on replace all the arbiter bricks
        - check for the new bricks attached successfully
        - Check for heals
        - Validate IO
        """
        # pylint: disable=too-many-locals,too-many-statements
        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume: %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick list: %s", bricks_list)

        # Clear all brick folders. Its need to prevent healing with old files
        for brick in bricks_list:
            g.log.info('Clearing brick %s', brick)
            node, brick_path = brick.split(':')
            ret, _, err = g.run(node, 'cd %s/ ; rm -rf *' % brick_path)
            self.assertFalse(ret, err)
            g.log.info('Clearing brick %s is successful', brick)
        g.log.info('Clearing for all brick is successful')

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("python %s create_deep_dirs_with_files "
                       "-d 3 "
                       "-l 3 "
                       "-n 3 "
                       "-f 20 "
                       "%s" % (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # replace bricks
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        for subvol in subvols:
            g.log.info('Replacing arbiter brick for %s', subvol)
            brick_to_replace = subvol[-1]
            self.bricks_to_clean.append(brick_to_replace)
            new_brick = brick_to_replace + 'new'
            g.log.info("Replacing the brick %s for the volume: %s",
                       brick_to_replace, self.volname)
            ret, _, err = replace_brick(self.mnode, self.volname,
                                        brick_to_replace, new_brick)
            self.assertFalse(ret, err)
            g.log.info('Replaced brick %s to %s successfully',
                       brick_to_replace, new_brick)

        # check replaced bricks
        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']
        index = 0
        for subvol in subvols:
            expected_brick_path = self.bricks_to_clean[index] + 'new'
            brick_to_check = subvol[-1]
            self.assertEqual(expected_brick_path, brick_to_check,
                             'Brick %s is not replaced brick' % brick_to_check)
            index += 1

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s: All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
Beispiel #18
0
    def test_glusterd_rebalance(self):

        '''
        -> Create Volume
        -> Fuse mount the volume
        -> Perform I/O on fuse mount
        -> Add bricks to the volume
        -> Perform rebalance on the volume
        -> While rebalance is in progress,
        -> restart glusterd on all the nodes in the cluster
        '''

        # run IOs
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 4 "
                   "--dir-length 6 "
                   "--max-num-of-dirs 3 "
                   "--num-of-files 25 %s" % (self.script_upload_path,
                                             self.counter,
                                             mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Validate IO
        self.assertTrue(
            validate_io_procs(self.all_mounts_procs, self.mounts),
            "IO failed on some of the clients"
        )

        # Forming brick list
        self.brick_list = form_bricks_list_to_add_brick(
            self.mnode, self.volname, self.servers, self.all_servers_info)

        # Adding Bricks
        ret, _, _ = add_brick(self.mnode, self.volname, self.brick_list)
        self.assertEqual(ret, 0, "Failed to add brick to the volume %s"
                         % self.volname)
        g.log.info("Brick added successfully to the volume %s", self.volname)

        # Performing rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance on volume %s'
                         % self.volname)
        g.log.info("Rebalance started successfully on volume %s",
                   self.volname)

        # Checking Rebalance is in progress or not
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        if rebalance_status['aggregate']['statusStr'] != 'in progress':
            raise ExecutionError("Rebalance is not in 'in progress' state, "
                                 "either rebalance is in compeleted state or"
                                 " failed to get rebalance status")

        # Restart glusterd
        ret = restart_glusterd(self.servers)
        self.assertTrue(ret, "Failed to restart glusterd on servers")
        g.log.info("Glusterd restarted successfully on %s", self.servers)

        # Checking glusterd status
        ret = wait_for_glusterd_to_start(self.servers)
        self.assertTrue(ret, "Glusterd is not running on some of the "
                        "servers")
        g.log.info("Glusterd is running on all servers %s", self.servers)
Beispiel #19
0
    def test_verify_lock_granted_from_2_clients(self):
        """
        - Create disperse volume and mount it to 2 clients`
        - Create file from 1 client on mount point
        - Take lock from client 1 => Lock is acquired
        - Try taking lock from client 2=> Lock is blocked (as already
          being taken by client 1)
        - Release lock from client1=> Lock is released
        - Take lock from client2
        - Again try taking lock from client 1
        - verify test with once, by disabling eagerlock and other eager lock
          and once by leaving eager and other eagerlock enabled(by default)
        """
        mpoint = self.mounts[0].mountpoint

        # Create a file on client 1
        cmd = "touch {}/test_file".format(mpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create file on client 1")

        # Verifying OCL as ON
        option = "optimistic-change-log"
        option_dict = get_volume_options(self.mnode, self.volname, option)
        self.assertIsNotNone(option_dict,
                             ("Failed to get %s volume option"
                              " for volume %s" % (option, self.volname)))
        self.assertEqual(option_dict['disperse.optimistic-change-log'], 'on',
                         ("%s is not ON for volume %s" %
                          (option, self.volname)))
        g.log.info("Succesfully verified %s value for volume %s", option,
                   self.volname)

        # Repeat the test with eager-lock and other-eager-lock 'on' & 'off'
        for lock_status in ('on', 'off'):
            options = {
                'disperse.eager-lock': lock_status,
                'disperse.other-eager-lock': lock_status
            }
            ret = set_volume_options(self.mnode, self.volname, options)

            self.assertTrue(ret, ("failed to set eagerlock and other "
                                  "eagerlock value as %s " % lock_status))
            g.log.info(
                "Successfully set eagerlock and other eagerlock value"
                " to %s", lock_status)

            # Repeat the test for both the combinations of clients
            for client_1, client_2 in list(
                    itertools.permutations([
                        self.mounts[0].client_system,
                        self.mounts[1].client_system
                    ],
                                           r=2)):
                # Get lock to file from one client
                lock_cmd = ("/usr/bin/env python {} -f {}/"
                            "test_file -t 30".format(self.script, mpoint))
                proc = g.run_async(client_1, lock_cmd)
                time.sleep(5)

                # As the lock is been acquired by one client,
                # try to get lock from the other
                ret, _, _ = g.run(client_2, lock_cmd)
                self.assertEqual(
                    ret, 1,
                    ("Unexpected: {} acquired the lock "
                     "before been released by {}".format(client_2, client_1)))
                g.log.info(
                    "Expected : Lock can't be acquired by %s before "
                    "being released by %s", client_2, client_1)

                # Wait for first client to release the lock.
                ret, _, _ = proc.async_communicate()
                self.assertEqual(
                    ret, 0,
                    ("File lock process failed on %s:%s", client_1, mpoint))

                # Try taking the lock from other client and releasing it
                lock_cmd = ("/usr/bin/env python {} -f "
                            "{}/test_file -t 1".format(self.script, mpoint))
                ret, _, _ = g.run(client_2, lock_cmd)
                self.assertEqual(ret, 0,
                                 ("Unexpected:{} Can't acquire the lock even "
                                  "after its been released by {}".format(
                                      client_2, client_1)))
                g.log.info(
                    "Successful, Lock acquired by %s after being "
                    "released by %s", client_2, client_1)
Beispiel #20
0
    def test_write_io_mount_point_resumed_quorum_restored_x3(self):
        """
        - set cluster.quorum-type to auto
        - start I/O from the mount point
        - Do IO and check on subvols with two nodes to reboot
        (do for each subvol)
        - get files to delete/create for nodes to be offline
        - delete files from mountpoint
        - reboot nodes
        - creating files on nodes while rebooting
        - validate for rofs
        - wait for volume processes to be online
        - creating files on nodes after rebooting
        - validate IO
        - Do IO and check on subvols without nodes to reboot
        (do for each subvol)
        - get files to delete/create for nodes to be online
        - delete files from mountpoint
        - reboot nodes
        - creating files on online nodes while rebooting other nodes
        - validate IO
        - Do IO and check and reboot two nodes on all subvols
        - get files to delete/create for nodes to be offline
        - delete files from mountpoint
        - reboot nodes
        - creating files on nodes while rebooting
        - validate for rofs
        - wait for volume processes to be online
        - creating files on nodes after rebooting
        - validate IO
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting cluster.quorum-type to auto on volume %s",
                   self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = ("python %s create_files -f 30 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        self.io_validation_complete = False
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Do IO and check on subvols with nodes to reboot
        subvols_dict = get_subvols(self.mnode, self.volname)
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            nodes_to_reboot = []
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be offline
            node, brick_path = brick_list[0].split(':')
            ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(
                    ret, 'Failed to rm file on %s' % mount_obj.client_system)
            g.log.info('Files %s are deleted', file_list)

            # reboot nodes on subvol and wait while rebooting
            g.log.info("Rebooting the nodes %s", nodes_to_reboot)
            ret = reboot_nodes(nodes_to_reboot)
            self.assertTrue(ret,
                            'Failed to reboot nodes %s ' % nodes_to_reboot)

            # Creating files on nodes while rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

                # Validate IO
                self.io_validation_complete = False
                g.log.info("Validating if IO failed with read-only filesystem")
                ret = is_io_procs_fail_with_rofs(self, self.all_mounts_procs,
                                                 self.mounts)
                self.assertTrue(ret, ("Unexpected error and IO successful"
                                      " on read-only filesystem"))
                self.io_validation_complete = True
                g.log.info("EXPECTED: "
                           "Read-only file system in IO while creating file")

            # check if nodes are online
            counter = 0
            timeout = 300
            _rc = False
            while counter < timeout:
                ret, reboot_results = are_nodes_online(nodes_to_reboot)
                if not ret:
                    g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                    time.sleep(5)
                    counter = counter + 5
                else:
                    _rc = True
                    break

            if not _rc:
                for node in reboot_results:
                    if reboot_results[node]:
                        g.log.info("Node %s is online", node)
                    else:
                        g.log.error(
                            "Node %s is offline even after "
                            "%d minutes", node, timeout / 60.0)
            else:
                g.log.info("All nodes %s are up and running", nodes_to_reboot)

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online", self.volname)

            # Creating files on nodes after rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

            # Validate IO
            self.io_validation_complete = False
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True

        # Do IO and check on subvols without nodes to reboot
        subvols_dict = get_subvols(self.mnode, self.volname)
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            nodes_to_reboot = []
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be online
            new_subvols_dict = get_subvols(self.mnode, self.volname)
            subvol_to_operate = new_subvols_dict['volume_subvols']
            subvol_to_operate.remove(subvol)
            brick_list_subvol_online = subvol_to_operate[0]

            node, brick_path_vol_online = \
                brick_list_subvol_online[0].split(':')
            ret, brick_file_list, _ = g.run(node,
                                            'ls %s' % brick_path_vol_online)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(
                    ret, 'Failed to rm file on %s' % mount_obj.client_system)
            g.log.info('Files %s are deleted', file_list)

            # reboot nodes on subvol and wait while rebooting
            g.log.info("Rebooting the nodes %s", nodes_to_reboot)
            ret = reboot_nodes(nodes_to_reboot)
            self.assertTrue(ret,
                            'Failed to reboot nodes %s ' % nodes_to_reboot)

            # Creating files on nodes while rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

                # Validate IO
                self.io_validation_complete = False
                self.assertTrue(
                    validate_io_procs(self.all_mounts_procs, self.mounts),
                    "IO failed on some of the clients")
                self.io_validation_complete = True

            # check if nodes are online
            counter = 0
            timeout = 300
            _rc = False
            while counter < timeout:
                ret, reboot_results = are_nodes_online(nodes_to_reboot)
                if not ret:
                    g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                    time.sleep(5)
                    counter = counter + 5
                else:
                    _rc = True
                    break

            if not _rc:
                for node in reboot_results:
                    if reboot_results[node]:
                        g.log.info("Node %s is online", node)
                    else:
                        g.log.error(
                            "Node %s is offline even after "
                            "%d minutes", node, timeout / 60.0)
            else:
                g.log.info("All nodes %s are up and running", nodes_to_reboot)

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online", self.volname)

        # Do IO and check and reboot nodes on all subvols
        subvols_dict = get_subvols(self.mnode, self.volname)
        nodes_to_reboot = []
        file_list_for_all_subvols = []
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be offline
            node, brick_path = brick_list[0].split(':')
            ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()
            file_list_for_all_subvols.append(file_list)

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(ret, 'Failed to rm file on %s' % node)
            g.log.info('Files %s are deleted', file_list)

        # reboot nodes on subvol and wait while rebooting
        g.log.info("Rebooting the nodes %s", nodes_to_reboot)
        ret = reboot_nodes(nodes_to_reboot)
        self.assertTrue(ret, 'Failed to reboot nodes %s ' % nodes_to_reboot)

        # Creating files on nodes while rebooting
        all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
        # Create files for 1-st subvol and get all_mounts_procs_1
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[0])))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs_1.append(proc)
            all_mounts_procs.append(all_mounts_procs_1)

        # Create files for 2-st subvol and get all_mounts_procs_2
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[1])))

            proc2 = g.run_async(mount_obj.client_system,
                                cmd,
                                user=mount_obj.user)
            all_mounts_procs_2.append(proc2)
            all_mounts_procs.append(all_mounts_procs_2)

        for mounts_procs in all_mounts_procs:
            # Validate IO
            self.io_validation_complete = False
            g.log.info("Validating if IO failed with read-only filesystem")
            ret = is_io_procs_fail_with_rofs(self, mounts_procs, self.mounts)
            self.assertTrue(ret, ("Unexpected error and IO successful"
                                  " on read-only filesystem"))
            self.io_validation_complete = True
            g.log.info("EXPECTED: "
                       "Read-only file system in IO while creating file")

        # check if nodes are online
        counter = 0
        timeout = 300
        _rc = False
        while counter < timeout:
            ret, reboot_results = are_nodes_online(nodes_to_reboot)
            if not ret:
                g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                time.sleep(5)
                counter = counter + 5
            else:
                _rc = True
                break

        if not _rc:
            for node in reboot_results:
                if reboot_results[node]:
                    g.log.info("Node %s is online", node)
                else:
                    g.log.error("Node %s is offline even after "
                                "%d minutes", node, timeout / 60.0)
        else:
            g.log.info("All nodes %s are up and running", nodes_to_reboot)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Creating files on nodes after rebooting
        all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
        # Create files for 1-st subvol and get all_mounts_procs_1
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[0])))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs_1.append(proc)
            all_mounts_procs.append(all_mounts_procs_1)

        # Create files for 2-st subvol and get all_mounts_procs_2
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[1])))

            proc2 = g.run_async(mount_obj.client_system,
                                cmd,
                                user=mount_obj.user)
            all_mounts_procs_2.append(proc2)
            all_mounts_procs.append(all_mounts_procs_2)

        for mounts_procs in all_mounts_procs:
            # Validate IO
            self.io_validation_complete = False
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True
    def test_dynamic_provisioning_glusterfile_gluster_pod_or_node_failure(
            self):
        """Create glusterblock PVC when gluster pod or node is down."""
        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node, pvc_name, "test-pvc-mount-on-app-pod",
            mount_path=mount_path)
        self.addCleanup(
            wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(
            self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Check for containerized Gluster
        if self.is_containerized_gluster():
            # Pick up one of the hosts which stores PV brick (4+ nodes case)
            gluster_pod_data = get_gluster_pod_names_by_pvc_name(
                self.node, pvc_name)[0]

            # Delete glusterfs POD from chosen host and wait for
            # spawn of new one
            oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
            cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
                   "grep -v Terminating | awk '{print $1}'") % (
                       gluster_pod_data["pod_hostname"])
            for w in Waiter(600, 15):
                new_gluster_pod_name = self.cmd_run(cmd)
                if new_gluster_pod_name:
                    break
            if w.expired:
                error_msg = "exceeded timeout, new gluster pod not created"
                g.log.error(error_msg)
                raise AssertionError(error_msg)
            g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
            wait_for_pod_be_ready(self.node, new_gluster_pod_name)
        else:
            pvc_hosting_node_ip = get_gluster_host_ips_by_pvc_name(
                self.node, pvc_name)[0]
            heketi_nodes = heketi_node_list(
                self.heketi_client_node, self.heketi_server_url)
            node_ip_for_reboot = None
            for heketi_node in heketi_nodes:
                heketi_node_ip = heketi_node_info(
                    self.heketi_client_node, self.heketi_server_url,
                    heketi_node, json=True)["hostnames"]["storage"][0]
                if heketi_node_ip == pvc_hosting_node_ip:
                    node_ip_for_reboot = heketi_node_ip
                    break

            if not node_ip_for_reboot:
                raise AssertionError(
                    "Gluster node IP %s not matched with heketi node %s" % (
                        pvc_hosting_node_ip, heketi_node_ip))

            node_reboot_by_command(node_ip_for_reboot)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
Beispiel #22
0
    def test_fops_ec_brickdown(self):
        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - 1.Start resource consumption tool
        - 2.Create directory dir1
        - 3.Create 5 dir and 5 files in each dir in directory 1
        - 4.Rename all file inside dir1
        - 5.Truncate at any dir in mountpoint inside dir1
        - 6.Create softlink and hardlink of files in mountpoint
        - 7.chmod, chown, chgrp inside dir1
        - 8.Create tiny, small, medium nd large file
        - 9.Creating files on client side for dir1
        - 10.Brick redundant bricks down
        - 11.Validating IO's and waiting to complete
        - 12.Creating dir2
        - 13.Creating files on client side for dir2
        - 14.Bring bricks online
        - 15.Wait for brick to come online
        - 16.Check if bricks are online
        - 17.Monitor heal completion
        - 18.Validating IO's and waiting to complete
        """

        # Starting resource consumption using top
        log_file_mem_monitor = '/var/log/glusterfs/mem_usage.log'
        cmd = ('for i in {1..100};do top -n 1 -b|egrep \
              "RES|gluster" & free -h 2>&1 >> %s ; \
              sleep 10;done' % (log_file_mem_monitor))
        g.log.info(cmd)
        for server in self.servers:
            g.run_async(server, cmd)
        bricks_list = []

        # get the bricks from the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Brick list is empty")
        g.log.info("Brick List : %s", bricks_list)

        # Creating dir1
        cmd = ('mkdir  %s/dir1' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create dir1")
        g.log.info("dir1 created successfully for %s", self.mounts[0])

        # Create 5 dir and 5 files in each dir at mountpoint on dir1
        start, end = 1, 5
        for mount_obj in self.mounts:
            # Number of dir and files to be created.
            dir_range = ("%s..%s" % (str(start), str(end)))
            file_range = ("%s..%s" % (str(start), str(end)))
            # Create dir 1-5 at mountpoint.
            cmd = ('mkdir %s/dir1/dir{%s};' %
                   (mount_obj.mountpoint, dir_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Directory creation failed")
            g.log.info("Directory created successfull")

            # Create files inside each dir.
            cmd = ('touch %s/dir1/dir{%s}/file{%s};' %
                   (mount_obj.mountpoint, dir_range, file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "File creation failed")
            g.log.info("File created successfull")

            # Increment counter so that at next client dir and files are made
            # with diff offset. Like at next client dir will be named
            # dir6, dir7...dir10. Same with files.
            start += 5
            end += 5

        # Rename all files inside dir1 at mountpoint on dir1
        cmd = ('cd %s/dir1/dir1/; '
               'for FILENAME in *;'
               'do mv $FILENAME Unix_$FILENAME; '
               'done;' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to rename file on" "client")
        g.log.info("Successfully renamed file on client")

        # Truncate at any dir in mountpoint inside dir1
        # start is an offset to be added to dirname to act on
        # diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s/; '
                   'for FILENAME in *;'
                   'do echo > $FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Truncate failed")
            g.log.info("Truncate of files successfull")

        # Create softlink and hardlink of files in mountpoint. Start is an
        # offset to be added to dirname to act on diff files at diff clients.
        start = 1
        for mount_obj in self.mounts:
            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln -s $FILENAME softlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Creating Softlinks have failed")
            g.log.info("Softlink of files have been changed successfully")

            cmd = ('cd %s/dir1/dir%s; '
                   'for FILENAME in *; '
                   'do ln $FILENAME hardlink_$FILENAME; '
                   'done;' % (mount_obj.mountpoint, str(start + 1)))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Creating Hardlinks have failed")
            g.log.info("Hardlink of files have been changed successfully")
            start += 5

        # chmod, chown, chgrp inside dir1
        # start and end used as offset to access diff files
        # at diff clients.
        start, end = 2, 5
        for mount_obj in self.mounts:
            dir_file_range = '%s..%s' % (str(start), str(end))
            cmd = ('chmod 777 %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing mode of files has failed")
            g.log.info("Mode of files have been changed successfully")

            cmd = ('chown root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing owner of files has failed")
            g.log.info("Owner of files have been changed successfully")

            cmd = ('chgrp root %s/dir1/dir{%s}/file{%s}' %
                   (mount_obj.mountpoint, dir_file_range, dir_file_range))
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Changing group of files has failed")
            g.log.info("Group of files have been changed successfully")
            start += 5
            end += 5

        # Create tiny, small, medium nd large file
        # at mountpoint. Offset to differ filenames
        # at diff clients.
        offset = 1
        for mount_obj in self.mounts:
            cmd = 'fallocate -l 100 tiny_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for tiny files failed")
            g.log.info("Fallocate for tiny files successfully")

            cmd = 'fallocate -l 20M small_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for small files failed")
            g.log.info("Fallocate for small files successfully")

            cmd = 'fallocate -l 200M medium_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for medium files failed")
            g.log.info("Fallocate for medium files successfully")

            cmd = 'fallocate -l 1G large_file%s.txt' % str(offset)
            ret, _, _ = g.run(mount_obj.client_system, cmd)
            self.assertFalse(ret, "Fallocate for large files failed")
            g.log.info("Fallocate for large files successfully")
            offset += 1

        # Creating files on client side for dir1
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir1" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Bring down other bricks to max redundancy
        # Bringing bricks offline
        ret = bring_bricks_offline(self.volname, bricks_list[2:4])
        self.assertTrue(ret, 'Bricks not offline')
        g.log.info('Bricks are offline successfully')

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Creating dir2
        cmd = ('mkdir  %s/dir2' % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to create dir2 ")
        g.log.info("dir2 created successfully for %s", self.mounts[0])

        # Creating files on client side for dir2
        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s/dir2" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Bring bricks online
        list_of_bricks_to_bring_online = bricks_list[2:4]
        ret = bring_bricks_online(self.mnode, self.volname,
                                  list_of_bricks_to_bring_online)
        self.assertTrue(ret, 'Bricks not brought online')
        g.log.info('Bricks are online successfully')

        # Wait for brick to come online
        g.log.info("Waiting for brick to come online")
        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Bricks are not online")
        g.log.info("EXPECTED : Bricks are online")

        # Check if bricks are online
        ret = get_offline_bricks_list(self.mnode, self.volname)
        self.assertListEqual(ret, [], 'All bricks are not online')
        g.log.info('All bricks are online')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')
        g.log.info('Heal has completed successfully')

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Check file exist for memory log
        g.log.info("Validating log exists")
        ret = file_exists(self.mnode, '/var/log/glusterfs/mem_usage.log')
        self.assertTrue(ret, "Memory log file does not exist")
        g.log.info("Memory log file exists")
    def test_disperse_removebrick(self):

        # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """
        - Write IO's
        - Start remove brick
        - Validate IOs
        - Start rebalance
        - Wait for rebalance to complete
        - Start IO's and Vaildate IO's
        """

        # Write IO
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Start remove-brick (subvolume-decrease)
        g.log.info("Start removing bricks from volume")
        ret = shrink_volume(self.mnode, self.volname)
        self.assertTrue(ret, ("Remove brick operation failed on "
                              "%s", self.volname))
        g.log.info("Remove brick operation is successful on "
                   "volume %s", self.volname)

        # Log Volume Info and Status after shrinking the volume
        g.log.info("Logging volume info and Status after shrinking volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("All process  for volume %s are not"
                              "online", self.volname))
        g.log.info("All volume %s processes are now online", self.volname)

        # Validating IO's and waiting to complete
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Start IO on all mounts after rebalance completes
        all_mounts_procs = []
        count = 21
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")
Beispiel #24
0
    def test_data_self_heal_algorithm_diff_default(self):
        """
        Test Volume Option - 'cluster.data-self-heal-algorithm' : 'diff'

        Description:
        - set the volume option "data-self-heal-algorithm" to value "diff"
        - create IO
        - bring down all bricks processes from selected set
        - modify the data
        - calculate arequal
        - bring bricks online
        - start healing
        - calculate arequal and compare with arequal before bringing bricks
        offline and after bringing bricks online
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Setting options
        g.log.info('Setting options "data-self-heal-algorithm": "diff"...')
        options = {"data-self-heal-algorithm": "diff"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'data-self-heal-algorithm' is set to 'diff' "
                   "successfully")

        # Creating files on client side
        all_mounts_procs = []
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Creating files
        command = "/usr/bin/env python %s create_files -f 100 %s" % (
            self.script_upload_path, self.mounts[0].mountpoint)

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        all_mounts_procs = []
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        command = ("/usr/bin/env python %s create_files -f 100 "
                   "--fixed-file-size 1M %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks online
        # and after bringing bricks online
        self.assertEqual(sorted(result_before_online),
                         sorted(result_after_online),
                         'Checksums are not equal')
        g.log.info('Checksums before bringing bricks online '
                   'and after bringing bricks online are equal')
Beispiel #25
0
 def run_async(cmd, hostname, raise_on_error=True):
     async_op = g.run_async(host=hostname, command=cmd)
     async_obj.append(async_op)
     return async_op
Beispiel #26
0
    def test_self_heal(self):
        """
        Description:-
        - Create files on mount point
        - Kill one brick from volume
        - rm -rfv on mount point
        - bring bricks online
        - wait for heals
        - list
        """
        # pylint: disable=too-many-statements

        # IO on the mount point
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = (
                "/usr/bin/env python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-depth 2 "
                "--dir-length 35 "
                "--max-num-of-dirs 5 "
                "--num-of-files 5 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = bricks_to_bring_offline_dict['volume_bricks']

        # Killing one brick from the volume set
        g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret,
            ("Failed to bring bricks: %s offline", bricks_to_bring_offline))
        g.log.info("Successful in bringing bricks: %s offline",
                   bricks_to_bring_offline)

        # Validate if bricks are offline
        g.log.info("Validating if bricks: %s are offline",
                   bricks_to_bring_offline)
        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(
            ret, "Not all the bricks in list: %s are offline" %
            bricks_to_bring_offline)
        g.log.info("Successfully validated that bricks: %s are all offline",
                   bricks_to_bring_offline)

        # Validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Checking volume status
        g.log.info(
            "Logging volume info and Status after bringing bricks "
            "offline from the volume %s", self.volname)
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Removing files from the mount point when one brick is down
        g.log.info("Removing files from the mount point")
        mountpoint = self.mounts[0].mountpoint
        client = self.mounts[0].client_system
        cmd = "rm -rfv %s/*" % mountpoint
        ret, _, _ = g.run(client, cmd)
        if ret != 0:
            raise ExecutionError("failed to delete the files")

        # Bringing bricks online
        g.log.info('Bringing bricks %s online', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bricks %s are online', bricks_to_bring_offline)

        # Check if bricks are online
        g.log.info("Checking bricks are online or not")
        ret = are_bricks_online(self.mnode, self.volname,
                                bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not online' % bricks_to_bring_offline)
        g.log.info('Bricks %s are online', bricks_to_bring_offline)

        # Monitoring heals on the volume
        g.log.info("Wait for heal completion...")
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(
            ret, "Self heal didn't complete even after waiting "
            "for 20 minutes.")
        g.log.info("self-heal is successful after changing the volume type "
                   "from replicated to arbitered volume")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
Beispiel #27
0
def cleanup_mounts(mounts):
    """Removes all the data from all the mountpoints

    Args:
        mounts (list): List of all GlusterMount objs.

    Returns:
        bool: True if cleanup is successful on all mounts. False otherwise.
    """
    if isinstance(mounts, GlusterMount):
        mounts = [mounts]

    g.log.info("Start cleanup mounts")
    all_mounts_procs = []
    valid_mounts = []
    for mount_obj in mounts:
        g.log.info("Cleaning up data from %s:%s", mount_obj.client_system,
                   mount_obj.mountpoint)
        if (not mount_obj.mountpoint or
            (os.path.realpath(os.path.abspath(mount_obj.mountpoint)) == '/')):
            g.log.error("%s on %s is not a valid mount point",
                        mount_obj.mountpoint, mount_obj.client_system)
            continue
        cmd = "rm -rf %s/*" % (mount_obj.mountpoint)
        proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
        all_mounts_procs.append(proc)
        valid_mounts.append(mount_obj)
    g.log.info("rm -rf on all clients is complete. Validating deletion now...")

    # Get cleanup status
    _rc_rmdir = True
    for i, proc in enumerate(all_mounts_procs):
        ret, out, err = proc.async_communicate()
        if ret != 0 or out or err:
            g.log.error("Deleting files/dirs Failed on %s:%s",
                        valid_mounts[i].client_system,
                        valid_mounts[i].mountpoint)
            _rc_rmdir = False
        else:
            g.log.info("Deleting files/dirs is successful on %s:%s",
                       valid_mounts[i].client_system,
                       valid_mounts[i].mountpoint)
    if _rc_rmdir:
        g.log.info("Successfully deleted files/dirs from all mounts")
    else:
        g.log.error("Deleting files/dirs failed on some of the mounts")

    # Check if mount points are empty
    ignore_dirs_list = [".trashcan"]
    ignore_dirs = r"\|".join(ignore_dirs_list)
    all_mounts_procs = []
    for mount_obj in mounts:
        cmd = ("find %s -mindepth 1 | grep -ve '%s'" %
               (mount_obj.mountpoint, ignore_dirs))
        proc = g.run_async(mount_obj.client_system, cmd, user=mount_obj.user)
        all_mounts_procs.append(proc)

    # Get cleanup status
    _rc_lookup = True
    for i, proc in enumerate(all_mounts_procs):
        ret, out, err = proc.async_communicate()
        if ret == 0:
            g.log.error("Mount %s on %s is still having entries:\n%s",
                        mounts[i].mountpoint, mounts[i].client_system, out)
            _rc_lookup = False
        else:
            g.log.info("Mount %s on %s is cleaned up\n%s",
                       mounts[i].mountpoint, mounts[i].client_system, out)
    if _rc_lookup:
        g.log.info("All the mounts are successfully cleaned up")
    else:
        g.log.error("Failed to cleanup all mounts")

    # List mounts entries
    g.log.info("Listing mounts entries:")
    list_all_files_and_dirs_mounts(mounts)

    return _rc_lookup
Beispiel #28
0
    def test_entry_self_heal_heal_command(self):
        """
        Test Entry-Self-Heal (heal command)

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        - create IO
        - get areequal before getting bricks offline
        - set the volume option
        "self-heal-daemon": "off"
        - bring down all bricks processes from selected set
        - get areequal after getting bricks offline and compare with
        arequal after bringing bricks offline
        - modify the data
        - get areequal before getting bricks online
        - bring bricks online
        - set the volume option
        "self-heal-daemon": "on"
        - check daemons and start healing
        - check if heal is completed
        - check for split-brain
        - get areequal after getting bricks online and compare with
        arequal before bringing bricks online
        """

        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options %s' % options)
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "are set to 'off'")

        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s" %
                       (mount_obj.client_system, mount_obj.mountpoint))
            cmd = (
                "python %s create_deep_dirs_with_files "
                "--dirname-start-num %d "
                "--dir-length 2 "
                "--dir-depth 2 "
                "--max-num-of-dirs 2 "
                "--num-of-files 20 %s" %
                (self.script_upload_path, self.counter, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
            self.counter = self.counter + 10
            g.log.info("IO on %s:%s is started successfully" %
                       (mount_obj.client_system, mount_obj.mountpoint))
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Command list to do different operations with data -
        # create, rename, copy and delete
        cmd_list = [
            "python %s create_files -f 20 %s",
            "python %s mv -i '.trashcan' %s",
            "python %s copy --dest-dir new_dir %s",
            "python %s delete %s",
        ]

        for cmd in cmd_list:
            # Get areequal before getting bricks offline
            g.log.info('Getting areequal before getting bricks offline...')
            ret, result_before_offline = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal before getting bricks offline '
                       'is successful')

            # Setting options
            g.log.info('Setting options...')
            options = {
                "self-heal-daemon": "off",
            }
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' "
                       "is set to 'off' successfully")

            # Select bricks to bring offline
            bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
                self.mnode, self.volname))
            bricks_to_bring_offline = filter(
                None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                       bricks_to_bring_offline_dict['cold_tier_bricks'] +
                       bricks_to_bring_offline_dict['volume_bricks']))

            # Bring brick offline
            g.log.info('Bringing bricks %s offline...' %
                       bricks_to_bring_offline)
            ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s offline' % bricks_to_bring_offline)

            ret = are_bricks_offline(self.mnode, self.volname,
                                     bricks_to_bring_offline)
            self.assertTrue(
                ret, 'Bricks %s are not offline' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s offline is successful' %
                       bricks_to_bring_offline)

            # Get areequal after getting bricks offline
            g.log.info('Getting areequal after getting bricks offline...')
            ret, result_after_offline = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal after getting bricks offline '
                       'is successful')

            # Checking areequals before bringing bricks offline
            # and after bringing bricks offline
            self.assertEqual(result_before_offline, result_after_offline,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks offline '
                       'and after bringing bricks offline are equal')

            # Modify the data
            g.log.info("Start modifying IO on all mounts...")
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Modifying IO on %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = cmd % (self.script_upload_path, mount_obj.mountpoint)
                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)
                g.log.info("IO on %s:%s is modified successfully" %
                           (mount_obj.client_system, mount_obj.mountpoint))
            self.io_validation_complete = False

            # Validate IO
            g.log.info("Wait for IO to complete and validate IO ...")
            ret = validate_io_procs(self.all_mounts_procs, self.mounts)
            self.assertTrue(ret, "IO failed on some of the clients")
            self.io_validation_complete = True
            g.log.info("IO is successful on all mounts")

            # Get areequal before getting bricks online
            g.log.info('Getting areequal before getting bricks online...')
            ret, result_before_online = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal before getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Bring brick online
            g.log.info('Bringing bricks %s online...' %
                       bricks_to_bring_offline)
            ret = bring_bricks_online(self.mnode, self.volname,
                                      bricks_to_bring_offline)
            self.assertTrue(
                ret,
                'Failed to bring bricks %s online' % bricks_to_bring_offline)
            g.log.info('Bringing bricks %s online is successful' %
                       bricks_to_bring_offline)

            # Setting options
            g.log.info('Setting options...')
            options = {
                "self-heal-daemon": "on",
            }
            ret = set_volume_options(self.mnode, self.volname, options)
            self.assertTrue(ret, 'Failed to set options %s' % options)
            g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online" % self.volname)

            # Wait for self-heal-daemons to be online
            g.log.info("Waiting for self-heal-daemons to be online")
            ret = is_shd_daemonized(self.all_servers)
            self.assertTrue(ret, "Either No self heal daemon process found")
            g.log.info("All self-heal-daemons are online")

            # Start healing
            ret = trigger_heal(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not started')
            g.log.info('Healing is started')

            # Monitor heal completion
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal has not yet completed')

            # Check if heal is completed
            ret = is_heal_complete(self.mnode, self.volname)
            self.assertTrue(ret, 'Heal is not complete')
            g.log.info('Heal is completed successfully')

            # Check for split-brain
            ret = is_volume_in_split_brain(self.mnode, self.volname)
            self.assertFalse(ret, 'Volume is in split-brain state')
            g.log.info('Volume is not in split-brain state')

            # Get areequal after getting bricks online
            g.log.info('Getting areequal after getting bricks online...')
            ret, result_after_online = collect_mounts_arequal(self.mounts)
            self.assertTrue(ret, 'Failed to get arequal')
            g.log.info('Getting areequal after getting bricks online '
                       'is successful')

            # List all files and dirs created
            g.log.info("List all files and directories:")
            ret = list_all_files_and_dirs_mounts(self.mounts)
            if not ret:
                raise ExecutionError("Failed to list all files and dirs")
            g.log.info("Listing all files and directories is successful")

            # Checking areequals before bringing bricks online
            # and after bringing bricks online
            self.assertEqual(result_before_online, result_after_online,
                             'Checksums are not equal')
            g.log.info('Checksums before bringing bricks online '
                       'and after bringing bricks online are equal')
Beispiel #29
0
def run_bonnie(servers, directory_to_run, username="******"):
    """Run bonnie test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Kwargs:
        username (str): username. Defaults to root.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_bonnie(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running bonnie tests on %s" % ','.join(servers))
    rt = True
    options_for_each_servers = []

    # Install bonnie test suite if not installed
    results = g.run_parallel(servers, "yum list installed bonnie++")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(
                server, "yum list installed bonnie++ || "
                "yum -y install bonnie++")
            if ret != 0:
                g.log.error("Failed to install bonnie on %s" % server)
                return False

        # Building options for bonnie tests
        options_list = []
        options = ""
        freemem_command = "free -g | grep Mem: | awk '{ print $2 }'"
        ret, out, _ = g.run(server, freemem_command)
        memory = int(out)
        g.log.info("Memory = %i", memory)
        options_list.append("-d %s -u %s" %
                            (directory_to_run[index], username))
        if memory >= 8:
            options_list.append("-r 16G -s 16G -n 0 -m TEST -f -b")

        options = " ".join(options_list)
        options_for_each_servers.append(options)

    proc_list = []
    for index, server in enumerate(servers):
        bonnie_command = "bonnie++ %s" % (options_for_each_servers[index])
        proc = g.run_async(server, bonnie_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("Bonnie test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server,
                            "rm -rf %s/Bonnie.*" % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove bonnie++")
        if ret != 0:
            g.log.error("Failed to remove bonnie from %s" % server)
            return False
    return rt
Beispiel #30
0
def run_fio(servers, directory_to_run):
    """Run fio test suite on the given servers.

    Args:
        servers (list): servers in which tests to be run.
        directory_to_run (list): directory path where tests will run for
         each server.

    Returns:
        bool: True, if test passes in all servers, False otherwise

    Example:
        run_fio(["abc.com", "def.com"], ["/mnt/test1", "/mnt/test2"])
    """

    g.log.info("Running fio tests on %s" % ','.join(servers))
    rt = True

    # Installing fio if not installed
    results = g.run_parallel(servers, "yum list installed fio")
    for index, server in enumerate(servers):
        if results[server][0] != 0:
            ret, out, _ = g.run(
                server, "yum list installed fio || "
                "yum -y install fio")
            if ret != 0:
                g.log.error("Failed to install fio on %s" % server)
                return False

        # building job file for running fio
        # TODO: parametrizing the fio and to get input values from user
        job_file = "/tmp/fio_job.ini"
        cmd = ("echo -e '[global]\nrw=randrw\nio_size=1g\nfsync_on_close=1\n"
               "size=4g\nbs=64k\nrwmixread=20\nopenfiles=1\nstartdelay=0\n"
               "ioengine=sync\n[write]\ndirectory=%s\nnrfiles=1\n"
               "filename_format=fio_file.$jobnum.$filenum\nnumjobs=8' "
               "> %s" % (directory_to_run[index], job_file))

        ret, _, _ = g.run(server, cmd)
        if ret != 0:
            g.log.error("Failed to create fio job file")
            rt = False

    proc_list = []
    for index, server in enumerate(servers):
        fio_command = "fio %s" % (job_file)
        proc = g.run_async(server, fio_command)
        proc_list.append(proc)

    for index, proc in enumerate(proc_list):
        results = proc.async_communicate()
        if results[0] != 0:
            g.log.error("fio test failed on server %s" % servers[index])
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server,
                            "rm -rf %s/fio_file.*" % directory_to_run[index])
        if ret != 0:
            g.log.error("Failed to remove files from %s" % server)
            rt = False

    for index, server in enumerate(servers):
        ret, out, _ = g.run(server, "rm -rf %s" % job_file)
        if ret != 0:
            g.log.error("Failed to remove job file from %s" % server)
            rt = False

    for server in servers:
        ret, out, _ = g.run(server, "yum -y remove fio")
        if ret != 0:
            g.log.error("Failed to remove fio from %s" % server)
            return False
    return rt
    def test_validate_snaps_create(self):
        """
        Creating snapshot using gluster snapshot create <snap1> <vol-name>
        """
        cmd_str = "gluster snapshot create %s %s" % ("snap1", self.volname)
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                  % self.volname))
        g.log.info("Snapshot snap1 created successfully for volume  %s",
                   self.volname)

        # Create snapshot of volume using
        # -- gluster snapshot create <snap2> <vol-name(s)> [description
        # <description with words and quotes>]
        desc = 'description "this is a snap with snap2 name and description"'
        cmd_str = ("gluster snapshot create %s %s %s"
                   % ("snap2", self.volname, desc))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                  % self.volname))
        g.log.info("Snapshot snap2 created successfully for volume  %s",
                   self.volname)

        # Create one more snapshot of volume using force
        cmd_str = ("gluster snapshot create %s %s %s"
                   % ("snap3", self.volname, "force"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                  % self.volname))
        g.log.info("Snapshot snap3 created successfully for volume  %s",
                   self.volname)

        # Create one more snapshot of volume using no-timestamp option
        cmd_str = ("gluster snapshot create %s %s %s"
                   % ("snap4", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                  % self.volname))
        g.log.info("Snapshot snap4 created successfully for volume  %s",
                   self.volname)

        # Delete all snaps
        g.log.info("delete all snapshots present")
        ret, _, _ = snap_delete_all(self.mnode)
        self.assertEqual(ret, 0, "Snapshot delete failed.")
        g.log.info("Successfully deleted all snaps")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" % (self.script_upload_path, count,
                                            mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system, cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Create 5 snaps while IO is in progress
        for i in range(0, 5):
            cmd_str = "gluster snapshot create %s %s %s" % (
                "snapy%s" % i, self.volname, "no-timestamp")
            ret, _, _ = g.run(self.mnode, cmd_str)
            self.assertEqual(ret, 0, ("Failed to create snapshot for %s"
                                      % self.volname))
            g.log.info("Snapshot %s created successfully for volume  %s",
                       "snapy%s" % i, self.volname)

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Check for no of snaps using snap_list it should be 5 now
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(5, len(snap_list), "No of snaps not consistent "
                         "for volume %s" % self.volname)
        g.log.info("Successfully validated number of snaps.")

        # Validate all snaps created during IO
        for i in range(0, 5):
            self.assertIn("snapy%s" % i, snap_list, "%s snap not "
                          "found " % ("snapy%s" % i))
        g.log.info("Successfully validated names of snap")