예제 #1
0
    def test_rebalance_start_when_glusterd_down(self):

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand success", self.volname)

        # Form a new list of servers without mnode in it to prevent mnode
        # from glusterd failure
        nodes = self.servers[:]
        nodes.remove(self.mnode)

        # Stop glusterd on a server
        self.random_server = random.choice(nodes)
        g.log.info("Stop glusterd on server %s", self.random_server)
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, ("Server %s: Failed to stop glusterd",
                              self.random_server))
        g.log.info("Server %s: Stopped glusterd", self.random_server)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance start success", self.volname)
        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertFalse(ret, ("Volume %s: Rebalance is completed",
                               self.volname))
        g.log.info("Rebalance failed on one or more nodes. Check rebalance "
                   "status for more details")
예제 #2
0
    def _add_brick_rebalance(self):
        """Create files,Perform Add brick and wait for rebalance to complete"""
        # Create files on mount point using dd command
        cmd = ('cd %s;for i in {1..100000};'
               'do dd if=/dev/urandom bs=1024 count=1 of=file$i;done;'
               % (self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to createfiles on mountpoint")
        g.log.info("Successfully created files on mountpoint")

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=1200)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
    def test_add_brick_rebalance_files_with_holes(self):
        """
        Test case:
        1. Create a volume, start it and mount it using fuse.
        2. On the volume root, create files with holes.
        3. After the file creation is complete, add bricks to the volume.
        4. Trigger rebalance on the volume.
        5. Wait for rebalance to complete.
        """
        # On the volume root, create files with holes
        cmd = ("cd %s;for i in {1..5000}; do dd if=/dev/urandom"
               " of=file_with_holes$i bs=1M count=1 seek=100M; done" %
               self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create files with holes")

        # After the file creation is complete, add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname)

        # Trigger rebalance on the volume
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=9000)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
예제 #4
0
    def test_induce_holes_thenfixlayout(self):

        # pylint: disable=too-many-statements
        m_point = self.mounts[0].mountpoint
        command = 'mkdir -p ' + m_point + '/testdir'
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "mkdir failed")
        g.log.info("mkdir is successful")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Shrinking volume by removing bricks
        g.log.info("Start removing bricks from volume")
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "force")
        self.assertFalse(ret, "Remove-brick with force: FAIL")
        g.log.info("Remove-brick with force: PASS")

        # Check the layout
        ret = is_layout_complete(self.mnode, self.volname, dirpath='/testdir')
        self.assertFalse(ret, "Volume %s: Layout is complete")
        g.log.info("Volume %s: Layout has some holes")

        # Start Rebalance fix-layout
        g.log.info("Volume %s: Start fix-layout", self.volname)
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, ("Volume %s: fix-layout start failed"
                                  "%s", self.volname))
        g.log.info("Volume %s: fix-layout start success", self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s: Fix-layout is either failed or "
                              "in-progress", self.volname))
        g.log.info("Volume %s: Fix-layout completed successfully",
                   self.volname)

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")
    def test_rebalance_while_remove_brick_in_progress(self):
        """
        - Create directories and files on the mount point.
        -  now remove one of the brick from the volume
            gluster volume remove-brick <vol> <brick> start
        - immediately start rebalance on the same volume
            gluster volume rebalance <vol> start
        """
        # pylint: disable=too-many-statements
        # DHT Layout validation
        for mount in self.mounts:
            g.log.debug('Check DHT values %s:%s', mount.client_system,
                        mount.mountpoint)
            ret = validate_files_in_dir(self.clients[0], mount.mountpoint,
                                        test_type=LAYOUT_IS_COMPLETE,
                                        file_type=FILETYPE_DIRS)
            self.assertTrue(ret, "TEST_LAYOUT_IS_COMPLETE: FAILED")
            g.log.info("TEST_LAYOUT_IS_COMPLETE: PASS")

        # Log Volume Info and Status before shrinking the volume.
        g.log.info("Logging volume info and Status before shrinking volume")
        log_volume_info_and_status(self.mnode, self.volname)
        g.log.info("Successful in logging volume info and status of volume "
                   "%s", self.volname)

        # Form bricks list for Shrinking volume
        self.remove_brick_list = form_bricks_list_to_remove_brick(
            self.mnode, self.volname, subvol_name=1)
        self.assertIsNotNone(self.remove_brick_list, ("Volume %s: Failed to "
                                                      "form bricks list for "
                                                      "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks with option start
        g.log.info("Start removing bricks for %s", self.volname)
        ret, _, _ = remove_brick(self.mnode, self.volname,
                                 self.remove_brick_list, "start")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick start success ", self.volname)

        # Log remove-brick status
        g.log.info("Logging Remove-brick status")
        ret, out, err = remove_brick(self.mnode, self.volname,
                                     self.remove_brick_list, "status")
        self.assertEqual(ret, 0, ("Volume %s: Remove-brick status failed",
                                  self.volname))
        g.log.info("Volume %s: Remove-brick status", self.volname)
        g.log.info(out)

        # Start rebalance while volume shrink in-progress
        g.log.info("Volume %s: Start rebalance while volume shrink is "
                   "in-progress")
        _, _, err = rebalance_start(self.mnode, self.volname)
        self.assertIn("Either commit or stop the remove-brick task.", err,
                      "Rebalance started successfully while volume shrink"
                      " is in-progress")
        g.log.info("Failed to start rebalance while volume shrink is "
                   "in progress <EXPECTED>")
예제 #6
0
    def test_rebalance_with_add_brick_and_lookup(self):
        """
        Rebalance with add brick and then lookup on mount
        - Create a Distributed-Replicated volume.
        - Create deep dirs(200) and 100 files on the deepest directory.
        - Expand volume.
        - Initiate rebalance
        - Once rebalance is completed, do a lookup on mount and time it.
        """
        # Create Deep dirs.
        cmd = (
            "cd %s/; for i in {1..200};do mkdir dir${i}; cd dir${i};"
            " if [ ${i} -eq 100 ]; then for j in {1..100}; do touch file${j};"
            " done; fi; done;" % (self.mounts[0].mountpoint))
        ret, _, _ = g.run(self.clients[0], cmd)
        self.assertEqual(ret, 0, "Failed to create the deep dirs and files")
        g.log.info("Deep dirs and files created.")

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Start Rebalance.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=500)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Do a lookup on the mountpoint and note the time taken to run.
        # The time used for comparison is taken as a benchmark on using a
        # RHGS 3.5.2 for this TC. For 3.5.2, the time takes came out to be
        # 4 seconds. Now the condition for subtest to pass is for the lookup
        # should not be more than 10% of this value, i.e. 4.4 seconds.
        cmd = ("ls -R %s/" % (self.mounts[0].mountpoint))
        start_time = time()
        ret, _, _ = g.run(self.clients[0], cmd)
        end_time = time()
        self.assertEqual(ret, 0, "Failed to do a lookup")
        time_taken = end_time - start_time
        # ToDo: Implement a better approach to get benchmark value
        # self.assertTrue(time_taken <= 4.4, "Lookup takes more time "
        #                 "than the previously benchmarked value.")
        g.log.info("Lookup took : %d seconds", time_taken)
    def test_rebalance_start_when_glusterd_down(self):

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Expand failed", self.volname))
        g.log.info("Volume %s: Expand success", self.volname)

        # Get all servers IP addresses which are part of volume
        ret = get_all_bricks(self.mnode, self.volname)
        list_of_servers_used = []
        for brick in ret:
            list_of_servers_used.append(brick.split(":")[0])
        self.assertTrue(ret, ("Failed to get server IP list for volume %s",
                              self.volname))
        g.log.info("Succesfully got server IP list for volume %s",
                   self.volname)

        # Form a new list of servers without mnode in it to prevent mnode
        # from glusterd failure
        for element in list_of_servers_used:
            if element == self.mnode:
                list_of_servers_used.remove(element)

        # Stop glusterd on a server
        self.random_server = choice(list_of_servers_used)
        g.log.info("Stop glusterd on server %s", self.random_server)
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, ("Server %s: Failed to stop glusterd",
                              self.random_server))
        g.log.info("Server %s: Stopped glusterd", self.random_server)

        # Start Rebalance
        g.log.info("Starting rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Volume %s: Failed to start rebalance",
                                  self.volname))
        g.log.info("Volume %s: Rebalance start success", self.volname)
        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertFalse(ret, ("Volume %s: Rebalance is completed",
                               self.volname))
        g.log.info("Expected: Rebalance failed on one or more nodes."
                   " Check rebalance status for more details")
        error_msg1 = "\"fix layout on / failed\""
        error_msg2 = "\"Transport endpoint is not connected\""
        ret, _, _ = g.run(self.mnode, "grep -w %s /var/log/glusterfs/"
                          "%s-rebalance.log| grep -w %s"
                          % (error_msg1, self.volname, error_msg2))
        self.assertEqual(ret, 0, ("Unexpected : Rebalance failed on volume %s"
                                  "not because of glusterd down on a node",
                                  self.volname))
        g.log.info("\n\nRebalance failed on volume %s due to glusterd down on"
                   "one of the nodes\n\n", self.volname)
    def test_add_brick_rebalance_with_acl_set_to_files(self):
        """
        Test case:
        1. Create a volume, start it and mount it to a client.
        2. Create 10 files on the mount point and set acls on the files.
        3. Check the acl value and collect arequal-checksum.
        4. Add bricks to the volume and start rebalance.
        5. Check the value of acl(it should be same as step 3),
           collect and compare arequal-checksum with the one collected
           in step 3
        """
        # Create 10 files on the mount point.
        cmd = ("cd {}; for i in `seq 1 10`;do touch file$i;done".format(
            self.mount_point))
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create files on mount point")

        for number in range(1, 11):
            ret = set_acl(self.first_client, 'u:joker:rwx',
                          '{}/file{}'.format(self.mount_point, str(number)))
            self.assertTrue(ret, "Failed to set acl on files")

        # Collect arequal on mount point and check acl value
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])
        self._check_acl_set_to_files()
        g.log.info("Files created and acl set to files properly")

        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s" % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1200)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")

        # Check acl value if it's same as before rebalance
        self._check_acl_set_to_files()

        # Check for data loss by comparing arequal before and after ops
        arequal_checksum_after = collect_mounts_arequal(self.mounts[0])
        self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum and acl value are SAME")
    def test_rebalance_start_status_stop(self):

        # Getting arequal checksum before rebalance start
        g.log.info("Getting arequal before rebalance start")
        arequal_checksum_before_rebalance_start = collect_mounts_arequal(
            self.mounts)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Stop on-going rebalance
        g.log.info("Stop rebalance on the volume")
        ret, _, _ = rebalance_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Volume %s: Failed to stop rebalance", self.volname))
        g.log.info("Checking whether the migration is stopped or not")
        # Wait till the on-going file migration completes on all servers
        count = 0
        while count < 80:
            rebalance_count = 0
            for server in self.servers:
                ret, _, _ = g.run(server, "pgrep rebalance")
                if ret != 0:
                    rebalance_count += 1
            if rebalance_count == len(self.servers):
                break
            sleep(2)
            count += 1
        g.log.info("Volume %s: Rebalance process is not running on servers",
                   self.volname)

        # List all files and dirs from mount point
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        g.log.info("Listing all files and directories is successful")

        # Getting arequal checksum after the rebalance is stopped
        g.log.info("Getting arequal checksum after the rebalance is stopped")
        arequal_checksum_after_rebalance_stop = collect_mounts_arequal(
            self.mounts)

        # Comparing arequals checksum before start of rebalance and
        #                       after the rebalance is stopped
        g.log.info("Comparing arequals checksum before start of rebalance and"
                   "after the rebalance is stopped")
        self.assertEqual(arequal_checksum_before_rebalance_start,
                         arequal_checksum_after_rebalance_stop,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_rebalance_multiple_expansions(self):
        """
        Test case:
        1. Create a volume, start it and mount it
        2. Create some file on mountpoint
        3. Collect arequal checksum on mount point pre-rebalance
        4. Do the following 3 times:
        5. Expand the volume
        6. Start rebalance and wait for it to finish
        7. Collect arequal checksum on mount point post-rebalance
           and compare with value from step 3
        """

        # Create some file on mountpoint
        cmd = ("cd %s; for i in {1..500} ; do "
               "dd if=/dev/urandom of=file$i bs=10M count=1; done" %
               self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertEqual(ret, 0, "IO failed on volume %s" % self.volname)

        # Collect arequal checksum before rebalance
        arequal_checksum_before = collect_mounts_arequal(self.mounts[0])

        for _ in range(3):
            # Add brick to volume
            ret = expand_volume(self.mnode, self.volname, self.servers,
                                self.all_servers_info)
            self.assertTrue(ret,
                            "Failed to add brick on volume %s" % self.volname)

            # Trigger rebalance and wait for it to complete
            ret, _, _ = rebalance_start(self.mnode, self.volname, force=True)
            self.assertEqual(
                ret, 0, "Failed to start rebalance on "
                "volume %s" % self.volname)

            # Wait for rebalance to complete
            ret = wait_for_rebalance_to_complete(self.mnode,
                                                 self.volname,
                                                 timeout=1200)
            self.assertTrue(
                ret, "Rebalance is not yet complete on the volume "
                "%s" % self.volname)
            g.log.info("Rebalance successfully completed")

            # Collect arequal checksum after rebalance
            arequal_checksum_after = collect_mounts_arequal(self.mounts[0])

            # Check for data loss by comparing arequal before and after
            # rebalance
            self.assertEqual(arequal_checksum_before, arequal_checksum_after,
                             "arequal checksum is NOT MATCHNG")
            g.log.info("arequal checksum is SAME")
    def test_add_brick_replace_brick_fix_layout(self):
        """
        Test case:
        1. Create a volume, start it and mount it.
        2. Create files and dirs on the mount point.
        3. Add bricks to the volume.
        4. Replace 2 old bricks to the volume.
        5. Trigger rebalance fix layout and wait for it to complete.
        6. Check layout on all the bricks through trusted.glusterfs.dht.
        """
        # Create directories with some files on mount point
        cmd = ("cd %s; for i in {1..10}; do mkdir dir$i; for j in {1..5};"
               " do dd if=/dev/urandom of=dir$i/file$j bs=1M count=1; done;"
               " done" % self.mounts[0].mountpoint)
        ret, _, _ = g.run(self.first_client, cmd)
        self.assertFalse(ret, "Failed to create dirs and files.")

        # Orginal brick list before add brick
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Empty present brick list")

        # Add bricks to the volume
        add_brick_list = form_bricks_list_to_add_brick(self.mnode,
                                                       self.volname,
                                                       self.servers,
                                                       self.all_servers_info)
        self.assertIsNotNone(add_brick_list, "Empty add brick list")

        ret, _, _ = add_brick(self.mnode, self.volname, add_brick_list)
        self.assertFalse(ret, "Failed to add bricks to the volume")
        g.log.info("Successfully added bricks to the volume")

        # Replace 2 old bricks to the volume
        for _ in range(0, 2):
            brick = choice(brick_list)
            self._replace_a_old_added_brick(brick)
            brick_list.remove(brick)

        # Start rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertFalse(ret, "Failed to start rebalance on volume")

        ret = wait_for_fix_layout_to_complete(self.mnode,
                                              self.volname,
                                              timeout=800)
        self.assertTrue(ret, "Rebalance failed on volume")

        # Check layout on all the bricks through trusted.glusterfs.dht
        self._check_trusted_glusterfs_dht_on_all_bricks()
예제 #12
0
    def _start_rebalance_and_wait(self):
        """Start rebalance and wait"""
        # Start Rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)
예제 #13
0
    def _rebalance_completion(self, volume_name):
        """Rebalance start and completion after expansion."""
        ret, _, err = rebalance_ops.rebalance_start(
            'auto_get_gluster_endpoint', volume_name)
        self.assertFalse(
            ret, "Rebalance for {} volume not started with error {}".format(
                volume_name, err))

        for w in waiter.Waiter(240, 10):
            reb_status = rebalance_ops.get_rebalance_status(
                'auto_get_gluster_endpoint', volume_name)
            if reb_status["aggregate"]["statusStr"] == "completed":
                break
        if w.expired:
            raise AssertionError(
                "Failed to complete the rebalance in 240 seconds")
    def _trigger_rebalance_and_wait(self, rebal_force=False):
        """Start rebalance with or without force and wait"""
        # Trigger rebalance on volume
        ret, _, _ = rebalance_start(self.mnode,
                                    self.volname,
                                    force=rebal_force)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1200)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
    def _expand_volume_and_wait_for_rebalance_to_complete(self):
        """Expand volume and wait for rebalance to complete"""
        # Add brick to volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to add brick on volume %s"
                        % self.volname)

        # Trigger rebalance and wait for it to complete
        ret, _, _ = rebalance_start(self.mnode, self.volname,
                                    force=True)
        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
                         % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
                                             timeout=6000)
        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
                             "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
예제 #16
0
    def _expand_volume_and_verify_rebalance(self):
        """ Expands the volume, trigger rebalance and verify file is copied"""

        # Expand the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to expand the volume")

        # Trigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0,
            "Failed to start rebalance on the volume %s" % self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1200)
        self.assertTrue(
            ret, "Rebalance is not yet complete on the volume "
            "%s" % self.volname)
        g.log.info("Rebalance successfully completed")
    def test_spurious_rebalance(self):
        """
        In this test case:
        1. Trusted storage Pool of 3 nodes
        2. Create a distributed volumes with 3 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Remove a brick from the volume
        6. Check remove-brick status
        7. Stop the remove brick process
        8. Perform fix-layoyt on the volume
        9. Get the rebalance fix-layout status
       10. Create a directory from mount point
       11. Check trusted.glusterfs.dht extended attribue for newly
           created directory on the remove brick
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:3]
        my_server_info = {}
        for server in self.servers[0:3]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 3):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 3, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        ret, _, _ = volume_create(self.mnode,
                                  self.volname,
                                  bricks_list,
                                  force=False)
        self.assertEqual(ret, 0, ("Unable"
                                  "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)
        remove_brick_list = []
        remove_brick_list.append(bricks_list[2])
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'stop')
        self.assertEqual(ret, 0, "Failed to stop remove brick operation")
        g.log.info("Remove bricks operation stopped successfully")

        g.log.info("Starting Fix-layoyt on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname, True)
        self.assertEqual(ret, 0, ("Failed to start rebalance for fix-layout"
                                  "on the volume %s", self.volname))
        g.log.info("Successfully started fix-layout on the volume %s",
                   self.volname)

        # Wait for fix-layout to complete
        g.log.info("Waiting for fix-layout to complete")
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Fix-layout is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Fix-layout is successfully complete on the volume %s",
                   self.volname)
        ret = mkdir(self.mounts[0].client_system,
                    "%s/dir1" % self.mounts[0].mountpoint)
        self.assertTrue(ret, ("Failed to create directory dir1"))
        g.log.info("directory dir1 is created successfully")

        brick_server, brick_dir = remove_brick_list[0].split(':')
        folder_name = brick_dir + "/dir1"
        g.log.info("Check trusted.glusterfs.dht on host  %s for directory %s",
                   brick_server, folder_name)

        ret = get_fattr(brick_server, folder_name, 'trusted.glusterfs.dht')
        self.assertTrue(
            ret, ("Failed to get trusted.glusterfs.dht for %s" % folder_name))
        g.log.info("get trusted.glusterfs.dht xattr for %s successfully",
                   folder_name)
    def test_expanding_volume_when_io_in_progress(self):
        """Test expanding volume (Increase distribution) using existing
        servers bricks when IO is in progress.

        Description:
            - add bricks
            - starts rebalance
            - wait for rebalance to complete
            - validate IO
        """
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Wait for gluster processes to come online
        time.sleep(30)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        _, _, _ = rebalance_status(self.mnode, self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Successfully got rebalance status of the volume %s",
                   self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO is successful on all mounts")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")
    def test_rebalance_with_quota_enabled(self):
        """
        Test rebalance with quota enabled on root.
        1. Create Volume of type distribute
        2. Set Quota limit on the root directory
        3. Do some IO to reach the Hard limit
        4. After IO ends, compute arequal checksum
        5. Add bricks to the volume.
        6. Start rebalance
        7. After rebalance is completed, check arequal checksum
        """
        # Enable Quota
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume %s", self.volname))
        g.log.info("Successfully enabled quota on volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname))
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname))
        g.log.info("Quota soft and hard timeout has been set to 0 for %s",
                   self.volname)

        # Set the quota limit of 1 GB on root dir of the volume
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "1GB")
        self.assertEqual(ret, 0, "Failed to set Quota for dir root")
        g.log.info("Successfully set quota limit for dir root")

        # Do some IO until hard limit is reached.
        cmd = ("/usr/bin/env python %s create_files "
               "-f 1024 --fixed-file-size 1M --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        self.assertTrue(
            wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]),
            "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")

        # Compute arequal checksum.
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log volume info and status after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Perform rebalance start operation.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to  start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Rebalance started.")

        # Check rebalance is in progress
        rebalance_status = get_rebalance_status(self.mnode, self.volname)
        ret = rebalance_status['aggregate']['statusStr']
        self.assertEqual(ret, "in progress", ("Rebalance is not in "
                                              "'in progress' state, either "
                                              "rebalance is in completed state"
                                              "  or failed to get rebalance "
                                              "status"))
        g.log.info("Rebalance is 'in progress' state")

        # Wait till rebalance ends.
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=1073741824,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")

        # Compute arequal checksum.
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance.
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
예제 #20
0
    def test_rebalance_with_hidden_files(self):
        # pylint: disable=too-many-statements
        # Start IO on mounts
        g.log.info("Starting IO on all mounts...")
        self.all_mounts_procs = []
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_files "
                   "--base-file-name . "
                   "-f 99 %s" %
                   (self.script_upload_path, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # validate IO
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("Listing all files and directories is successful")

        # Verify DHT values across mount points
        for mount_obj in self.mounts:
            g.log.debug("Verifying hash layout values %s:%s",
                        mount_obj.client_system, mount_obj.mountpoint)
            ret = validate_files_in_dir(mount_obj.client_system,
                                        mount_obj.mountpoint,
                                        test_type=FILE_ON_HASHED_BRICKS,
                                        file_type=FILETYPE_FILES)
            self.assertTrue(
                ret, "Expected - Files are created on only "
                "sub-volume according to its hashed value")
            g.log.info("Hash layout values are verified %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

        # Getting areequal checksum before rebalance
        g.log.info("Getting areequal checksum before rebalance")
        arequal_checksum_before_rebalance = collect_mounts_arequal(self.mounts)

        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online ", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            failed_files_count = int(each_node['failures'])
            self.assertEqual(
                failed_files_count, 0,
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("There are no migration failures")

        # Getting areequal checksum after rebalance
        g.log.info("Getting areequal checksum after rebalance")
        arequal_checksum_after_rebalance = collect_mounts_arequal(self.mounts)

        # Comparing arequals checksum before and after rebalance
        g.log.info("Comparing arequals checksum before and after rebalance")
        self.assertEqual(arequal_checksum_before_rebalance,
                         arequal_checksum_after_rebalance,
                         "arequal checksum is NOT MATCHNG")
        g.log.info("arequal checksum is SAME")
    def test_brick_removal_with_quota(self):
        """
        Test Brick removal with quota in place
        1. Create Volume of type distribute
        2. Set Quota limit on the directory
        3. Do some IO to reach the Hard limit
        4. After IO ends, remove bricks
        5. Quota validation should succeed.
        """
        # Enable Quota
        ret, _, _ = quota_enable(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Failed to enable quota on the volume 5s", self.volname))
        g.log.info("Successfully enabled quota on volume %s", self.volname)

        # Set the Quota timeouts to 0 for strict accounting
        ret, _, _ = quota_set_hard_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set hard-timeout to 0 for %s", self.volname))
        ret, _, _ = quota_set_soft_timeout(self.mnode, self.volname, 0)
        self.assertEqual(
            ret, 0, ("Failed to set soft-timeout to 0 for %s", self.volname))
        g.log.info("Quota soft and hard timeout has been set to 0 for %s",
                   self.volname)

        # Set the quota limit of 100 MB on root dir of the volume
        ret, _, _ = quota_limit_usage(self.mnode, self.volname, "/", "100MB")
        self.assertEqual(ret, 0, "Failed to set Quota for dir root")
        g.log.info("Successfully set quota limit for dir root")

        # Do some IO until hard limit is reached.
        cmd = ("/usr/bin/env python %s create_files "
               "-f 100 --fixed-file-size 1M --base-file-name file %s" %
               (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete and validate IO
        self.assertTrue(
            wait_for_io_to_complete(self.all_mounts_procs, self.mounts[0]),
            "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=104857600,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")

        # Log Volume info and status before shrinking volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Shrink the volume.
        ret = shrink_volume(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to shrink volume on "
                              "volume %s", self.volname))
        g.log.info("Shrinking volume is successful on "
                   "volume %s", self.volname)

        # Log volume info and status after shrinking volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Perform rebalance start operation.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to  start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Rebalance started.")

        # Wait till rebalance ends.
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Validate quota
        ret = quota_validate(self.mnode,
                             self.volname,
                             path='/',
                             hard_limit=104857600,
                             sl_exceeded=True,
                             hl_exceeded=True)
        self.assertTrue(ret, "Quota validate Failed for '/'")
        g.log.info("Quota Validated for path '/'")
    def test_stack_overflow(self):
        """
        Description: Tests to check that there is no stack overflow
                     in readdirp with parallel-readdir enabled.
        Steps :
        1) Create a volume.
        2) Mount the volume using FUSE.
        3) Enable performance.parallel-readdir and
           performance.readdir-ahead on the volume.
        4) Create 10000 files on the mount point.
        5) Add-brick to the volume.
        6) Perform fix-layout on the volume (not rebalance).
        7) From client node, rename all the files, this will result in creation
           of linkto files on the newly added brick.
        8) Do ls -l (lookup) on the mount-point.
        """
        # pylint: disable=too-many-statements
        # Enable performance.parallel-readdir and
        # performance.readdir-ahead on the volume
        options = {"performance.parallel-readdir": "enable",
                   "performance.readdir-ahead": "enable"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, "Failed to set volume options")
        g.log.info("Successfully set volume options")

        # Creating 10000 files on volume root
        m_point = self.mounts[0].mountpoint
        command = 'touch ' + m_point + '/file{1..10000}_0'
        ret, _, _ = g.run(self.clients[0], command)
        self.assertEqual(ret, 0, "File creation failed on %s"
                         % m_point)
        g.log.info("Files successfully created on the mount point")

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s",
                              self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Perform fix-layout on the volume
        ret, _, _ = rebalance_start(self.mnode, self.volname, fix_layout=True)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        # Wait for fix-layout to complete
        ret = wait_for_fix_layout_to_complete(self.mnode, self.volname,
                                              timeout=3000)
        self.assertTrue(ret, ("Fix-layout failed on volume %s",
                              self.volname))
        g.log.info("Fix-layout is successful on "
                   "volume %s", self.volname)

        # Rename all files from client node
        for i in range(1, 10000):
            ret = move_file(self.clients[0],
                            '{}/file{}_0'.format(m_point, i),
                            '{}/file{}_1'.format(m_point, i))
            self.assertTrue(ret, "Failed to rename files")
        g.log.info("Files renamed successfully")

        # Perform lookup from the mount-point
        cmd = "ls -lR " + m_point
        ret, _, _ = g.run(self.mounts[0].client_system, cmd)
        self.assertEqual(ret, 0, "Failed to lookup")
        g.log.info("Lookup successful")
예제 #23
0
    def test_data_self_heal_algorithm_diff_heal_command(self):
        """
        Test Volume Option - 'cluster.data-self-heal-algorithm' : 'diff'

        Description:
        - set the volume option
        "metadata-self-heal": "off"
        "entry-self-heal": "off"
        "data-self-heal": "off"
        "data-self-heal-algorithm": "diff"
        "self-heal-daemon": "off"
        - create IO
        - calculate arequal
        - bring down all bricks processes from selected set
        - modify the data
        - get arequal before getting bricks online
        - bring bricks online
        - expand volume by adding bricks to the volume
        - do rebalance
        - set the volume option "self-heal-daemon": "on" and check for daemons
        - start healing
        - check if heal is completed
        - check for split-brain
        - calculate arequal and compare with arequal before bringing bricks
        offline and after bringing bricks online
        """
        # pylint: disable=too-many-branches,too-many-statements
        # Setting options
        g.log.info('Setting options...')
        options = {
            "metadata-self-heal": "off",
            "entry-self-heal": "off",
            "data-self-heal": "off",
            "data-self-heal-algorithm": "diff"
        }
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Options "
                   "'metadata-self-heal', "
                   "'entry-self-heal', "
                   "'data-self-heal', "
                   "'self-heal-daemon' "
                   "are set to 'off',"
                   "'data-self-heal-algorithm' "
                   "is set to 'diff' successfully")

        # Creating files on client side
        all_mounts_procs = []
        g.log.info("Generating data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        # Creating files
        command = "/usr/bin/env python %s create_files -f 100 %s" % (
            self.script_upload_path, self.mounts[0].mountpoint)

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "off"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'off' successfully")

        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = list(
            filter(None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                          bricks_to_bring_offline_dict['cold_tier_bricks'] +
                          bricks_to_bring_offline_dict['volume_bricks'])))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Modify the data
        all_mounts_procs = []
        g.log.info("Modifying data for %s:%s", self.mounts[0].client_system,
                   self.mounts[0].mountpoint)
        command = ("/usr/bin/env python %s create_files -f 100 "
                   "--fixed-file-size 1M %s" %
                   (self.script_upload_path, self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        all_mounts_procs.append(proc)

        # Validate IO
        self.assertTrue(validate_io_procs(all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")

        # Get arequal before getting bricks online
        g.log.info('Getting arequal before getting bricks online...')
        ret, result_before_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks online '
                   'is successful')

        # Bring brick online
        g.log.info('Bringing bricks %s online...', bricks_to_bring_offline)
        ret = bring_bricks_online(self.mnode, self.volname,
                                  bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s online' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s online is successful',
                   bricks_to_bring_offline)

        # Expand volume by adding bricks to the volume
        g.log.info("Start adding bricks to volume...")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info("Expanding volume is successful on volume %s", self.volname)

        # Do rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, 'Failed to start rebalance')
        g.log.info('Rebalance is started')

        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Rebalance is not completed')
        g.log.info('Rebalance is completed successfully')

        # Setting options
        g.log.info('Setting options...')
        options = {"self-heal-daemon": "on"}
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, 'Failed to set options')
        g.log.info("Option 'self-heal-daemon' is set to 'on' successfully")

        # Wait for self-heal-daemons to be online
        g.log.info("Waiting for self-heal-daemons to be online")
        ret = is_shd_daemonized(self.all_servers)
        self.assertTrue(ret, "Either No self heal daemon process found")
        g.log.info("All self-heal-daemons are online")

        # Start healing
        ret = trigger_heal(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not started')
        g.log.info('Healing is started')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after getting bricks online
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_online = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Checking arequals before bringing bricks offline
        # and after bringing bricks online
        self.assertItemsEqual(result_before_online, result_after_online,
                              'Checksums are not equal')
        g.log.info('Checksums are equal')
예제 #24
0
    def test_expanding_volume_when_io_in_progress(self):
        # pylint: disable=too-many-statements
        # Log Volume Info and Status before expanding the volume.
        g.log.info("Logging volume info and Status before expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Expanding volume by adding bricks to the volume when IO in progress
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume while IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume while IO in progress on "
            "volume %s : Success", self.volname)

        # Wait for gluster processes to come online
        g.log.info("Wait for gluster processes to come online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info("Waiting for volume %s process to be online", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        log_volume_info_and_status(self.mnode, self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online", self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Started rebalance on the volume %s: Success", self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode,
                                             self.volname,
                                             timeout=1800)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for the "
                                  "volume %s", self.volname))
        g.log.info("Rebalance status on volume %s: Complete", self.volname)

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("IO on all mounts: Complete")

        # List all files and dirs created
        g.log.info("List all files and directories:")
        ret = list_all_files_and_dirs_mounts(self.mounts)
        self.assertTrue(ret, "Failed to list all files and dirs")
        g.log.info("List all files and directories: Success")

        # DHT Layout validation
        g.log.debug("Verifying hash layout values %s:%s", self.clients[0],
                    self.mounts[0].mountpoint)
        ret = validate_files_in_dir(self.clients[0],
                                    self.mounts[0].mountpoint,
                                    test_type=LAYOUT_IS_COMPLETE,
                                    file_type=FILETYPE_DIRS)
        self.assertTrue(ret, "LAYOUT_IS_COMPLETE: FAILED")
        g.log.info("LAYOUT_IS_COMPLETE: PASS")

        # Checking if there are any migration failures
        status = get_rebalance_status(self.mnode, self.volname)
        for each_node in status['node']:
            self.assertEqual(
                0, int(each_node['failures']),
                "Rebalance failed to migrate few files on %s" %
                each_node['nodeName'])
            g.log.info("No migration failures on %s", each_node['nodeName'])
예제 #25
0
    def test_rebalance_with_brick_down(self):
        """
        Rebalance with brick down in replica
        - Create a Replica volume.
        - Bring down one of the brick down in the replica pair
        - Do some IO and create files on the mount point
        - Add a pair of bricks to the volume
        - Initiate rebalance
        - Bring back the brick which was down.
        - After self heal happens, all the files should be present.
        """
        # Log the volume info and status before brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring one fo the bricks offline
        brick_list = get_all_bricks(self.mnode, self.volname)
        ret = bring_bricks_offline(self.volname, choice(brick_list))

        # Log the volume info and status after brick is down.
        log_volume_info_and_status(self.mnode, self.volname)

        # Create files at mountpoint.
        cmd = (
            "/usr/bin/env python %s create_files "
            "-f 2000 --fixed-file-size 1k --base-file-name file %s"
            % (self.script_upload_path, self.mounts[0].mountpoint))
        proc = g.run_async(
            self.mounts[0].client_system, cmd, user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)

        # Wait for IO to complete.
        self.assertTrue(wait_for_io_to_complete(self.all_mounts_procs,
                                                self.mounts[0]),
                        "IO failed on some of the clients")
        g.log.info("IO completed on the clients")

        # Compute the arequal checksum before bringing all bricks online
        arequal_before_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Log the volume info and status before expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Expand the volume.
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume %s", self.volname))
        g.log.info("Expanding volume is successful on "
                   "volume %s", self.volname)

        # Log the voluem info after expanding volume.
        log_volume_info_and_status(self.mnode, self.volname)

        # Start Rebalance.
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete on the volume "
                              "%s", self.volname))
        g.log.info("Rebalance is successfully complete on the volume %s",
                   self.volname)

        # Log the voluem info and status before bringing all bricks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Bring all bricks online.
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Not able to start volume with force option")
        g.log.info("Volume start with force option successful.")

        # Log the volume info and status after bringing all beicks online
        log_volume_info_and_status(self.mnode, self.volname)

        # Monitor heal completion.
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, "heal has not yet completed")
        g.log.info("Self heal completed")

        # Compute the arequal checksum after all bricks online.
        arequal_after_all_bricks_online = collect_mounts_arequal(self.mounts)

        # Comparing arequal checksum before and after the operations.
        self.assertEqual(arequal_before_all_bricks_online,
                         arequal_after_all_bricks_online,
                         "arequal checksum is NOT MATCHING")
        g.log.info("arequal checksum is SAME")
예제 #26
0
    def test_remove_brick_no_commit_followed_by_rebalance(self):
        """
        Description: Tests to check that there is no data loss when
                     remove-brick operation is stopped and then new bricks
                     are added to the volume.
         Steps :
         1) Create a volume.
         2) Mount the volume using FUSE.
         3) Create files and dirs on the mount-point.
         4) Calculate the arequal-checksum on the mount-point
         5) Start remove-brick operation on the volume.
         6) While migration is in progress, stop the remove-brick
            operation.
         7) Add-bricks to the volume and trigger rebalance.
         8) Wait for rebalance to complete.
         9) Calculate the arequal-checksum on the mount-point.
         """
        # Start IO on mounts
        m_point = self.mounts[0].mountpoint
        cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
               "--dir-length 10 --dir-depth 2 --max-num-of-dirs 1 "
               "--num-of-files 50 --file-type empty-file %s" %
               (self.script_upload_path, m_point))
        proc = g.run_async(self.mounts[0].client_system,
                           cmd,
                           user=self.mounts[0].user)
        g.log.info("IO on %s:%s is started successfully",
                   self.mounts[0].client_system, m_point)

        # Validate IO
        self.assertTrue(validate_io_procs([proc], self.mounts[0]),
                        "IO failed on some of the clients")

        # Calculate arequal-checksum before starting remove-brick
        ret, arequal_before = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Form bricks list for volume shrink
        remove_brick_list = form_bricks_list_to_remove_brick(self.mnode,
                                                             self.volname,
                                                             subvol_name=1)
        self.assertIsNotNone(remove_brick_list, ("Volume %s: Failed to "
                                                 "form bricks list for "
                                                 "shrink", self.volname))
        g.log.info("Volume %s: Formed bricks list for shrink", self.volname)

        # Shrink volume by removing bricks
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 "start")
        self.assertEqual(ret, 0, ("Volume %s shrink failed ", self.volname))
        g.log.info("Volume %s shrink started ", self.volname)

        # Log remove-brick status
        ret, out, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                   "status")
        self.assertEqual(ret, 0,
                         ("Remove-brick status failed on %s ", self.volname))

        # Check if migration is in progress
        if r'in progress' in out:
            # Stop remove-brick process
            g.log.info("Stop removing bricks from volume")
            ret, out, _ = remove_brick(self.mnode, self.volname,
                                       remove_brick_list, "stop")
            self.assertEqual(ret, 0, "Failed to stop remove-brick process")
            g.log.info("Stopped remove-brick process successfully")
        else:
            g.log.error("Migration for remove-brick is complete")

        # Sleep for 30 secs so that any running remove-brick process stops
        sleep(30)

        # Add bricks to the volume
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Volume %s: Add-brick failed", self.volname))
        g.log.info("Volume %s: Add-brick successful", self.volname)

        # Tigger rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, ("Volume %s: Failed to start rebalance", self.volname))
        g.log.info("Volume %s: Rebalance started ", self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance has not completed")
        g.log.info("Rebalance has completed successfully")

        # Calculate arequal-checksum on mount-point
        ret, arequal_after = collect_mounts_arequal(self.mounts[0])
        self.assertTrue(ret, "Collecting arequal-checksum failed")

        # Check if there is any data loss
        self.assertEqual(set(arequal_before), set(arequal_after),
                         ("There is data loss"))
        g.log.info("The checksum before and after rebalance is same."
                   " There is no data loss.")
    def test_subdir_with_addbrick(self):

        # pylint: disable=too-many-statements
        """
        Mount the volume
        Create 2 subdir on mount point, subdir1 and subdir2
        Auth allow - Client1(subdir1,subdir2),Client2(subdir1,subdir2)
        Mount the subdir1 on client 1 and subdir2 on client2
        Start IO's on both subdirs
        Perform add-brick and rebalance
        """

        # Create  directories subdir1 and subdir2 on mount point
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir1' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/subdir2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'subdir2' on"
                  "volume %s from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        # unmount volume
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Volumes Unmount failed")
        g.log.info("Volumes Unmounted successfully")

        # Set authentication on the subdirectory subdir1
        # and subdir2 to access by 2 clients
        g.log.info(
            'Setting authentication on subdir1 and subdir2'
            'for client %s and %s', self.clients[0], self.clients[0])
        ret = set_auth_allow(
            self.volname, self.mnode, {
                '/subdir1': [self.clients[0], self.clients[1]],
                '/subdir2': [self.clients[0], self.clients[1]]
            })
        self.assertTrue(
            ret, 'Failed to set Authentication on volume %s' % self.volume)

        # Creating mount list for subdirectories
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/subdir1" % self.volname
        self.subdir_mounts[1].volname = "%s/subdir2" % self.volname

        # Mount Subdirectory "subdir1" on client 1 and "subdir2" on client 2
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount  %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted subdirectories on client1"
                   "and clients 2")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Start add-brick (subvolume-increase)
        g.log.info("Start adding bricks to volume when IO in progress")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to expand the volume when IO in "
                              "progress on volume %s", self.volname))
        g.log.info(
            "Expanding volume when IO in progress is successful on "
            "volume %s", self.volname)

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed on "
                              "volume %s", self.volname))
        g.log.info("Successful in logging volume info and status of volume %s",
                   self.volname)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("All process  for volume %s are not"
                              "online", self.volname))
        g.log.info("All volume %s processes are now online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to start rebalance on the volume "
                                  "%s", self.volname))
        g.log.info("Successfully started rebalance on the volume %s",
                   self.volname)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname, 600)
        self.assertTrue(
            ret, "Rebalance did not complete "
            "despite waiting for 10 minutes")
        g.log.info("Rebalance successfully completed on the volume %s",
                   self.volname)

        # Again validate if subdirectories are still mounted post add-brick

        for mount_obj in self.subdir_mounts:
            ret = mount_obj.is_mounted()
            self.assertTrue(
                ret, ("Subdirectory %s is not mounted on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Subdirectory %s is mounted on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully validated that subdirectories are mounted"
                   "on client1 and clients 2 post add-brick operation")
    def test_status_string(self):
        '''
        -> Create Volume
        -> Start rebalance
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        -> Start Remove brick operation
        -> Check task type in volume status
        -> Check task status string in volume status
        -> Check task type in volume status xml
        -> Check task status string in volume status xml
        '''

        # Start rebalance
        ret, _, _ = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start rebalance for volume %s"
                         % self.volname)
        g.log.info("Rebalance started successfully on volume %s",
                   self.volname)

        # Wait for rebalance to complete
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, "Rebalance failed for volume %s" % self.volname)
        g.log.info("Rebalance completed successfully on volume %s",
                   self.volname)

        # Getting volume status after rebalance start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status for rebalance
        self.assertIn('Rebalance', status_list[len(status_list) - 4],
                      "Incorrect task type found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task type found in volume status for %s",
                   self.volname)

        # Verifying task status string in volume status for rebalance
        self.assertIn('completed', status_list[len(status_list) - 2],
                      "Incorrect task status found in volume status for %s"
                      % self.volname)
        g.log.info("Correct task status found in volume status for %s",
                   self.volname)

        # Getting volume status --xml after rebalance start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml for rebalance
        self.assertEqual('Rebalance',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml "
                         "for %s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml for rebalance
        self.assertEqual(
            'completed',
            vol_status[self.volname]['task_status'][0]['statusStr'],
            "Incorrect task status found in volume status "
            "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)

        # Getting sub vols
        subvol_dict = get_subvols(self.mnode, self.volname)
        subvol = subvol_dict['volume_subvols'][1]

        # Perform remove brick start
        ret, _, _ = remove_brick(self.mnode, self.volname, subvol,
                                 'start', replica_count=3)
        self.assertEqual(ret, 0, "Failed to start remove brick operation "
                                 "for %s" % self.volname)
        g.log.info("Remove brick operation started successfully on volume %s",
                   self.volname)

        # Getting volume status after remove brick start
        ret, out, _ = volume_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to get volume status for volume %s"
                         % self.volname)
        g.log.info("Volume status successful on volume %s", self.volname)
        status_list = out.splitlines()

        # Verifying task type from volume status after remove brick start
        self.assertIn('Remove brick', status_list[len(status_list) - 8],
                      "Incorrect task type found in volume status for "
                      "%s" % self.volname)
        g.log.info("Correct task type found in volume status task for %s",
                   self.volname)

        # Verifying task status string in volume status after remove
        # brick start
        ret = False
        remove_status = ['completed', 'in progress']
        if (status_list[len(status_list) - 2].split(':')[1].strip() in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "task for %s" % self.volname)
        g.log.info("Correct task status found in volume status task for %s",
                   self.volname)

        # Getting volume status --xml after remove brick start
        vol_status = get_volume_status(self.mnode, self.volname,
                                       options='tasks')

        # Verifying task type  from volume status --xml after
        # remove brick start
        self.assertEqual('Remove brick',
                         vol_status[self.volname]['task_status'][0]['type'],
                         "Incorrect task type found in volume status xml for "
                         "%s" % self.volname)
        g.log.info("Correct task type found in volume status xml for %s",
                   self.volname)

        # Verifying task status string from volume status --xml
        # after remove brick start
        ret = False
        if (vol_status[self.volname]['task_status'][0]['statusStr'] in
                remove_status):
            ret = True
        self.assertTrue(ret, "Incorrect task status found in volume status "
                             "xml for %s" % self.volname)
        g.log.info("Correct task status found in volume status xml %s",
                   self.volname)
    def test_snap_rebalance(self):
        # pylint: disable=too-many-statements, too-many-locals
        """

        Snapshot rebalance contains tests which verifies snapshot clone,
        creating snapshot and performing I/O on mountpoints

        Steps:

        1. Create snapshot of a volume
        2. Activate snapshot
        3. Clone snapshot and Activate
        4. Mount Cloned volume
        5. Perform I/O on mount point
        6. Calculate areequal for bricks and mountpoints
        7. Add-brick more brick to cloned volume
        8. Initiate Re-balance
        9. validate areequal of bricks and mountpoints
        """

        # Creating snapshot:
        g.log.info("Starting to Create snapshot")
        ret, _, _ = snap_create(self.mnode, self.volname, self.snap)
        self.assertEqual(
            ret, 0, ("Failed to create snapshot for volume %s" % self.volname))
        g.log.info("Snapshot %s created successfully for volume %s", self.snap,
                   self.volname)

        # Activating snapshot
        g.log.info("Starting to Activate Snapshot")
        ret, _, _ = snap_activate(self.mnode, self.snap)
        self.assertEqual(ret, 0,
                         ("Failed to Activate snapshot %s" % self.snap))
        g.log.info("Snapshot %s activated successfully", self.snap)

        # Creating a Clone of snapshot:
        g.log.info("creating Clone Snapshot")
        ret, _, _ = snap_clone(self.mnode, self.snap, self.clone)
        self.assertEqual(ret, 0, ("Failed to clone volume %s" % self.clone))
        g.log.info("clone volume %s created successfully", self.clone)

        # Starting clone volume
        g.log.info("starting clone volume")
        ret, _, _ = volume_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start %s" % self.clone)
        g.log.info("clone volume %s started successfully", self.clone)

        # Mounting a clone volume
        g.log.info("Mounting created clone volume")
        ret, _, _ = mount_volume(self.clone, self.mount_type, self.mount1,
                                 self.mnode, self.clients[0])
        self.assertEqual(ret, 0,
                         "clone Volume mount failed for %s" % self.clone)
        g.log.info("cloned volume %s mounted Successfully", self.clone)

        # Validate clone volume mounted or not
        g.log.info("Validate clone volume mounted or not")
        ret = is_mounted(self.clone, self.mount1, self.mnode, self.clients[0],
                         self.mount_type)
        self.assertTrue(
            ret, "Cloned Volume not mounted on mount point: %s" % self.mount1)
        g.log.info("Cloned Volume %s mounted on %s", self.clone, self.mount1)

        # write files to mountpoint
        g.log.info("Starting IO on %s mountpoint...", self.mount1)
        all_mounts_procs = []
        cmd = ("/usr/bin/env python %s create_files "
               "-f 10 --base-file-name file %s" %
               (self.script_upload_path, self.mount1))
        proc = g.run(self.clients[0], cmd)
        all_mounts_procs.append(proc)

        self.check_arequal()

        # expanding volume
        g.log.info("Starting to expand volume")
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, "Failed to expand volume %s" % self.clone)
        g.log.info("Expand volume successful")

        ret, _, _ = rebalance_start(self.mnode, self.clone)
        self.assertEqual(ret, 0, "Failed to start rebalance")
        g.log.info("Successfully started rebalance on the "
                   "volume %s", self.clone)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        _, _, _ = rebalance_status(self.mnode, self.clone)

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.clone)
        self.assertTrue(ret, ("Rebalance is not yet complete "
                              "on the volume %s", self.clone))
        g.log.info("Rebalance is successfully complete on "
                   "the volume %s", self.clone)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.clone)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for "
                                  "the volume %s", self.clone))
        g.log.info("Successfully got rebalance status of the "
                   "volume %s", self.clone)

        self.check_arequal()
예제 #30
0
    def test_snapshot_while_rebalance(self):
        # pylint: disable=too-many-statements, missing-docstring
        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Create one snapshot of volume using no-timestamp option
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot for %s" % self.volname))
        g.log.info("Snapshot snapy created successfully "
                   "for volume %s", self.volname)

        # Check for no of snaps using snap_list it should be 1
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            1, len(snap_list), "Expected 1 snapshot "
            "found %s snapshots" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # validate snap name
        self.assertIn("snapy", snap_list, " snap not found")
        g.log.info("Successfully validated names of snap")

        # get the bricks for the volume
        g.log.info("Fetching bricks for the volume : %s", self.volname)
        bricks_list = get_all_bricks(self.mnode, self.volname)
        g.log.info("Brick List : %s", bricks_list)

        # expanding volume
        g.log.info("Start adding bricks to volume %s", self.volname)
        ret = expand_volume(self.mnode, self.volname, self.servers,
                            self.all_servers_info)
        self.assertTrue(ret, ("Failed to add bricks to "
                              "volume %s " % self.volname))
        g.log.info("Add brick successful")

        # Log Volume Info and Status after expanding the volume
        g.log.info("Logging volume info and Status after expanding volume")
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Logging volume info and status failed "
                              "on volume %s", self.volname))
        g.log.info(
            "Successful in logging volume info and status "
            "of volume %s", self.volname)

        # Verify volume's all process are online for 60 sec
        g.log.info("Verifying volume's all process are online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname,
                                                   60)
        self.assertTrue(ret, ("Volume %s : All process are not "
                              "online", self.volname))
        g.log.info("Successfully Verified volume %s "
                   "processes are online", self.volname)

        # Start Rebalance
        g.log.info("Starting Rebalance on the volume")
        ret, _, err = rebalance_start(self.mnode, self.volname)
        self.assertEqual(ret, 0,
                         ("Failed to start rebalance on "
                          "the volume %s with error %s" % (self.volname, err)))
        g.log.info("Successfully started rebalance on the "
                   "volume %s", self.volname)

        # Log Rebalance status
        g.log.info("Log Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to log rebalance status")
        g.log.info("successfully logged rebalance status")

        # Create one snapshot of volume during rebalance
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy_rebal", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertNotEqual(ret, 0, ("successfully created 'snapy_rebal'"
                                     " for %s" % self.volname))
        g.log.info("Snapshot 'snapy_rebal' not created as rebalance is in "
                   "progress check log")
        # Check for no of snaps using snap_list it should be 1
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            1, len(snap_list), "Expected 1 snapshot "
            "found %s snapshot" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # Wait for rebalance to complete
        g.log.info("Waiting for rebalance to complete")
        ret = wait_for_rebalance_to_complete(self.mnode, self.volname)
        self.assertTrue(ret, ("Rebalance is not yet complete "
                              "on the volume %s", self.volname))
        g.log.info("Rebalance is successfully complete on "
                   "the volume %s", self.volname)

        # Check Rebalance status after rebalance is complete
        g.log.info("Checking Rebalance status")
        ret, _, _ = rebalance_status(self.mnode, self.volname)
        self.assertEqual(ret, 0, ("Failed to get rebalance status for "
                                  "the volume %s", self.volname))
        g.log.info("Successfully got rebalance status of the "
                   "volume %s", self.volname)

        # Create one snapshot of volume post rebalance with same name
        cmd_str = ("gluster snapshot create %s %s %s" %
                   ("snapy_rebal", self.volname, "no-timestamp"))
        ret, _, _ = g.run(self.mnode, cmd_str)
        self.assertEqual(ret, 0,
                         ("Failed to create snapshot for %s" % self.volname))
        g.log.info(
            "Snapshot snapy_rebal created successfully "
            "for volume  %s", self.volname)

        # Check for no of snaps using snap_list it should be 2
        snap_list = get_snap_list(self.mnode)
        self.assertEqual(
            2, len(snap_list), "Expected 2 snapshots "
            "found %s snapshot" % len(snap_list))
        g.log.info("Successfully validated number of snaps.")

        # validate snap name
        self.assertIn("snapy_rebal", snap_list, " snap not found")
        g.log.info("Successfully validated names of snap")