def test_gfind_when_node_down(self):
        """
        Verifying the glusterfind functionality when node is down.

        1. Create a volume
        2. Create a session on the volume
        3. Create various files from mount point
        4. Bring down glusterd on one of the node
        5. Perform glusterfind pre
        6. Perform glusterfind post
        7. Check the contents of outfile
        8. Create more files from mountpoint
        9. Reboot one of the nodes
        10. Perform gluserfind pre
        11. Perform glusterfind post
        12. Check the contents of outfile
        """

        # pylint: disable=too-many-statements
        # Create a session for the volume
        ret, _, _ = gfind_create(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Unexpected: Creation of a session for the "
                                  "volume %s failed" % self.volname))
        g.log.info("Successfully created a session for the volume %s",
                   self.volname)

        # Perform glusterfind list to check if session exists
        _, out, _ = gfind_list(self.mnode, volname=self.volname,
                               sessname=self.session)
        self.assertNotEqual(out, "No sessions found.",
                            "Failed to list the glusterfind session")
        g.log.info("Successfully listed the glusterfind session")

        self._perform_io_and_validate_presence_of_files()

        # Wait for changelog to get updated
        sleep(2)

        # Bring one of the node down.
        self.random_server = choice(self.servers[1:])
        ret = stop_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to stop glusterd on one node.")
        g.log.info("Succesfully stopped glusterd on one node.")

        # Wait till glusterd is completely down.
        while is_glusterd_running(self.random_server) != 1:
            sleep(2)

        self._perform_glusterfind_pre_and_validate_outfile()

        # Perform glusterfind post for the session
        ret, _, _ = gfind_post(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Failed to perform glusterfind post"))
        g.log.info("Successfully performed glusterfind post")

        # Bring glusterd which was downed on a random node, up.
        ret = start_glusterd(self.random_server)
        self.assertTrue(ret, "Failed to start glusterd on %s"
                        % self.random_server)
        g.log.info("Successfully started glusterd on node : %s",
                   self.random_server)

        # Waiting for glusterd to start completely.
        ret = wait_for_glusterd_to_start(self.random_server)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.random_server)
        g.log.info("glusterd is started and running on %s",
                   self.random_server)

        self._perform_io_and_validate_presence_of_files()

        # Perform IO
        self._perform_io_and_validate_presence_of_files()

        # Wait for changelog to get updated
        sleep(2)

        # Reboot one of the nodes.
        self.random_server = choice(self.servers[1:])
        ret = reboot_nodes(self.random_server)
        self.assertTrue(ret, "Failed to reboot the said node.")
        g.log.info("Successfully started reboot process on one node.")

        self._perform_glusterfind_pre_and_validate_outfile()

        # Perform glusterfind post for the session
        ret, _, _ = gfind_post(self.mnode, self.volname, self.session)
        self.assertEqual(ret, 0, ("Failed to perform glusterfind post"))
        g.log.info("Successfully performed glusterfind post")

        # Gradual sleep backoff till the node has rebooted.
        counter = 0
        timeout = 300
        ret = False
        while counter < timeout:
            ret, _ = are_nodes_online(self.random_server)
            if not ret:
                g.log.info("Node's offline, Retrying after 5 seconds ...")
                sleep(5)
                counter += 5
            else:
                ret = True
                break
        self.assertTrue(ret, "Node is still offline.")
        g.log.info("Rebooted node is online")

        # Wait for glusterd to start completely
        ret = wait_for_glusterd_to_start(self.random_server)
        self.assertTrue(ret, "glusterd is not running on %s"
                        % self.random_server)
        g.log.info("glusterd is started and running on %s",
                   self.random_server)
    def test_write_io_mount_point_resumed_quorum_restored_x3(self):
        """
        - set cluster.quorum-type to auto
        - start I/O from the mount point
        - Do IO and check on subvols with two nodes to reboot
        (do for each subvol)
        - get files to delete/create for nodes to be offline
        - delete files from mountpoint
        - reboot nodes
        - creating files on nodes while rebooting
        - validate for rofs
        - wait for volume processes to be online
        - creating files on nodes after rebooting
        - validate IO
        - Do IO and check on subvols without nodes to reboot
        (do for each subvol)
        - get files to delete/create for nodes to be online
        - delete files from mountpoint
        - reboot nodes
        - creating files on online nodes while rebooting other nodes
        - validate IO
        - Do IO and check and reboot two nodes on all subvols
        - get files to delete/create for nodes to be offline
        - delete files from mountpoint
        - reboot nodes
        - creating files on nodes while rebooting
        - validate for rofs
        - wait for volume processes to be online
        - creating files on nodes after rebooting
        - validate IO
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        # set cluster.quorum-type to auto
        options = {"cluster.quorum-type": "auto"}
        g.log.info("setting cluster.quorum-type to auto on volume %s",
                   self.volname)
        ret = set_volume_options(self.mnode, self.volname, options)
        self.assertTrue(ret, ("Unable to set volume option %s for"
                              "volume %s" % (options, self.volname)))
        g.log.info("Successfully set %s for volume %s", options, self.volname)

        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = "/usr/bin/env python %s create_files -f 30 %s" % (
                self.script_upload_path, mount_obj.mountpoint)

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # Validate IO
        self.io_validation_complete = False
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        self.io_validation_complete = True

        # Do IO and check on subvols with nodes to reboot
        subvols_dict = get_subvols(self.mnode, self.volname)
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            nodes_to_reboot = []
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be offline
            node, brick_path = brick_list[0].split(':')
            ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(
                    ret, 'Failed to rm file on %s' % mount_obj.client_system)
            g.log.info('Files %s are deleted', file_list)

            # reboot nodes on subvol and wait while rebooting
            g.log.info("Rebooting the nodes %s", nodes_to_reboot)
            ret = reboot_nodes(nodes_to_reboot)
            self.assertTrue(ret,
                            'Failed to reboot nodes %s ' % nodes_to_reboot)

            # Creating files on nodes while rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

                # Validate IO
                self.io_validation_complete = False
                g.log.info("Validating if IO failed with read-only filesystem")
                ret = is_io_procs_fail_with_rofs(self, self.all_mounts_procs,
                                                 self.mounts)
                self.assertTrue(ret, ("Unexpected error and IO successful"
                                      " on read-only filesystem"))
                self.io_validation_complete = True
                g.log.info("EXPECTED: "
                           "Read-only file system in IO while creating file")

            # check if nodes are online
            counter = 0
            timeout = 300
            _rc = False
            while counter < timeout:
                ret, reboot_results = are_nodes_online(nodes_to_reboot)
                if not ret:
                    g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                    time.sleep(5)
                    counter = counter + 5
                else:
                    _rc = True
                    break

            if not _rc:
                for node in reboot_results:
                    if reboot_results[node]:
                        g.log.info("Node %s is online", node)
                    else:
                        g.log.error(
                            "Node %s is offline even after "
                            "%d minutes", node, timeout / 60.0)
            else:
                g.log.info("All nodes %s are up and running", nodes_to_reboot)

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online", self.volname)

            # Creating files on nodes after rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

            # Validate IO
            self.io_validation_complete = False
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True

        # Do IO and check on subvols without nodes to reboot
        subvols_dict = get_subvols(self.mnode, self.volname)
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            nodes_to_reboot = []
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be online
            new_subvols_dict = get_subvols(self.mnode, self.volname)
            subvol_to_operate = new_subvols_dict['volume_subvols']
            subvol_to_operate.remove(subvol)
            brick_list_subvol_online = subvol_to_operate[0]

            node, brick_path_vol_online = \
                brick_list_subvol_online[0].split(':')
            ret, brick_file_list, _ = g.run(node,
                                            'ls %s' % brick_path_vol_online)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(
                    ret, 'Failed to rm file on %s' % mount_obj.client_system)
            g.log.info('Files %s are deleted', file_list)

            # reboot nodes on subvol and wait while rebooting
            g.log.info("Rebooting the nodes %s", nodes_to_reboot)
            ret = reboot_nodes(nodes_to_reboot)
            self.assertTrue(ret,
                            'Failed to reboot nodes %s ' % nodes_to_reboot)

            # Creating files on nodes while rebooting
            self.all_mounts_procs = []
            for mount_obj in self.mounts:
                g.log.info("Creating data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)

                # Creating files
                cmd = ("cd %s/ ;"
                       "touch %s" %
                       (mount_obj.mountpoint, ' '.join(file_list)))

                proc = g.run_async(mount_obj.client_system,
                                   cmd,
                                   user=mount_obj.user)
                self.all_mounts_procs.append(proc)

                # Validate IO
                self.io_validation_complete = False
                self.assertTrue(
                    validate_io_procs(self.all_mounts_procs, self.mounts),
                    "IO failed on some of the clients")
                self.io_validation_complete = True

            # check if nodes are online
            counter = 0
            timeout = 300
            _rc = False
            while counter < timeout:
                ret, reboot_results = are_nodes_online(nodes_to_reboot)
                if not ret:
                    g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                    time.sleep(5)
                    counter = counter + 5
                else:
                    _rc = True
                    break

            if not _rc:
                for node in reboot_results:
                    if reboot_results[node]:
                        g.log.info("Node %s is online", node)
                    else:
                        g.log.error(
                            "Node %s is offline even after "
                            "%d minutes", node, timeout / 60.0)
            else:
                g.log.info("All nodes %s are up and running", nodes_to_reboot)

            # Wait for volume processes to be online
            g.log.info("Wait for volume processes to be online")
            ret = wait_for_volume_process_to_be_online(self.mnode,
                                                       self.volname)
            self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                                  "be online", self.volname))
            g.log.info(
                "Successful in waiting for volume %s processes to be "
                "online", self.volname)

            # Verify volume's all process are online
            g.log.info("Verifying volume's all process are online")
            ret = verify_all_process_of_volume_are_online(
                self.mnode, self.volname)
            self.assertTrue(
                ret, ("Volume %s : All process are not online" % self.volname))
            g.log.info("Volume %s : All process are online", self.volname)

        # Do IO and check and reboot nodes on all subvols
        subvols_dict = get_subvols(self.mnode, self.volname)
        nodes_to_reboot = []
        file_list_for_all_subvols = []
        for subvol in subvols_dict['volume_subvols']:
            # define nodes to reboot
            brick_list = subvol[0:2]
            for brick in brick_list:
                node, brick_path = brick.split(':')
                nodes_to_reboot.append(node)

            # get files to delete/create for nodes to be offline
            node, brick_path = brick_list[0].split(':')
            ret, brick_file_list, _ = g.run(node, 'ls %s' % brick_path)
            self.assertFalse(ret, 'Failed to ls files on %s' % node)
            file_list = brick_file_list.splitlines()
            file_list_for_all_subvols.append(file_list)

            # delete files from mountpoint
            for mount_obj in self.mounts:
                g.log.info("Deleting data for %s:%s", mount_obj.client_system,
                           mount_obj.mountpoint)
                cmd = ('cd %s/ ; rm -rf %s' %
                       (mount_obj.mountpoint, ' '.join(file_list)))
                ret, _, _ = g.run(mount_obj.client_system, cmd)
                self.assertFalse(ret, 'Failed to rm file on %s' % node)
            g.log.info('Files %s are deleted', file_list)

        # reboot nodes on subvol and wait while rebooting
        g.log.info("Rebooting the nodes %s", nodes_to_reboot)
        ret = reboot_nodes(nodes_to_reboot)
        self.assertTrue(ret, 'Failed to reboot nodes %s ' % nodes_to_reboot)

        # Creating files on nodes while rebooting
        all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
        # Create files for 1-st subvol and get all_mounts_procs_1
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[0])))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs_1.append(proc)
            all_mounts_procs.append(all_mounts_procs_1)

        # Create files for 2-st subvol and get all_mounts_procs_2
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[1])))

            proc2 = g.run_async(mount_obj.client_system,
                                cmd,
                                user=mount_obj.user)
            all_mounts_procs_2.append(proc2)
            all_mounts_procs.append(all_mounts_procs_2)

        for mounts_procs in all_mounts_procs:
            # Validate IO
            self.io_validation_complete = False
            g.log.info("Validating if IO failed with read-only filesystem")
            ret = is_io_procs_fail_with_rofs(self, mounts_procs, self.mounts)
            self.assertTrue(ret, ("Unexpected error and IO successful"
                                  " on read-only filesystem"))
            self.io_validation_complete = True
            g.log.info("EXPECTED: "
                       "Read-only file system in IO while creating file")

        # check if nodes are online
        counter = 0
        timeout = 300
        _rc = False
        while counter < timeout:
            ret, reboot_results = are_nodes_online(nodes_to_reboot)
            if not ret:
                g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                time.sleep(5)
                counter = counter + 5
            else:
                _rc = True
                break

        if not _rc:
            for node in reboot_results:
                if reboot_results[node]:
                    g.log.info("Node %s is online", node)
                else:
                    g.log.error("Node %s is offline even after "
                                "%d minutes", node, timeout / 60.0)
        else:
            g.log.info("All nodes %s are up and running", nodes_to_reboot)

        # Wait for volume processes to be online
        g.log.info("Wait for volume processes to be online")
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))
        g.log.info(
            "Successful in waiting for volume %s processes to be "
            "online", self.volname)

        # Verify volume's all process are online
        g.log.info("Verifying volume's all process are online")
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))
        g.log.info("Volume %s : All process are online", self.volname)

        # Creating files on nodes after rebooting
        all_mounts_procs, all_mounts_procs_1, all_mounts_procs_2 = [], [], []
        # Create files for 1-st subvol and get all_mounts_procs_1
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[0])))

            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs_1.append(proc)
            all_mounts_procs.append(all_mounts_procs_1)

        # Create files for 2-st subvol and get all_mounts_procs_2
        for mount_obj in self.mounts:
            g.log.info("Creating data for %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)

            # Creating files
            cmd = (
                "cd %s/ ;"
                "touch %s" %
                (mount_obj.mountpoint, ' '.join(file_list_for_all_subvols[1])))

            proc2 = g.run_async(mount_obj.client_system,
                                cmd,
                                user=mount_obj.user)
            all_mounts_procs_2.append(proc2)
            all_mounts_procs.append(all_mounts_procs_2)

        for mounts_procs in all_mounts_procs:
            # Validate IO
            self.io_validation_complete = False
            self.assertTrue(
                validate_io_procs(self.all_mounts_procs, self.mounts),
                "IO failed on some of the clients")
            self.io_validation_complete = True
Exemple #3
0
    def test_heal_full_node_reboot(self):
        """
        - Create IO from mountpoint.
        - Calculate arequal from mount.
        - Delete data from backend from the EC volume.
        - Trigger heal full.
        - Disable Heal.
        - Again Enable and do Heal full.
        - Reboot a Node.
        - Calculate arequal checksum and compare it.
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Creating files on client side
        for mount_obj in self.mounts:
            g.log.info("Generating data for %s:%s",
                       mount_obj.client_system, mount_obj.mountpoint)

            # Create dirs with file
            g.log.info('Creating dirs with file...')
            command = ("/usr/bin/env python %s create_deep_dirs_with_files "
                       "-d 2 -l 2 -n 2 -f 20 %s" % (
                           self.script_upload_path,
                           mount_obj.mountpoint))

            proc = g.run_async(mount_obj.client_system, command,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)
        self.io_validation_complete = False

        # Validate IO
        g.log.info("Wait for IO to complete and validate IO ...")
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        self.io_validation_complete = True
        g.log.info("IO is successful on all mounts")

        # Get arequal before deleting the files from brick
        g.log.info('Getting arequal before getting bricks offline...')
        ret, result_before_killing_procs = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal before getting bricks offline '
                   'is successful')

        subvols = get_subvols(self.mnode, self.volname)['volume_subvols']

        # Delete data from backend from the erasure node
        for subvol in subvols:
            erasure = subvol[-1]
            g.log.info('Clearing ec brick %s', erasure)
            node, brick_path = erasure.split(':')
            ret, _, err = g.run(node, 'cd %s/ ; rm -rf *' % brick_path)
            self.assertFalse(ret, err)
            g.log.info('Clearing ec brick %s is successful', erasure)
        g.log.info('Clearing data from brick is unsuccessful')

        # Trigger heal full
        ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to trigger full heal.')

        # Disable Heal and Enable Heal Full Again
        g.log.info("Disabling Healon the Servers")
        ret = disable_heal(self.mnode, self.volname)
        self.assertTrue(ret, "Disabling Failed")
        g.log.info("Healing is Now Disabled")

        g.log.info("Enbaling Heal Now")
        ret = enable_heal(self.mnode, self.volname)
        self.assertTrue(ret, "Enabling Heal failed")
        g.log.info("Healing is now enabled")
        ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to trigger full heal.')

        # Reboot A Node
        g.log.info("Rebooting Node from the Cluster")
        subvols_dict = get_subvols(self.mnode, self.volname)
        nodes_to_reboot = []
        for subvol in subvols_dict['volume_subvols']:
            # Define nodes to reboot
            brick_list = subvol[1:2]
            for brick in brick_list:
                node, brick_path = brick.split(':')
                if node not in nodes_to_reboot:
                    nodes_to_reboot.append(node)

        # Reboot nodes on subvol and wait while rebooting
        g.log.info("Rebooting the nodes %s", nodes_to_reboot)
        ret = reboot_nodes(nodes_to_reboot)
        self.assertTrue(ret, 'Failed to reboot nodes %s '
                        % nodes_to_reboot)

        # Check if nodes are online
        counter = 0
        timeout = 700
        _rc = False
        while counter < timeout:
            ret, reboot_results = are_nodes_online(nodes_to_reboot)
            if not ret:
                g.log.info("Nodes are offline, Retry after 5 seconds ... ")
                sleep(5)
                counter = counter + 5
            _rc = True
            break

        if not _rc:
            for node in reboot_results:
                if not reboot_results[node]:
                    g.log.error("Node %s is offline even after "
                                "%d minutes", node, timeout / 60.0)
        g.log.info("All nodes %s are up and running", nodes_to_reboot)

        # Trigger Heal Full
        ret = trigger_heal_full(self.mnode, self.volname)
        if not ret:
            sleep(10)
            ret = trigger_heal_full(self.mnode, self.volname)
        self.assertTrue(ret, 'Unable to trigger full heal.')

        # Monitor heal completion
        ret = monitor_heal_completion(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal has not yet completed')

        # Check if heal is completed
        ret = is_heal_complete(self.mnode, self.volname)
        self.assertTrue(ret, 'Heal is not complete')
        g.log.info('Heal is completed successfully')

        # Check for split-brain
        ret = is_volume_in_split_brain(self.mnode, self.volname)
        self.assertFalse(ret, 'Volume is in split-brain state')
        g.log.info('Volume is not in split-brain state')

        # Get arequal after healing
        g.log.info('Getting arequal after getting bricks online...')
        ret, result_after_healing = collect_mounts_arequal(self.mounts)
        self.assertTrue(ret, 'Failed to get arequal')
        g.log.info('Getting arequal after getting bricks online '
                   'is successful')

        # Comparing arequals
        self.assertEqual(result_before_killing_procs, result_after_healing,
                         'Arequals before killing arbiter '
                         'processes and after healing are not equal')
        g.log.info('Arequals before killing arbiter '
                   'processes and after healing are equal')