Beispiel #1
0
    def test_volume_start_force(self):

        # get the brick list and create a volume
        num_of_bricks = len(self.servers)
        bricks_list = form_bricks_list(self.mnode, self.volname, num_of_bricks,
                                       self.servers, self.all_servers_info)

        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertEqual(ret, 0, "Failed to create volume")

        # remove brick path in one node and try to start the volume with force
        # and without force
        index_of_node = random.randint(0, len(bricks_list) - 1)
        brick_node = bricks_list[index_of_node]
        node = brick_node.split(":")[0]
        brick_path = brick_node.split(":")[1]
        cmd = "rm -rf %s" % brick_path
        ret, _, _ = g.run(node, cmd)
        self.assertEqual(ret, 0, "Failed to delete the brick")
        g.log.info("Deleted the brick successfully")

        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertNotEqual(ret, 0, "Volume start succeeded")

        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Volume start with force failed")

        # volume start force should not bring the brick online
        ret = are_bricks_online(self.mnode, self.volname,
                                [bricks_list[index_of_node]])
        self.assertFalse(ret, "Volume start force brought the bricks online")
        g.log.info("Volume start force didn't bring the brick online")
Beispiel #2
0
    def _validate_brick_down_scenario(self,
                                      validate_heal=False,
                                      monitor_heal=False):
        """
        Refactor of common steps across volume type for validating brick down
        scenario
        """
        if validate_heal:
            # Wait for ample amount of IO to be written to file
            sleep(180)

            # Validate heal info shows o/p and exit in <8s
            self._validate_heal()

        # Force start volume and verify all process are online
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, 'Unable to force start volume')

        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(
            ret, 'Not able to confirm all process of volume are online')

        if monitor_heal:
            # Wait for IO to be written to file
            sleep(30)

            # Monitor heal and validate data was appended successfully to file
            ret = monitor_heal_completion(self.mnode, self.volname)
            self.assertTrue(ret,
                            'Self heal is not completed post brick online')
    def test_ec_quota_errors_on_brick_down(self):
        """
        Steps:
        - Create and mount EC volume on two clients
        - Create two dirs on the mount and perform parallel IO from clients
        - Simulate disk full to validate EIO errors when no space is left
        - Remove simulation and apply different quota limits on two dirs
        - Bring down redundant bricks from the volume
        - Validate EDQUOTE error on reaching quota limit and extend quota to
          validate absence of EDQUOTE error
        - Reduce the quota limit and validate EDQUOTE error upon reaching quota
        - Remove quota limits, unmount and cleanup the volume
        """
        self._perform_quota_ops_before_brick_down()

        # Bring redundant bricks offline
        subvols = get_subvols(self.mnode, self.volname)
        self.assertTrue(subvols.get('volume_subvols'), 'Not able to get '
                        'subvols of the volume')
        self.offline_bricks = []
        for subvol in subvols['volume_subvols']:
            self.offline_bricks.extend(
                sample(subvol,
                       self.volume.get('voltype')['redundancy_count']))
        ret = bring_bricks_offline(self.volname, self.offline_bricks)
        self.assertTrue(ret, 'Not able to bring redundant bricks offline')

        self._perform_quota_ops_after_brick_down()

        # Bring offline bricks online
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, 'Not able to bring offline bricks online')
        self.offline_bricks *= 0

        g.log.info('Pass: Validating quota errors on brick down is successful')
    def test_nfs_ganesha_export_after_vol_restart(self):
        """
        Tests script to check nfs-ganesha volume gets exported after
        multiple volume restarts.
        """
        for i in range(1, 6):
            g.log.info(
                "Testing nfs ganesha export after volume stop/start."
                "Count : %s", str(i))

            # Stopping volume
            ret = volume_stop(self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))

            # Waiting for few seconds for volume unexport. Max wait time is
            # 120 seconds.
            ret = wait_for_nfs_ganesha_volume_to_get_unexported(
                self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to unexport volume %s after "
                                  "stopping volume" % self.volname))

            # Starting volume
            ret = volume_start(self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to start volume %s" % self.volname))

            # Waiting for few seconds for volume export. Max wait time is
            # 120 seconds.
            ret = wait_for_nfs_ganesha_volume_to_get_exported(
                self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to export volume %s after "
                                  "starting volume" % self.volname))
Beispiel #5
0
def snap_restore_complete(mnode, volname, snapname):
    """stops the volume restore the snapshot and starts the volume

    Args:
        mnode (str): Node on which cmd has to be executed.
        volname (str): volume name
        snapname (str): snapshot name

    Returns:
        bool: True on success, False on failure

    Example:
        snap_restore_complete(mnode, testvol, testsnap)

    """

    # Stopping volume before snap restore
    ret = volume_stop(mnode, volname)
    if not ret:
        g.log.error("Failed to stop volume %s before restoring snapshot "
                    "%s in node %s" % (volname, snapname, mnode))
        return False
    ret, _, _ = snap_restore(mnode, snapname)
    if ret != 0:
        g.log.error("snapshot restore cli execution failed")
        return False

    # Starting volume after snap restore
    ret = volume_start(mnode, volname)
    if not ret:
        g.log.error("Failed to start volume %s after restoring snapshot "
                    "%s in node %s" % (volname, snapname, mnode))
        return False
    return True
    def test_volume_status_xml(self):

        # create a two node cluster
        ret = peer_probe_servers(self.servers[0], self.servers[1])
        self.assertTrue(
            ret,
            "Peer probe failed to %s from %s" % (self.mnode, self.servers[1]))

        # create a distributed volume with single node
        number_of_bricks = 1
        servers_info_from_single_node = {}
        servers_info_from_single_node[self.servers[0]] = self.all_servers_info[
            self.servers[0]]

        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       number_of_bricks, self.servers[0],
                                       servers_info_from_single_node)
        ret, _, _ = volume_create(self.servers[0], self.volname, bricks_list)
        self.assertEqual(ret, 0, "Volume creation failed")
        g.log.info("Volume %s created successfully", self.volname)

        # Get volume status
        ret, _, err = volume_status(self.servers[1], self.volname)
        self.assertNotEqual(ret, 0, ("Unexpected: volume status is success for"
                                     " %s, even though volume is not started "
                                     "yet" % self.volname))
        self.assertIn("is not started", err, ("volume status exited with"
                                              " incorrect error message"))

        # Get volume status with --xml
        vol_status = get_volume_status(self.servers[1], self.volname)
        self.assertIsNone(vol_status, ("Unexpected: volume status --xml for %s"
                                       " is success even though the volume is"
                                       " not stared yet" % self.volname))

        # start the volume
        ret, _, _ = volume_start(self.servers[1], self.volname)
        self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname)

        # Get volume status
        ret, _, _ = volume_status(self.servers[1], self.volname)
        self.assertEqual(ret, 0,
                         ("Failed to get volume status for %s" % self.volname))

        # Get volume status with --xml
        vol_status = get_volume_status(self.servers[1], self.volname)
        self.assertIsNotNone(vol_status,
                             ("Failed to get volume "
                              "status --xml for %s" % self.volname))

        # Verify there are no crashes while executing gluster volume status
        status = True
        glusterd_log = (self._get_test_specific_glusterd_log(
            self.mnode).split("\n"))
        for line in glusterd_log:
            if ' E ' in glusterd_log:
                status = False
                g.log.info("Unexpected! Error found %s", line)

        self.assertTrue(status, "Error found in glusterd logs")
    def tearDown(self):

        # start the volume, it should succeed
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Volume stop failed")

        # start glusterd on all servers
        ret = start_glusterd(self.servers)
        if not ret:
            raise ExecutionError("Failed to start glusterd on all servers")

        for server in self.servers:
            ret = wait_for_peers_to_connect(server, self.servers)
            if not ret:
                ret = peer_probe_servers(server, self.servers)
                if not ret:
                    raise ExecutionError("Failed to peer probe all "
                                         "the servers")

        # clean up all volumes
        vol_list = get_volume_list(self.mnode)
        if vol_list is None:
            raise ExecutionError("Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            if not ret:
                raise ExecutionError("Unable to delete volume % s" % volume)
            g.log.info("Volume deleted successfully : %s", volume)

        self.get_super_method(self, 'tearDown')()
    def setUp(self):
        """Setup Volume"""
        # Calling GlusterBaseClass setUp
        self.get_super_method(self, 'setUp')()

        # Fetching all the parameters for volume_create
        list_of_three_servers = []
        server_info_for_three_nodes = {}

        for server in self.servers[0:3]:
            list_of_three_servers.append(server)
            server_info_for_three_nodes[server] = self.all_servers_info[server]

        bricks_list = form_bricks_list(self.mnode, self.volname, 3,
                                       list_of_three_servers,
                                       server_info_for_three_nodes)
        # Creating 2nd volume
        self.volname_2 = "test_volume"
        ret, _, _ = volume_create(self.mnode, self.volname_2, bricks_list)
        self.assertFalse(ret, "Volume creation failed")
        g.log.info("Volume %s created successfully", self.volname_2)
        ret, _, _ = volume_start(self.mnode, self.volname_2)
        if ret:
            raise ExecutionError("Failed to start volume {}".format(
                self.volname_2))
        # Setup and mount the volume
        ret = self.setup_volume_and_mount_volume(mounts=self.mounts)
        if not ret:
            raise ExecutionError("Failed to setup volume and mount it")
    def test_nfs_ganesha_exportID_after_vol_restart(self):
        """
        Tests script to check nfs-ganesha volume gets exported with same
        Export ID after multiple volume restarts.
        Steps:
        1. Create and Export the Volume
        2. Stop and Start the volume multiple times
        3. Check for export ID
           Export ID should not change
        """
        for i in range(1, 4):
            g.log.info(
                "Testing nfs ganesha exportID after volume stop and "
                "start.\n Count : %s", str(i))

            # Stopping volume
            ret = volume_stop(self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))
            g.log.info("Volume is stopped")

            # Waiting for few seconds for volume unexport. Max wait time is
            # 120 seconds.
            ret = wait_for_nfs_ganesha_volume_to_get_unexported(
                self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to unexport volume %s after "
                                  "stopping volume" % self.volname))
            g.log.info("Volume is unexported via ganesha")

            # Starting volume
            ret = volume_start(self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to start volume %s" % self.volname))
            g.log.info("Volume is started")

            # Waiting for few seconds for volume export. Max wait time is
            # 120 seconds.
            ret = wait_for_nfs_ganesha_volume_to_get_exported(
                self.mnode, self.volname)
            self.assertTrue(ret, ("Failed to export volume %s after "
                                  "starting volume" % self.volname))
            g.log.info("Volume is exported via ganesha")

            # Check for Export ID
            cmd = ("cat /run/gluster/shared_storage/nfs-ganesha/exports/"
                   "export.*.conf | grep Export_Id | grep -Eo '[0-9]'")
            ret, out, _ = g.run(self.mnode, cmd)
            self.assertEqual(
                ret, 0,
                "Unable to get export ID of the volume %s" % self.volname)
            g.log.info("Successful in getting volume export ID: %s " % out)
            self.assertEqual(
                out.strip("\n"), "2",
                "Export ID changed after export and unexport "
                "of volume: %s" % out)
            g.log.info("Export ID of volume is same after export "
                       "and export: %s" % out)
Beispiel #10
0
    def test_default_log_level_of_cli(self):
        """
        Test Case:
        1) Create and start a volume
        2) Run volume info command
        3) Run volume status command
        4) Run volume stop command
        5) Run volume start command
        6) Check the default log level of cli.log
        """
        # Check volume info operation
        ret, _, _ = volume_info(self.mnode)
        self.assertEqual(
            ret, 0, "Failed to execute volume info"
            " command on node: %s" % self.mnode)
        g.log.info(
            "Successfully executed the volume info command on"
            " node: %s", self.mnode)

        # Check volume status operation
        ret, _, _ = volume_status(self.mnode)
        self.assertEqual(
            ret, 0, "Failed to execute volume status command"
            " on node: %s" % self.mnode)
        g.log.info(
            "Successfully executed the volume status command"
            " on node: %s", self.mnode)

        # Check volume stop operation
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to stop the volume %s on node: %s" %
            (self.volname, self.mnode))
        g.log.info("Successfully stopped the volume %s on node: %s",
                   self.volname, self.mnode)

        # Check volume start operation
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(
            ret, 0, "Failed to start the volume %s on node: %s" %
            (self.volname, self.mnode))
        g.log.info("Successfully started the volume %s on node: %s",
                   self.volname, self.mnode)

        # Check the default log level of cli.log
        cmd = 'cat /var/log/glusterfs/cli.log | grep -F "] D [" | wc -l'
        ret, out, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, "Failed to execute the command")
        self.assertEqual(
            int(out), 0, "Unexpected: Default log level of "
            "cli.log is not INFO")
        g.log.info("Default log level of cli.log is INFO as expected")
Beispiel #11
0
    def test_volume_create_start_stop_start(self):
        """Tests volume create, start, status, stop, start.
        Also Validates whether all the brick process are running after the
        start of the volume.
        """
        # Verify volume processes are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online" %
                              self.volname))
        g.log.info("Successfully Verified volume %s processes are online",
                   self.volname)

        # Stop Volume
        ret, _, _ = volume_stop(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname)
        g.log.info("Successfully stopped volume %s", self.volname)

        # Start Volume
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname)
        g.log.info("Successfully started volume %s", self.volname)

        # Wait for volume processes to be online
        ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to wait for volume %s processes to "
                              "be online", self.volname))

        # Log Volume Info and Status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to Log volume %s info and status",
                              self.volname))
        g.log.info("Successfully logged Volume %s Info and Status",
                   self.volname)

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(ret, ("Volume %s : All process are not online" %
                              self.volname))
        g.log.info("Successfully verified volume %s processes are online",
                   self.volname)

        # Log Volume Info and Status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to Log volume %s info and status",
                              self.volname))
        g.log.info("Successfully logged Volume %s Info and Status",
                   self.volname)

        # Check if glusterd is running on all servers(expected: active)
        ret = is_glusterd_running(self.servers)
        self.assertEqual(ret, 0, "Glusterd is not running on all servers")
        g.log.info("Glusterd is running on all the servers")
Beispiel #12
0
    def test_volume_absent_bricks(self):
        """
        Test Case:
        1) Create Volume
        2) Remove any one Brick directory
        3) Start Volume and compare the failure message
        4) Check the gluster volume status nad compare the status message
        """
        # Fetching the brick list
        brick_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(brick_list, "Failed to get the bricks in"
                             " the volume")

        # Command for removing brick directory
        random_brick = random.choice(brick_list)
        node, brick_path = random_brick.split(r':')
        cmd = 'rm -rf ' + brick_path

        # Removing brick directory of one node
        ret, _, _ = g.run(node, cmd)
        self.assertEqual(ret, 0, "Failed to remove brick dir")
        g.log.info("Brick directory removed successfully")

        # Starting volume
        ret, _, err = volume_start(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Unexpected: Volume started successfully "
            "even though brick directory is removed "
            "for %s" % self.volname)
        g.log.info("Expected: Failed to start volume %s", self.volname)

        # Checking volume start failed message
        msg = "Failed to find brick directory"
        self.assertIn(
            msg, err, "Expected message is %s but volume start "
            "command failed with this "
            "message %s" % (msg, err))
        g.log.info("Volume start failed with correct error message %s", err)

        # Checking Volume status
        ret, _, err = volume_status(self.mnode, self.volname)
        self.assertNotEqual(
            ret, 0, "Success in getting volume status, volume "
            "status should fail when volume is in "
            "not started state ")
        g.log.info("Failed to get volume status which is expected")

        # Checking volume status message
        msg = ' '.join(['Volume', self.volname, 'is not started'])
        self.assertIn(msg, err, 'Incorrect error message for gluster vol '
                      'status')
        g.log.info("Correct error message for volume status")
 def _guster_volume_cleanup(self, vol_name):
     # Check brick status. Restart vol if bricks are offline
     openshift_ops.switch_oc_project(self._master,
                                     self._registry_project_name)
     brick_list = brick_libs.get_all_bricks("auto_get_gluster_endpoint",
                                            vol_name)
     self.assertIsNotNone(brick_list, "Failed to get brick list")
     check_bricks = brick_libs.are_bricks_online(
         "auto_get_gluster_endpoint", vol_name, brick_list)
     if not check_bricks:
         start_vol, _, _ = volume_ops.volume_start(
             "auto_get_gluster_endpoint", vol_name, force=True)
         self.assertFalse(start_vol, "Failed to start volume using force")
 def tearDown(self):
     if self.offline_bricks:
         ret, _, _ = volume_start(self.mnode, self.volname, force=True)
         if ret:
             raise ExecutionError('Not able to force start volume to bring '
                                  'offline bricks online')
     if self.all_mount_procs:
         ret = wait_for_io_to_complete(self.all_mount_procs, self.mounts)
         if not ret:
             raise ExecutionError('Wait for IO completion failed')
     ret = self.unmount_volume_and_cleanup_volume(mounts=self.mounts)
     if not ret:
         raise ExecutionError('Failed to unmount and cleanup volume')
     self.get_super_method(self, 'tearDown')()
Beispiel #15
0
    def test_volume_status_xml(self):

        # create a two node cluster
        ret = peer_probe_servers(self.servers[0], self.servers[1])
        self.assertTrue(
            ret,
            "Peer probe failed to %s from %s" % (self.mnode, self.servers[1]))

        # create a distributed volume with single node
        number_of_bricks = 1
        servers_info_from_single_node = {}
        servers_info_from_single_node[self.servers[0]] = self.all_servers_info[
            self.servers[0]]

        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       number_of_bricks, self.servers[0],
                                       servers_info_from_single_node)
        ret, _, _ = volume_create(self.servers[0], self.volname, bricks_list)
        self.assertEqual(ret, 0, "Volume creation failed")
        g.log.info("Volume %s created successfully", self.volname)

        # Get volume status
        ret, _, err = volume_status(self.servers[1], self.volname)
        self.assertNotEqual(ret, 0, ("Unexpected: volume status is success for"
                                     " %s, even though volume is not started "
                                     "yet" % self.volname))
        self.assertIn("is not started", err, ("volume status exited with"
                                              " incorrect error message"))

        # Get volume status with --xml
        vol_status = get_volume_status(self.servers[1], self.volname)
        self.assertIsNone(vol_status, ("Unexpected: volume status --xml for %s"
                                       " is success even though the volume is"
                                       " not stared yet" % self.volname))

        # start the volume
        ret, _, _ = volume_start(self.servers[1], self.volname)
        self.assertEqual(ret, 0, "Failed to start volume %s" % self.volname)

        # Get volume status
        ret, _, _ = volume_status(self.servers[1], self.volname)
        self.assertEqual(ret, 0,
                         ("Failed to get volume status for %s" % self.volname))

        # Get volume status with --xml
        vol_status = get_volume_status(self.servers[1], self.volname)
        self.assertIsNotNone(vol_status,
                             ("Failed to get volume "
                              "status --xml for %s" % self.volname))
    def test_brickmux_brick_process(self):
        """
        1. Create a 3 node cluster.
        2. Set cluster.brick-multiplex to enable.
        3. Create 15 volumes of type replica 1x3.
        4. Start all the volumes one by one.
        5. While the volumes are starting reboot one node.
        6. check for pifof glusterfsd single process should be visible
        """
        volume_config = {
            'name': 'test',
            'servers': self.all_servers[:3],
            'voltype': {
                'type': 'replicated',
                'replica_count': 3,
                'transport': 'tcp'
            }
        }

        servers = self.all_servers[:3]
        # Volume Creation
        ret = bulk_volume_creation(self.mnode,
                                   14,
                                   self.all_servers_info,
                                   volume_config,
                                   is_create_only=True)
        self.assertTrue(ret, "Volume creation Failed")
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'enable'})
        self.assertTrue(ret)
        vol_list = get_volume_list(self.mnode)
        for volname in vol_list:
            if vol_list.index(volname) == 2:
                g.run(servers[2], "reboot")
            ret, out, _ = volume_start(self.mnode, volname)
            self.assertFalse(ret,
                             "Failed to start volume '{}'".format(volname))

        for _ in range(10):
            sleep(1)
            _, node_result = are_nodes_online(servers[2])
            self.assertTrue(node_result, "Node is not Online")

        for server in servers:
            ret, out, _ = g.run(server, "pgrep glusterfsd")
            out = out.split()
            self.assertFalse(ret, "Failed to get 'glusterfsd' pid")
            self.assertEqual(len(out), 1,
                             "More then 1 brick process  seen in glusterfsd")
Beispiel #17
0
    def _volume_operations_in_loop(self):
        """ Create, start, stop and delete 100 volumes in a loop """
        # Create and start 100 volumes in a loop
        self.volume_config = {
            'name': 'volume-',
            'servers': self.servers,
            'voltype': {
                'type': 'distributed-replicated',
                'dist_count': 2,
                'replica_count': 3
            },
        }

        ret = bulk_volume_creation(self.mnode, 100, self.all_servers_info,
                                   self.volume_config, "", False, True)
        self.assertTrue(ret, "Failed to create volumes")

        self.volume_present = True

        g.log.info("Successfully created all the volumes")

        # Start 100 volumes in loop
        for i in range(100):
            self.volname = "volume-%d" % i
            ret, _, _ = volume_start(self.mnode, self.volname)
            self.assertEqual(ret, 0,
                             "Failed to start volume: %s" % self.volname)

        g.log.info("Successfully started all the volumes")

        # Stop 100 volumes in loop
        for i in range(100):
            self.volname = "volume-%d" % i
            ret, _, _ = volume_stop(self.mnode, self.volname)
            self.assertEqual(ret, 0,
                             "Failed to stop volume: %s" % self.volname)

        g.log.info("Successfully stopped all the volumes")

        # Delete 100 volumes in loop
        for i in range(100):
            self.volname = "volume-%d" % i
            ret = volume_delete(self.mnode, self.volname)
            self.assertTrue(ret, "Failed to delete volume: %s" % self.volname)

        self.volume_present = False

        g.log.info("Successfully deleted all the volumes")
Beispiel #18
0
        def create_snap(value, volname, snap, clone, counter):
            # Creating snapshots
            g.log.info("Starting to Create snapshot")
            for snap_count in value:
                ret, _, _ = snap_create(self.mnode, volname,
                                        "snap%s" % snap_count)
                self.assertEqual(ret, 0, ("Failed to create "
                                          "snapshot for volume %s" % volname))
                g.log.info(
                    "Snapshot snap%s created successfully"
                    " for volume %s", snap_count, volname)

            # Validate snapshot list
            g.log.info("Starting to list all snapshots")
            ret, out, _ = snap_list(self.mnode)
            self.assertEqual(
                ret, 0, ("Failed to list snapshot of volume %s" % volname))
            v_list = out.strip().split('\n')
            self.assertEqual(len(v_list), counter, "Failed to validate "
                             "all snapshots")
            g.log.info(
                "Snapshot listed and  Validated for volume %s"
                " successfully", volname)
            if counter == 40:
                return 0

            # Creating a Clone of snapshot:
            g.log.info("Starting to Clone Snapshot")
            ret, _, _ = snap_clone(self.mnode, snap, clone)
            self.assertEqual(ret, 0, "Failed to clone %s" % clone)
            g.log.info("Clone volume %s created successfully", clone)

            # Start cloned volumes
            g.log.info("starting to Validate clone volumes are started")
            ret, _, _ = volume_start(self.mnode, clone)
            self.assertEqual(ret, 0, "Failed to start %s" % clone)
            g.log.info("%s started successfully", clone)

            # log Cloned Volume information
            g.log.info("Logging Volume info and Volume status")
            ret = log_volume_info_and_status(self.mnode, clone)
            self.assertTrue("Failed to Log Info and Status of Volume %s" %
                            clone)
            g.log.info("Successfully Logged Info and Status")
            return counter + 10
def restart_block_hosting_volume(gluster_pod,
                                 block_hosting_vol,
                                 sleep_time=120,
                                 hostname=None):
    """restars block hosting volume service

    Args:
        hostname (str): hostname on which gluster pod exists
        gluster_pod (podcmd | str): gluster pod class object has gluster
                                    pod and ocp master node or gluster
                                    pod name
        block_hosting_vol (str): name of block hosting volume
    """
    gluster_pod = _get_gluster_pod(gluster_pod, hostname)

    gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol)
    if not gluster_volume_status:
        raise AssertionError("failed to get gluster volume status")

    g.log.info("Gluster volume %s status\n%s : " %
               (block_hosting_vol, gluster_volume_status))

    ret, out, err = volume_stop(gluster_pod, block_hosting_vol)
    if ret != 0:
        err_msg = "failed to stop gluster volume %s on pod %s error: %s" % (
            block_hosting_vol, gluster_pod, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    # Explicit wait to stop ios and pvc creation for 2 mins
    time.sleep(sleep_time)
    ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True)
    if ret != 0:
        err_msg = "failed to start gluster volume %s on pod %s error: %s" % (
            block_hosting_vol, gluster_pod, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    ret, out, err = volume_status(gluster_pod, block_hosting_vol)
    if ret != 0:
        err_msg = ("failed to get status for gluster volume %s on pod %s "
                   "error: %s" % (block_hosting_vol, gluster_pod, err))
        g.log.error(err_msg)
        raise AssertionError(err_msg)
Beispiel #20
0
 def _enable_xlator(self, xlator, parent, xtype, xsfail=False):
     self.verified_bricks = []
     option = '{0}{1}.{2}'.format(xtype,
                                  '.xlator' if xtype == 'user' else '',
                                  xlator)
     self._set_and_assert_volume_option(option, parent)
     ret, _, _ = volume_stop(self.mnode, self.volname)
     self.assertEqual(ret, 0, 'Unable to stop volume')
     sleep(self.timeout)
     ret, _, _ = volume_start(self.mnode, self.volname)
     if xsfail:
         self.assertNotEqual(ret, 0, 'Expected volume start to fail')
         return
     self.assertEqual(ret, 0, 'Unable to start a stopped volume')
     self._verify_position(xlator, parent, xtype)
     ret = wait_for_volume_process_to_be_online(self.mnode, self.volname)
     self.assertTrue(
         ret, 'Not all volume processes are online after '
         'starting a stopped volume')
     sleep(self.timeout)
def restart_brick_process(hostname, gluster_pod, block_hosting_vol):
    """restarts brick process of block hosting volumes

    Args:
        hostname (str): hostname on which gluster pod exists
        gluster_pod (podcmd | str): gluster pod class object has gluster
                                    pod and ocp master node or gluster
                                    pod name
        block_hosting_vol (str): block hosting volume name
    """
    pids = get_brick_pids(gluster_pod, block_hosting_vol, hostname)

    # using count variable to limit the max pod process kill to 2
    count = 0
    killed_process = {}
    pid_keys = pids.keys()
    oc_pods = oc_get_pods(hostname)
    for pod in oc_pods.keys():
        if not (oc_pods[pod]["ip"] in pid_keys and count <= 1):
            continue

        ret, out, err = oc_rsh(hostname, pod,
                               "kill -9 %s" % pids[oc_pods[pod]["ip"]])
        if ret != 0:
            err_msg = "failed to kill process id %s error: %s" % (
                pids[oc_pods[pod]["ip"]], err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        killed_process[pod] = pids[oc_pods[pod]["ip"]]
        count += 1

    for pod, pid in killed_process.items():
        wait_for_process_to_kill_on_pod(pod, pid, hostname)

    ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True)
    if ret != 0:
        err_msg = "failed to start gluster volume %s on pod %s error: %s" % (
            block_hosting_vol, gluster_pod, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)
    def bricks_online_and_volume_reset(cls):
        """
        reset the volume if any bricks are offline.
        waits for all bricks to be online and resets
        volume options set
        """
        bricks_offline = get_offline_bricks_list(cls.mnode, cls.volname)
        if bricks_offline is not None:
            ret = volume_start(cls.mnode, cls.volname, force=True)
            if not ret:
                raise ExecutionError("Failed to force start volume"
                                     "%s" % cls.volname)
        ret = wait_for_bricks_to_be_online(cls.mnode, cls.volname)
        if not ret:
            raise ExecutionError("Failed to bring bricks online"
                                 "for volume %s" % cls.volname)

        ret, _, _ = volume_reset(cls.mnode, cls.volname, force=True)
        if ret:
            raise ExecutionError("Failed to reset volume %s" % cls.volname)
        g.log.info("Successful in volume reset %s", cls.volname)
    def test_volume_create_start_stop_start(self):
        """Tests volume create, start, status, stop, start.
        Also Validates whether all the brick process are running after the
        start of the volume.
        """
        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))

        # Stop Volume
        ret, _, _ = volume_stop(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to stop volume %s" % self.volname)

        # Start Volume
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Unable to start volume %s" % self.volname)

        time.sleep(15)

        # Log Volume Info and Status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Logging volume %s info and status failed" % self.volname))

        # Verify volume's all process are online
        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Volume %s : All process are not online" % self.volname))

        # Log Volume Info and Status
        ret = log_volume_info_and_status(self.mnode, self.volname)
        self.assertTrue(
            ret, ("Logging volume %s info and status failed" % self.volname))

        # Verify all glusterd's are running
        ret = is_glusterd_running(self.servers)
        self.assertEqual(
            ret, 0, ("glusterd not running on all servers: %s" % self.servers))
def restart_file_volume(file_vol, sleep_time=120):
    """Restart file volume (stop and start volume).

    Args:
        file_vol (str): name of a file volume
    """
    gluster_volume_status = get_volume_status(
        "auto_get_gluster_endpoint", file_vol)
    if not gluster_volume_status:
        raise AssertionError("failed to get gluster volume status")

    g.log.info("Gluster volume %s status\n%s : " % (
        file_vol, gluster_volume_status)
    )

    ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = "Failed to stop gluster volume %s. error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    # Explicit wait to stop ios and pvc creation for 2 mins
    time.sleep(sleep_time)

    ret, out, err = volume_start(
        "auto_get_gluster_endpoint", file_vol, force=True)
    if ret != 0:
        err_msg = "failed to start gluster volume %s error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = ("Failed to get status for gluster volume %s error: %s" % (
            file_vol, err))
        g.log.error(err_msg)
        raise AssertionError(err_msg)
def restart_file_volume(file_vol, sleep_time=120):
    """Restars file volume service.

    Args:
        file_vol (str): name of a file volume
    """
    gluster_volume_status = get_volume_status(
        "auto_get_gluster_endpoint", file_vol)
    if not gluster_volume_status:
        raise AssertionError("failed to get gluster volume status")

    g.log.info("Gluster volume %s status\n%s : " % (
        file_vol, gluster_volume_status)
    )

    ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = "Failed to stop gluster volume %s. error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    # Explicit wait to stop ios and pvc creation for 2 mins
    time.sleep(sleep_time)

    ret, out, err = volume_start(
        "auto_get_gluster_endpoint", file_vol, force=True)
    if ret != 0:
        err_msg = "failed to start gluster volume %s error: %s" % (
            file_vol, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)

    ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol)
    if ret != 0:
        err_msg = ("Failed to get status for gluster volume %s error: %s" % (
            file_vol, err))
        g.log.error(err_msg)
        raise AssertionError(err_msg)
    def tearDown(self):

        for number in range(1, 4):

            # Starting volumes.
            self.volume['name'] = ("test_volume_%s" % number)
            self.volname = ("test_volume_%s" % number)
            ret, _, _ = volume_start(self.mnode, self.volname)
            g.log.info("Volume %s started was successfully", self.volname)

            # Cleaning up volumes.
            ret = cleanup_volume(self.mnode, self.volname)
            if not ret:
                raise ExecutionError("Failed to cleanup %s" % self.volname)
            g.log.info("Successfully cleaned volume: %s", self.volname)

        # Setting cluster.brick-multiplex to disable.
        ret = set_volume_options(self.mnode, 'all',
                                 {'cluster.brick-multiplex': 'disable'})
        if not ret:
            raise ExecutionError("Failed to disable cluster.brick-multiplex")
        g.log.info("Successfully set cluster.brick-multiplex to disable.")

        self.get_super_method(self, 'tearDown')()
Beispiel #27
0
    def test_brick_process_not_started_on_read_only_node_disks(self):
        """
        * create volume and start
        * kill one brick
        * start IO
        * unmount the brick directory from node
        * remount the brick directory with read-only option
        * start the volume with "force" option
        * check for error 'posix: initializing translator failed' in log file
        * remount the brick directory with read-write option
        * start the volume with "force" option
        * validate IO
        """
        # pylint: disable=too-many-locals,too-many-statements
        # Select bricks to bring offline
        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
            self.mnode, self.volname))
        bricks_to_bring_offline = filter(
            None, (bricks_to_bring_offline_dict['hot_tier_bricks'] +
                   bricks_to_bring_offline_dict['cold_tier_bricks'] +
                   bricks_to_bring_offline_dict['volume_bricks']))

        # Bring brick offline
        g.log.info('Bringing bricks %s offline...', bricks_to_bring_offline)
        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
        self.assertTrue(
            ret, 'Failed to bring bricks %s offline' % bricks_to_bring_offline)

        ret = are_bricks_offline(self.mnode, self.volname,
                                 bricks_to_bring_offline)
        self.assertTrue(ret,
                        'Bricks %s are not offline' % bricks_to_bring_offline)
        g.log.info('Bringing bricks %s offline is successful',
                   bricks_to_bring_offline)

        # Creating files for all volumes
        for mount_obj in self.mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("python %s create_files -f 100 %s/%s/test_dir" %
                   (self.script_upload_path, mount_obj.mountpoint,
                    mount_obj.client_system))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            self.all_mounts_procs.append(proc)

        # umount brick
        brick_node, volume_brick = bricks_to_bring_offline[0].split(':')
        node_brick = '/'.join(volume_brick.split('/')[0:3])
        g.log.info('Start umount brick %s...', node_brick)
        ret, _, _ = g.run(brick_node, 'umount -l %s' % node_brick)
        self.assertFalse(ret, 'Failed to umount brick %s' % node_brick)
        g.log.info('Successfully umounted %s', node_brick)

        # get time before remount the directory and checking logs for error
        g.log.info('Getting time before remount the directory and '
                   'checking logs for error...')
        _, time_before_checking_logs, _ = g.run(brick_node, 'date -u +%s')
        g.log.info('Time before remount the directory and checking logs - %s',
                   time_before_checking_logs)

        # remount the directory with read-only option
        g.log.info('Start remount brick %s with read-only option...',
                   node_brick)
        ret, _, _ = g.run(brick_node, 'mount -o ro %s' % node_brick)
        self.assertFalse(ret, 'Failed to remount brick %s' % node_brick)
        g.log.info('Successfully remounted %s with read-only option',
                   node_brick)

        # start volume with "force" option
        g.log.info('starting volume with "force" option...')
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertFalse(
            ret,
            'Failed to start volume %s with "force" option' % self.volname)
        g.log.info('Successfully started volume %s with "force" option',
                   self.volname)

        # check logs for an 'initializing translator failed' error
        g.log.info(
            "Checking logs for an 'initializing translator failed' "
            "error for %s brick...", node_brick)
        error_msg = 'posix: initializing translator failed'
        cmd = ("cat /var/log/glusterfs/bricks/%s-%s-%s.log | "
               "grep '%s'" %
               (volume_brick.split('/')[-3], volume_brick.split('/')[-2],
                volume_brick.split('/')[-1], error_msg))
        ret, log_msgs, _ = g.run(brick_node, cmd)
        log_msg = log_msgs.rstrip().split('\n')[-1]

        self.assertTrue(error_msg in log_msg, 'No errors in logs')
        g.log.info('EXPECTED: %s', error_msg)

        # get time from log message
        log_time_msg = log_msg.split('E')[0][1:-2].split('.')[0]
        log_time_msg_converted = calendar.timegm(
            time.strptime(log_time_msg, '%Y-%m-%d %H:%M:%S'))
        g.log.info('Time_msg from logs - %s ', log_time_msg)
        g.log.info('Time from logs - %s ', log_time_msg_converted)

        # get time after remount the directory checking logs for error
        g.log.info('Getting time after remount the directory and '
                   'checking logs for error...')
        _, time_after_checking_logs, _ = g.run(brick_node, 'date -u +%s')
        g.log.info('Time after remount the directory and checking logs - %s',
                   time_after_checking_logs)

        # check time periods
        g.log.info('Checking if an error is in right time period...')
        self.assertTrue(
            int(time_before_checking_logs) <= int(log_time_msg_converted) <=
            int(time_after_checking_logs),
            'Expected error is not in right time period')
        g.log.info('Expected error is in right time period')

        # umount brick
        g.log.info('Start umount brick %s...', node_brick)
        ret, _, _ = g.run(brick_node, 'umount -l %s' % node_brick)
        self.assertFalse(ret, 'Failed to umount brick %s' % node_brick)
        g.log.info('Successfully umounted %s', node_brick)

        # remount the directory with read-write option
        g.log.info('Start remount brick %s with read-write option...',
                   node_brick)
        ret, _, _ = g.run(brick_node, 'mount %s' % node_brick)
        self.assertFalse(ret, 'Failed to remount brick %s' % node_brick)
        g.log.info('Successfully remounted %s with read-write option',
                   node_brick)

        # start volume with "force" option
        g.log.info('starting volume with "force" option...')
        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertFalse(
            ret,
            'Failed to start volume %s with "force" option' % self.volname)
        g.log.info('Successfully started volume %s with "force" option',
                   self.volname)

        # Validate IO
        g.log.info('Validating IO on all mounts')
        self.assertTrue(validate_io_procs(self.all_mounts_procs, self.mounts),
                        "IO failed on some of the clients")
        g.log.info('Successfully Validated IO on all mounts')
        self.io_validation_complete = True
Beispiel #28
0
    def test_remove_brick(self):
        """
        In this test case:
        1. Trusted storage Pool of 4 nodes
        2. Create a distributed-replicated volumes with 4 bricks
        3. Start the volume
        4. Fuse mount the gluster volume on out of trusted nodes
        5. Create some data file
        6. Start remove-brick operation for one replica pair
        7. Restart glusterd on all nodes
        8. Try to commit the remove-brick operation while rebalance
           is in progress, it should fail
        """

        # pylint: disable=too-many-statements
        my_servers = self.servers[0:4]
        my_server_info = {}
        for server in self.servers[0:4]:
            my_server_info[server] = self.all_servers_info[server]
        for index in range(1, 4):
            ret, _, _ = peer_probe(self.servers[0], self.servers[index])
            self.assertEqual(ret, 0, ("peer probe from %s to %s is failed",
                                      self.servers[0], self.servers[index]))
            g.log.info("peer probe is success from %s to "
                       "%s", self.servers[0], self.servers[index])

        # Validating whether the peer are connected or not
        # In jenkins This case is failing saying peers are not in connected
        # state, that is reason adding a check whether peers are connected
        # or not
        count = 0
        while count < 30:
            ret = is_peer_connected(self.mnode, my_servers)
            if ret:
                g.log.info("Peers are in connected state")
                break
            sleep(3)
            count = count + 1
        self.assertTrue(ret, "Some peers are not in connected state")

        self.volname = "testvol"
        bricks_list = form_bricks_list(self.mnode, self.volname, 4, my_servers,
                                       my_server_info)
        g.log.info("Creating a volume %s ", self.volname)
        kwargs = {}
        kwargs['replica_count'] = 2
        ret = volume_create(self.mnode,
                            self.volname,
                            bricks_list,
                            force=False,
                            **kwargs)
        self.assertEqual(ret[0], 0, ("Unable"
                                     "to create volume %s" % self.volname))
        g.log.info("Volume created successfully %s", self.volname)

        ret, _, _ = volume_start(self.mnode, self.volname, False)
        self.assertEqual(ret, 0, ("Failed to start the "
                                  "volume %s", self.volname))
        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        # Mounting a volume
        ret, _, _ = mount_volume(self.volname,
                                 mtype=self.mount_type,
                                 mpoint=self.mounts[0].mountpoint,
                                 mserver=self.mnode,
                                 mclient=self.mounts[0].client_system)
        self.assertEqual(ret, 0, ("Volume %s is not mounted") % self.volname)
        g.log.info("Volume mounted successfully : %s", self.volname)

        self.all_mounts_procs = []
        # Creating files
        command = ("cd %s/ ; "
                   "for i in `seq 1 10` ; "
                   "do mkdir l1_dir.$i ; "
                   "for j in `seq 1 5` ; "
                   "do mkdir l1_dir.$i/l2_dir.$j ; "
                   "for k in `seq 1 10` ; "
                   "do dd if=/dev/urandom of=l1_dir.$i/l2_dir.$j/test.$k "
                   "bs=128k count=$k ; "
                   "done ; "
                   "done ; "
                   "done ; " % (self.mounts[0].mountpoint))

        proc = g.run_async(self.mounts[0].client_system,
                           command,
                           user=self.mounts[0].user)
        self.all_mounts_procs.append(proc)
        self.io_validation_complete = False
        # Validate IO
        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
        self.io_validation_complete = True
        self.assertTrue(ret, "IO failed on some of the clients")

        remove_brick_list = bricks_list[2:4]
        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'start')
        self.assertEqual(ret, 0, "Failed to start remove brick operation")
        g.log.info("Remove bricks operation started successfully")
        g.log.info("Restart glusterd on servers %s", self.servers)
        ret = restart_glusterd(self.servers)
        self.assertTrue(
            ret, ("Failed to restart glusterd on servers %s", self.servers))
        g.log.info("Successfully restarted glusterd on servers %s",
                   self.servers)

        ret, _, _ = remove_brick(self.mnode, self.volname, remove_brick_list,
                                 'commit')
        self.assertNotEqual(ret, 0, "Remove brick commit ops should be fail")
        g.log.info("Remove bricks commit operation failure is expected")
def restart_gluster_vol_brick_processes(ocp_client_node, file_vol,
                                        gluster_nodes):
    """Restarts brick process of a file volume.

    Args:
        ocp_client_node (str): Node to execute OCP commands on.
        file_vol (str): file volume name.
        gluster_nodes (str/list): One or several IPv4 addresses of Gluster
            nodes, where 'file_vol' brick processes must be recreated.
    """
    if not isinstance(gluster_nodes, (list, set, tuple)):
        gluster_nodes = [gluster_nodes]

    # Get Gluster vol brick PIDs
    gluster_volume_status = get_gluster_vol_status(file_vol)
    pids = ()
    for gluster_node in gluster_nodes:
        pid = None
        for g_node, g_node_data in gluster_volume_status.items():
            if g_node != gluster_node:
                continue
            for process_name, process_data in g_node_data.items():
                if not process_name.startswith("/var"):
                    continue
                pid = process_data["pid"]
                # When birck is down, pid of the brick is returned as -1.
                # Which is unexepeted situation. So, add appropriate assertion.
                assert pid != "-1", (
                    "Got unexpected PID (-1) for '%s' gluster vol on '%s' "
                    "node." % file_vol, gluster_node)
        assert pid, ("Could not find 'pid' in Gluster vol data for '%s' "
                     "Gluster node. Data: %s" % (
                         gluster_node, gluster_volume_status))
        pids.append((gluster_node, pid))

    # Restart Gluster vol brick processes using found PIDs
    for gluster_node, pid in pids:
        cmd = "kill -9 %s" % pid
        cmd_run_on_gluster_pod_or_node(ocp_client_node, cmd, gluster_node)

    # Wait for Gluster vol brick processes to be recreated
    for gluster_node, pid in pids:
        killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'"
        _waiter = waiter.Waiter(timeout=60, interval=2)
        for w in _waiter:
            result = cmd_run_on_gluster_pod_or_node(
                ocp_client_node, killed_pid_cmd, gluster_node)
            if result.strip() == pid:
                continue
            g.log.info("Brick process '%s' was killed successfully on '%s'" % (
                pid, gluster_node))
            break
        if w.expired:
            error_msg = ("Process ID '%s' still exists on '%s' after waiting "
                         "for it 60 seconds to get killed." % (
                            pid, gluster_node))
            g.log.error(error_msg)
            raise exceptions.ExecutionError(error_msg)

    # Start volume after gluster vol brick processes recreation
    ret, out, err = volume_start(
        "auto_get_gluster_endpoint", file_vol, force=True)
    if ret != 0:
        err_msg = "Failed to start gluster volume %s on %s. error: %s" % (
            file_vol, gluster_node, err)
        g.log.error(err_msg)
        raise AssertionError(err_msg)
    def test_nfs_ganesha_subdirectory_mount_from_server_side(self):
        """
        Tests script to verify nfs ganesha subdirectory mount from server
        side succeeds and able to write IOs.
        """
        subdir_to_mount = self.subdir_path.replace(self.mounts[0].mountpoint,
                                                   '')
        if not subdir_to_mount.startswith(os.path.sep):
            subdir_to_mount = os.path.sep + subdir_to_mount

        subdir = self.volname + subdir_to_mount

        for mount_obj in self.sub_dir_mounts:
            mount_obj.volname = subdir

        export_file = ("/var/run/gluster/shared_storage/nfs-ganesha/exports/"
                       "export.%s.conf" % self.volname)
        cmd = (r"sed -i  s/'Path = .*'/'Path = \"\/%s\";'/g %s" %
               (re.escape(subdir), export_file))
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, ("Unable to change Path info to %s in %s" %
                                  ("/" + subdir, export_file)))

        cmd = ("sed -i  's/volume=.*/& \\n volpath=\"%s\";/g' %s" %
               (re.escape(subdir_to_mount), export_file))
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, ("Unable to add volpath info to %s in %s" %
                                  ("/" + subdir, export_file)))

        cmd = (r"sed -i  s/'Pseudo=.*'/'Pseudo=\"\/%s\";'/g %s" %
               (re.escape(subdir), export_file))
        ret, _, _ = g.run(self.mnode, cmd)
        self.assertEqual(ret, 0, ("Unable to change pseudo Path info to "
                                  "%s in %s" % ("/" + subdir, export_file)))

        # Stop and start volume to take the modified export file to effect.
        # Stopping volume
        ret = volume_stop(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to stop volume %s" % self.volname))

        # Waiting for few seconds for volume unexport. Max wait time is
        # 120 seconds.
        ret = wait_for_nfs_ganesha_volume_to_get_unexported(
            self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to unexport volume %s after "
                              "stopping volume" % self.volname))

        # Starting volume
        ret = volume_start(self.mnode, self.volname)
        self.assertTrue(ret, ("Failed to start volume %s" % self.volname))

        # Waiting for few seconds for volume export. Max wait time is
        # 120 seconds.
        ret = wait_for_nfs_ganesha_volume_to_get_exported(self.mnode, subdir)
        self.assertTrue(ret, ("Failed to export sub directory %s after "
                              "starting volume" % subdir))

        for mount_obj in self.sub_dir_mounts:
            if not mount_obj.is_mounted():
                ret = mount_obj.mount()
                self.assertTrue(
                    ret, ("Unable to mount volume '%s:%s' "
                          "on '%s:%s'" %
                          (mount_obj.server_system, mount_obj.volname,
                           mount_obj.client_system, mount_obj.mountpoint)))

        ret = self.start_and_wait_for_io_to_complete(self.sub_dir_mounts)
        self.assertTrue(ret, ("Failed to write IOs when sub directory is"
                              " mounted from server side"))
        g.log.info("IO successful on clients")
    def test_volume_set_ops_sub_dirs_mounted(self):
        """
        Check volume start/volume stop/volume reset operations while sub-dirs
        are mounted

        Steps:
        1. Create two sub-directories on mounted volume.
        2. Unmount volume from clients.
        3. Mount each sub-directory to two different clients.
        4. Perform IO on mounts.
        5. Perform volume stop operation.
        6. Perform volume start operation.
        7. Perform volume reset operation.
        """
        # Creating two sub directories on mounted volume
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d1" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd1' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))
        ret = mkdir(self.mounts[0].client_system,
                    "%s/d2" % self.mounts[0].mountpoint)
        self.assertTrue(
            ret, ("Failed to create directory 'd2' in volume %s "
                  "from client %s" %
                  (self.mounts[0].volname, self.mounts[0].client_system)))

        # Unmounting volumes
        ret = self.unmount_volume(self.mounts)
        self.assertTrue(ret, "Failed to un mount one or more volumes")
        g.log.info("Successfully un mounted all volumes")

        # Mounting one sub directory on each client.
        self.subdir_mounts = [
            copy.deepcopy(self.mounts[0]),
            copy.deepcopy(self.mounts[1])
        ]
        self.subdir_mounts[0].volname = "%s/d1" % self.volname
        self.subdir_mounts[1].volname = "%s/d2" % self.volname
        for mount_obj in self.subdir_mounts:
            ret = mount_obj.mount()
            self.assertTrue(
                ret, ("Failed to mount sub directory %s on client"
                      " %s" % (mount_obj.volname, mount_obj.client_system)))
            g.log.info("Successfully mounted sub directory %s on client %s",
                       mount_obj.volname, mount_obj.client_system)
        g.log.info("Successfully mounted sub directories to clients.")

        # Start IO on all mounts.
        all_mounts_procs = []
        count = 1
        for mount_obj in self.subdir_mounts:
            g.log.info("Starting IO on %s:%s", mount_obj.client_system,
                       mount_obj.mountpoint)
            cmd = ("/usr/bin/env python %s create_deep_dirs_with_files "
                   "--dirname-start-num %d "
                   "--dir-depth 2 "
                   "--dir-length 10 "
                   "--max-num-of-dirs 5 "
                   "--num-of-files 5 %s" %
                   (self.script_upload_path, count, mount_obj.mountpoint))
            proc = g.run_async(mount_obj.client_system,
                               cmd,
                               user=mount_obj.user)
            all_mounts_procs.append(proc)
            count = count + 10

        # Validate IO
        g.log.info("Validating IO's")
        ret = validate_io_procs(all_mounts_procs, self.subdir_mounts)
        self.assertTrue(ret, "IO failed on some of the clients")
        g.log.info("Successfully validated all io's")

        # Get stat of all the files/dirs created.
        g.log.info("Get stat of all the files/dirs created.")
        ret = get_mounts_stat(self.subdir_mounts)
        self.assertTrue(ret, "Stat failed on some of the clients")
        g.log.info("Successfully got stat of all files/dirs created")

        # Stop volume
        g.log.info("Stopping volume: %s", self.volname)
        ret, _, _ = volume_stop(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to stop volume: %s" % self.volname)

        # Start volume
        g.log.info("Starting volume again: %s", self.volname)
        ret, _, _ = volume_start(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to start volume: %s" % self.volname)

        # Reset volume
        g.log.info("Resetting volume: %s", self.volname)
        ret, _, _ = volume_reset(self.mnode, self.volname)
        self.assertEqual(ret, 0, "Failed to reset volume: %s" % self.volname)
    def test_volume_create(self):

        # create and start a volume
        self.volume['name'] = "first_volume"
        self.volname = "first_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(ret, "Failed to create and start volume")

        # bring a brick down and volume start force should bring it to online

        g.log.info("Get all the bricks of the volume")
        bricks_list = get_all_bricks(self.mnode, self.volname)
        self.assertIsNotNone(bricks_list, "Failed to get the brick list")
        g.log.info("Successfully got the list of bricks of volume")

        ret = bring_bricks_offline(self.volname, bricks_list[0:2])
        self.assertTrue(ret, "Failed to bring down the bricks")
        g.log.info("Successfully brought the bricks down")

        ret, _, _ = volume_start(self.mnode, self.volname, force=True)
        self.assertEqual(ret, 0, "Failed to start the volume")
        g.log.info("Volume start with force is success")

        ret = wait_for_bricks_to_be_online(self.mnode, self.volname)
        self.assertTrue(ret, "Failed to bring the bricks online")
        g.log.info("Volume start with force successfully brought all the "
                   "bricks online")

        # create volume with previously used bricks and different volume name
        self.volname = "second_volume"
        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertNotEqual(
            ret, 0, "Expected: It should fail to create a "
            "volume with previously used bricks. Actual:"
            "Successfully created the volume with previously"
            " used bricks")
        g.log.info("Failed to create the volume with previously used bricks")

        # create a volume with already existing volume name
        self.volume['name'] = "first_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertTrue(
            ret, "Expected: It should fail to create a volume"
            " with already existing volume name. Actual: "
            "Successfully created the volume with "
            "already existing volname")
        g.log.info("Failed to create the volume with already existing volname")

        # creating a volume with non existing brick path should fail

        self.volname = "second_volume"
        bricks_list = form_bricks_list(self.mnode, self.volname,
                                       len(self.servers), self.servers,
                                       self.all_servers_info)
        nonexisting_brick_index = random.randint(0, len(bricks_list) - 1)
        non_existing_brick = bricks_list[nonexisting_brick_index].split(":")[0]
        non_existing_path = ":/brick/non_existing_path"
        non_existing_brick = non_existing_brick + non_existing_path
        bricks_list[nonexisting_brick_index] = non_existing_brick

        ret, _, _ = volume_create(self.mnode, self.volname, bricks_list)
        self.assertNotEqual(
            ret, 0, "Expected: Creating a volume with non "
            "existing brick path should fail. Actual: "
            "Successfully created the volume with "
            "non existing brick path")
        g.log.info("Failed to create the volume with non existing brick path")

        # cleanup the volume and peer detach all servers. form two clusters,try
        # to create a volume with bricks whose nodes are in different clusters

        # cleanup volumes
        vol_list = get_volume_list(self.mnode)
        self.assertIsNotNone(vol_list, "Failed to get the volume list")

        for volume in vol_list:
            ret = cleanup_volume(self.mnode, volume)
            self.assertTrue(ret, "Unable to delete volume % s" % volume)

        # peer detach all servers
        ret = peer_detach_servers(self.mnode, self.servers)
        self.assertTrue(ret, "Peer detach to all servers is failed")
        g.log.info("Peer detach to all the servers is success")

        # form cluster 1
        ret, _, _ = peer_probe(self.servers[0], self.servers[1])
        self.assertEqual(
            ret, 0, "Peer probe from %s to %s is failed" %
            (self.servers[0], self.servers[1]))
        g.log.info("Peer probe is success from %s to %s" %
                   (self.servers[0], self.servers[1]))

        # form cluster 2
        ret, _, _ = peer_probe(self.servers[2], self.servers[3])
        self.assertEqual(
            ret, 0, "Peer probe from %s to %s is failed" %
            (self.servers[2], self.servers[3]))
        g.log.info("Peer probe is success from %s to %s" %
                   (self.servers[2], self.servers[3]))

        # Creating a volume with bricks which are part of another
        # cluster should fail
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertFalse(
            ret, "Expected: Creating a volume with bricks"
            " which are part of another cluster should fail."
            " Actual: Successfully created the volume with "
            "bricks which are part of another cluster")
        g.log.info("Failed to create the volume with bricks which are "
                   "part of another cluster")

        # form a cluster, bring a node down. try to create a volume when one of
        # the brick node is down
        ret, _, _ = peer_detach(self.servers[2], self.servers[3])
        self.assertEqual(ret, 0, "Peer detach is failed")
        g.log.info("Peer detach is success")

        ret = peer_probe_servers(self.mnode, self.servers)
        self.assertTrue(ret, "Peer probe is failed")
        g.log.info("Peer probe to all the servers is success")

        random_server = self.servers[random.randint(1, len(self.servers) - 1)]
        ret = stop_glusterd(random_server)
        self.assertTrue(ret, "Glusterd is stopped successfully")

        self.volume['name'] = "third_volume"
        ret = setup_volume(self.mnode, self.all_servers_info, self.volume)
        self.assertFalse(
            ret, "Expected: It should fail to create a volume "
            "when one of the node is down. Actual: Successfully "
            "created the volume with bbrick whose node is down")

        g.log.info("Failed to create the volume with brick whose node is down")